RubyGems - lumix - Versions diffs - 0.0.2-java - Mend

lumix 0.0.2-java

Files changed (35) hide show

data/COPYING +18 -0
data/bin/lumix +4 -0
data/bin/lumix-gui +4 -0
data/lib/lumix/base.rb +56 -0
data/lib/lumix/charset.rb +35 -0
data/lib/lumix/cli.rb +96 -0
data/lib/lumix/concordancer.rb +254 -0
data/lib/lumix/corrections.rb +84 -0
data/lib/lumix/fast_search.rb +91 -0
data/lib/lumix/filter.rb +89 -0
data/lib/lumix/gui.rb +148 -0
data/lib/lumix/lookup.rb +105 -0
data/lib/lumix/lookup_filter.rb +43 -0
data/lib/lumix/lookup_search.rb +95 -0
data/lib/lumix/main.rb +7 -0
data/lib/lumix/model/base_models.rb +35 -0
data/lib/lumix/model/maglev_models.rb +42 -0
data/lib/lumix/model/mock_models.rb +46 -0
data/lib/lumix/model/sequel_models.rb +53 -0
data/lib/lumix/proto/lookup.rb +105 -0
data/lib/lumix/proto/lookup_filter.rb +40 -0
data/lib/lumix/proto/lookup_search.rb +81 -0
data/lib/lumix/result_view.rb +93 -0
data/lib/lumix/schema/001_create_tables.rb +35 -0
data/lib/lumix/schema/002_categories.rb +28 -0
data/lib/lumix/schema/003_add_fulltagged.rb +15 -0
data/lib/lumix/schema/004_create_lookup_tables.rb +44 -0
data/lib/lumix/slow_search.rb +104 -0
data/lib/lumix/text_snippet.rb +29 -0
data/lib/lumix/textprocessing.rb +108 -0
data/lib/lumix/thread_pool.rb +127 -0
data/spec/filter_spec.rb +55 -0
data/spec/lookup_spec.rb +70 -0
data/spec/text_snippet_spec.rb +55 -0
metadata +175 -0

data/lib/lumix/thread_pool.rb ADDED Viewed

@@ -0,0 +1,127 @@
+# Ruby Thread Pool
+# ================
+# A thread pool is useful when you wish to do some work in a thread, but do
+# not know how much work you will be doing in advance. Spawning one thread
+# for each task is potentially expensive, as threads are not free.
+#
+# In this case, it might be more beneficial to start a predefined set of
+# threads and then hand off work to them as it becomes available. This is
+# the pure essence of what a thread pool is: an array of threads, all just
+# waiting to do some work for you!
+#
+# Prerequisites
+# -------------
+# We need the [Queue](http://rdoc.info/stdlib/thread/1.9.2/Queue), as our
+# thread pool is largely dependent on it. Thanks to this, the implementation
+# becomes very simple!
+require 'thread'
+# Public Interface
+# ----------------
+# `Pool` is our thread pool class. It will allow us to do three operations:
+#
+# - `.new(size)` creates a thread pool of a given size
+# - `#schedule(*args, &job)` schedules a new job to be executed
+# - `#shutdown` shuts down all threads (after letting them finish working, of course)
+class Pool
+  # ### initialization, or `Pool.new(size)`
+  # Creating a new `Pool` involves a certain amount of work. First, however,
+  # we need to define its’ `size`. It defines how many threads we will have
+  # working internally.
+  #
+  # Which size is best for you is hard to answer. You do not want it to be
+  # too low, as then you won’t be able to do as many things concurrently.
+  # However, if you make it too high Ruby will spend too much time switching
+  # between threads, and that will also degrade performance!
+  def initialize(size)
+    # Before we do anything else, we need to store some information about
+    # our pool. `@size` is useful later, when we want to shut our pool down,
+    # and `@jobs` is the heart of our pool that allows us to schedule work.
+    @size = size
+    @jobs = Queue.new
+    # #### Creating our pool of threads
+    # Once preparation is done, it’s time to create our pool of threads.
+    # Each thread store its’ index in a thread-local variable, in case we
+    # need to know which thread a job is executing in later on.
+    @pool = Array.new(@size) do |i|
+      Thread.new do
+        Thread.current[:id] = i
+        # We start off by defining a `catch` around our worker loop. This
+        # way we’ve provided a method for graceful shutdown of our threads.
+        # Shutting down is merely a `#schedule { throw :exit }` away!
+        catch(:exit) do
+          # The worker thread life-cycle is very simple. We continuously wait
+          # for tasks to be put into our job `Queue`. If the `Queue` is empty,
+          # we will wait until it’s not.
+          loop do
+            # Once we have a piece of work to be done, we will pull out the
+            # information we need and get to work.
+            job, args = @jobs.pop
+            job.call(*args)
+          end
+        end
+      end
+    end
+  end
+  # ### Work scheduling
+  # To schedule a piece of work to be done is to say to the `Pool` that you
+  # want something done.
+  def schedule(*args, &block)
+    # Your given task will not be run immediately; rather, it will be put
+    # into the work `Queue` and executed once a thread is ready to work.
+    @jobs << [block, args]
+  end
+  # ### Graceful shutdown
+  # If you ever wish to close down your application, I took the liberty of
+  # making it easy for you to wait for any currently executing jobs to finish
+  # before you exit.
+  def shutdown
+    # A graceful shutdown involves threads exiting cleanly themselves, and
+    # since we’ve defined a `catch`-handler around the threads’ worker loop
+    # it is simply a matter of throwing `:exit`. Thus, if we throw one `:exit`
+    # for each thread in our pool, they will all exit eventually!
+    @size.times do
+      schedule { throw :exit }
+    end
+    # And now one final thing: wait for our `throw :exit` jobs to be run on
+    # all our worker threads. This call will not return until all worker threads
+    # have exited.
+    @pool.map(&:join)
+  end
+end
+# Demonstration
+# -------------
+# Running this file will display how the thread pool works.
+if $0 == __FILE__
+  # - First, we create a new thread pool with a size of 10. This number is
+  #   lower than our planned amount of work, to show that threads do not
+  #   exit once they have finished a task.
+  p = Pool.new(10)
+  # - Next we simulate some workload by scheduling a large amount of work
+  #   to be done. The actual time taken for each job is randomized. This
+  #   is to demonstrate that even if two tasks are scheduled approximately
+  #   at the same time, the one that takes less time to execute is likely
+  #   to finish before the other one.
+  20.times do |i|
+    p.schedule do
+      sleep rand(4) + 2
+      puts "Job #{i} finished by thread #{Thread.current[:id]}"
+    end
+  end
+  # - Finally, register an `at_exit`-hook that will wait for our thread pool
+  #   to properly shut down before allowing our script to completely exit.
+  at_exit { p.shutdown }
+end

data/spec/filter_spec.rb ADDED Viewed

@@ -0,0 +1,55 @@
+$: << File.expand_path('../../lib', __FILE__)
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'lumix/filter'
+puts RUBY_PLATFORM
+Add = '|12|3'
+TXT = ("They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
+  "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD").split(' ').map{|e| e + Add }.join(' ') + ' '
+def search(filter)
+  f = Lumix::Filter.new('\|\d+\|\d+', filter)
+  f.scan(TXT).map do |e|
+    # strip out the additional components
+    e.split(' ').map{ |c| c.strip[0..-Add.size-1] }.join(' ')
+  end
+end
+describe Lumix::Filter do
+  it "should find tags" do
+    search('NN').should == %w[business|NN Apr|NN]
+  end
+  it "should find words" do
+    search('"have"').should == %w[have|AUXP have|DMKD]
+  end
+  it "should find word and tag combinations" do
+    search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
+  end
+  it "should find wildcard tags" do
+    search('AU*').should == %w[have|AUXP]
+  end
+  it "should find exclusions" do
+    search('A(!UXP DNE)').should == %w[too|ADVE]
+  end
+  it "should find word|tag pairs" do
+    search('"have"|D*').should == %w[have|DMKD]
+  end
+  it "should find unlimited repetitions" do
+    search('(AD*)+').should == ['derp|ADNE too|ADVE']
+  end
+  it "should find limited repetitions" do
+    search('(AD*){3}').should == []
+    search('(AD*){2}').should == ['derp|ADNE too|ADVE']
+  end
+end

data/spec/lookup_spec.rb ADDED Viewed

@@ -0,0 +1,70 @@
+$: << File.expand_path('../../lib', __FILE__)
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'lumix/lookup_search'
+require 'lumix/model/mock_models'
+puts RUBY_PLATFORM
+TEXT = "They have business uses derp too Apr 4th 2007 have ."
+TAGGED = "They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
+  "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD"
+module Helper
+  def lookup
+    return @lookup if @lookup
+    @lookup = Lumix::LookupSearch.new(nil, nil)
+    @text = TaggedText.create(:id => 0, :filename => "text", :text => TEXT, :tagged => TAGGED)
+    @lookup.link_text(0)
+    @lookup
+  end
+  def search(filter)
+    f = lookup.create_filter(filter)
+    results = []
+    lookup.find(f) do |text, tagged|
+      results << tagged.to_s
+    end
+    results
+  end
+end
+RSpec.configure do |config|
+  config.include Helper
+end
+describe Lumix::LookupFilter do
+  it "should find tags" do
+    search('NN').should == %w[business|NN Apr|NN]
+  end
+  it "should find words" do
+    search('"have"').should == %w[have|AUXP have|DMKD]
+  end
+  it "should find word and tag combinations" do
+    search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
+  end
+  it "should find wildcard tags" do
+    search('AU*').should == %w[have|AUXP]
+  end
+  def disabled
+    it "should find exclusions" do
+      search('A(!UXP DNE)').should == %w[too|ADVE]
+    end
+    it "should find word|tag pairs" do
+      search('"have"|D*').should == %w[have|DMKD]
+    end
+    it "should find unlimited repetitions" do
+      search('(AD*)+').should == ['derp|ADNE too|ADVE']
+    end
+    it "should find limited repetitions" do
+      search('(AD*){3}').should == []
+      search('(AD*){2}').should == ['derp|ADNE too|ADVE']
+    end
+  end
+end

data/spec/text_snippet_spec.rb ADDED Viewed

@@ -0,0 +1,55 @@
+$: << File.expand_path('../../lib', __FILE__)
+require 'lumix/text_snippet'
+describe Lumix::TextSnippet do
+  before(:each) do
+  end
+  it "should handle umlauts properly" do
+    ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /öfünfä/)
+    ts.left(3).should == 'zwei drei vierß '
+    ts.to_s.should == 'öfünfä'
+    ts.right(3).should == ' ßechs sieben acht'
+  end
+  it "should handle partial words and umlauts properly" do
+    ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /fünf/)
+    ts.left(3).should == 'zwei drei vierß ö'
+    ts.to_s.should == 'fünf'
+    ts.right(3).should == 'ä ßechs sieben acht'
+  end
+  it "should have dynamic left context" do
+    ts = create_ts('one two three four five six seven eight nine ten', /five/)
+    ts.left(1).should == 'four '
+    ts.left(2).should == 'three four '
+    ts.left(10).should == 'one two three four '
+  end
+  it "should have dynamic right context" do
+    ts = create_ts('one two three four five six seven eight nine ten', /five/)
+    ts.right(1).should == ' six'
+    ts.right(2).should == ' six seven'
+    ts.right(10).should == ' six seven eight nine ten'
+  end
+  it "should work correctly with newlines" do
+    ts = create_ts("one two\n three four five six seven eight\n nine ten", /five/)
+    ts.left(1).should == 'four '
+    ts.right(1).should == ' six'
+  end
+  it "should replace newlines and tabs with spaces" do
+    ts = create_ts("one two three\n four five six\n\t seven eight nine ten", /five/)
+    ts.left(2).should == 'three four '
+    ts.right(2).should == ' six seven'
+  end
+end
+def create_ts(text, re)
+  @count ||= 0
+  m = text.match(re)
+  Lumix::TextSnippet.new "text#{@count += 1}", text, m.begin(0), m.end(0)
+end

metadata ADDED Viewed

@@ -0,0 +1,175 @@
+--- !ruby/object:Gem::Specification
+name: lumix
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.0.2
+platform: java
+authors:
+  - Michael Klaus
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-01-09 00:00:00 Z
+dependencies:
+  - !ruby/object:Gem::Dependency
+    name: ffi-icu
+    prerelease: false
+    requirement: &id001 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id001
+  - !ruby/object:Gem::Dependency
+    name: msgpack-jruby
+    prerelease: false
+    requirement: &id002 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id002
+  - !ruby/object:Gem::Dependency
+    name: htmlentities
+    prerelease: false
+    requirement: &id003 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id003
+  - !ruby/object:Gem::Dependency
+    name: sequel
+    prerelease: false
+    requirement: &id004 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id004
+  - !ruby/object:Gem::Dependency
+    name: savon
+    prerelease: false
+    requirement: &id005 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id005
+  - !ruby/object:Gem::Dependency
+    name: curb
+    prerelease: false
+    requirement: &id006 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id006
+  - !ruby/object:Gem::Dependency
+    name: jdbc-postgres
+    prerelease: false
+    requirement: &id007 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id007
+  - !ruby/object:Gem::Dependency
+    name: sweet
+    prerelease: false
+    requirement: &id008 !ruby/object:Gem::Requirement
+      none: false
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            version: "0"
+    type: :runtime
+    version_requirements: *id008
+description: Lumix helps you create and tag a corpus from raw texts, as well as search in it with a simple query language.
+email: Michael.Klaus@gmx.net
+executables:
+  - lumix
+  - lumix-gui
+extensions: []
+extra_rdoc_files: []
+files:
+  - COPYING
+  - bin/lumix-gui
+  - bin/lumix
+  - spec/filter_spec.rb
+  - spec/lookup_spec.rb
+  - spec/text_snippet_spec.rb
+  - lib/lumix/filter.rb
+  - lib/lumix/slow_search.rb
+  - lib/lumix/textprocessing.rb
+  - lib/lumix/fast_search.rb
+  - lib/lumix/lookup_filter.rb
+  - lib/lumix/thread_pool.rb
+  - lib/lumix/corrections.rb
+  - lib/lumix/lookup.rb
+  - lib/lumix/main.rb
+  - lib/lumix/concordancer.rb
+  - lib/lumix/text_snippet.rb
+  - lib/lumix/gui.rb
+  - lib/lumix/result_view.rb
+  - lib/lumix/cli.rb
+  - lib/lumix/lookup_search.rb
+  - lib/lumix/base.rb
+  - lib/lumix/charset.rb
+  - lib/lumix/schema/002_categories.rb
+  - lib/lumix/schema/003_add_fulltagged.rb
+  - lib/lumix/schema/001_create_tables.rb
+  - lib/lumix/schema/004_create_lookup_tables.rb
+  - lib/lumix/proto/lookup_filter.rb
+  - lib/lumix/proto/lookup.rb
+  - lib/lumix/proto/lookup_search.rb
+  - lib/lumix/model/base_models.rb
+  - lib/lumix/model/maglev_models.rb
+  - lib/lumix/model/sequel_models.rb
+  - lib/lumix/model/mock_models.rb
+homepage: http://github.org/QaDeS/lumix
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+  - lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.9
+signing_key:
+specification_version: 3
+summary: A concordancer for corpus-based linuistic research.
+test_files: []