lumix 0.0.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ # Ruby Thread Pool
2
+ # ================
3
+ # A thread pool is useful when you wish to do some work in a thread, but do
4
+ # not know how much work you will be doing in advance. Spawning one thread
5
+ # for each task is potentially expensive, as threads are not free.
6
+ #
7
+ # In this case, it might be more beneficial to start a predefined set of
8
+ # threads and then hand off work to them as it becomes available. This is
9
+ # the pure essence of what a thread pool is: an array of threads, all just
10
+ # waiting to do some work for you!
11
+ #
12
+ # Prerequisites
13
+ # -------------
14
+
15
+ # We need the [Queue](http://rdoc.info/stdlib/thread/1.9.2/Queue), as our
16
+ # thread pool is largely dependent on it. Thanks to this, the implementation
17
+ # becomes very simple!
18
+ require 'thread'
19
+
20
+ # Public Interface
21
+ # ----------------
22
+
23
+ # `Pool` is our thread pool class. It will allow us to do three operations:
24
+ #
25
+ # - `.new(size)` creates a thread pool of a given size
26
+ # - `#schedule(*args, &job)` schedules a new job to be executed
27
+ # - `#shutdown` shuts down all threads (after letting them finish working, of course)
28
+ class Pool
29
+
30
+ # ### initialization, or `Pool.new(size)`
31
+ # Creating a new `Pool` involves a certain amount of work. First, however,
32
+ # we need to define its’ `size`. It defines how many threads we will have
33
+ # working internally.
34
+ #
35
+ # Which size is best for you is hard to answer. You do not want it to be
36
+ # too low, as then you won’t be able to do as many things concurrently.
37
+ # However, if you make it too high Ruby will spend too much time switching
38
+ # between threads, and that will also degrade performance!
39
+ def initialize(size)
40
+ # Before we do anything else, we need to store some information about
41
+ # our pool. `@size` is useful later, when we want to shut our pool down,
42
+ # and `@jobs` is the heart of our pool that allows us to schedule work.
43
+ @size = size
44
+ @jobs = Queue.new
45
+
46
+ # #### Creating our pool of threads
47
+ # Once preparation is done, it’s time to create our pool of threads.
48
+ # Each thread store its’ index in a thread-local variable, in case we
49
+ # need to know which thread a job is executing in later on.
50
+ @pool = Array.new(@size) do |i|
51
+ Thread.new do
52
+ Thread.current[:id] = i
53
+
54
+ # We start off by defining a `catch` around our worker loop. This
55
+ # way we’ve provided a method for graceful shutdown of our threads.
56
+ # Shutting down is merely a `#schedule { throw :exit }` away!
57
+ catch(:exit) do
58
+ # The worker thread life-cycle is very simple. We continuously wait
59
+ # for tasks to be put into our job `Queue`. If the `Queue` is empty,
60
+ # we will wait until it’s not.
61
+ loop do
62
+ # Once we have a piece of work to be done, we will pull out the
63
+ # information we need and get to work.
64
+ job, args = @jobs.pop
65
+ job.call(*args)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ # ### Work scheduling
73
+
74
+ # To schedule a piece of work to be done is to say to the `Pool` that you
75
+ # want something done.
76
+ def schedule(*args, &block)
77
+ # Your given task will not be run immediately; rather, it will be put
78
+ # into the work `Queue` and executed once a thread is ready to work.
79
+ @jobs << [block, args]
80
+ end
81
+
82
+ # ### Graceful shutdown
83
+
84
+ # If you ever wish to close down your application, I took the liberty of
85
+ # making it easy for you to wait for any currently executing jobs to finish
86
+ # before you exit.
87
+ def shutdown
88
+ # A graceful shutdown involves threads exiting cleanly themselves, and
89
+ # since we’ve defined a `catch`-handler around the threads’ worker loop
90
+ # it is simply a matter of throwing `:exit`. Thus, if we throw one `:exit`
91
+ # for each thread in our pool, they will all exit eventually!
92
+ @size.times do
93
+ schedule { throw :exit }
94
+ end
95
+
96
+ # And now one final thing: wait for our `throw :exit` jobs to be run on
97
+ # all our worker threads. This call will not return until all worker threads
98
+ # have exited.
99
+ @pool.map(&:join)
100
+ end
101
+ end
102
+
103
+ # Demonstration
104
+ # -------------
105
+ # Running this file will display how the thread pool works.
106
+ if $0 == __FILE__
107
+ # - First, we create a new thread pool with a size of 10. This number is
108
+ # lower than our planned amount of work, to show that threads do not
109
+ # exit once they have finished a task.
110
+ p = Pool.new(10)
111
+
112
+ # - Next we simulate some workload by scheduling a large amount of work
113
+ # to be done. The actual time taken for each job is randomized. This
114
+ # is to demonstrate that even if two tasks are scheduled approximately
115
+ # at the same time, the one that takes less time to execute is likely
116
+ # to finish before the other one.
117
+ 20.times do |i|
118
+ p.schedule do
119
+ sleep rand(4) + 2
120
+ puts "Job #{i} finished by thread #{Thread.current[:id]}"
121
+ end
122
+ end
123
+
124
+ # - Finally, register an `at_exit`-hook that will wait for our thread pool
125
+ # to properly shut down before allowing our script to completely exit.
126
+ at_exit { p.shutdown }
127
+ end
@@ -0,0 +1,55 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+ # To change this template, choose Tools | Templates
3
+ # and open the template in the editor.
4
+
5
+ require 'lumix/filter'
6
+
7
+ puts RUBY_PLATFORM
8
+
9
+ Add = '|12|3'
10
+ TXT = ("They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
11
+ "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD").split(' ').map{|e| e + Add }.join(' ') + ' '
12
+
13
+ def search(filter)
14
+ f = Lumix::Filter.new('\|\d+\|\d+', filter)
15
+ f.scan(TXT).map do |e|
16
+ # strip out the additional components
17
+ e.split(' ').map{ |c| c.strip[0..-Add.size-1] }.join(' ')
18
+ end
19
+ end
20
+
21
+ describe Lumix::Filter do
22
+
23
+ it "should find tags" do
24
+ search('NN').should == %w[business|NN Apr|NN]
25
+ end
26
+
27
+ it "should find words" do
28
+ search('"have"').should == %w[have|AUXP have|DMKD]
29
+ end
30
+
31
+ it "should find word and tag combinations" do
32
+ search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
33
+ end
34
+
35
+ it "should find wildcard tags" do
36
+ search('AU*').should == %w[have|AUXP]
37
+ end
38
+
39
+ it "should find exclusions" do
40
+ search('A(!UXP DNE)').should == %w[too|ADVE]
41
+ end
42
+
43
+ it "should find word|tag pairs" do
44
+ search('"have"|D*').should == %w[have|DMKD]
45
+ end
46
+
47
+ it "should find unlimited repetitions" do
48
+ search('(AD*)+').should == ['derp|ADNE too|ADVE']
49
+ end
50
+
51
+ it "should find limited repetitions" do
52
+ search('(AD*){3}').should == []
53
+ search('(AD*){2}').should == ['derp|ADNE too|ADVE']
54
+ end
55
+ end
@@ -0,0 +1,70 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+ # To change this template, choose Tools | Templates
3
+ # and open the template in the editor.
4
+
5
+ require 'lumix/lookup_search'
6
+ require 'lumix/model/mock_models'
7
+
8
+ puts RUBY_PLATFORM
9
+
10
+ TEXT = "They have business uses derp too Apr 4th 2007 have ."
11
+ TAGGED = "They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
12
+ "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD"
13
+ module Helper
14
+ def lookup
15
+ return @lookup if @lookup
16
+ @lookup = Lumix::LookupSearch.new(nil, nil)
17
+ @text = TaggedText.create(:id => 0, :filename => "text", :text => TEXT, :tagged => TAGGED)
18
+ @lookup.link_text(0)
19
+ @lookup
20
+ end
21
+
22
+ def search(filter)
23
+ f = lookup.create_filter(filter)
24
+ results = []
25
+ lookup.find(f) do |text, tagged|
26
+ results << tagged.to_s
27
+ end
28
+ results
29
+ end
30
+ end
31
+ RSpec.configure do |config|
32
+ config.include Helper
33
+ end
34
+ describe Lumix::LookupFilter do
35
+
36
+ it "should find tags" do
37
+ search('NN').should == %w[business|NN Apr|NN]
38
+ end
39
+
40
+ it "should find words" do
41
+ search('"have"').should == %w[have|AUXP have|DMKD]
42
+ end
43
+
44
+ it "should find word and tag combinations" do
45
+ search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
46
+ end
47
+
48
+ it "should find wildcard tags" do
49
+ search('AU*').should == %w[have|AUXP]
50
+ end
51
+
52
+ def disabled
53
+ it "should find exclusions" do
54
+ search('A(!UXP DNE)').should == %w[too|ADVE]
55
+ end
56
+
57
+ it "should find word|tag pairs" do
58
+ search('"have"|D*').should == %w[have|DMKD]
59
+ end
60
+
61
+ it "should find unlimited repetitions" do
62
+ search('(AD*)+').should == ['derp|ADNE too|ADVE']
63
+ end
64
+
65
+ it "should find limited repetitions" do
66
+ search('(AD*){3}').should == []
67
+ search('(AD*){2}').should == ['derp|ADNE too|ADVE']
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'lumix/text_snippet'
4
+
5
+ describe Lumix::TextSnippet do
6
+ before(:each) do
7
+ end
8
+
9
+ it "should handle umlauts properly" do
10
+ ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /öfünfä/)
11
+ ts.left(3).should == 'zwei drei vierß '
12
+ ts.to_s.should == 'öfünfä'
13
+ ts.right(3).should == ' ßechs sieben acht'
14
+ end
15
+
16
+ it "should handle partial words and umlauts properly" do
17
+ ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /fünf/)
18
+ ts.left(3).should == 'zwei drei vierß ö'
19
+ ts.to_s.should == 'fünf'
20
+ ts.right(3).should == 'ä ßechs sieben acht'
21
+ end
22
+
23
+ it "should have dynamic left context" do
24
+ ts = create_ts('one two three four five six seven eight nine ten', /five/)
25
+ ts.left(1).should == 'four '
26
+ ts.left(2).should == 'three four '
27
+ ts.left(10).should == 'one two three four '
28
+ end
29
+
30
+ it "should have dynamic right context" do
31
+ ts = create_ts('one two three four five six seven eight nine ten', /five/)
32
+ ts.right(1).should == ' six'
33
+ ts.right(2).should == ' six seven'
34
+ ts.right(10).should == ' six seven eight nine ten'
35
+ end
36
+
37
+ it "should work correctly with newlines" do
38
+ ts = create_ts("one two\n three four five six seven eight\n nine ten", /five/)
39
+ ts.left(1).should == 'four '
40
+ ts.right(1).should == ' six'
41
+ end
42
+
43
+ it "should replace newlines and tabs with spaces" do
44
+ ts = create_ts("one two three\n four five six\n\t seven eight nine ten", /five/)
45
+ ts.left(2).should == 'three four '
46
+ ts.right(2).should == ' six seven'
47
+ end
48
+
49
+ end
50
+
51
+ def create_ts(text, re)
52
+ @count ||= 0
53
+ m = text.match(re)
54
+ Lumix::TextSnippet.new "text#{@count += 1}", text, m.begin(0), m.end(0)
55
+ end
metadata ADDED
@@ -0,0 +1,175 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lumix
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.2
6
+ platform: java
7
+ authors:
8
+ - Michael Klaus
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-01-09 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: ffi-icu
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: msgpack-jruby
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: htmlentities
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: sequel
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: savon
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :runtime
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: curb
72
+ prerelease: false
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id006
81
+ - !ruby/object:Gem::Dependency
82
+ name: jdbc-postgres
83
+ prerelease: false
84
+ requirement: &id007 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ type: :runtime
91
+ version_requirements: *id007
92
+ - !ruby/object:Gem::Dependency
93
+ name: sweet
94
+ prerelease: false
95
+ requirement: &id008 !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "0"
101
+ type: :runtime
102
+ version_requirements: *id008
103
+ description: Lumix helps you create and tag a corpus from raw texts, as well as search in it with a simple query language.
104
+ email: Michael.Klaus@gmx.net
105
+ executables:
106
+ - lumix
107
+ - lumix-gui
108
+ extensions: []
109
+
110
+ extra_rdoc_files: []
111
+
112
+ files:
113
+ - COPYING
114
+ - bin/lumix-gui
115
+ - bin/lumix
116
+ - spec/filter_spec.rb
117
+ - spec/lookup_spec.rb
118
+ - spec/text_snippet_spec.rb
119
+ - lib/lumix/filter.rb
120
+ - lib/lumix/slow_search.rb
121
+ - lib/lumix/textprocessing.rb
122
+ - lib/lumix/fast_search.rb
123
+ - lib/lumix/lookup_filter.rb
124
+ - lib/lumix/thread_pool.rb
125
+ - lib/lumix/corrections.rb
126
+ - lib/lumix/lookup.rb
127
+ - lib/lumix/main.rb
128
+ - lib/lumix/concordancer.rb
129
+ - lib/lumix/text_snippet.rb
130
+ - lib/lumix/gui.rb
131
+ - lib/lumix/result_view.rb
132
+ - lib/lumix/cli.rb
133
+ - lib/lumix/lookup_search.rb
134
+ - lib/lumix/base.rb
135
+ - lib/lumix/charset.rb
136
+ - lib/lumix/schema/002_categories.rb
137
+ - lib/lumix/schema/003_add_fulltagged.rb
138
+ - lib/lumix/schema/001_create_tables.rb
139
+ - lib/lumix/schema/004_create_lookup_tables.rb
140
+ - lib/lumix/proto/lookup_filter.rb
141
+ - lib/lumix/proto/lookup.rb
142
+ - lib/lumix/proto/lookup_search.rb
143
+ - lib/lumix/model/base_models.rb
144
+ - lib/lumix/model/maglev_models.rb
145
+ - lib/lumix/model/sequel_models.rb
146
+ - lib/lumix/model/mock_models.rb
147
+ homepage: http://github.org/QaDeS/lumix
148
+ licenses: []
149
+
150
+ post_install_message:
151
+ rdoc_options: []
152
+
153
+ require_paths:
154
+ - lib
155
+ required_ruby_version: !ruby/object:Gem::Requirement
156
+ none: false
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: "0"
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ none: false
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: "0"
167
+ requirements: []
168
+
169
+ rubyforge_project:
170
+ rubygems_version: 1.8.9
171
+ signing_key:
172
+ specification_version: 3
173
+ summary: A concordancer for corpus-based linuistic research.
174
+ test_files: []
175
+