lumix 0.0.2-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,127 @@
1
+ # Ruby Thread Pool
2
+ # ================
3
+ # A thread pool is useful when you wish to do some work in a thread, but do
4
+ # not know how much work you will be doing in advance. Spawning one thread
5
+ # for each task is potentially expensive, as threads are not free.
6
+ #
7
+ # In this case, it might be more beneficial to start a predefined set of
8
+ # threads and then hand off work to them as it becomes available. This is
9
+ # the pure essence of what a thread pool is: an array of threads, all just
10
+ # waiting to do some work for you!
11
+ #
12
+ # Prerequisites
13
+ # -------------
14
+
15
+ # We need the [Queue](http://rdoc.info/stdlib/thread/1.9.2/Queue), as our
16
+ # thread pool is largely dependent on it. Thanks to this, the implementation
17
+ # becomes very simple!
18
+ require 'thread'
19
+
20
+ # Public Interface
21
+ # ----------------
22
+
23
+ # `Pool` is our thread pool class. It will allow us to do three operations:
24
+ #
25
+ # - `.new(size)` creates a thread pool of a given size
26
+ # - `#schedule(*args, &job)` schedules a new job to be executed
27
+ # - `#shutdown` shuts down all threads (after letting them finish working, of course)
28
+ class Pool
29
+
30
+ # ### initialization, or `Pool.new(size)`
31
+ # Creating a new `Pool` involves a certain amount of work. First, however,
32
+ # we need to define its’ `size`. It defines how many threads we will have
33
+ # working internally.
34
+ #
35
+ # Which size is best for you is hard to answer. You do not want it to be
36
+ # too low, as then you won’t be able to do as many things concurrently.
37
+ # However, if you make it too high Ruby will spend too much time switching
38
+ # between threads, and that will also degrade performance!
39
+ def initialize(size)
40
+ # Before we do anything else, we need to store some information about
41
+ # our pool. `@size` is useful later, when we want to shut our pool down,
42
+ # and `@jobs` is the heart of our pool that allows us to schedule work.
43
+ @size = size
44
+ @jobs = Queue.new
45
+
46
+ # #### Creating our pool of threads
47
+ # Once preparation is done, it’s time to create our pool of threads.
48
+ # Each thread store its’ index in a thread-local variable, in case we
49
+ # need to know which thread a job is executing in later on.
50
+ @pool = Array.new(@size) do |i|
51
+ Thread.new do
52
+ Thread.current[:id] = i
53
+
54
+ # We start off by defining a `catch` around our worker loop. This
55
+ # way we’ve provided a method for graceful shutdown of our threads.
56
+ # Shutting down is merely a `#schedule { throw :exit }` away!
57
+ catch(:exit) do
58
+ # The worker thread life-cycle is very simple. We continuously wait
59
+ # for tasks to be put into our job `Queue`. If the `Queue` is empty,
60
+ # we will wait until it’s not.
61
+ loop do
62
+ # Once we have a piece of work to be done, we will pull out the
63
+ # information we need and get to work.
64
+ job, args = @jobs.pop
65
+ job.call(*args)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ # ### Work scheduling
73
+
74
+ # To schedule a piece of work to be done is to say to the `Pool` that you
75
+ # want something done.
76
+ def schedule(*args, &block)
77
+ # Your given task will not be run immediately; rather, it will be put
78
+ # into the work `Queue` and executed once a thread is ready to work.
79
+ @jobs << [block, args]
80
+ end
81
+
82
+ # ### Graceful shutdown
83
+
84
+ # If you ever wish to close down your application, I took the liberty of
85
+ # making it easy for you to wait for any currently executing jobs to finish
86
+ # before you exit.
87
+ def shutdown
88
+ # A graceful shutdown involves threads exiting cleanly themselves, and
89
+ # since we’ve defined a `catch`-handler around the threads’ worker loop
90
+ # it is simply a matter of throwing `:exit`. Thus, if we throw one `:exit`
91
+ # for each thread in our pool, they will all exit eventually!
92
+ @size.times do
93
+ schedule { throw :exit }
94
+ end
95
+
96
+ # And now one final thing: wait for our `throw :exit` jobs to be run on
97
+ # all our worker threads. This call will not return until all worker threads
98
+ # have exited.
99
+ @pool.map(&:join)
100
+ end
101
+ end
102
+
103
+ # Demonstration
104
+ # -------------
105
+ # Running this file will display how the thread pool works.
106
+ if $0 == __FILE__
107
+ # - First, we create a new thread pool with a size of 10. This number is
108
+ # lower than our planned amount of work, to show that threads do not
109
+ # exit once they have finished a task.
110
+ p = Pool.new(10)
111
+
112
+ # - Next we simulate some workload by scheduling a large amount of work
113
+ # to be done. The actual time taken for each job is randomized. This
114
+ # is to demonstrate that even if two tasks are scheduled approximately
115
+ # at the same time, the one that takes less time to execute is likely
116
+ # to finish before the other one.
117
+ 20.times do |i|
118
+ p.schedule do
119
+ sleep rand(4) + 2
120
+ puts "Job #{i} finished by thread #{Thread.current[:id]}"
121
+ end
122
+ end
123
+
124
+ # - Finally, register an `at_exit`-hook that will wait for our thread pool
125
+ # to properly shut down before allowing our script to completely exit.
126
+ at_exit { p.shutdown }
127
+ end
@@ -0,0 +1,55 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+ # To change this template, choose Tools | Templates
3
+ # and open the template in the editor.
4
+
5
+ require 'lumix/filter'
6
+
7
+ puts RUBY_PLATFORM
8
+
9
+ Add = '|12|3'
10
+ TXT = ("They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
11
+ "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD").split(' ').map{|e| e + Add }.join(' ') + ' '
12
+
13
+ def search(filter)
14
+ f = Lumix::Filter.new('\|\d+\|\d+', filter)
15
+ f.scan(TXT).map do |e|
16
+ # strip out the additional components
17
+ e.split(' ').map{ |c| c.strip[0..-Add.size-1] }.join(' ')
18
+ end
19
+ end
20
+
21
+ describe Lumix::Filter do
22
+
23
+ it "should find tags" do
24
+ search('NN').should == %w[business|NN Apr|NN]
25
+ end
26
+
27
+ it "should find words" do
28
+ search('"have"').should == %w[have|AUXP have|DMKD]
29
+ end
30
+
31
+ it "should find word and tag combinations" do
32
+ search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
33
+ end
34
+
35
+ it "should find wildcard tags" do
36
+ search('AU*').should == %w[have|AUXP]
37
+ end
38
+
39
+ it "should find exclusions" do
40
+ search('A(!UXP DNE)').should == %w[too|ADVE]
41
+ end
42
+
43
+ it "should find word|tag pairs" do
44
+ search('"have"|D*').should == %w[have|DMKD]
45
+ end
46
+
47
+ it "should find unlimited repetitions" do
48
+ search('(AD*)+').should == ['derp|ADNE too|ADVE']
49
+ end
50
+
51
+ it "should find limited repetitions" do
52
+ search('(AD*){3}').should == []
53
+ search('(AD*){2}').should == ['derp|ADNE too|ADVE']
54
+ end
55
+ end
@@ -0,0 +1,70 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+ # To change this template, choose Tools | Templates
3
+ # and open the template in the editor.
4
+
5
+ require 'lumix/lookup_search'
6
+ require 'lumix/model/mock_models'
7
+
8
+ puts RUBY_PLATFORM
9
+
10
+ TEXT = "They have business uses derp too Apr 4th 2007 have ."
11
+ TAGGED = "They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
12
+ "Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD"
13
+ module Helper
14
+ def lookup
15
+ return @lookup if @lookup
16
+ @lookup = Lumix::LookupSearch.new(nil, nil)
17
+ @text = TaggedText.create(:id => 0, :filename => "text", :text => TEXT, :tagged => TAGGED)
18
+ @lookup.link_text(0)
19
+ @lookup
20
+ end
21
+
22
+ def search(filter)
23
+ f = lookup.create_filter(filter)
24
+ results = []
25
+ lookup.find(f) do |text, tagged|
26
+ results << tagged.to_s
27
+ end
28
+ results
29
+ end
30
+ end
31
+ RSpec.configure do |config|
32
+ config.include Helper
33
+ end
34
+ describe Lumix::LookupFilter do
35
+
36
+ it "should find tags" do
37
+ search('NN').should == %w[business|NN Apr|NN]
38
+ end
39
+
40
+ it "should find words" do
41
+ search('"have"').should == %w[have|AUXP have|DMKD]
42
+ end
43
+
44
+ it "should find word and tag combinations" do
45
+ search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
46
+ end
47
+
48
+ it "should find wildcard tags" do
49
+ search('AU*').should == %w[have|AUXP]
50
+ end
51
+
52
+ def disabled
53
+ it "should find exclusions" do
54
+ search('A(!UXP DNE)').should == %w[too|ADVE]
55
+ end
56
+
57
+ it "should find word|tag pairs" do
58
+ search('"have"|D*').should == %w[have|DMKD]
59
+ end
60
+
61
+ it "should find unlimited repetitions" do
62
+ search('(AD*)+').should == ['derp|ADNE too|ADVE']
63
+ end
64
+
65
+ it "should find limited repetitions" do
66
+ search('(AD*){3}').should == []
67
+ search('(AD*){2}').should == ['derp|ADNE too|ADVE']
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'lumix/text_snippet'
4
+
5
+ describe Lumix::TextSnippet do
6
+ before(:each) do
7
+ end
8
+
9
+ it "should handle umlauts properly" do
10
+ ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /öfünfä/)
11
+ ts.left(3).should == 'zwei drei vierß '
12
+ ts.to_s.should == 'öfünfä'
13
+ ts.right(3).should == ' ßechs sieben acht'
14
+ end
15
+
16
+ it "should handle partial words and umlauts properly" do
17
+ ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /fünf/)
18
+ ts.left(3).should == 'zwei drei vierß ö'
19
+ ts.to_s.should == 'fünf'
20
+ ts.right(3).should == 'ä ßechs sieben acht'
21
+ end
22
+
23
+ it "should have dynamic left context" do
24
+ ts = create_ts('one two three four five six seven eight nine ten', /five/)
25
+ ts.left(1).should == 'four '
26
+ ts.left(2).should == 'three four '
27
+ ts.left(10).should == 'one two three four '
28
+ end
29
+
30
+ it "should have dynamic right context" do
31
+ ts = create_ts('one two three four five six seven eight nine ten', /five/)
32
+ ts.right(1).should == ' six'
33
+ ts.right(2).should == ' six seven'
34
+ ts.right(10).should == ' six seven eight nine ten'
35
+ end
36
+
37
+ it "should work correctly with newlines" do
38
+ ts = create_ts("one two\n three four five six seven eight\n nine ten", /five/)
39
+ ts.left(1).should == 'four '
40
+ ts.right(1).should == ' six'
41
+ end
42
+
43
+ it "should replace newlines and tabs with spaces" do
44
+ ts = create_ts("one two three\n four five six\n\t seven eight nine ten", /five/)
45
+ ts.left(2).should == 'three four '
46
+ ts.right(2).should == ' six seven'
47
+ end
48
+
49
+ end
50
+
51
+ def create_ts(text, re)
52
+ @count ||= 0
53
+ m = text.match(re)
54
+ Lumix::TextSnippet.new "text#{@count += 1}", text, m.begin(0), m.end(0)
55
+ end
metadata ADDED
@@ -0,0 +1,175 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lumix
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.2
6
+ platform: java
7
+ authors:
8
+ - Michael Klaus
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-01-09 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: ffi-icu
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: msgpack-jruby
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: htmlentities
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: sequel
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: savon
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :runtime
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: curb
72
+ prerelease: false
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id006
81
+ - !ruby/object:Gem::Dependency
82
+ name: jdbc-postgres
83
+ prerelease: false
84
+ requirement: &id007 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ type: :runtime
91
+ version_requirements: *id007
92
+ - !ruby/object:Gem::Dependency
93
+ name: sweet
94
+ prerelease: false
95
+ requirement: &id008 !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "0"
101
+ type: :runtime
102
+ version_requirements: *id008
103
+ description: Lumix helps you create and tag a corpus from raw texts, as well as search in it with a simple query language.
104
+ email: Michael.Klaus@gmx.net
105
+ executables:
106
+ - lumix
107
+ - lumix-gui
108
+ extensions: []
109
+
110
+ extra_rdoc_files: []
111
+
112
+ files:
113
+ - COPYING
114
+ - bin/lumix-gui
115
+ - bin/lumix
116
+ - spec/filter_spec.rb
117
+ - spec/lookup_spec.rb
118
+ - spec/text_snippet_spec.rb
119
+ - lib/lumix/filter.rb
120
+ - lib/lumix/slow_search.rb
121
+ - lib/lumix/textprocessing.rb
122
+ - lib/lumix/fast_search.rb
123
+ - lib/lumix/lookup_filter.rb
124
+ - lib/lumix/thread_pool.rb
125
+ - lib/lumix/corrections.rb
126
+ - lib/lumix/lookup.rb
127
+ - lib/lumix/main.rb
128
+ - lib/lumix/concordancer.rb
129
+ - lib/lumix/text_snippet.rb
130
+ - lib/lumix/gui.rb
131
+ - lib/lumix/result_view.rb
132
+ - lib/lumix/cli.rb
133
+ - lib/lumix/lookup_search.rb
134
+ - lib/lumix/base.rb
135
+ - lib/lumix/charset.rb
136
+ - lib/lumix/schema/002_categories.rb
137
+ - lib/lumix/schema/003_add_fulltagged.rb
138
+ - lib/lumix/schema/001_create_tables.rb
139
+ - lib/lumix/schema/004_create_lookup_tables.rb
140
+ - lib/lumix/proto/lookup_filter.rb
141
+ - lib/lumix/proto/lookup.rb
142
+ - lib/lumix/proto/lookup_search.rb
143
+ - lib/lumix/model/base_models.rb
144
+ - lib/lumix/model/maglev_models.rb
145
+ - lib/lumix/model/sequel_models.rb
146
+ - lib/lumix/model/mock_models.rb
147
+ homepage: http://github.org/QaDeS/lumix
148
+ licenses: []
149
+
150
+ post_install_message:
151
+ rdoc_options: []
152
+
153
+ require_paths:
154
+ - lib
155
+ required_ruby_version: !ruby/object:Gem::Requirement
156
+ none: false
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: "0"
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ none: false
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: "0"
167
+ requirements: []
168
+
169
+ rubyforge_project:
170
+ rubygems_version: 1.8.9
171
+ signing_key:
172
+ specification_version: 3
173
+ summary: A concordancer for corpus-based linuistic research.
174
+ test_files: []
175
+