poliqarpr 0.0.8 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/changelog.txt CHANGED
@@ -1,3 +1,8 @@
1
+ 0.1.0
2
+ - synchronization on ansync call without handler done via internal mutex
3
+ instead of call to STATUS
4
+ - client config moved to external class
5
+
1
6
  0.0.8
2
7
  - Speed optimization: socket puts changed to write
3
8
 
@@ -8,7 +13,7 @@
8
13
 
9
14
  0.0.6
10
15
  - fix: Excerpt#word - the words consituing the matched query
11
- - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
16
+ - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
12
17
  return the matched, right context, left context segments respecively
13
18
 
14
19
  0.0.5
@@ -19,16 +24,16 @@
19
24
  - Documentation now points to gemcutter instead of github
20
25
 
21
26
  0.0.4
22
- - ping/pong diagnostics
27
+ - ping/pong diagnostics
23
28
  - server version
24
- - corpus statistics
29
+ - corpus statistics
25
30
  - implementation of asynchronous protocol (not stable)
26
31
 
27
32
 
28
33
  0.0.3
29
34
  - the license of the corpus included
30
- - client rdoc documentation
31
- - support for lemmata retrieval
35
+ - client rdoc documentation
36
+ - support for lemmata retrieval
32
37
  - excerpt now contains segments instead of strings
33
38
  - buffer size setter
34
39
  - default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
@@ -45,6 +50,6 @@
45
50
  - README.txt included in gem
46
51
  - specs included in gem
47
52
 
48
- 0.0.1
53
+ 0.0.1
49
54
  - initiali implementation
50
55
  - synchorous querying for terms
data/lib/poliqarpr.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  begin
2
- require 'poliqarpr-corpus'
2
+ require 'poliqarpr-corpus'
3
3
  rescue LoadError
4
4
  # Do nothig, since the default corpus is optional
5
5
  end
@@ -3,61 +3,59 @@ module Poliqarp
3
3
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
4
4
  # License:: MIT License
5
5
  #
6
- # This class is the implementation of the Poliqarp server client.
6
+ # This class is the implementation of the Poliqarp server client.
7
7
  class Client
8
- GROUPS = [:left_context, :left_match, :right_match, :right_context]
9
-
10
- # If debug is turned on, the communication between server and client
8
+ # If debug is turned on, the communication between server and client
11
9
  # is logged to standard output.
12
10
  attr_writer :debug
13
11
 
14
- # The size of the buffer is the maximum number of excerpts which
15
- # are returned for single query.
16
- attr_writer :buffer_size
12
+ # The configuration of the client.
13
+ attr_reader :config
17
14
 
18
- # Creates new poliqarp server client.
19
- #
15
+ # Creates new poliqarp server client.
16
+ #
20
17
  # Parameters:
21
18
  # * +session_name+ the name of the client session. Defaults to "RUBY".
22
19
  # * +debug+ if set to true, all messages sent and received from server
23
20
  # are printed to standard output. Defaults to false.
24
21
  def initialize(session_name="RUBY", debug=false)
25
22
  @session_name = session_name
26
- @left_context = 5
27
- @right_context = 5
28
23
  @debug = debug
29
- @buffer_size = 500000
30
24
  @connector = Connector.new(debug)
25
+ @config = Config.new(self,500000)
31
26
  @answer_queue = Queue.new
27
+ @waiting_mutext = Mutex.new
32
28
  new_session
29
+ config.left_context_size = 5
30
+ config.right_context_size = 5
31
+ config.tags = []
32
+ config.lemmata = []
33
33
  end
34
34
 
35
35
  # A hint about installation of default corpus gem
36
36
  def self.const_missing(const)
37
- if const.to_s =~ /DEFAULT_CORPUS/
37
+ if const.to_s =~ /DEFAULT_CORPUS/
38
38
  raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
39
39
  end
40
40
  super
41
41
  end
42
42
 
43
- # Creates new session for the client with the name given in constructor.
44
- # If the session was already opened, it is closed.
43
+ # Creates new session for the client with the name given in constructor.
44
+ # If the session was already opened, it is closed.
45
45
  #
46
- # Parameters:
46
+ # Parameters:
47
47
  # * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
48
48
  def new_session(port=4567)
49
49
  close if @session
50
50
  @connector.open("localhost",port)
51
51
  talk("MAKE-SESSION #{@session_name}")
52
- talk("BUFFER-RESIZE #{@buffer_size}")
52
+ talk("BUFFER-RESIZE #{config.buffer_size}")
53
53
  @session = true
54
- self.tags = {}
55
- self.lemmata = {}
56
54
  end
57
55
 
58
56
  # Closes the opened session.
59
57
  def close
60
- talk "CLOSE-SESSION"
58
+ talk "CLOSE-SESSION"
61
59
  @session = false
62
60
  end
63
61
 
@@ -66,114 +64,36 @@ module Poliqarp
66
64
  talk "CLOSE"
67
65
  end
68
66
 
69
- # Sets the size of the left short context. It must be > 0
70
- #
71
- # The size of the left short context is the number
72
- # of segments displayed in the found excerpts left to the
73
- # matched segment(s).
74
- def left_context=(value)
75
- if correct_context_value?(value)
76
- result = talk("SET left-context-width #{value}")
77
- @left_context = value if result =~ /^R OK/
78
- else
79
- raise "Invalid argument: #{value}. It must be fixnum greater than 0."
80
- end
81
- end
82
-
83
- # Sets the size of the right short context. It must be > 0
84
- #
85
- # The size of the right short context is the number
86
- # of segments displayed in the found excerpts right to the
87
- # matched segment(s).
88
- def right_context=(value)
89
- if correct_context_value?(value)
90
- result = talk("SET right-context-width #{value}")
91
- @right_context = value if result =~ /^R OK/
92
- else
93
- raise "Invalid argument: #{value}. It must be fixnum greater than 0."
94
- end
95
- end
96
-
97
- # Sets the tags' flags. There are four groups of segments
98
- # which the flags apply for:
99
- # * +left_context+
100
- # * +left_match+
101
- # * +right_match+
102
- # * +right_context+
103
- #
104
- # If the flag for given group is set to true, all segments
105
- # in the group are annotated with grammatical tags. E.g.:
106
- # c.find("kot")
107
- # ...
108
- # "kot" tags: "subst:sg:nom:m2"
109
- #
110
- # You can pass :all to turn on flags for all groups
111
- def tags=(options={})
112
- options = set_all_flags if options == :all
113
- @tag_flags = options
114
- flags = ""
115
- GROUPS.each do |flag|
116
- flags << (options[flag] ? "1" : "0")
117
- end
118
- talk("SET retrieve-tags #{flags}")
119
- end
120
-
121
- # Sets the lemmatas' flags. There are four groups of segments
122
- # which the flags apply for:
123
- # * +left_context+
124
- # * +left_match+
125
- # * +right_match+
126
- # * +right_context+
127
- #
128
- # If the flag for given group is set to true, all segments
129
- # in the group are returned with the base form of the lemmata. E.g.:
130
- # c.find("kotu")
131
- # ...
132
- # "kotu" base_form: "kot"
133
- #
134
- # You can pass :all to turn on flags for all groups
135
- def lemmata=(options={})
136
- options = set_all_flags if options == :all
137
- @lemmata_flags = options
138
- flags = ""
139
- GROUPS.each do |flag|
140
- flags << (options[flag] ? "1" : "0")
141
- end
142
- talk("SET retrieve-lemmata #{flags}")
143
- end
144
-
145
67
  # *Asynchronous* Opens the corpus given as +path+. To open the default
146
- # corpus pass +:default+ as the argument.
147
- #
68
+ # corpus pass +:default+ as the argument.
69
+ #
148
70
  # If you don't want to wait until the call is finished, you
149
71
  # have to provide +handler+ for the asynchronous answer.
150
72
  def open_corpus(path, &handler)
151
73
  if path == :default
152
74
  open_corpus(DEFAULT_CORPUS, &handler)
153
75
  else
154
- real_handler = handler || lambda{|msg| @answer_queue.push msg }
155
- talk("OPEN #{path}", :async, &real_handler)
156
- do_wait if handler.nil?
76
+ talk("OPEN #{path}", :async, &handler)
157
77
  end
158
78
  end
159
79
 
160
80
  # Server diagnostics -- the result should be :pong
161
- def ping
81
+ def ping
162
82
  :pong if talk("PING") =~ /PONG/
163
83
  end
164
84
 
165
85
  # Returns server version
166
- def version
86
+ def version
167
87
  talk("VERSION")
168
88
  end
169
89
 
170
90
  # Returns corpus statistics:
171
- # * +:segment_tokens+ the number of segments in the corpus
91
+ # * +:segment_tokens+ the number of segments in the corpus
172
92
  # (two segments which look exactly the same are counted separately)
173
93
  # * +:segment_types+ the number of segment types in the corpus
174
94
  # (two segments which look exactly the same are counted as one type)
175
95
  # * +:lemmata+ the number of lemmata (lexemes) types
176
- # (all forms of inflected word, e.g. 'kot', 'kotu', ...
96
+ # (all forms of inflected word, e.g. 'kot', 'kotu', ...
177
97
  # are treated as one "word" -- lemmata)
178
98
  # * +:tags+ the number of different grammar tags (each combination
179
99
  # of atomic tags is treated as different "tag")
@@ -181,7 +101,7 @@ module Poliqarp
181
101
  stats = {}
182
102
  talk("CORPUS-STATS").split.each_with_index do |value, index|
183
103
  case index
184
- when 1
104
+ when 1
185
105
  stats[:segment_tokens] = value.to_i
186
106
  when 2
187
107
  stats[:segment_types] = value.to_i
@@ -205,7 +125,7 @@ module Poliqarp
205
125
  # (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
206
126
  # means that there are 5 genders: masculine(1,2,3), feminine and neuter)
207
127
  # * +:classes+ enlists grammatical tags used to describe it
208
- # (each class has a list of tags used to describe it, eg. adj: degree
128
+ # (each class has a list of tags used to describe it, eg. adj: degree
209
129
  # gender case number, means that adjectives are described in terms
210
130
  # of degree, gender, case and number)
211
131
  def tagset
@@ -226,14 +146,14 @@ module Poliqarp
226
146
  #
227
147
  # Options:
228
148
  # * +index+ the index of the (only one) result to be returned. The index is relative
229
- # to the beginning of the query result. In normal case you should query the
149
+ # to the beginning of the query result. In normal case you should query the
230
150
  # corpus without specifying the index, to see what results are returned.
231
- # Then you can use the index and the same query to retrieve one result.
151
+ # Then you can use the index and the same query to retrieve one result.
232
152
  # The pair (query, index) is a kind of unique identifier of the excerpt.
233
153
  # * +page_size+ the size of the page of results. If the page size is 0, then
234
154
  # all results are returned on one page. It is ignored if the +index+ option
235
155
  # is present. Defaults to 0.
236
- # * +page_index+ the index of the page of results (the first page has index 1, not 0).
156
+ # * +page_index+ the index of the page of results (the first page has index 1, not 0).
237
157
  # It is ignored if the +index+ option is present. Defaults to 1.
238
158
  def find(query,options={})
239
159
  if options[:index]
@@ -243,11 +163,11 @@ module Poliqarp
243
163
  end
244
164
  end
245
165
 
246
- alias query find
166
+ alias query find
247
167
 
248
168
  # Returns the number of results for given query.
249
169
  def count(query)
250
- count_results(make_query(query))
170
+ count_results(make_query(query))
251
171
  end
252
172
 
253
173
  # Returns the long context of the excerpt which is identified by
@@ -257,13 +177,13 @@ module Poliqarp
257
177
  result = []
258
178
  talk "GET-CONTEXT #{index}"
259
179
  # 1st part
260
- result << read_word
180
+ result << read_word
261
181
  # 2nd part
262
- result << read_word
182
+ result << read_word
263
183
  # 3rd part
264
- result << read_word
184
+ result << read_word
265
185
  # 4th part
266
- result << read_word
186
+ result << read_word
267
187
  result
268
188
  end
269
189
 
@@ -286,19 +206,62 @@ module Poliqarp
286
206
  end
287
207
 
288
208
  protected
209
+ # Set the size of the left context.
210
+ def left_context=(value)
211
+ result = talk("SET left-context-width #{value}")
212
+ unless result =~ /^OK/
213
+ raise "Failed to set left context to #{value}: #{result}"
214
+ end
215
+ end
216
+
217
+ # Set the size of the right context.
218
+ def right_context=(value)
219
+ result = talk("SET right-context-width #{value}")
220
+ unless result =~ /^OK/
221
+ raise "Failed to set right context to #{value}: #{result}"
222
+ end
223
+ end
224
+
225
+ # Sets the 'retrieve-tags' flags.
226
+ def retrieve_tags(flags)
227
+ talk("SET retrieve-tags #{flags}")
228
+ end
229
+
230
+ # Sets the 'retrieve-lemmata' flags.
231
+ def retrieve_lemmata(flags)
232
+ talk("SET retrieve-lemmata #{flags}")
233
+ end
234
+
235
+
289
236
  # Sends a message directly to the server
290
237
  # * +msg+ the message to send
291
238
  # * +mode+ if set to :sync, the method block untli the message
292
239
  # is received. If :async the method returns immediately.
293
240
  # Default: :sync
294
- # * +handler+ the handler of the assynchronous message.
241
+ # * +handler+ the handler of the assynchronous message.
295
242
  # It is ignored when the mode is set to :sync.
296
243
  def talk(msg, mode = :sync, &handler)
297
244
  puts msg if @debug
298
- @connector.send(msg, mode, &handler)
245
+ if mode == :sync
246
+ @connector.send_message(msg, mode, &handler)
247
+ else
248
+ if handler.nil?
249
+ real_handler = lambda do |msg|
250
+ @answer_queue.push msg
251
+ stop_waiting
252
+ end
253
+ start_waiting
254
+ else
255
+ real_handler = handler
256
+ end
257
+ @connector.send_message(msg, mode, &real_handler)
258
+ if handler.nil?
259
+ do_wait
260
+ end
261
+ end
299
262
  end
300
263
 
301
- # Make query and retrieve many results.
264
+ # Make query and retrieve many results.
302
265
  # * +query+ the query to be sent to the server.
303
266
  # * +options+ see find
304
267
  def find_many(query, options)
@@ -308,7 +271,7 @@ protected
308
271
  answer_offset = page_size * (page_index - 1)
309
272
  if page_size > 0
310
273
  result_count = make_async_query(query,answer_offset)
311
- answers_limit = answer_offset + page_size > result_count ?
274
+ answers_limit = answer_offset + page_size > result_count ?
312
275
  result_count - answer_offset : page_size
313
276
  else
314
277
  # all answers needed -- the call must be synchronous
@@ -321,12 +284,12 @@ protected
321
284
 
322
285
  result = QueryResult.new(page_index, page_count,page_size,self,query)
323
286
  if answers_limit > 0
324
- talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
287
+ talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
325
288
  answers_limit.times do |answer_index|
326
289
  result << fetch_result(answer_offset + answer_index, query)
327
290
  end
328
291
  end
329
- result
292
+ result
330
293
  end
331
294
 
332
295
  # Make query and retrieve only one result
@@ -334,13 +297,13 @@ protected
334
297
  # * +index+ the index of the answer to be retrieved
335
298
  def find_one(query,index)
336
299
  make_async_query(query,index)
337
- talk("GET-RESULTS #{index} #{index}")
338
- fetch_result(index,query)
300
+ talk("GET-RESULTS #{index} #{index}")
301
+ fetch_result(index,query)
339
302
  end
340
303
 
341
304
  # Fetches one result of the query
342
305
  #
343
- # MAKE-QUERY and GET-RESULTS must be sent to the server before
306
+ # MAKE-QUERY and GET-RESULTS must be sent to the server before
344
307
  # this method is called
345
308
  def fetch_result(index, query)
346
309
  result = Excerpt.new(index, self, query)
@@ -357,15 +320,15 @@ protected
357
320
  segments = []
358
321
  size.times do |segment_index|
359
322
  segment = Segment.new(read_word)
360
- segments << segment
361
- if @lemmata_flags[group] || @tag_flags[group]
323
+ segments << segment
324
+ if config.lemmata.include?(group) || config.tags.include?(group)
362
325
  lemmata_size = read_number()
363
- lemmata_size.times do |lemmata_index|
326
+ lemmata_size.times do |lemmata_index|
364
327
  lemmata = Lemmata.new()
365
- if @lemmata_flags[group]
328
+ if config.lemmata.include?(group)
366
329
  lemmata.base_form = read_word
367
330
  end
368
- if @tag_flags[group]
331
+ if config.tags.include?(group)
369
332
  lemmata.tags = read_word
370
333
  end
371
334
  segment.lemmata << lemmata
@@ -377,7 +340,9 @@ protected
377
340
 
378
341
  # Reads number stored in the message received from the server.
379
342
  def read_number
380
- @connector.read_message.match(/\d+/)[0].to_i
343
+ msg = @connector.read_message
344
+ puts "XXX #{msg}" if @debug
345
+ msg.match(/\d+/)[0].to_i
381
346
  end
382
347
 
383
348
  # Counts number of results for given answer
@@ -387,25 +352,22 @@ protected
387
352
 
388
353
  # *Asynchronous* Sends the query to the server
389
354
  # * +query+ query to send
390
- # * +handler+ if given, the method returns immediately,
355
+ # * +handler+ if given, the method returns immediately,
391
356
  # and the answer is sent to the handler. In this case
392
357
  # the result returned by make_query should be IGNORED!
393
358
  def make_query(query, &handler)
394
359
  if @last_query != query
395
360
  @last_query = query
396
- if handler.nil?
397
- real_handler = lambda { |msg| @answer_queue.push msg }
398
- else
399
- real_handler = handler
400
- end
401
361
  begin
402
362
  talk("MAKE-QUERY #{query}")
403
363
  rescue JobInProgress
404
364
  talk("CANCEL") rescue nil
405
365
  talk("MAKE-QUERY #{query}")
406
366
  end
407
- talk("RUN-QUERY #{@buffer_size}", :async, &real_handler)
408
- @last_result = do_wait if handler.nil?
367
+ result = talk("RUN-QUERY #{config.buffer_size}", :async, &handler)
368
+ if handler.nil?
369
+ @last_result = result
370
+ end
409
371
  end
410
372
  @last_result
411
373
  end
@@ -415,38 +377,55 @@ protected
415
377
  @connector.read_message
416
378
  end
417
379
 
418
- private
380
+ private
381
+ # Wait for the assynchronous answer, if some synchronous query
382
+ # was sent without handler.
419
383
  def do_wait
420
384
  loop {
421
- status = talk("STATUS") rescue break
422
- puts "STATUS: #{status}" if @debug
423
- sleep 0.3
385
+ break unless should_wait?
386
+ puts "WAITING" if @debug
387
+ sleep 0.1
424
388
  }
425
389
  @answer_queue.shift
426
390
  end
427
391
 
428
- def set_all_flags
429
- options = {}
430
- GROUPS.each{|g| options[g] = true}
431
- options
392
+ # Stop waiting for the ansynchonous answer.
393
+ def stop_waiting
394
+ @waiting_mutext.synchronize {
395
+ @should_wait = false
396
+ }
397
+ puts "WAITING stopped" if @debug
398
+ end
399
+
400
+ # Check if the thread should still wait for the answer.
401
+ def should_wait?
402
+ should_wait = nil
403
+ @waiting_mutext.synchronize {
404
+ should_wait = @should_wait
405
+ }
406
+ should_wait
432
407
  end
433
-
434
- def correct_context_value?(value)
435
- value.is_a?(Fixnum) && value > 0
408
+
409
+ # Start waiting for the answer.
410
+ def start_waiting
411
+ @waiting_mutext.synchronize {
412
+ @should_wait = true
413
+ }
414
+ puts "WAITING started" if @debug
436
415
  end
437
416
 
438
- def make_async_query(query,answer_offset)
439
- # the handler is empty, since we access the result count through
440
- # BUFFER-STATE call
441
- make_query(query){|msg| }
442
- result_count = 0
443
- begin
417
+ def make_async_query(query,answer_offset)
418
+ start_waiting
419
+ # we access the result count through BUFFER-STATE call
420
+ make_query(query){|msg| stop_waiting}
421
+ result_count = 0
422
+ begin
444
423
  # the result count might be not exact!
445
424
  result_count = talk("BUFFER-STATE").split(" ")[2].to_i
446
- talk("STATUS") rescue break
425
+ break unless should_wait?
447
426
  end while result_count < answer_offset
448
427
  @last_result = "OK #{result_count}"
449
428
  result_count
450
429
  end
451
- end
430
+ end
452
431
  end
@@ -0,0 +1,138 @@
1
+ # vim:encoding=utf-8
2
+ module Poliqarp
3
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
4
+ # License:: MIT License
5
+ #
6
+ # This class holds the configuration of the client.
7
+ class Config
8
+ GROUPS = [:left_context, :left_match, :right_match, :right_context]
9
+ # The size of the buffer is the maximum number of excerpts which
10
+ # are returned for single query.
11
+ attr_accessor :buffer_size, :left_context_size, :right_context_size, :tags, :lemmata
12
+
13
+ def initialize(client,buffer_size)
14
+ @client = client
15
+ @buffer_size = buffer_size
16
+ end
17
+
18
+ # Sets the size of the left short context. It must be > 0
19
+ #
20
+ # The size of the left short context is the number
21
+ # of segments displayed in the found excerpts left to the
22
+ # matched segment(s).
23
+ def left_context_size=(value)
24
+ if correct_context_value?(value)
25
+ @client.send(:left_context=,value)
26
+ @left_context_size = value
27
+ else
28
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
29
+ end
30
+ end
31
+
32
+ # Sets the size of the right short context. It must be > 0
33
+ #
34
+ # The size of the right short context is the number
35
+ # of segments displayed in the found excerpts right to the
36
+ # matched segment(s).
37
+ def right_context_size=(value)
38
+ if correct_context_value?(value)
39
+ @client.send(:right_context=,value)
40
+ @right_context_size = value
41
+ else
42
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
43
+ end
44
+ end
45
+
46
+ # Sets the tags' flags. There are four groups of segments
47
+ # which the flags apply for:
48
+ # * +:left_context+
49
+ # * +:left_match+
50
+ # * +:right_match+
51
+ # * +:right_context+
52
+ #
53
+ # If the flag for given group is present, all segments
54
+ # in the group are annotated with grammatical tags. E.g.:
55
+ # c.find("kot")
56
+ # ...
57
+ # "kot" tags: "subst:sg:nom:m2"
58
+ #
59
+ # E.g. config.tags = [:left_context] will retrieve tags
60
+ # only for the left context.
61
+ #
62
+ # You can pass :all to turn on flags for all groups, i.e.
63
+ # config.tags = :all will retrieve tags for all groups.
64
+ def tags=(groups)
65
+ if groups == :all
66
+ @tags = GROUPS.dup
67
+ else
68
+ @tags = groups
69
+ end
70
+ @client.send(:retrieve_tags, flags_for(@tags))
71
+ end
72
+
73
+ # Sets the lemmatas' flags. There are four groups of segments
74
+ # which the flags apply for:
75
+ # * +left_context+
76
+ # * +left_match+
77
+ # * +right_match+
78
+ # * +right_context+
79
+ #
80
+ # If the flag for given group is present, all segments
81
+ # in the group are returned with the base form of the lemmata. E.g.:
82
+ # c.find("kotu")
83
+ # ...
84
+ # "kotu" base_form: "kot"
85
+ #
86
+ # E.g. config.lemmata = [:left_context] will retrieve lemmata
87
+ # only for the left context.
88
+ #
89
+ # You can pass :all to turn on flags for all groups, i.e.
90
+ # config.lemmata = :all will retrieve lemmata for all groups.
91
+ def lemmata=(groups)
92
+ if groups == :all
93
+ @lemmata = GROUPS.dup
94
+ else
95
+ @lemmata = groups
96
+ end
97
+ @client.send(:retrieve_lemmata, flags_for(@lemmata))
98
+ end
99
+
100
+ # Allow for accessing individual group tags/lemmata flag,
101
+ # e.g. config.left_context_tags, config.left_context_lemmata
102
+ [:tags,:lemmata].each do |type|
103
+ GROUPS.each do |group|
104
+ define_method("#{group}_#{type}".to_sym) do
105
+ @tags.include?(group)
106
+ end
107
+ end
108
+ end
109
+
110
+ # Allow for changing individual group tags/lemmata flag,
111
+ # e.g. config.left_context_tags = true, config.left_context_lemmata = true
112
+ [:tags,:lemmata].each do |type|
113
+ GROUPS.each do |group|
114
+ define_method("#{group}_#{type}=".to_sym) do |value|
115
+ if value
116
+ @tags << group unless @tags.include?(group)
117
+ else
118
+ @tags.delete(group) if @tags.include?(group)
119
+ end
120
+ @client.send("retrieve_#{type}".to_sym, flags_for(@tags))
121
+ end
122
+ end
123
+ end
124
+
125
+ protected
126
+ def correct_context_value?(value)
127
+ value.is_a?(Fixnum) && value > 0
128
+ end
129
+
130
+ def flags_for(elements)
131
+ flags = ""
132
+ GROUPS.each do |flag|
133
+ flags << (elements.include?(flag) ? "1" : "0")
134
+ end
135
+ flags
136
+ end
137
+ end
138
+ end
@@ -44,7 +44,7 @@ module Poliqarp
44
44
  @debug = debug
45
45
  end
46
46
 
47
- # Opens connection with poliqarp server which runs
47
+ # Opens connection with poliqarp server which runs
48
48
  # on given +host+ and +port+.
49
49
  def open(host,port)
50
50
  @socket_mutex.synchronize {
@@ -60,17 +60,16 @@ module Poliqarp
60
60
  }
61
61
  end
62
62
 
63
- # Sends message to the poliqarp server. Returns the first synchronous
63
+ # Sends message to the poliqarp server. Returns the first synchronous
64
64
  # answer of the server.
65
65
  # * +message+ the message to send
66
66
  # * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
67
67
  # * +handler+ the handler of the asynchronous message
68
- def send(message, mode, &handler)
68
+ def send_message(message, mode, &handler)
69
69
  puts "send #{mode} #{message}" if @debug
70
70
  if ruby19?
71
71
  massage = message.encode(UTF8)
72
72
  end
73
- #@socket.puts(message)
74
73
  @socket.write(message+"\n")
75
74
  if mode == :async
76
75
  @handler = handler
@@ -79,7 +78,7 @@ module Poliqarp
79
78
  end
80
79
 
81
80
  # Retrives one message from the server.
82
- # If the message indicates an error, new runtime error
81
+ # If the message indicates an error, new runtime error
83
82
  # containing the error description is returned.
84
83
  def read_message
85
84
  message = @message_queue.shift
@@ -94,7 +93,7 @@ module Poliqarp
94
93
 
95
94
  private
96
95
  def main_loop
97
- @loop = Thread.new {
96
+ @loop = Thread.new {
98
97
  loop {
99
98
  receive
100
99
  # XXX ??? needed
@@ -124,8 +123,8 @@ private
124
123
 
125
124
  def receive_async(message)
126
125
  puts "receive async: #{message}" if @debug
127
- Thread.new{
128
- @handler.call(message)
126
+ Thread.new{
127
+ @handler.call(message)
129
128
  }
130
129
  end
131
130
 
@@ -2,7 +2,7 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT License
4
4
 
5
- # The JobInProgress exception is raised if there was asynchronous call
5
+ # The JobInProgress exception is raised if there was asynchronous call
6
6
  # to the server which haven't finished, which is interrupted by another
7
7
  # asynchronous call.
8
8
  class JobInProgress < Exception; end
@@ -2,13 +2,13 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl
3
3
  # License:: MIT License
4
4
  #
5
- # The excerpt class is used to store single result of the query,
5
+ # The excerpt class is used to store single result of the query,
6
6
  # i.e. the excerpt of the corpus which contains the words which
7
- # the corpus was queried for.
7
+ # the corpus was queried for.
8
8
  #
9
9
  # The excerpt is divided into groups, which contain segments,
10
- # which the texts in the corpus were divided for.
11
- # The first group is the left context, the second -- the matched
10
+ # which the texts in the corpus were divided for.
11
+ # The first group is the left context, the second -- the matched
12
12
  # query, and the last -- the right context.
13
13
  class Excerpt
14
14
  attr_reader :index, :base_form, :short_context
@@ -40,7 +40,7 @@ module Poliqarp
40
40
  @short_context[2]
41
41
  end
42
42
 
43
- # Returns the matched query as string
43
+ # Returns the matched query as string
44
44
  def word
45
45
  #@short_context[0].split(/\s+/)[-1]
46
46
  @short_context[1].map{|s| s.to_s}.join("")
@@ -54,7 +54,7 @@ module Poliqarp
54
54
  @short_context.join("")
55
55
  end
56
56
 
57
- # Returns the long context of the query.
57
+ # Returns the long context of the query.
58
58
  def context
59
59
  return @context unless @context.nil?
60
60
  @context = @client.context(@base_form, @index)
@@ -63,7 +63,7 @@ module Poliqarp
63
63
  { :medium => :medium, :style => :styl, :date => :data_wydania,
64
64
  :city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
65
65
  :author => :autor}.each do |method, keyword|
66
- define_method method do
66
+ define_method method do
67
67
  self.metadata[keyword]
68
68
  end
69
69
  end
@@ -2,9 +2,9 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT License
4
4
  #
5
- # The query result class is used to paginate results of the
5
+ # The query result class is used to paginate results of the
6
6
  # query. Each query result has information about its context
7
- # (the next and previous page).
7
+ # (the next and previous page).
8
8
  class QueryResult
9
9
  include Enumerable
10
10
 
@@ -51,16 +51,16 @@ module Poliqarp
51
51
  # Returns the previous page of the query result
52
52
  def previous_page
53
53
  if @page > 1
54
- @client.find(@query, :page_size => @page_size,
55
- :page_index => @page - 1)
54
+ @client.find(@query, :page_size => @page_size,
55
+ :page_index => @page - 1)
56
56
  end
57
57
  end
58
58
 
59
59
  # Return the next page of the query result
60
60
  def next_page
61
61
  if @page < @page_count
62
- @client.find(@query, :page_size => @page_size,
63
- :page_index => @page + 1)
62
+ @client.find(@query, :page_size => @page_size,
63
+ :page_index => @page + 1)
64
64
  end
65
65
  end
66
66
 
@@ -1,22 +1,22 @@
1
- module Poliqarp
1
+ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT LICENSE
4
4
  #
5
- # The segment is the smallest meaningful part of the text.
6
- # It may contain many lemmata, since the segments are sometimes
7
- # not disambiguated.
5
+ # The segment is the smallest meaningful part of the text.
6
+ # It may contain many lemmata, since the segments are sometimes
7
+ # not disambiguated.
8
8
  class Segment
9
9
  attr_reader :literal, :lemmata
10
10
 
11
- # Creates new segment. The specified argument is the literal
12
- # (as found in the text) representation of the segment.
11
+ # Creates new segment. The specified argument is the literal
12
+ # (as found in the text) representation of the segment.
13
13
  def initialize(literal)
14
14
  @literal = literal
15
15
  @lemmata = []
16
16
  end
17
17
 
18
18
  # Returns the segment literal
19
- def to_s
19
+ def to_s
20
20
  @literal
21
21
  end
22
22
  end
@@ -3,7 +3,7 @@ module Poliqarp #:nodoc:
3
3
  module Ruby19
4
4
  # Returns true if the Ruby version is at least 1.9.0
5
5
  def ruby19?
6
- RUBY_VERSION.split(".")[0..1].join(".").to_f >= 1.9
6
+ RUBY_VERSION.split(".")[0..1].join(".").to_f >= 1.9
7
7
  end
8
8
  end
9
9
  end
data/poliqarpr.gemspec CHANGED
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "poliqarpr"
3
- s.version = "0.0.8"
4
- s.date = "2011-01-12"
3
+ s.version = "0.1.0"
4
+ s.date = "2011-01-17"
5
5
  s.summary = "Ruby client for Poliqarp"
6
6
  s.email = "apohllo@o2.pl"
7
7
  s.homepage = "http://www.github.com/apohllo/poliqarpr"
8
8
  s.description = "Ruby client for Poliqarp (NLP corpus server)"
9
9
  s.authors = ['Aleksander Pohl']
10
- s.files = ["Rakefile", "poliqarpr.gemspec",
10
+ s.files = ["Rakefile", "poliqarpr.gemspec",
11
11
  "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
12
12
  s.test_files = Dir.glob("spec/**/*")
13
13
  s.rdoc_options = ["--main", "README.txt"]
data/spec/client.rb CHANGED
@@ -5,17 +5,17 @@ require 'poliqarpr'
5
5
  describe Poliqarp::Client do
6
6
  describe "(general test)" do
7
7
  before(:each) do
8
- @client = Poliqarp::Client.new("TEST")
8
+ @client = Poliqarp::Client.new("TEST1")
9
9
  end
10
-
11
- after(:each) do
10
+
11
+ after(:each) do
12
12
  @client.close
13
13
  end
14
-
14
+
15
15
  it "should allow to open corpus" do
16
16
  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
17
17
  end
18
-
18
+
19
19
  it "should allow to open :default corpus" do
20
20
  @client.open_corpus(:default)
21
21
  end
@@ -32,7 +32,7 @@ describe Poliqarp::Client do
32
32
 
33
33
  describe "(with 'sample' corpus)" do
34
34
  before(:all) do
35
- @client = Poliqarp::Client.new("TEST")
35
+ @client = Poliqarp::Client.new("TEST2")
36
36
  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
37
37
  end
38
38
 
@@ -40,35 +40,37 @@ describe Poliqarp::Client do
40
40
  @client.close
41
41
  end
42
42
 
43
- it "should allow to set the right context size" do
44
- @client.right_context = 5
43
+ it "should allow to set and get the right context size" do
44
+ @client.config.right_context_size = 5
45
+ @client.config.right_context_size.should == 5
45
46
  end
46
47
 
47
- it "should raise error if the size of right context is not number" do
48
- (proc do
49
- @client.right_context = "a"
48
+ it "should raise error if the size of right context is not number" do
49
+ (proc do
50
+ @client.config.right_context_size = "a"
50
51
  end).should raise_error(RuntimeError)
51
52
  end
52
53
 
53
- it "should rais error if the size of right context is less or equal 0" do
54
- (proc do
55
- @client.right_context = 0
54
+ it "should rais error if the size of right context is less or equal 0" do
55
+ (proc do
56
+ @client.config.right_context_size = 0
56
57
  end).should raise_error(RuntimeError)
57
58
  end
58
59
 
59
- it "should allow to set the left context size" do
60
- @client.right_context = 5
60
+ it "should allow to set and get the left context size" do
61
+ @client.config.left_context_size = 5
62
+ @client.config.left_context_size.should == 5
61
63
  end
62
64
 
63
- it "should raise error if the size of left context is not number" do
64
- (lambda do
65
- @client.left_context = "a"
65
+ it "should raise error if the size of left context is not number" do
66
+ (lambda do
67
+ @client.config.left_context_size = "a"
66
68
  end).should raise_error(RuntimeError)
67
69
  end
68
70
 
69
- it "should rais error if the size of left context is less or equal 0" do
70
- (lambda do
71
- @client.left_context = 0
71
+ it "should rais error if the size of left context is less or equal 0" do
72
+ (lambda do
73
+ @client.config.left_context_size = 0
72
74
  end).should raise_error(RuntimeError)
73
75
  end
74
76
 
@@ -87,7 +89,7 @@ describe Poliqarp::Client do
87
89
  tagset[:classes].should_not == nil
88
90
  end
89
91
 
90
- it "should allow to find 'kot'" do
92
+ it "should allow to find 'kot'" do
91
93
  @client.find("kot").size.should_not == 0
92
94
  end
93
95
 
@@ -129,7 +131,7 @@ describe Poliqarp::Client do
129
131
  end
130
132
 
131
133
  describe("(with index specified in find)") do
132
- before(:each) do
134
+ before(:each) do
133
135
  @result = @client.find("nachalny",:index => 0)
134
136
  end
135
137
 
@@ -146,13 +148,12 @@ describe Poliqarp::Client do
146
148
  end
147
149
  end
148
150
 
149
- describe("(with lemmata flags set to true)") do
151
+ describe("(with lemmata flags set to true)") do
150
152
  before(:all) do
151
- @client.lemmata = {:left_context => true, :right_context => true,
152
- :left_match => true, :right_match => true}
153
+ @client.config.lemmata = [:left_context, :right_context, :left_match, :right_match]
153
154
  end
154
155
 
155
- it "should allow to find 'kotu'" do
156
+ it "should allow to find 'kotu'" do
156
157
  @client.find("kotu").size.should_not == 0
157
158
  end
158
159
 
data/spec/excerpt.rb CHANGED
@@ -25,7 +25,7 @@ describe Poliqarp::Excerpt do
25
25
  @excerpt.index.should_not == nil
26
26
  end
27
27
 
28
- it "should have base form" do
28
+ it "should have base form" do
29
29
  @excerpt.base_form.should_not == nil
30
30
  end
31
31
 
@@ -64,10 +64,10 @@ describe Poliqarp::Excerpt do
64
64
  end
65
65
 
66
66
  it "should have index set to 0" do
67
- @excerpt.index.should == 0
67
+ @excerpt.index.should == 0
68
68
  end
69
69
 
70
- it "should have base form set to 'kot'" do
70
+ it "should have base form set to 'kot'" do
71
71
  @excerpt.base_form.should == "mu za to astronomiczną"
72
72
  end
73
73
 
@@ -96,7 +96,7 @@ describe Poliqarp::Excerpt do
96
96
  it "should have 'city' set to nil" do
97
97
  @excerpt.city.should == nil
98
98
  end
99
-
99
+
100
100
  it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
101
101
  @excerpt.publisher.size.should == 1
102
102
  @excerpt.publisher[0].should == "Wydawnictwo W.A.B."
@@ -114,13 +114,13 @@ describe Poliqarp::Excerpt do
114
114
  end
115
115
 
116
116
  describe('first result for "kotu" with lemmatization turned on') do
117
- before(:all) do
118
- @client.lemmata = :all
117
+ before(:all) do
118
+ @client.config.lemmata = :all
119
119
  @client.open_corpus(:default)
120
- @excerpt = @client.find("kotu")[0]
120
+ @excerpt = @client.find("kotu")[0]
121
121
  end
122
122
 
123
- it "should have one lemmata for each segment" do
123
+ it "should have one lemmata for each segment" do
124
124
  @excerpt.short_context.each do |group|
125
125
  group.each do |segment|
126
126
  segment.lemmata.size.should == 1
@@ -134,7 +134,7 @@ describe Poliqarp::Excerpt do
134
134
  end
135
135
  end
136
136
 
137
- it "should contain 'kot' as one of the lemmata" do
137
+ it "should contain 'kot' as one of the lemmata" do
138
138
  @excerpt.short_context.flatten.
139
139
  any?{|s| s.lemmata[0].base_form == "kot"}.should == true
140
140
  end
data/spec/query_result.rb CHANGED
@@ -12,7 +12,7 @@ describe Poliqarp::QueryResult do
12
12
  @client.close
13
13
  end
14
14
 
15
- describe "(for unspecified query)" do
15
+ describe "(for unspecified query)" do
16
16
  before(:all) do
17
17
  @result = @client.find("kita")
18
18
  end
@@ -57,11 +57,11 @@ describe Poliqarp::QueryResult do
57
57
  end
58
58
 
59
59
  describe "(for 'kot' in :default corpus)" do
60
- before(:all) do
60
+ before(:all) do
61
61
  @result = @client.find("kot")
62
62
  end
63
63
 
64
- it "should have size == 6" do
64
+ it "should have size == 6" do
65
65
  @result.size.should == 6
66
66
  end
67
67
 
@@ -83,11 +83,11 @@ describe Poliqarp::QueryResult do
83
83
  end
84
84
 
85
85
  describe "(for 'kot' with page_size set to 5 in :default corpus)" do
86
- before(:all) do
86
+ before(:all) do
87
87
  @result = @client.find("kot", :page_size => 5)
88
88
  end
89
89
 
90
- it "should have size == 5" do
90
+ it "should have size == 5" do
91
91
  @result.size.should == 5
92
92
  end
93
93
 
@@ -109,11 +109,11 @@ describe Poliqarp::QueryResult do
109
109
  end
110
110
 
111
111
  describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
112
- before(:all) do
112
+ before(:all) do
113
113
  @result = @client.find("kot", :page_size => 5).next_page
114
114
  end
115
115
 
116
- it "should have size == 1" do
116
+ it "should have size == 1" do
117
117
  @result.size.should == 1
118
118
  end
119
119
 
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
+ - 1
7
8
  - 0
8
- - 8
9
- version: 0.0.8
9
+ version: 0.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Aleksander Pohl
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-01-12 00:00:00 +01:00
17
+ date: 2011-01-17 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -32,6 +32,7 @@ files:
32
32
  - changelog.txt
33
33
  - README.txt
34
34
  - lib/poliqarpr.rb
35
+ - lib/poliqarpr/config.rb
35
36
  - lib/poliqarpr/exceptions.rb
36
37
  - lib/poliqarpr/lemmata.rb
37
38
  - lib/poliqarpr/query_result.rb