poliqarpr 0.0.8 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/changelog.txt CHANGED
@@ -1,3 +1,8 @@
1
+ 0.1.0
2
+ - synchronization on ansync call without handler done via internal mutex
3
+ instead of call to STATUS
4
+ - client config moved to external class
5
+
1
6
  0.0.8
2
7
  - Speed optimization: socket puts changed to write
3
8
 
@@ -8,7 +13,7 @@
8
13
 
9
14
  0.0.6
10
15
  - fix: Excerpt#word - the words consituing the matched query
11
- - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
16
+ - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
12
17
  return the matched, right context, left context segments respecively
13
18
 
14
19
  0.0.5
@@ -19,16 +24,16 @@
19
24
  - Documentation now points to gemcutter instead of github
20
25
 
21
26
  0.0.4
22
- - ping/pong diagnostics
27
+ - ping/pong diagnostics
23
28
  - server version
24
- - corpus statistics
29
+ - corpus statistics
25
30
  - implementation of asynchronous protocol (not stable)
26
31
 
27
32
 
28
33
  0.0.3
29
34
  - the license of the corpus included
30
- - client rdoc documentation
31
- - support for lemmata retrieval
35
+ - client rdoc documentation
36
+ - support for lemmata retrieval
32
37
  - excerpt now contains segments instead of strings
33
38
  - buffer size setter
34
39
  - default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
@@ -45,6 +50,6 @@
45
50
  - README.txt included in gem
46
51
  - specs included in gem
47
52
 
48
- 0.0.1
53
+ 0.0.1
49
54
  - initiali implementation
50
55
  - synchorous querying for terms
data/lib/poliqarpr.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  begin
2
- require 'poliqarpr-corpus'
2
+ require 'poliqarpr-corpus'
3
3
  rescue LoadError
4
4
  # Do nothig, since the default corpus is optional
5
5
  end
@@ -3,61 +3,59 @@ module Poliqarp
3
3
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
4
4
  # License:: MIT License
5
5
  #
6
- # This class is the implementation of the Poliqarp server client.
6
+ # This class is the implementation of the Poliqarp server client.
7
7
  class Client
8
- GROUPS = [:left_context, :left_match, :right_match, :right_context]
9
-
10
- # If debug is turned on, the communication between server and client
8
+ # If debug is turned on, the communication between server and client
11
9
  # is logged to standard output.
12
10
  attr_writer :debug
13
11
 
14
- # The size of the buffer is the maximum number of excerpts which
15
- # are returned for single query.
16
- attr_writer :buffer_size
12
+ # The configuration of the client.
13
+ attr_reader :config
17
14
 
18
- # Creates new poliqarp server client.
19
- #
15
+ # Creates new poliqarp server client.
16
+ #
20
17
  # Parameters:
21
18
  # * +session_name+ the name of the client session. Defaults to "RUBY".
22
19
  # * +debug+ if set to true, all messages sent and received from server
23
20
  # are printed to standard output. Defaults to false.
24
21
  def initialize(session_name="RUBY", debug=false)
25
22
  @session_name = session_name
26
- @left_context = 5
27
- @right_context = 5
28
23
  @debug = debug
29
- @buffer_size = 500000
30
24
  @connector = Connector.new(debug)
25
+ @config = Config.new(self,500000)
31
26
  @answer_queue = Queue.new
27
+ @waiting_mutext = Mutex.new
32
28
  new_session
29
+ config.left_context_size = 5
30
+ config.right_context_size = 5
31
+ config.tags = []
32
+ config.lemmata = []
33
33
  end
34
34
 
35
35
  # A hint about installation of default corpus gem
36
36
  def self.const_missing(const)
37
- if const.to_s =~ /DEFAULT_CORPUS/
37
+ if const.to_s =~ /DEFAULT_CORPUS/
38
38
  raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
39
39
  end
40
40
  super
41
41
  end
42
42
 
43
- # Creates new session for the client with the name given in constructor.
44
- # If the session was already opened, it is closed.
43
+ # Creates new session for the client with the name given in constructor.
44
+ # If the session was already opened, it is closed.
45
45
  #
46
- # Parameters:
46
+ # Parameters:
47
47
  # * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
48
48
  def new_session(port=4567)
49
49
  close if @session
50
50
  @connector.open("localhost",port)
51
51
  talk("MAKE-SESSION #{@session_name}")
52
- talk("BUFFER-RESIZE #{@buffer_size}")
52
+ talk("BUFFER-RESIZE #{config.buffer_size}")
53
53
  @session = true
54
- self.tags = {}
55
- self.lemmata = {}
56
54
  end
57
55
 
58
56
  # Closes the opened session.
59
57
  def close
60
- talk "CLOSE-SESSION"
58
+ talk "CLOSE-SESSION"
61
59
  @session = false
62
60
  end
63
61
 
@@ -66,114 +64,36 @@ module Poliqarp
66
64
  talk "CLOSE"
67
65
  end
68
66
 
69
- # Sets the size of the left short context. It must be > 0
70
- #
71
- # The size of the left short context is the number
72
- # of segments displayed in the found excerpts left to the
73
- # matched segment(s).
74
- def left_context=(value)
75
- if correct_context_value?(value)
76
- result = talk("SET left-context-width #{value}")
77
- @left_context = value if result =~ /^R OK/
78
- else
79
- raise "Invalid argument: #{value}. It must be fixnum greater than 0."
80
- end
81
- end
82
-
83
- # Sets the size of the right short context. It must be > 0
84
- #
85
- # The size of the right short context is the number
86
- # of segments displayed in the found excerpts right to the
87
- # matched segment(s).
88
- def right_context=(value)
89
- if correct_context_value?(value)
90
- result = talk("SET right-context-width #{value}")
91
- @right_context = value if result =~ /^R OK/
92
- else
93
- raise "Invalid argument: #{value}. It must be fixnum greater than 0."
94
- end
95
- end
96
-
97
- # Sets the tags' flags. There are four groups of segments
98
- # which the flags apply for:
99
- # * +left_context+
100
- # * +left_match+
101
- # * +right_match+
102
- # * +right_context+
103
- #
104
- # If the flag for given group is set to true, all segments
105
- # in the group are annotated with grammatical tags. E.g.:
106
- # c.find("kot")
107
- # ...
108
- # "kot" tags: "subst:sg:nom:m2"
109
- #
110
- # You can pass :all to turn on flags for all groups
111
- def tags=(options={})
112
- options = set_all_flags if options == :all
113
- @tag_flags = options
114
- flags = ""
115
- GROUPS.each do |flag|
116
- flags << (options[flag] ? "1" : "0")
117
- end
118
- talk("SET retrieve-tags #{flags}")
119
- end
120
-
121
- # Sets the lemmatas' flags. There are four groups of segments
122
- # which the flags apply for:
123
- # * +left_context+
124
- # * +left_match+
125
- # * +right_match+
126
- # * +right_context+
127
- #
128
- # If the flag for given group is set to true, all segments
129
- # in the group are returned with the base form of the lemmata. E.g.:
130
- # c.find("kotu")
131
- # ...
132
- # "kotu" base_form: "kot"
133
- #
134
- # You can pass :all to turn on flags for all groups
135
- def lemmata=(options={})
136
- options = set_all_flags if options == :all
137
- @lemmata_flags = options
138
- flags = ""
139
- GROUPS.each do |flag|
140
- flags << (options[flag] ? "1" : "0")
141
- end
142
- talk("SET retrieve-lemmata #{flags}")
143
- end
144
-
145
67
  # *Asynchronous* Opens the corpus given as +path+. To open the default
146
- # corpus pass +:default+ as the argument.
147
- #
68
+ # corpus pass +:default+ as the argument.
69
+ #
148
70
  # If you don't want to wait until the call is finished, you
149
71
  # have to provide +handler+ for the asynchronous answer.
150
72
  def open_corpus(path, &handler)
151
73
  if path == :default
152
74
  open_corpus(DEFAULT_CORPUS, &handler)
153
75
  else
154
- real_handler = handler || lambda{|msg| @answer_queue.push msg }
155
- talk("OPEN #{path}", :async, &real_handler)
156
- do_wait if handler.nil?
76
+ talk("OPEN #{path}", :async, &handler)
157
77
  end
158
78
  end
159
79
 
160
80
  # Server diagnostics -- the result should be :pong
161
- def ping
81
+ def ping
162
82
  :pong if talk("PING") =~ /PONG/
163
83
  end
164
84
 
165
85
  # Returns server version
166
- def version
86
+ def version
167
87
  talk("VERSION")
168
88
  end
169
89
 
170
90
  # Returns corpus statistics:
171
- # * +:segment_tokens+ the number of segments in the corpus
91
+ # * +:segment_tokens+ the number of segments in the corpus
172
92
  # (two segments which look exactly the same are counted separately)
173
93
  # * +:segment_types+ the number of segment types in the corpus
174
94
  # (two segments which look exactly the same are counted as one type)
175
95
  # * +:lemmata+ the number of lemmata (lexemes) types
176
- # (all forms of inflected word, e.g. 'kot', 'kotu', ...
96
+ # (all forms of inflected word, e.g. 'kot', 'kotu', ...
177
97
  # are treated as one "word" -- lemmata)
178
98
  # * +:tags+ the number of different grammar tags (each combination
179
99
  # of atomic tags is treated as different "tag")
@@ -181,7 +101,7 @@ module Poliqarp
181
101
  stats = {}
182
102
  talk("CORPUS-STATS").split.each_with_index do |value, index|
183
103
  case index
184
- when 1
104
+ when 1
185
105
  stats[:segment_tokens] = value.to_i
186
106
  when 2
187
107
  stats[:segment_types] = value.to_i
@@ -205,7 +125,7 @@ module Poliqarp
205
125
  # (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
206
126
  # means that there are 5 genders: masculine(1,2,3), feminine and neuter)
207
127
  # * +:classes+ enlists grammatical tags used to describe it
208
- # (each class has a list of tags used to describe it, eg. adj: degree
128
+ # (each class has a list of tags used to describe it, eg. adj: degree
209
129
  # gender case number, means that adjectives are described in terms
210
130
  # of degree, gender, case and number)
211
131
  def tagset
@@ -226,14 +146,14 @@ module Poliqarp
226
146
  #
227
147
  # Options:
228
148
  # * +index+ the index of the (only one) result to be returned. The index is relative
229
- # to the beginning of the query result. In normal case you should query the
149
+ # to the beginning of the query result. In normal case you should query the
230
150
  # corpus without specifying the index, to see what results are returned.
231
- # Then you can use the index and the same query to retrieve one result.
151
+ # Then you can use the index and the same query to retrieve one result.
232
152
  # The pair (query, index) is a kind of unique identifier of the excerpt.
233
153
  # * +page_size+ the size of the page of results. If the page size is 0, then
234
154
  # all results are returned on one page. It is ignored if the +index+ option
235
155
  # is present. Defaults to 0.
236
- # * +page_index+ the index of the page of results (the first page has index 1, not 0).
156
+ # * +page_index+ the index of the page of results (the first page has index 1, not 0).
237
157
  # It is ignored if the +index+ option is present. Defaults to 1.
238
158
  def find(query,options={})
239
159
  if options[:index]
@@ -243,11 +163,11 @@ module Poliqarp
243
163
  end
244
164
  end
245
165
 
246
- alias query find
166
+ alias query find
247
167
 
248
168
  # Returns the number of results for given query.
249
169
  def count(query)
250
- count_results(make_query(query))
170
+ count_results(make_query(query))
251
171
  end
252
172
 
253
173
  # Returns the long context of the excerpt which is identified by
@@ -257,13 +177,13 @@ module Poliqarp
257
177
  result = []
258
178
  talk "GET-CONTEXT #{index}"
259
179
  # 1st part
260
- result << read_word
180
+ result << read_word
261
181
  # 2nd part
262
- result << read_word
182
+ result << read_word
263
183
  # 3rd part
264
- result << read_word
184
+ result << read_word
265
185
  # 4th part
266
- result << read_word
186
+ result << read_word
267
187
  result
268
188
  end
269
189
 
@@ -286,19 +206,62 @@ module Poliqarp
286
206
  end
287
207
 
288
208
  protected
209
+ # Set the size of the left context.
210
+ def left_context=(value)
211
+ result = talk("SET left-context-width #{value}")
212
+ unless result =~ /^OK/
213
+ raise "Failed to set left context to #{value}: #{result}"
214
+ end
215
+ end
216
+
217
+ # Set the size of the right context.
218
+ def right_context=(value)
219
+ result = talk("SET right-context-width #{value}")
220
+ unless result =~ /^OK/
221
+ raise "Failed to set right context to #{value}: #{result}"
222
+ end
223
+ end
224
+
225
+ # Sets the 'retrieve-tags' flags.
226
+ def retrieve_tags(flags)
227
+ talk("SET retrieve-tags #{flags}")
228
+ end
229
+
230
+ # Sets the 'retrieve-lemmata' flags.
231
+ def retrieve_lemmata(flags)
232
+ talk("SET retrieve-lemmata #{flags}")
233
+ end
234
+
235
+
289
236
  # Sends a message directly to the server
290
237
  # * +msg+ the message to send
291
238
  # * +mode+ if set to :sync, the method block untli the message
292
239
  # is received. If :async the method returns immediately.
293
240
  # Default: :sync
294
- # * +handler+ the handler of the assynchronous message.
241
+ # * +handler+ the handler of the assynchronous message.
295
242
  # It is ignored when the mode is set to :sync.
296
243
  def talk(msg, mode = :sync, &handler)
297
244
  puts msg if @debug
298
- @connector.send(msg, mode, &handler)
245
+ if mode == :sync
246
+ @connector.send_message(msg, mode, &handler)
247
+ else
248
+ if handler.nil?
249
+ real_handler = lambda do |msg|
250
+ @answer_queue.push msg
251
+ stop_waiting
252
+ end
253
+ start_waiting
254
+ else
255
+ real_handler = handler
256
+ end
257
+ @connector.send_message(msg, mode, &real_handler)
258
+ if handler.nil?
259
+ do_wait
260
+ end
261
+ end
299
262
  end
300
263
 
301
- # Make query and retrieve many results.
264
+ # Make query and retrieve many results.
302
265
  # * +query+ the query to be sent to the server.
303
266
  # * +options+ see find
304
267
  def find_many(query, options)
@@ -308,7 +271,7 @@ protected
308
271
  answer_offset = page_size * (page_index - 1)
309
272
  if page_size > 0
310
273
  result_count = make_async_query(query,answer_offset)
311
- answers_limit = answer_offset + page_size > result_count ?
274
+ answers_limit = answer_offset + page_size > result_count ?
312
275
  result_count - answer_offset : page_size
313
276
  else
314
277
  # all answers needed -- the call must be synchronous
@@ -321,12 +284,12 @@ protected
321
284
 
322
285
  result = QueryResult.new(page_index, page_count,page_size,self,query)
323
286
  if answers_limit > 0
324
- talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
287
+ talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
325
288
  answers_limit.times do |answer_index|
326
289
  result << fetch_result(answer_offset + answer_index, query)
327
290
  end
328
291
  end
329
- result
292
+ result
330
293
  end
331
294
 
332
295
  # Make query and retrieve only one result
@@ -334,13 +297,13 @@ protected
334
297
  # * +index+ the index of the answer to be retrieved
335
298
  def find_one(query,index)
336
299
  make_async_query(query,index)
337
- talk("GET-RESULTS #{index} #{index}")
338
- fetch_result(index,query)
300
+ talk("GET-RESULTS #{index} #{index}")
301
+ fetch_result(index,query)
339
302
  end
340
303
 
341
304
  # Fetches one result of the query
342
305
  #
343
- # MAKE-QUERY and GET-RESULTS must be sent to the server before
306
+ # MAKE-QUERY and GET-RESULTS must be sent to the server before
344
307
  # this method is called
345
308
  def fetch_result(index, query)
346
309
  result = Excerpt.new(index, self, query)
@@ -357,15 +320,15 @@ protected
357
320
  segments = []
358
321
  size.times do |segment_index|
359
322
  segment = Segment.new(read_word)
360
- segments << segment
361
- if @lemmata_flags[group] || @tag_flags[group]
323
+ segments << segment
324
+ if config.lemmata.include?(group) || config.tags.include?(group)
362
325
  lemmata_size = read_number()
363
- lemmata_size.times do |lemmata_index|
326
+ lemmata_size.times do |lemmata_index|
364
327
  lemmata = Lemmata.new()
365
- if @lemmata_flags[group]
328
+ if config.lemmata.include?(group)
366
329
  lemmata.base_form = read_word
367
330
  end
368
- if @tag_flags[group]
331
+ if config.tags.include?(group)
369
332
  lemmata.tags = read_word
370
333
  end
371
334
  segment.lemmata << lemmata
@@ -377,7 +340,9 @@ protected
377
340
 
378
341
  # Reads number stored in the message received from the server.
379
342
  def read_number
380
- @connector.read_message.match(/\d+/)[0].to_i
343
+ msg = @connector.read_message
344
+ puts "XXX #{msg}" if @debug
345
+ msg.match(/\d+/)[0].to_i
381
346
  end
382
347
 
383
348
  # Counts number of results for given answer
@@ -387,25 +352,22 @@ protected
387
352
 
388
353
  # *Asynchronous* Sends the query to the server
389
354
  # * +query+ query to send
390
- # * +handler+ if given, the method returns immediately,
355
+ # * +handler+ if given, the method returns immediately,
391
356
  # and the answer is sent to the handler. In this case
392
357
  # the result returned by make_query should be IGNORED!
393
358
  def make_query(query, &handler)
394
359
  if @last_query != query
395
360
  @last_query = query
396
- if handler.nil?
397
- real_handler = lambda { |msg| @answer_queue.push msg }
398
- else
399
- real_handler = handler
400
- end
401
361
  begin
402
362
  talk("MAKE-QUERY #{query}")
403
363
  rescue JobInProgress
404
364
  talk("CANCEL") rescue nil
405
365
  talk("MAKE-QUERY #{query}")
406
366
  end
407
- talk("RUN-QUERY #{@buffer_size}", :async, &real_handler)
408
- @last_result = do_wait if handler.nil?
367
+ result = talk("RUN-QUERY #{config.buffer_size}", :async, &handler)
368
+ if handler.nil?
369
+ @last_result = result
370
+ end
409
371
  end
410
372
  @last_result
411
373
  end
@@ -415,38 +377,55 @@ protected
415
377
  @connector.read_message
416
378
  end
417
379
 
418
- private
380
+ private
381
+ # Wait for the assynchronous answer, if some synchronous query
382
+ # was sent without handler.
419
383
  def do_wait
420
384
  loop {
421
- status = talk("STATUS") rescue break
422
- puts "STATUS: #{status}" if @debug
423
- sleep 0.3
385
+ break unless should_wait?
386
+ puts "WAITING" if @debug
387
+ sleep 0.1
424
388
  }
425
389
  @answer_queue.shift
426
390
  end
427
391
 
428
- def set_all_flags
429
- options = {}
430
- GROUPS.each{|g| options[g] = true}
431
- options
392
+ # Stop waiting for the ansynchonous answer.
393
+ def stop_waiting
394
+ @waiting_mutext.synchronize {
395
+ @should_wait = false
396
+ }
397
+ puts "WAITING stopped" if @debug
398
+ end
399
+
400
+ # Check if the thread should still wait for the answer.
401
+ def should_wait?
402
+ should_wait = nil
403
+ @waiting_mutext.synchronize {
404
+ should_wait = @should_wait
405
+ }
406
+ should_wait
432
407
  end
433
-
434
- def correct_context_value?(value)
435
- value.is_a?(Fixnum) && value > 0
408
+
409
+ # Start waiting for the answer.
410
+ def start_waiting
411
+ @waiting_mutext.synchronize {
412
+ @should_wait = true
413
+ }
414
+ puts "WAITING started" if @debug
436
415
  end
437
416
 
438
- def make_async_query(query,answer_offset)
439
- # the handler is empty, since we access the result count through
440
- # BUFFER-STATE call
441
- make_query(query){|msg| }
442
- result_count = 0
443
- begin
417
+ def make_async_query(query,answer_offset)
418
+ start_waiting
419
+ # we access the result count through BUFFER-STATE call
420
+ make_query(query){|msg| stop_waiting}
421
+ result_count = 0
422
+ begin
444
423
  # the result count might be not exact!
445
424
  result_count = talk("BUFFER-STATE").split(" ")[2].to_i
446
- talk("STATUS") rescue break
425
+ break unless should_wait?
447
426
  end while result_count < answer_offset
448
427
  @last_result = "OK #{result_count}"
449
428
  result_count
450
429
  end
451
- end
430
+ end
452
431
  end
@@ -0,0 +1,138 @@
1
+ # vim:encoding=utf-8
2
+ module Poliqarp
3
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
4
+ # License:: MIT License
5
+ #
6
+ # This class holds the configuration of the client.
7
+ class Config
8
+ GROUPS = [:left_context, :left_match, :right_match, :right_context]
9
+ # The size of the buffer is the maximum number of excerpts which
10
+ # are returned for single query.
11
+ attr_accessor :buffer_size, :left_context_size, :right_context_size, :tags, :lemmata
12
+
13
+ def initialize(client,buffer_size)
14
+ @client = client
15
+ @buffer_size = buffer_size
16
+ end
17
+
18
+ # Sets the size of the left short context. It must be > 0
19
+ #
20
+ # The size of the left short context is the number
21
+ # of segments displayed in the found excerpts left to the
22
+ # matched segment(s).
23
+ def left_context_size=(value)
24
+ if correct_context_value?(value)
25
+ @client.send(:left_context=,value)
26
+ @left_context_size = value
27
+ else
28
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
29
+ end
30
+ end
31
+
32
+ # Sets the size of the right short context. It must be > 0
33
+ #
34
+ # The size of the right short context is the number
35
+ # of segments displayed in the found excerpts right to the
36
+ # matched segment(s).
37
+ def right_context_size=(value)
38
+ if correct_context_value?(value)
39
+ @client.send(:right_context=,value)
40
+ @right_context_size = value
41
+ else
42
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
43
+ end
44
+ end
45
+
46
+ # Sets the tags' flags. There are four groups of segments
47
+ # which the flags apply for:
48
+ # * +:left_context+
49
+ # * +:left_match+
50
+ # * +:right_match+
51
+ # * +:right_context+
52
+ #
53
+ # If the flag for given group is present, all segments
54
+ # in the group are annotated with grammatical tags. E.g.:
55
+ # c.find("kot")
56
+ # ...
57
+ # "kot" tags: "subst:sg:nom:m2"
58
+ #
59
+ # E.g. config.tags = [:left_context] will retrieve tags
60
+ # only for the left context.
61
+ #
62
+ # You can pass :all to turn on flags for all groups, i.e.
63
+ # config.tags = :all will retrieve tags for all groups.
64
+ def tags=(groups)
65
+ if groups == :all
66
+ @tags = GROUPS.dup
67
+ else
68
+ @tags = groups
69
+ end
70
+ @client.send(:retrieve_tags, flags_for(@tags))
71
+ end
72
+
73
+ # Sets the lemmatas' flags. There are four groups of segments
74
+ # which the flags apply for:
75
+ # * +left_context+
76
+ # * +left_match+
77
+ # * +right_match+
78
+ # * +right_context+
79
+ #
80
+ # If the flag for given group is present, all segments
81
+ # in the group are returned with the base form of the lemmata. E.g.:
82
+ # c.find("kotu")
83
+ # ...
84
+ # "kotu" base_form: "kot"
85
+ #
86
+ # E.g. config.lemmata = [:left_context] will retrieve lemmata
87
+ # only for the left context.
88
+ #
89
+ # You can pass :all to turn on flags for all groups, i.e.
90
+ # config.lemmata = :all will retrieve lemmata for all groups.
91
+ def lemmata=(groups)
92
+ if groups == :all
93
+ @lemmata = GROUPS.dup
94
+ else
95
+ @lemmata = groups
96
+ end
97
+ @client.send(:retrieve_lemmata, flags_for(@lemmata))
98
+ end
99
+
100
+ # Allow for accessing individual group tags/lemmata flag,
101
+ # e.g. config.left_context_tags, config.left_context_lemmata
102
+ [:tags,:lemmata].each do |type|
103
+ GROUPS.each do |group|
104
+ define_method("#{group}_#{type}".to_sym) do
105
+ @tags.include?(group)
106
+ end
107
+ end
108
+ end
109
+
110
+ # Allow for changing individual group tags/lemmata flag,
111
+ # e.g. config.left_context_tags = true, config.left_context_lemmata = true
112
+ [:tags,:lemmata].each do |type|
113
+ GROUPS.each do |group|
114
+ define_method("#{group}_#{type}=".to_sym) do |value|
115
+ if value
116
+ @tags << group unless @tags.include?(group)
117
+ else
118
+ @tags.delete(group) if @tags.include?(group)
119
+ end
120
+ @client.send("retrieve_#{type}".to_sym, flags_for(@tags))
121
+ end
122
+ end
123
+ end
124
+
125
+ protected
126
+ def correct_context_value?(value)
127
+ value.is_a?(Fixnum) && value > 0
128
+ end
129
+
130
+ def flags_for(elements)
131
+ flags = ""
132
+ GROUPS.each do |flag|
133
+ flags << (elements.include?(flag) ? "1" : "0")
134
+ end
135
+ flags
136
+ end
137
+ end
138
+ end
@@ -44,7 +44,7 @@ module Poliqarp
44
44
  @debug = debug
45
45
  end
46
46
 
47
- # Opens connection with poliqarp server which runs
47
+ # Opens connection with poliqarp server which runs
48
48
  # on given +host+ and +port+.
49
49
  def open(host,port)
50
50
  @socket_mutex.synchronize {
@@ -60,17 +60,16 @@ module Poliqarp
60
60
  }
61
61
  end
62
62
 
63
- # Sends message to the poliqarp server. Returns the first synchronous
63
+ # Sends message to the poliqarp server. Returns the first synchronous
64
64
  # answer of the server.
65
65
  # * +message+ the message to send
66
66
  # * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
67
67
  # * +handler+ the handler of the asynchronous message
68
- def send(message, mode, &handler)
68
+ def send_message(message, mode, &handler)
69
69
  puts "send #{mode} #{message}" if @debug
70
70
  if ruby19?
71
71
  massage = message.encode(UTF8)
72
72
  end
73
- #@socket.puts(message)
74
73
  @socket.write(message+"\n")
75
74
  if mode == :async
76
75
  @handler = handler
@@ -79,7 +78,7 @@ module Poliqarp
79
78
  end
80
79
 
81
80
  # Retrives one message from the server.
82
- # If the message indicates an error, new runtime error
81
+ # If the message indicates an error, new runtime error
83
82
  # containing the error description is returned.
84
83
  def read_message
85
84
  message = @message_queue.shift
@@ -94,7 +93,7 @@ module Poliqarp
94
93
 
95
94
  private
96
95
  def main_loop
97
- @loop = Thread.new {
96
+ @loop = Thread.new {
98
97
  loop {
99
98
  receive
100
99
  # XXX ??? needed
@@ -124,8 +123,8 @@ private
124
123
 
125
124
  def receive_async(message)
126
125
  puts "receive async: #{message}" if @debug
127
- Thread.new{
128
- @handler.call(message)
126
+ Thread.new{
127
+ @handler.call(message)
129
128
  }
130
129
  end
131
130
 
@@ -2,7 +2,7 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT License
4
4
 
5
- # The JobInProgress exception is raised if there was asynchronous call
5
+ # The JobInProgress exception is raised if there was asynchronous call
6
6
  # to the server which haven't finished, which is interrupted by another
7
7
  # asynchronous call.
8
8
  class JobInProgress < Exception; end
@@ -2,13 +2,13 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl
3
3
  # License:: MIT License
4
4
  #
5
- # The excerpt class is used to store single result of the query,
5
+ # The excerpt class is used to store single result of the query,
6
6
  # i.e. the excerpt of the corpus which contains the words which
7
- # the corpus was queried for.
7
+ # the corpus was queried for.
8
8
  #
9
9
  # The excerpt is divided into groups, which contain segments,
10
- # which the texts in the corpus were divided for.
11
- # The first group is the left context, the second -- the matched
10
+ # which the texts in the corpus were divided for.
11
+ # The first group is the left context, the second -- the matched
12
12
  # query, and the last -- the right context.
13
13
  class Excerpt
14
14
  attr_reader :index, :base_form, :short_context
@@ -40,7 +40,7 @@ module Poliqarp
40
40
  @short_context[2]
41
41
  end
42
42
 
43
- # Returns the matched query as string
43
+ # Returns the matched query as string
44
44
  def word
45
45
  #@short_context[0].split(/\s+/)[-1]
46
46
  @short_context[1].map{|s| s.to_s}.join("")
@@ -54,7 +54,7 @@ module Poliqarp
54
54
  @short_context.join("")
55
55
  end
56
56
 
57
- # Returns the long context of the query.
57
+ # Returns the long context of the query.
58
58
  def context
59
59
  return @context unless @context.nil?
60
60
  @context = @client.context(@base_form, @index)
@@ -63,7 +63,7 @@ module Poliqarp
63
63
  { :medium => :medium, :style => :styl, :date => :data_wydania,
64
64
  :city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
65
65
  :author => :autor}.each do |method, keyword|
66
- define_method method do
66
+ define_method method do
67
67
  self.metadata[keyword]
68
68
  end
69
69
  end
@@ -2,9 +2,9 @@ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT License
4
4
  #
5
- # The query result class is used to paginate results of the
5
+ # The query result class is used to paginate results of the
6
6
  # query. Each query result has information about its context
7
- # (the next and previous page).
7
+ # (the next and previous page).
8
8
  class QueryResult
9
9
  include Enumerable
10
10
 
@@ -51,16 +51,16 @@ module Poliqarp
51
51
  # Returns the previous page of the query result
52
52
  def previous_page
53
53
  if @page > 1
54
- @client.find(@query, :page_size => @page_size,
55
- :page_index => @page - 1)
54
+ @client.find(@query, :page_size => @page_size,
55
+ :page_index => @page - 1)
56
56
  end
57
57
  end
58
58
 
59
59
  # Return the next page of the query result
60
60
  def next_page
61
61
  if @page < @page_count
62
- @client.find(@query, :page_size => @page_size,
63
- :page_index => @page + 1)
62
+ @client.find(@query, :page_size => @page_size,
63
+ :page_index => @page + 1)
64
64
  end
65
65
  end
66
66
 
@@ -1,22 +1,22 @@
1
- module Poliqarp
1
+ module Poliqarp
2
2
  # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
3
  # License:: MIT LICENSE
4
4
  #
5
- # The segment is the smallest meaningful part of the text.
6
- # It may contain many lemmata, since the segments are sometimes
7
- # not disambiguated.
5
+ # The segment is the smallest meaningful part of the text.
6
+ # It may contain many lemmata, since the segments are sometimes
7
+ # not disambiguated.
8
8
  class Segment
9
9
  attr_reader :literal, :lemmata
10
10
 
11
- # Creates new segment. The specified argument is the literal
12
- # (as found in the text) representation of the segment.
11
+ # Creates new segment. The specified argument is the literal
12
+ # (as found in the text) representation of the segment.
13
13
  def initialize(literal)
14
14
  @literal = literal
15
15
  @lemmata = []
16
16
  end
17
17
 
18
18
  # Returns the segment literal
19
- def to_s
19
+ def to_s
20
20
  @literal
21
21
  end
22
22
  end
@@ -3,7 +3,7 @@ module Poliqarp #:nodoc:
3
3
  module Ruby19
4
4
  # Returns true if the Ruby version is at least 1.9.0
5
5
  def ruby19?
6
- RUBY_VERSION.split(".")[0..1].join(".").to_f >= 1.9
6
+ RUBY_VERSION.split(".")[0..1].join(".").to_f >= 1.9
7
7
  end
8
8
  end
9
9
  end
data/poliqarpr.gemspec CHANGED
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "poliqarpr"
3
- s.version = "0.0.8"
4
- s.date = "2011-01-12"
3
+ s.version = "0.1.0"
4
+ s.date = "2011-01-17"
5
5
  s.summary = "Ruby client for Poliqarp"
6
6
  s.email = "apohllo@o2.pl"
7
7
  s.homepage = "http://www.github.com/apohllo/poliqarpr"
8
8
  s.description = "Ruby client for Poliqarp (NLP corpus server)"
9
9
  s.authors = ['Aleksander Pohl']
10
- s.files = ["Rakefile", "poliqarpr.gemspec",
10
+ s.files = ["Rakefile", "poliqarpr.gemspec",
11
11
  "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
12
12
  s.test_files = Dir.glob("spec/**/*")
13
13
  s.rdoc_options = ["--main", "README.txt"]
data/spec/client.rb CHANGED
@@ -5,17 +5,17 @@ require 'poliqarpr'
5
5
  describe Poliqarp::Client do
6
6
  describe "(general test)" do
7
7
  before(:each) do
8
- @client = Poliqarp::Client.new("TEST")
8
+ @client = Poliqarp::Client.new("TEST1")
9
9
  end
10
-
11
- after(:each) do
10
+
11
+ after(:each) do
12
12
  @client.close
13
13
  end
14
-
14
+
15
15
  it "should allow to open corpus" do
16
16
  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
17
17
  end
18
-
18
+
19
19
  it "should allow to open :default corpus" do
20
20
  @client.open_corpus(:default)
21
21
  end
@@ -32,7 +32,7 @@ describe Poliqarp::Client do
32
32
 
33
33
  describe "(with 'sample' corpus)" do
34
34
  before(:all) do
35
- @client = Poliqarp::Client.new("TEST")
35
+ @client = Poliqarp::Client.new("TEST2")
36
36
  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
37
37
  end
38
38
 
@@ -40,35 +40,37 @@ describe Poliqarp::Client do
40
40
  @client.close
41
41
  end
42
42
 
43
- it "should allow to set the right context size" do
44
- @client.right_context = 5
43
+ it "should allow to set and get the right context size" do
44
+ @client.config.right_context_size = 5
45
+ @client.config.right_context_size.should == 5
45
46
  end
46
47
 
47
- it "should raise error if the size of right context is not number" do
48
- (proc do
49
- @client.right_context = "a"
48
+ it "should raise error if the size of right context is not number" do
49
+ (proc do
50
+ @client.config.right_context_size = "a"
50
51
  end).should raise_error(RuntimeError)
51
52
  end
52
53
 
53
- it "should rais error if the size of right context is less or equal 0" do
54
- (proc do
55
- @client.right_context = 0
54
+ it "should rais error if the size of right context is less or equal 0" do
55
+ (proc do
56
+ @client.config.right_context_size = 0
56
57
  end).should raise_error(RuntimeError)
57
58
  end
58
59
 
59
- it "should allow to set the left context size" do
60
- @client.right_context = 5
60
+ it "should allow to set and get the left context size" do
61
+ @client.config.left_context_size = 5
62
+ @client.config.left_context_size.should == 5
61
63
  end
62
64
 
63
- it "should raise error if the size of left context is not number" do
64
- (lambda do
65
- @client.left_context = "a"
65
+ it "should raise error if the size of left context is not number" do
66
+ (lambda do
67
+ @client.config.left_context_size = "a"
66
68
  end).should raise_error(RuntimeError)
67
69
  end
68
70
 
69
- it "should rais error if the size of left context is less or equal 0" do
70
- (lambda do
71
- @client.left_context = 0
71
+ it "should rais error if the size of left context is less or equal 0" do
72
+ (lambda do
73
+ @client.config.left_context_size = 0
72
74
  end).should raise_error(RuntimeError)
73
75
  end
74
76
 
@@ -87,7 +89,7 @@ describe Poliqarp::Client do
87
89
  tagset[:classes].should_not == nil
88
90
  end
89
91
 
90
- it "should allow to find 'kot'" do
92
+ it "should allow to find 'kot'" do
91
93
  @client.find("kot").size.should_not == 0
92
94
  end
93
95
 
@@ -129,7 +131,7 @@ describe Poliqarp::Client do
129
131
  end
130
132
 
131
133
  describe("(with index specified in find)") do
132
- before(:each) do
134
+ before(:each) do
133
135
  @result = @client.find("nachalny",:index => 0)
134
136
  end
135
137
 
@@ -146,13 +148,12 @@ describe Poliqarp::Client do
146
148
  end
147
149
  end
148
150
 
149
- describe("(with lemmata flags set to true)") do
151
+ describe("(with lemmata flags set to true)") do
150
152
  before(:all) do
151
- @client.lemmata = {:left_context => true, :right_context => true,
152
- :left_match => true, :right_match => true}
153
+ @client.config.lemmata = [:left_context, :right_context, :left_match, :right_match]
153
154
  end
154
155
 
155
- it "should allow to find 'kotu'" do
156
+ it "should allow to find 'kotu'" do
156
157
  @client.find("kotu").size.should_not == 0
157
158
  end
158
159
 
data/spec/excerpt.rb CHANGED
@@ -25,7 +25,7 @@ describe Poliqarp::Excerpt do
25
25
  @excerpt.index.should_not == nil
26
26
  end
27
27
 
28
- it "should have base form" do
28
+ it "should have base form" do
29
29
  @excerpt.base_form.should_not == nil
30
30
  end
31
31
 
@@ -64,10 +64,10 @@ describe Poliqarp::Excerpt do
64
64
  end
65
65
 
66
66
  it "should have index set to 0" do
67
- @excerpt.index.should == 0
67
+ @excerpt.index.should == 0
68
68
  end
69
69
 
70
- it "should have base form set to 'kot'" do
70
+ it "should have base form set to 'kot'" do
71
71
  @excerpt.base_form.should == "mu za to astronomiczną"
72
72
  end
73
73
 
@@ -96,7 +96,7 @@ describe Poliqarp::Excerpt do
96
96
  it "should have 'city' set to nil" do
97
97
  @excerpt.city.should == nil
98
98
  end
99
-
99
+
100
100
  it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
101
101
  @excerpt.publisher.size.should == 1
102
102
  @excerpt.publisher[0].should == "Wydawnictwo W.A.B."
@@ -114,13 +114,13 @@ describe Poliqarp::Excerpt do
114
114
  end
115
115
 
116
116
  describe('first result for "kotu" with lemmatization turned on') do
117
- before(:all) do
118
- @client.lemmata = :all
117
+ before(:all) do
118
+ @client.config.lemmata = :all
119
119
  @client.open_corpus(:default)
120
- @excerpt = @client.find("kotu")[0]
120
+ @excerpt = @client.find("kotu")[0]
121
121
  end
122
122
 
123
- it "should have one lemmata for each segment" do
123
+ it "should have one lemmata for each segment" do
124
124
  @excerpt.short_context.each do |group|
125
125
  group.each do |segment|
126
126
  segment.lemmata.size.should == 1
@@ -134,7 +134,7 @@ describe Poliqarp::Excerpt do
134
134
  end
135
135
  end
136
136
 
137
- it "should contain 'kot' as one of the lemmata" do
137
+ it "should contain 'kot' as one of the lemmata" do
138
138
  @excerpt.short_context.flatten.
139
139
  any?{|s| s.lemmata[0].base_form == "kot"}.should == true
140
140
  end
data/spec/query_result.rb CHANGED
@@ -12,7 +12,7 @@ describe Poliqarp::QueryResult do
12
12
  @client.close
13
13
  end
14
14
 
15
- describe "(for unspecified query)" do
15
+ describe "(for unspecified query)" do
16
16
  before(:all) do
17
17
  @result = @client.find("kita")
18
18
  end
@@ -57,11 +57,11 @@ describe Poliqarp::QueryResult do
57
57
  end
58
58
 
59
59
  describe "(for 'kot' in :default corpus)" do
60
- before(:all) do
60
+ before(:all) do
61
61
  @result = @client.find("kot")
62
62
  end
63
63
 
64
- it "should have size == 6" do
64
+ it "should have size == 6" do
65
65
  @result.size.should == 6
66
66
  end
67
67
 
@@ -83,11 +83,11 @@ describe Poliqarp::QueryResult do
83
83
  end
84
84
 
85
85
  describe "(for 'kot' with page_size set to 5 in :default corpus)" do
86
- before(:all) do
86
+ before(:all) do
87
87
  @result = @client.find("kot", :page_size => 5)
88
88
  end
89
89
 
90
- it "should have size == 5" do
90
+ it "should have size == 5" do
91
91
  @result.size.should == 5
92
92
  end
93
93
 
@@ -109,11 +109,11 @@ describe Poliqarp::QueryResult do
109
109
  end
110
110
 
111
111
  describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
112
- before(:all) do
112
+ before(:all) do
113
113
  @result = @client.find("kot", :page_size => 5).next_page
114
114
  end
115
115
 
116
- it "should have size == 1" do
116
+ it "should have size == 1" do
117
117
  @result.size.should == 1
118
118
  end
119
119
 
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
+ - 1
7
8
  - 0
8
- - 8
9
- version: 0.0.8
9
+ version: 0.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Aleksander Pohl
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-01-12 00:00:00 +01:00
17
+ date: 2011-01-17 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -32,6 +32,7 @@ files:
32
32
  - changelog.txt
33
33
  - README.txt
34
34
  - lib/poliqarpr.rb
35
+ - lib/poliqarpr/config.rb
35
36
  - lib/poliqarpr/exceptions.rb
36
37
  - lib/poliqarpr/lemmata.rb
37
38
  - lib/poliqarpr/query_result.rb