apohllo-poliqarpr 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +10 -2
- data/changelog.txt +7 -0
- data/lib/poliqarpr.rb +4 -6
- data/lib/poliqarpr/client.rb +167 -112
- data/lib/poliqarpr/connector.rb +128 -0
- data/lib/poliqarpr/exceptions.rb +9 -0
- data/poliqarpr.gemspec +3 -1
- data/spec/client.rb +29 -1
- metadata +3 -1
data/README.txt
CHANGED
@@ -11,8 +11,16 @@ Poliqarpr is Ruby client for Poliqarp server.
|
|
11
11
|
|
12
12
|
* built-in pagination of query results
|
13
13
|
* support for lemmatization
|
14
|
-
* asynchronous communication
|
15
|
-
* only partial implementation of server protocol
|
14
|
+
* asynchronous communication might be not stable (100% CPU ussage)
|
15
|
+
* only partial implementation of server protocol:
|
16
|
+
** PING, VERSION
|
17
|
+
** MAKE-SESSION, CLOSE-SESSION
|
18
|
+
** OPEN (corpus), CLOSE (corpus)
|
19
|
+
** CORPUS-STATS, GET-TAGSET
|
20
|
+
** STATUS, CANCEL (used internally)
|
21
|
+
** MAKE-QUERY, RUN-QUERY, BUFFER-STATE (used internally)
|
22
|
+
** GET-RESULTS, GET-CONTEXT, METADATA
|
23
|
+
** SET: lemmata, tags
|
16
24
|
|
17
25
|
== SYNOPSIS:
|
18
26
|
|
data/changelog.txt
CHANGED
data/lib/poliqarpr.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
path = File.join(File.dirname(__FILE__), 'poliqarpr')
|
2
|
-
require File.join(path, 'client')
|
3
|
-
require File.join(path, 'query_result')
|
4
|
-
require File.join(path, 'excerpt')
|
5
|
-
require File.join(path, 'segment')
|
6
|
-
require File.join(path, 'lemmata')
|
7
1
|
begin
|
8
2
|
require 'poliqarpr-corpus'
|
9
3
|
rescue LoadError
|
10
4
|
# Do nothig, since the default corpus is optional
|
11
5
|
end
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift File.dirname(__FILE__)
|
8
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'poliqarpr/**.rb')).each { |f| require f }
|
9
|
+
|
data/lib/poliqarpr/client.rb
CHANGED
@@ -1,29 +1,9 @@
|
|
1
|
-
require 'socket'
|
2
1
|
module Poliqarp
|
3
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
4
3
|
# License:: MIT License
|
5
4
|
#
|
6
5
|
# This class is the implementation of the Poliqarp server client.
|
7
6
|
class Client
|
8
|
-
ERRORS = {
|
9
|
-
1 => "Incorrect number of arguments",
|
10
|
-
3 => "No session opened",
|
11
|
-
4 => "Cannot create a session for a connection that",
|
12
|
-
5 => "Not enough memory",
|
13
|
-
6 => "Invalid session ID",
|
14
|
-
7 => "Session with this ID is already bound",
|
15
|
-
8 => "Session user ID does not match the argument",
|
16
|
-
10 => "Session already has an open corpus",
|
17
|
-
12 => "System error while opening the corpus",
|
18
|
-
13 => "No corpus opened",
|
19
|
-
14 => "Invalid job ID",
|
20
|
-
15 => "A job is already in progress",
|
21
|
-
16 => "Incorrect query",
|
22
|
-
17 => "Invalid result range",
|
23
|
-
18 => "Incorrect session option",
|
24
|
-
19 => "Invalid session option value",
|
25
|
-
20 => "Invalid sorting criteria"
|
26
|
-
}
|
27
7
|
GROUPS = [:left_context, :left_match, :right_match, :right_context]
|
28
8
|
|
29
9
|
# If debug is turned on, the communication between server and client
|
@@ -46,10 +26,12 @@ module Poliqarp
|
|
46
26
|
@right_context = 5
|
47
27
|
@debug = debug
|
48
28
|
@buffer_size = 500000
|
29
|
+
@connector = Connector.new(debug)
|
30
|
+
@answer_queue = Queue.new
|
49
31
|
new_session
|
50
32
|
end
|
51
33
|
|
52
|
-
# A hint about
|
34
|
+
# A hint about installation of default corpus gem
|
53
35
|
def self.const_missing(const)
|
54
36
|
if const.to_s =~ /DEFAULT_CORPUS/
|
55
37
|
raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
|
@@ -64,26 +46,25 @@ module Poliqarp
|
|
64
46
|
# * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
|
65
47
|
def new_session(port=4567)
|
66
48
|
close if @session
|
67
|
-
@
|
68
|
-
talk
|
69
|
-
rcv_sync
|
49
|
+
@connector.open("localhost",port)
|
50
|
+
talk("MAKE-SESSION #{@session_name}")
|
70
51
|
talk("BUFFER-RESIZE #{@buffer_size}")
|
71
|
-
rcv_sync
|
72
52
|
@session = true
|
73
53
|
self.tags = {}
|
74
54
|
self.lemmata = {}
|
75
55
|
end
|
76
56
|
|
77
|
-
# Closes the opened
|
57
|
+
# Closes the opened session.
|
78
58
|
def close
|
79
|
-
#talk "CLOSE"
|
80
|
-
#rcv_sync
|
81
59
|
talk "CLOSE-SESSION"
|
82
|
-
rcv_sync
|
83
|
-
#@socket.close
|
84
60
|
@session = false
|
85
61
|
end
|
86
62
|
|
63
|
+
# Closes the opened corpus.
|
64
|
+
def close_corpus
|
65
|
+
talk "CLOSE"
|
66
|
+
end
|
67
|
+
|
87
68
|
# Sets the size of the left short context. It must be > 0
|
88
69
|
#
|
89
70
|
# The size of the left short context is the number
|
@@ -91,8 +72,7 @@ module Poliqarp
|
|
91
72
|
# matched segment(s).
|
92
73
|
def left_context=(value)
|
93
74
|
if correct_context_value?(value)
|
94
|
-
talk
|
95
|
-
result = rcv_sync
|
75
|
+
result = talk("SET left-context-width #{value}")
|
96
76
|
@left_context = value if result =~ /^R OK/
|
97
77
|
else
|
98
78
|
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
@@ -106,8 +86,7 @@ module Poliqarp
|
|
106
86
|
# matched segment(s).
|
107
87
|
def right_context=(value)
|
108
88
|
if correct_context_value?(value)
|
109
|
-
talk
|
110
|
-
result = rcv_sync
|
89
|
+
result = talk("SET right-context-width #{value}")
|
111
90
|
@right_context = value if result =~ /^R OK/
|
112
91
|
else
|
113
92
|
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
@@ -135,8 +114,7 @@ module Poliqarp
|
|
135
114
|
GROUPS.each do |flag|
|
136
115
|
flags << (options[flag] ? "1" : "0")
|
137
116
|
end
|
138
|
-
talk
|
139
|
-
rcv_sync
|
117
|
+
talk("SET retrieve-tags #{flags}")
|
140
118
|
end
|
141
119
|
|
142
120
|
# Sets the lemmatas' flags. There are four groups of segments
|
@@ -160,20 +138,87 @@ module Poliqarp
|
|
160
138
|
GROUPS.each do |flag|
|
161
139
|
flags << (options[flag] ? "1" : "0")
|
162
140
|
end
|
163
|
-
talk
|
164
|
-
rcv_sync
|
141
|
+
talk("SET retrieve-lemmata #{flags}")
|
165
142
|
end
|
166
143
|
|
167
|
-
# Opens the corpus given as +path+. To open the default
|
144
|
+
# *Asynchronous* Opens the corpus given as +path+. To open the default
|
168
145
|
# corpus pass +:default+ as the argument.
|
169
|
-
|
146
|
+
#
|
147
|
+
# If you don't want to wait until the call is finished, you
|
148
|
+
# have to provide +handler+ for the asynchronous answer.
|
149
|
+
def open_corpus(path, &handler)
|
170
150
|
if path == :default
|
171
|
-
open_corpus(DEFAULT_CORPUS)
|
151
|
+
open_corpus(DEFAULT_CORPUS, &handler)
|
172
152
|
else
|
173
|
-
|
174
|
-
|
175
|
-
|
153
|
+
real_handler = handler || lambda{|msg| @answer_queue.push msg }
|
154
|
+
talk("OPEN #{path}", :async, &real_handler)
|
155
|
+
do_wait if handler.nil?
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Server diagnostics -- the result should be :pong
|
160
|
+
def ping
|
161
|
+
:pong if talk("PING") =~ /PONG/
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns server version
|
165
|
+
def version
|
166
|
+
talk("VERSION")
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns corpus statistics:
|
170
|
+
# * +:segment_tokens+ the number of segments in the corpus
|
171
|
+
# (two segments which look exactly the same are counted separately)
|
172
|
+
# * +:segment_types+ the number of segment types in the corpus
|
173
|
+
# (two segments which look exactly the same are counted as one type)
|
174
|
+
# * +:lemmata+ the number of lemmata (lexemes) types
|
175
|
+
# (all forms of inflected word, e.g. 'kot', 'kotu', ...
|
176
|
+
# are treated as one "word" -- lemmata)
|
177
|
+
# * +:tags+ the number of different grammar tags (each combination
|
178
|
+
# of atomic tags is treated as different "tag")
|
179
|
+
def stats
|
180
|
+
stats = {}
|
181
|
+
talk("CORPUS-STATS").split.each_with_index do |value, index|
|
182
|
+
case index
|
183
|
+
when 1
|
184
|
+
stats[:segment_tokens] = value.to_i
|
185
|
+
when 2
|
186
|
+
stats[:segment_types] = value.to_i
|
187
|
+
when 3
|
188
|
+
stats[:lemmata] = value.to_i
|
189
|
+
when 4
|
190
|
+
stats[:tags] = value.to_i
|
191
|
+
end
|
192
|
+
end
|
193
|
+
stats
|
194
|
+
end
|
195
|
+
|
196
|
+
# TODO
|
197
|
+
def metadata_types
|
198
|
+
raise "Not implemented"
|
199
|
+
end
|
200
|
+
|
201
|
+
# Returns the tag-set used in the corpus.
|
202
|
+
# It is divided into two groups:
|
203
|
+
# * +:categories+ enlists tags belonging to grammatical categories
|
204
|
+
# (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
|
205
|
+
# means that there are 5 genders: masculine(1,2,3), feminine and neuter)
|
206
|
+
# * +:classes+ enlists grammatical tags used to describe it
|
207
|
+
# (each class has a list of tags used to describe it, eg. adj: degree
|
208
|
+
# gender case number, means that adjectives are described in terms
|
209
|
+
# of degree, gender, case and number)
|
210
|
+
def tagset
|
211
|
+
answer = talk("GET-TAGSET")
|
212
|
+
counters = answer.split
|
213
|
+
result = {}
|
214
|
+
[:categories, :classes].each_with_index do |type, type_index|
|
215
|
+
result[type] = {}
|
216
|
+
counters[type_index+1].to_i.times do |index|
|
217
|
+
values = read_word.split
|
218
|
+
result[type][values[0].to_sym] = values[1..-1].map{|v| v.to_sym}
|
219
|
+
end
|
176
220
|
end
|
221
|
+
result
|
177
222
|
end
|
178
223
|
|
179
224
|
# Send the query to the opened corpus.
|
@@ -210,8 +255,6 @@ module Poliqarp
|
|
210
255
|
make_query(query)
|
211
256
|
result = []
|
212
257
|
talk "GET-CONTEXT #{index}"
|
213
|
-
# R OK
|
214
|
-
rcv_sync
|
215
258
|
# 1st part
|
216
259
|
result << read_word
|
217
260
|
# 2nd part
|
@@ -228,11 +271,11 @@ module Poliqarp
|
|
228
271
|
def metadata(query, index)
|
229
272
|
make_query(query)
|
230
273
|
result = {}
|
231
|
-
talk
|
232
|
-
count =
|
274
|
+
answer = talk("METADATA #{index}")
|
275
|
+
count = answer.split(" ")[1].to_i
|
233
276
|
count.times do |index|
|
234
277
|
type = read_word.gsub(/[^a-zA-Z]/,"").to_sym
|
235
|
-
value =
|
278
|
+
value = read_word[2..-1]
|
236
279
|
unless value.nil?
|
237
280
|
result[type] ||= []
|
238
281
|
result[type] << value
|
@@ -244,36 +287,40 @@ module Poliqarp
|
|
244
287
|
protected
|
245
288
|
# Sends a message directly to the server
|
246
289
|
# * +msg+ the message to send
|
247
|
-
|
290
|
+
# * +mode+ if set to :sync, the method block untli the message
|
291
|
+
# is received. If :async the method returns immediately.
|
292
|
+
# Default: :sync
|
293
|
+
# * +handler+ the handler of the assynchronous message.
|
294
|
+
# It is ignored when the mode is set to :sync.
|
295
|
+
def talk(msg, mode = :sync, &handler)
|
248
296
|
puts msg if @debug
|
249
|
-
@
|
297
|
+
@connector.send(msg, mode, &handler)
|
250
298
|
end
|
251
299
|
|
300
|
+
# Make query and retrieve many results.
|
301
|
+
# * +query+ the query to be sent to the server.
|
302
|
+
# * +options+ see find
|
252
303
|
def find_many(query, options)
|
253
304
|
page_size = (options[:page_size] || 0)
|
254
305
|
page_index = (options[:page_index] || 1)
|
255
|
-
|
256
|
-
#talk("GET-COLUMN-TYPES")
|
257
|
-
#rcv_sync
|
258
|
-
result_count = count_results(answers)
|
306
|
+
|
259
307
|
answer_offset = page_size * (page_index - 1)
|
260
308
|
if page_size > 0
|
309
|
+
result_count = make_async_query(query,answer_offset)
|
261
310
|
answers_limit = answer_offset + page_size > result_count ?
|
262
311
|
result_count - answer_offset : page_size
|
263
312
|
else
|
313
|
+
# all answers needed -- the call must be synchronous
|
314
|
+
result_count = count_results(make_query(query))
|
264
315
|
answers_limit = result_count
|
265
316
|
end
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
end
|
317
|
+
|
318
|
+
page_count = page_size <= 0 ? 1 :
|
319
|
+
result_count / page_size + (result_count % page_size > 0 ? 1 : 0)
|
320
|
+
|
271
321
|
result = QueryResult.new(page_index, page_count,page_size,self,query)
|
272
322
|
if answers_limit > 0
|
273
323
|
talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
|
274
|
-
# R OK 1
|
275
|
-
rcv_sync
|
276
|
-
|
277
324
|
answers_limit.times do |answer_index|
|
278
325
|
result << fetch_result(answer_offset + answer_index, query)
|
279
326
|
end
|
@@ -281,17 +328,18 @@ protected
|
|
281
328
|
result
|
282
329
|
end
|
283
330
|
|
331
|
+
# Make query and retrieve only one result
|
332
|
+
# * +query+ the query to be sent to the server
|
333
|
+
# * +index+ the index of the answer to be retrieved
|
284
334
|
def find_one(query,index)
|
285
|
-
|
335
|
+
make_async_query(query,index)
|
286
336
|
talk("GET-RESULTS #{index} #{index}")
|
287
|
-
# R OK 1
|
288
|
-
rcv_sync
|
289
337
|
fetch_result(index,query)
|
290
338
|
end
|
291
339
|
|
292
340
|
# Fetches one result of the query
|
293
|
-
|
294
|
-
# MAKE-QUERY and GET-RESULTS must be
|
341
|
+
#
|
342
|
+
# MAKE-QUERY and GET-RESULTS must be sent to the server before
|
295
343
|
# this method is called
|
296
344
|
def fetch_result(index, query)
|
297
345
|
result = Excerpt.new(index, self, query)
|
@@ -300,18 +348,17 @@ protected
|
|
300
348
|
# XXX
|
301
349
|
#result << read_segments(:right_match)
|
302
350
|
result << read_segments(:right_context)
|
303
|
-
|
304
351
|
result
|
305
352
|
end
|
306
353
|
|
307
354
|
def read_segments(group)
|
308
|
-
size =
|
355
|
+
size = read_number()
|
309
356
|
segments = []
|
310
357
|
size.times do |segment_index|
|
311
358
|
segment = Segment.new(read_word)
|
312
359
|
segments << segment
|
313
360
|
if @lemmata_flags[group] || @tag_flags[group]
|
314
|
-
lemmata_size =
|
361
|
+
lemmata_size = read_number()
|
315
362
|
lemmata_size.times do |lemmata_index|
|
316
363
|
lemmata = Lemmata.new()
|
317
364
|
if @lemmata_flags[group]
|
@@ -327,62 +374,56 @@ protected
|
|
327
374
|
segments
|
328
375
|
end
|
329
376
|
|
330
|
-
|
331
|
-
|
377
|
+
# Reads number stored in the message received from the server.
|
378
|
+
def read_number
|
379
|
+
@connector.read_message.match(/\d+/)[0].to_i
|
332
380
|
end
|
333
381
|
|
382
|
+
# Counts number of results for given answer
|
334
383
|
def count_results(answer)
|
335
|
-
answer.split(" ")[
|
384
|
+
answer.split(" ")[1].to_i
|
336
385
|
end
|
337
386
|
|
338
|
-
|
387
|
+
# *Asynchronous* Sends the query to the server
|
388
|
+
# * +query+ query to send
|
389
|
+
# * +handler+ if given, the method returns immediately,
|
390
|
+
# and the answer is sent to the handler. In this case
|
391
|
+
# the result returned by make_query should be IGNORED!
|
392
|
+
def make_query(query, &handler)
|
339
393
|
if @last_query != query
|
340
394
|
@last_query = query
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
395
|
+
if handler.nil?
|
396
|
+
real_handler = lambda { |msg| @answer_queue.push msg }
|
397
|
+
else
|
398
|
+
real_handler = handler
|
399
|
+
end
|
400
|
+
begin
|
401
|
+
talk("MAKE-QUERY #{query}")
|
402
|
+
rescue JobInProgress
|
403
|
+
talk("CANCEL") rescue nil
|
404
|
+
talk("MAKE-QUERY #{query}")
|
405
|
+
end
|
406
|
+
talk("RUN-QUERY #{@buffer_size}", :async, &real_handler)
|
407
|
+
@last_result = do_wait if handler.nil?
|
345
408
|
end
|
346
|
-
@
|
409
|
+
@last_result
|
347
410
|
end
|
348
411
|
|
412
|
+
# Reads string stored in the last message received from server
|
349
413
|
def read_word
|
350
|
-
|
351
|
-
end
|
352
|
-
|
353
|
-
def read_line
|
354
|
-
line = ""
|
355
|
-
begin
|
356
|
-
chars = @socket.recvfrom(1)
|
357
|
-
line << chars[0]
|
358
|
-
end while chars[0] != "\n"
|
359
|
-
line
|
414
|
+
@connector.read_message
|
360
415
|
end
|
361
416
|
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
raise error_message(result) if result =~ /^R ERR/
|
371
|
-
result
|
372
|
-
# @socket.recvfrom(1024)
|
373
|
-
end
|
374
|
-
|
375
|
-
# XXX
|
376
|
-
def rcv_async
|
377
|
-
begin
|
378
|
-
line = read_line
|
379
|
-
raise error_message(line) if line =~ /^. ERR/
|
380
|
-
puts line if @debug
|
381
|
-
end until line =~ /^M/
|
382
|
-
line
|
417
|
+
private
|
418
|
+
def do_wait
|
419
|
+
loop {
|
420
|
+
status = talk("STATUS") rescue break
|
421
|
+
puts "STATUS: #{status}" if @debug
|
422
|
+
sleep 0.3
|
423
|
+
}
|
424
|
+
@answer_queue.shift
|
383
425
|
end
|
384
426
|
|
385
|
-
private
|
386
427
|
def set_all_flags
|
387
428
|
options = {}
|
388
429
|
GROUPS.each{|g| options[g] = true}
|
@@ -392,5 +433,19 @@ private
|
|
392
433
|
def correct_context_value?(value)
|
393
434
|
value.is_a?(Fixnum) && value > 0
|
394
435
|
end
|
436
|
+
|
437
|
+
def make_async_query(query,answer_offset)
|
438
|
+
# the handler is empty, since we access the result count through
|
439
|
+
# BUFFER-STATE call
|
440
|
+
make_query(query){|msg| }
|
441
|
+
result_count = 0
|
442
|
+
begin
|
443
|
+
# the result count might be not exact!
|
444
|
+
result_count = talk("BUFFER-STATE").split(" ")[2].to_i
|
445
|
+
talk("STATUS") rescue break
|
446
|
+
end while result_count < answer_offset
|
447
|
+
@last_result = "OK #{result_count}"
|
448
|
+
result_count
|
449
|
+
end
|
395
450
|
end
|
396
451
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'socket'
|
2
|
+
|
3
|
+
module Poliqarp
|
4
|
+
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
5
|
+
# License:: MIT License
|
6
|
+
#
|
7
|
+
# This class hold the TCP connection to the server and is responsible
|
8
|
+
# for dispatching synchronous and asynchronous queries and answers.
|
9
|
+
class Connector
|
10
|
+
|
11
|
+
# Error messages assigned to error codes
|
12
|
+
# (taken from poliqarpd implementation)
|
13
|
+
ERRORS = {
|
14
|
+
1 => "Incorrect number of arguments",
|
15
|
+
3 => "No session opened",
|
16
|
+
4 => "Cannot create a session for a connection that",
|
17
|
+
5 => "Not enough memory",
|
18
|
+
6 => "Invalid session ID",
|
19
|
+
7 => "Session with this ID is already bound",
|
20
|
+
8 => "Session user ID does not match the argument",
|
21
|
+
10 => "Session already has an open corpus",
|
22
|
+
12 => "System error while opening the corpus",
|
23
|
+
13 => "No corpus opened",
|
24
|
+
14 => "Invalid job ID",
|
25
|
+
15 => "A job is already in progress",
|
26
|
+
16 => "Incorrect query",
|
27
|
+
17 => "Invalid result range",
|
28
|
+
18 => "Incorrect session option",
|
29
|
+
19 => "Invalid session option value",
|
30
|
+
20 => "Invalid sorting criteria"
|
31
|
+
}
|
32
|
+
|
33
|
+
# Creates new connector
|
34
|
+
def initialize(debug)
|
35
|
+
@message_queue = Queue.new
|
36
|
+
@socket_mutex = Mutex.new
|
37
|
+
@loop_mutex = Mutex.new
|
38
|
+
@debug = debug
|
39
|
+
end
|
40
|
+
|
41
|
+
# Opens connection with poliqarp server which runs
|
42
|
+
# on given +host+ and +port+.
|
43
|
+
def open(host,port)
|
44
|
+
@socket_mutex.synchronize {
|
45
|
+
@socket = TCPSocket.new(host,port) if @socket.nil?
|
46
|
+
}
|
47
|
+
running = nil
|
48
|
+
@loop_mutex.synchronize {
|
49
|
+
running = @loop_running
|
50
|
+
}
|
51
|
+
main_loop unless running
|
52
|
+
@loop_mutex.synchronize {
|
53
|
+
@loop_running = true
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Sends message to the poliqarp server. Returns the first synchronous
|
58
|
+
# answer of the server.
|
59
|
+
# * +message+ the message to send
|
60
|
+
# * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
|
61
|
+
# * +handler+ the handler of the asynchronous message
|
62
|
+
def send(message, mode, &handler)
|
63
|
+
puts "send #{mode} #{message}" if @debug
|
64
|
+
@socket.puts(message)
|
65
|
+
if mode == :async
|
66
|
+
@handler = handler
|
67
|
+
end
|
68
|
+
read_message
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrives one message from the server.
|
72
|
+
# If the message indicates an error, new runtime error
|
73
|
+
# containing the error description is returned.
|
74
|
+
def read_message
|
75
|
+
message = @message_queue.shift
|
76
|
+
if message =~ /^ERR/
|
77
|
+
code = message.match(/\d+/)[0].to_i
|
78
|
+
raise JobInProgress.new() if code == 15
|
79
|
+
raise RuntimeError.new("Poliqarp Error: "+ERRORS[code])
|
80
|
+
else
|
81
|
+
message
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
def main_loop
|
87
|
+
@loop = Thread.new {
|
88
|
+
loop {
|
89
|
+
receive
|
90
|
+
# XXX ??? needed
|
91
|
+
#sleep 0.001
|
92
|
+
}
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
def receive
|
97
|
+
result = read_line
|
98
|
+
msg = result[2..-2]
|
99
|
+
if result =~ /^M/
|
100
|
+
receive_async(msg)
|
101
|
+
elsif result
|
102
|
+
receive_sync(msg)
|
103
|
+
end
|
104
|
+
# if nil, nothing was received
|
105
|
+
end
|
106
|
+
|
107
|
+
def receive_sync(message)
|
108
|
+
puts "receive sync: #{message}" if @debug
|
109
|
+
@message_queue << message
|
110
|
+
end
|
111
|
+
|
112
|
+
def receive_async(message)
|
113
|
+
puts "receive async: #{message}" if @debug
|
114
|
+
Thread.new{
|
115
|
+
@handler.call(message)
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
def read_line
|
120
|
+
line = ""
|
121
|
+
begin
|
122
|
+
chars = @socket.recvfrom(1)
|
123
|
+
line << chars[0]
|
124
|
+
end while chars[0] != "\n"
|
125
|
+
line
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Poliqarp
|
2
|
+
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
|
+
# License:: MIT License
|
4
|
+
|
5
|
+
# The JobInProgress exception is raised if there was asynchronous call
|
6
|
+
# to the server which haven't finished, which is interrupted by another
|
7
|
+
# asynchronous call.
|
8
|
+
class JobInProgress < Exception; end
|
9
|
+
end
|
data/poliqarpr.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "poliqarpr"
|
3
|
-
s.version = "0.0.
|
3
|
+
s.version = "0.0.4"
|
4
4
|
s.date = "2008-12-20"
|
5
5
|
s.summary = "Ruby client for Poliqarp"
|
6
6
|
s.email = "apohllo@o2.pl"
|
@@ -14,6 +14,8 @@ Gem::Specification.new do |s|
|
|
14
14
|
"lib/poliqarpr/excerpt.rb",
|
15
15
|
"lib/poliqarpr/lemmata.rb",
|
16
16
|
"lib/poliqarpr/segment.rb",
|
17
|
+
"lib/poliqarpr/connector.rb",
|
18
|
+
"lib/poliqarpr/exceptions.rb",
|
17
19
|
"README.txt",
|
18
20
|
]
|
19
21
|
s.test_files = [
|
data/spec/client.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'lib/poliqarpr'
|
2
2
|
|
3
3
|
describe Poliqarp::Client do
|
4
4
|
describe "(general test)" do
|
@@ -17,6 +17,15 @@ describe Poliqarp::Client do
|
|
17
17
|
it "should allow to open :default corpus" do
|
18
18
|
@client.open_corpus(:default)
|
19
19
|
end
|
20
|
+
|
21
|
+
it "should respond to :ping" do
|
22
|
+
@client.ping.should == :pong
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return server version" do
|
26
|
+
@client.version.should_not == nil
|
27
|
+
end
|
28
|
+
|
20
29
|
end
|
21
30
|
|
22
31
|
describe "(with 'sample' corpus)" do
|
@@ -61,6 +70,21 @@ describe Poliqarp::Client do
|
|
61
70
|
end).should raise_error(RuntimeError)
|
62
71
|
end
|
63
72
|
|
73
|
+
it "should return corpus statistics" do
|
74
|
+
stats = @client.stats
|
75
|
+
stats.size.should == 4
|
76
|
+
[:segment_tokens, :segment_types, :lemmata, :tags].each do |type|
|
77
|
+
stats[type].should_not == nil
|
78
|
+
stats[type].should > 0
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should return the corpus tagset" do
|
83
|
+
tagset = @client.tagset
|
84
|
+
tagset[:categories].should_not == nil
|
85
|
+
tagset[:classes].should_not == nil
|
86
|
+
end
|
87
|
+
|
64
88
|
it "should allow to find 'kot'" do
|
65
89
|
@client.find("kot").size.should_not == 0
|
66
90
|
end
|
@@ -111,6 +135,10 @@ describe Poliqarp::Client do
|
|
111
135
|
@result.should_not respond_to(:[])
|
112
136
|
end
|
113
137
|
|
138
|
+
it "should not be nil" do
|
139
|
+
@result.should_not == nil
|
140
|
+
end
|
141
|
+
|
114
142
|
it "should fetch the same excerpt as in find without index " do
|
115
143
|
@result.to_s.should == @client.find("nachalny")[0].to_s
|
116
144
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apohllo-poliqarpr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Pohl
|
@@ -31,6 +31,8 @@ files:
|
|
31
31
|
- lib/poliqarpr/excerpt.rb
|
32
32
|
- lib/poliqarpr/lemmata.rb
|
33
33
|
- lib/poliqarpr/segment.rb
|
34
|
+
- lib/poliqarpr/connector.rb
|
35
|
+
- lib/poliqarpr/exceptions.rb
|
34
36
|
- README.txt
|
35
37
|
has_rdoc: true
|
36
38
|
homepage: http://www.apohllo.pl/projekty/poliqarpr
|