poliqarpr 0.0.8 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/changelog.txt +11 -6
- data/lib/poliqarpr.rb +1 -1
- data/lib/poliqarpr/client.rb +137 -158
- data/lib/poliqarpr/config.rb +138 -0
- data/lib/poliqarpr/connector.rb +7 -8
- data/lib/poliqarpr/exceptions.rb +1 -1
- data/lib/poliqarpr/excerpt.rb +7 -7
- data/lib/poliqarpr/query_result.rb +6 -6
- data/lib/poliqarpr/segment.rb +7 -7
- data/lib/poliqarpr/util.rb +1 -1
- data/poliqarpr.gemspec +3 -3
- data/spec/client.rb +29 -28
- data/spec/excerpt.rb +9 -9
- data/spec/query_result.rb +7 -7
- metadata +4 -3
data/changelog.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
0.1.0
|
2
|
+
- synchronization on ansync call without handler done via internal mutex
|
3
|
+
instead of call to STATUS
|
4
|
+
- client config moved to external class
|
5
|
+
|
1
6
|
0.0.8
|
2
7
|
- Speed optimization: socket puts changed to write
|
3
8
|
|
@@ -8,7 +13,7 @@
|
|
8
13
|
|
9
14
|
0.0.6
|
10
15
|
- fix: Excerpt#word - the words consituing the matched query
|
11
|
-
- new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
|
16
|
+
- new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
|
12
17
|
return the matched, right context, left context segments respecively
|
13
18
|
|
14
19
|
0.0.5
|
@@ -19,16 +24,16 @@
|
|
19
24
|
- Documentation now points to gemcutter instead of github
|
20
25
|
|
21
26
|
0.0.4
|
22
|
-
- ping/pong diagnostics
|
27
|
+
- ping/pong diagnostics
|
23
28
|
- server version
|
24
|
-
- corpus statistics
|
29
|
+
- corpus statistics
|
25
30
|
- implementation of asynchronous protocol (not stable)
|
26
31
|
|
27
32
|
|
28
33
|
0.0.3
|
29
34
|
- the license of the corpus included
|
30
|
-
- client rdoc documentation
|
31
|
-
- support for lemmata retrieval
|
35
|
+
- client rdoc documentation
|
36
|
+
- support for lemmata retrieval
|
32
37
|
- excerpt now contains segments instead of strings
|
33
38
|
- buffer size setter
|
34
39
|
- default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
|
@@ -45,6 +50,6 @@
|
|
45
50
|
- README.txt included in gem
|
46
51
|
- specs included in gem
|
47
52
|
|
48
|
-
0.0.1
|
53
|
+
0.0.1
|
49
54
|
- initiali implementation
|
50
55
|
- synchorous querying for terms
|
data/lib/poliqarpr.rb
CHANGED
data/lib/poliqarpr/client.rb
CHANGED
@@ -3,61 +3,59 @@ module Poliqarp
|
|
3
3
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
4
4
|
# License:: MIT License
|
5
5
|
#
|
6
|
-
# This class is the implementation of the Poliqarp server client.
|
6
|
+
# This class is the implementation of the Poliqarp server client.
|
7
7
|
class Client
|
8
|
-
|
9
|
-
|
10
|
-
# If debug is turned on, the communication between server and client
|
8
|
+
# If debug is turned on, the communication between server and client
|
11
9
|
# is logged to standard output.
|
12
10
|
attr_writer :debug
|
13
11
|
|
14
|
-
# The
|
15
|
-
|
16
|
-
attr_writer :buffer_size
|
12
|
+
# The configuration of the client.
|
13
|
+
attr_reader :config
|
17
14
|
|
18
|
-
# Creates new poliqarp server client.
|
19
|
-
#
|
15
|
+
# Creates new poliqarp server client.
|
16
|
+
#
|
20
17
|
# Parameters:
|
21
18
|
# * +session_name+ the name of the client session. Defaults to "RUBY".
|
22
19
|
# * +debug+ if set to true, all messages sent and received from server
|
23
20
|
# are printed to standard output. Defaults to false.
|
24
21
|
def initialize(session_name="RUBY", debug=false)
|
25
22
|
@session_name = session_name
|
26
|
-
@left_context = 5
|
27
|
-
@right_context = 5
|
28
23
|
@debug = debug
|
29
|
-
@buffer_size = 500000
|
30
24
|
@connector = Connector.new(debug)
|
25
|
+
@config = Config.new(self,500000)
|
31
26
|
@answer_queue = Queue.new
|
27
|
+
@waiting_mutext = Mutex.new
|
32
28
|
new_session
|
29
|
+
config.left_context_size = 5
|
30
|
+
config.right_context_size = 5
|
31
|
+
config.tags = []
|
32
|
+
config.lemmata = []
|
33
33
|
end
|
34
34
|
|
35
35
|
# A hint about installation of default corpus gem
|
36
36
|
def self.const_missing(const)
|
37
|
-
if const.to_s =~ /DEFAULT_CORPUS/
|
37
|
+
if const.to_s =~ /DEFAULT_CORPUS/
|
38
38
|
raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
|
39
39
|
end
|
40
40
|
super
|
41
41
|
end
|
42
42
|
|
43
|
-
# Creates new session for the client with the name given in constructor.
|
44
|
-
# If the session was already opened, it is closed.
|
43
|
+
# Creates new session for the client with the name given in constructor.
|
44
|
+
# If the session was already opened, it is closed.
|
45
45
|
#
|
46
|
-
# Parameters:
|
46
|
+
# Parameters:
|
47
47
|
# * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
|
48
48
|
def new_session(port=4567)
|
49
49
|
close if @session
|
50
50
|
@connector.open("localhost",port)
|
51
51
|
talk("MAKE-SESSION #{@session_name}")
|
52
|
-
talk("BUFFER-RESIZE #{
|
52
|
+
talk("BUFFER-RESIZE #{config.buffer_size}")
|
53
53
|
@session = true
|
54
|
-
self.tags = {}
|
55
|
-
self.lemmata = {}
|
56
54
|
end
|
57
55
|
|
58
56
|
# Closes the opened session.
|
59
57
|
def close
|
60
|
-
talk "CLOSE-SESSION"
|
58
|
+
talk "CLOSE-SESSION"
|
61
59
|
@session = false
|
62
60
|
end
|
63
61
|
|
@@ -66,114 +64,36 @@ module Poliqarp
|
|
66
64
|
talk "CLOSE"
|
67
65
|
end
|
68
66
|
|
69
|
-
# Sets the size of the left short context. It must be > 0
|
70
|
-
#
|
71
|
-
# The size of the left short context is the number
|
72
|
-
# of segments displayed in the found excerpts left to the
|
73
|
-
# matched segment(s).
|
74
|
-
def left_context=(value)
|
75
|
-
if correct_context_value?(value)
|
76
|
-
result = talk("SET left-context-width #{value}")
|
77
|
-
@left_context = value if result =~ /^R OK/
|
78
|
-
else
|
79
|
-
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# Sets the size of the right short context. It must be > 0
|
84
|
-
#
|
85
|
-
# The size of the right short context is the number
|
86
|
-
# of segments displayed in the found excerpts right to the
|
87
|
-
# matched segment(s).
|
88
|
-
def right_context=(value)
|
89
|
-
if correct_context_value?(value)
|
90
|
-
result = talk("SET right-context-width #{value}")
|
91
|
-
@right_context = value if result =~ /^R OK/
|
92
|
-
else
|
93
|
-
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Sets the tags' flags. There are four groups of segments
|
98
|
-
# which the flags apply for:
|
99
|
-
# * +left_context+
|
100
|
-
# * +left_match+
|
101
|
-
# * +right_match+
|
102
|
-
# * +right_context+
|
103
|
-
#
|
104
|
-
# If the flag for given group is set to true, all segments
|
105
|
-
# in the group are annotated with grammatical tags. E.g.:
|
106
|
-
# c.find("kot")
|
107
|
-
# ...
|
108
|
-
# "kot" tags: "subst:sg:nom:m2"
|
109
|
-
#
|
110
|
-
# You can pass :all to turn on flags for all groups
|
111
|
-
def tags=(options={})
|
112
|
-
options = set_all_flags if options == :all
|
113
|
-
@tag_flags = options
|
114
|
-
flags = ""
|
115
|
-
GROUPS.each do |flag|
|
116
|
-
flags << (options[flag] ? "1" : "0")
|
117
|
-
end
|
118
|
-
talk("SET retrieve-tags #{flags}")
|
119
|
-
end
|
120
|
-
|
121
|
-
# Sets the lemmatas' flags. There are four groups of segments
|
122
|
-
# which the flags apply for:
|
123
|
-
# * +left_context+
|
124
|
-
# * +left_match+
|
125
|
-
# * +right_match+
|
126
|
-
# * +right_context+
|
127
|
-
#
|
128
|
-
# If the flag for given group is set to true, all segments
|
129
|
-
# in the group are returned with the base form of the lemmata. E.g.:
|
130
|
-
# c.find("kotu")
|
131
|
-
# ...
|
132
|
-
# "kotu" base_form: "kot"
|
133
|
-
#
|
134
|
-
# You can pass :all to turn on flags for all groups
|
135
|
-
def lemmata=(options={})
|
136
|
-
options = set_all_flags if options == :all
|
137
|
-
@lemmata_flags = options
|
138
|
-
flags = ""
|
139
|
-
GROUPS.each do |flag|
|
140
|
-
flags << (options[flag] ? "1" : "0")
|
141
|
-
end
|
142
|
-
talk("SET retrieve-lemmata #{flags}")
|
143
|
-
end
|
144
|
-
|
145
67
|
# *Asynchronous* Opens the corpus given as +path+. To open the default
|
146
|
-
# corpus pass +:default+ as the argument.
|
147
|
-
#
|
68
|
+
# corpus pass +:default+ as the argument.
|
69
|
+
#
|
148
70
|
# If you don't want to wait until the call is finished, you
|
149
71
|
# have to provide +handler+ for the asynchronous answer.
|
150
72
|
def open_corpus(path, &handler)
|
151
73
|
if path == :default
|
152
74
|
open_corpus(DEFAULT_CORPUS, &handler)
|
153
75
|
else
|
154
|
-
|
155
|
-
talk("OPEN #{path}", :async, &real_handler)
|
156
|
-
do_wait if handler.nil?
|
76
|
+
talk("OPEN #{path}", :async, &handler)
|
157
77
|
end
|
158
78
|
end
|
159
79
|
|
160
80
|
# Server diagnostics -- the result should be :pong
|
161
|
-
def ping
|
81
|
+
def ping
|
162
82
|
:pong if talk("PING") =~ /PONG/
|
163
83
|
end
|
164
84
|
|
165
85
|
# Returns server version
|
166
|
-
def version
|
86
|
+
def version
|
167
87
|
talk("VERSION")
|
168
88
|
end
|
169
89
|
|
170
90
|
# Returns corpus statistics:
|
171
|
-
# * +:segment_tokens+ the number of segments in the corpus
|
91
|
+
# * +:segment_tokens+ the number of segments in the corpus
|
172
92
|
# (two segments which look exactly the same are counted separately)
|
173
93
|
# * +:segment_types+ the number of segment types in the corpus
|
174
94
|
# (two segments which look exactly the same are counted as one type)
|
175
95
|
# * +:lemmata+ the number of lemmata (lexemes) types
|
176
|
-
# (all forms of inflected word, e.g. 'kot', 'kotu', ...
|
96
|
+
# (all forms of inflected word, e.g. 'kot', 'kotu', ...
|
177
97
|
# are treated as one "word" -- lemmata)
|
178
98
|
# * +:tags+ the number of different grammar tags (each combination
|
179
99
|
# of atomic tags is treated as different "tag")
|
@@ -181,7 +101,7 @@ module Poliqarp
|
|
181
101
|
stats = {}
|
182
102
|
talk("CORPUS-STATS").split.each_with_index do |value, index|
|
183
103
|
case index
|
184
|
-
when 1
|
104
|
+
when 1
|
185
105
|
stats[:segment_tokens] = value.to_i
|
186
106
|
when 2
|
187
107
|
stats[:segment_types] = value.to_i
|
@@ -205,7 +125,7 @@ module Poliqarp
|
|
205
125
|
# (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
|
206
126
|
# means that there are 5 genders: masculine(1,2,3), feminine and neuter)
|
207
127
|
# * +:classes+ enlists grammatical tags used to describe it
|
208
|
-
# (each class has a list of tags used to describe it, eg. adj: degree
|
128
|
+
# (each class has a list of tags used to describe it, eg. adj: degree
|
209
129
|
# gender case number, means that adjectives are described in terms
|
210
130
|
# of degree, gender, case and number)
|
211
131
|
def tagset
|
@@ -226,14 +146,14 @@ module Poliqarp
|
|
226
146
|
#
|
227
147
|
# Options:
|
228
148
|
# * +index+ the index of the (only one) result to be returned. The index is relative
|
229
|
-
# to the beginning of the query result. In normal case you should query the
|
149
|
+
# to the beginning of the query result. In normal case you should query the
|
230
150
|
# corpus without specifying the index, to see what results are returned.
|
231
|
-
# Then you can use the index and the same query to retrieve one result.
|
151
|
+
# Then you can use the index and the same query to retrieve one result.
|
232
152
|
# The pair (query, index) is a kind of unique identifier of the excerpt.
|
233
153
|
# * +page_size+ the size of the page of results. If the page size is 0, then
|
234
154
|
# all results are returned on one page. It is ignored if the +index+ option
|
235
155
|
# is present. Defaults to 0.
|
236
|
-
# * +page_index+ the index of the page of results (the first page has index 1, not 0).
|
156
|
+
# * +page_index+ the index of the page of results (the first page has index 1, not 0).
|
237
157
|
# It is ignored if the +index+ option is present. Defaults to 1.
|
238
158
|
def find(query,options={})
|
239
159
|
if options[:index]
|
@@ -243,11 +163,11 @@ module Poliqarp
|
|
243
163
|
end
|
244
164
|
end
|
245
165
|
|
246
|
-
alias query find
|
166
|
+
alias query find
|
247
167
|
|
248
168
|
# Returns the number of results for given query.
|
249
169
|
def count(query)
|
250
|
-
count_results(make_query(query))
|
170
|
+
count_results(make_query(query))
|
251
171
|
end
|
252
172
|
|
253
173
|
# Returns the long context of the excerpt which is identified by
|
@@ -257,13 +177,13 @@ module Poliqarp
|
|
257
177
|
result = []
|
258
178
|
talk "GET-CONTEXT #{index}"
|
259
179
|
# 1st part
|
260
|
-
result << read_word
|
180
|
+
result << read_word
|
261
181
|
# 2nd part
|
262
|
-
result << read_word
|
182
|
+
result << read_word
|
263
183
|
# 3rd part
|
264
|
-
result << read_word
|
184
|
+
result << read_word
|
265
185
|
# 4th part
|
266
|
-
result << read_word
|
186
|
+
result << read_word
|
267
187
|
result
|
268
188
|
end
|
269
189
|
|
@@ -286,19 +206,62 @@ module Poliqarp
|
|
286
206
|
end
|
287
207
|
|
288
208
|
protected
|
209
|
+
# Set the size of the left context.
|
210
|
+
def left_context=(value)
|
211
|
+
result = talk("SET left-context-width #{value}")
|
212
|
+
unless result =~ /^OK/
|
213
|
+
raise "Failed to set left context to #{value}: #{result}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Set the size of the right context.
|
218
|
+
def right_context=(value)
|
219
|
+
result = talk("SET right-context-width #{value}")
|
220
|
+
unless result =~ /^OK/
|
221
|
+
raise "Failed to set right context to #{value}: #{result}"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Sets the 'retrieve-tags' flags.
|
226
|
+
def retrieve_tags(flags)
|
227
|
+
talk("SET retrieve-tags #{flags}")
|
228
|
+
end
|
229
|
+
|
230
|
+
# Sets the 'retrieve-lemmata' flags.
|
231
|
+
def retrieve_lemmata(flags)
|
232
|
+
talk("SET retrieve-lemmata #{flags}")
|
233
|
+
end
|
234
|
+
|
235
|
+
|
289
236
|
# Sends a message directly to the server
|
290
237
|
# * +msg+ the message to send
|
291
238
|
# * +mode+ if set to :sync, the method block untli the message
|
292
239
|
# is received. If :async the method returns immediately.
|
293
240
|
# Default: :sync
|
294
|
-
# * +handler+ the handler of the assynchronous message.
|
241
|
+
# * +handler+ the handler of the assynchronous message.
|
295
242
|
# It is ignored when the mode is set to :sync.
|
296
243
|
def talk(msg, mode = :sync, &handler)
|
297
244
|
puts msg if @debug
|
298
|
-
|
245
|
+
if mode == :sync
|
246
|
+
@connector.send_message(msg, mode, &handler)
|
247
|
+
else
|
248
|
+
if handler.nil?
|
249
|
+
real_handler = lambda do |msg|
|
250
|
+
@answer_queue.push msg
|
251
|
+
stop_waiting
|
252
|
+
end
|
253
|
+
start_waiting
|
254
|
+
else
|
255
|
+
real_handler = handler
|
256
|
+
end
|
257
|
+
@connector.send_message(msg, mode, &real_handler)
|
258
|
+
if handler.nil?
|
259
|
+
do_wait
|
260
|
+
end
|
261
|
+
end
|
299
262
|
end
|
300
263
|
|
301
|
-
# Make query and retrieve many results.
|
264
|
+
# Make query and retrieve many results.
|
302
265
|
# * +query+ the query to be sent to the server.
|
303
266
|
# * +options+ see find
|
304
267
|
def find_many(query, options)
|
@@ -308,7 +271,7 @@ protected
|
|
308
271
|
answer_offset = page_size * (page_index - 1)
|
309
272
|
if page_size > 0
|
310
273
|
result_count = make_async_query(query,answer_offset)
|
311
|
-
answers_limit = answer_offset + page_size > result_count ?
|
274
|
+
answers_limit = answer_offset + page_size > result_count ?
|
312
275
|
result_count - answer_offset : page_size
|
313
276
|
else
|
314
277
|
# all answers needed -- the call must be synchronous
|
@@ -321,12 +284,12 @@ protected
|
|
321
284
|
|
322
285
|
result = QueryResult.new(page_index, page_count,page_size,self,query)
|
323
286
|
if answers_limit > 0
|
324
|
-
talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
|
287
|
+
talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
|
325
288
|
answers_limit.times do |answer_index|
|
326
289
|
result << fetch_result(answer_offset + answer_index, query)
|
327
290
|
end
|
328
291
|
end
|
329
|
-
result
|
292
|
+
result
|
330
293
|
end
|
331
294
|
|
332
295
|
# Make query and retrieve only one result
|
@@ -334,13 +297,13 @@ protected
|
|
334
297
|
# * +index+ the index of the answer to be retrieved
|
335
298
|
def find_one(query,index)
|
336
299
|
make_async_query(query,index)
|
337
|
-
talk("GET-RESULTS #{index} #{index}")
|
338
|
-
fetch_result(index,query)
|
300
|
+
talk("GET-RESULTS #{index} #{index}")
|
301
|
+
fetch_result(index,query)
|
339
302
|
end
|
340
303
|
|
341
304
|
# Fetches one result of the query
|
342
305
|
#
|
343
|
-
# MAKE-QUERY and GET-RESULTS must be sent to the server before
|
306
|
+
# MAKE-QUERY and GET-RESULTS must be sent to the server before
|
344
307
|
# this method is called
|
345
308
|
def fetch_result(index, query)
|
346
309
|
result = Excerpt.new(index, self, query)
|
@@ -357,15 +320,15 @@ protected
|
|
357
320
|
segments = []
|
358
321
|
size.times do |segment_index|
|
359
322
|
segment = Segment.new(read_word)
|
360
|
-
segments << segment
|
361
|
-
if
|
323
|
+
segments << segment
|
324
|
+
if config.lemmata.include?(group) || config.tags.include?(group)
|
362
325
|
lemmata_size = read_number()
|
363
|
-
lemmata_size.times do |lemmata_index|
|
326
|
+
lemmata_size.times do |lemmata_index|
|
364
327
|
lemmata = Lemmata.new()
|
365
|
-
if
|
328
|
+
if config.lemmata.include?(group)
|
366
329
|
lemmata.base_form = read_word
|
367
330
|
end
|
368
|
-
if
|
331
|
+
if config.tags.include?(group)
|
369
332
|
lemmata.tags = read_word
|
370
333
|
end
|
371
334
|
segment.lemmata << lemmata
|
@@ -377,7 +340,9 @@ protected
|
|
377
340
|
|
378
341
|
# Reads number stored in the message received from the server.
|
379
342
|
def read_number
|
380
|
-
@connector.read_message
|
343
|
+
msg = @connector.read_message
|
344
|
+
puts "XXX #{msg}" if @debug
|
345
|
+
msg.match(/\d+/)[0].to_i
|
381
346
|
end
|
382
347
|
|
383
348
|
# Counts number of results for given answer
|
@@ -387,25 +352,22 @@ protected
|
|
387
352
|
|
388
353
|
# *Asynchronous* Sends the query to the server
|
389
354
|
# * +query+ query to send
|
390
|
-
# * +handler+ if given, the method returns immediately,
|
355
|
+
# * +handler+ if given, the method returns immediately,
|
391
356
|
# and the answer is sent to the handler. In this case
|
392
357
|
# the result returned by make_query should be IGNORED!
|
393
358
|
def make_query(query, &handler)
|
394
359
|
if @last_query != query
|
395
360
|
@last_query = query
|
396
|
-
if handler.nil?
|
397
|
-
real_handler = lambda { |msg| @answer_queue.push msg }
|
398
|
-
else
|
399
|
-
real_handler = handler
|
400
|
-
end
|
401
361
|
begin
|
402
362
|
talk("MAKE-QUERY #{query}")
|
403
363
|
rescue JobInProgress
|
404
364
|
talk("CANCEL") rescue nil
|
405
365
|
talk("MAKE-QUERY #{query}")
|
406
366
|
end
|
407
|
-
talk("RUN-QUERY #{
|
408
|
-
|
367
|
+
result = talk("RUN-QUERY #{config.buffer_size}", :async, &handler)
|
368
|
+
if handler.nil?
|
369
|
+
@last_result = result
|
370
|
+
end
|
409
371
|
end
|
410
372
|
@last_result
|
411
373
|
end
|
@@ -415,38 +377,55 @@ protected
|
|
415
377
|
@connector.read_message
|
416
378
|
end
|
417
379
|
|
418
|
-
private
|
380
|
+
private
|
381
|
+
# Wait for the assynchronous answer, if some synchronous query
|
382
|
+
# was sent without handler.
|
419
383
|
def do_wait
|
420
384
|
loop {
|
421
|
-
|
422
|
-
puts "
|
423
|
-
sleep 0.
|
385
|
+
break unless should_wait?
|
386
|
+
puts "WAITING" if @debug
|
387
|
+
sleep 0.1
|
424
388
|
}
|
425
389
|
@answer_queue.shift
|
426
390
|
end
|
427
391
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
392
|
+
# Stop waiting for the ansynchonous answer.
|
393
|
+
def stop_waiting
|
394
|
+
@waiting_mutext.synchronize {
|
395
|
+
@should_wait = false
|
396
|
+
}
|
397
|
+
puts "WAITING stopped" if @debug
|
398
|
+
end
|
399
|
+
|
400
|
+
# Check if the thread should still wait for the answer.
|
401
|
+
def should_wait?
|
402
|
+
should_wait = nil
|
403
|
+
@waiting_mutext.synchronize {
|
404
|
+
should_wait = @should_wait
|
405
|
+
}
|
406
|
+
should_wait
|
432
407
|
end
|
433
|
-
|
434
|
-
|
435
|
-
|
408
|
+
|
409
|
+
# Start waiting for the answer.
|
410
|
+
def start_waiting
|
411
|
+
@waiting_mutext.synchronize {
|
412
|
+
@should_wait = true
|
413
|
+
}
|
414
|
+
puts "WAITING started" if @debug
|
436
415
|
end
|
437
416
|
|
438
|
-
def make_async_query(query,answer_offset)
|
439
|
-
|
440
|
-
# BUFFER-STATE call
|
441
|
-
make_query(query){|msg| }
|
442
|
-
result_count = 0
|
443
|
-
begin
|
417
|
+
def make_async_query(query,answer_offset)
|
418
|
+
start_waiting
|
419
|
+
# we access the result count through BUFFER-STATE call
|
420
|
+
make_query(query){|msg| stop_waiting}
|
421
|
+
result_count = 0
|
422
|
+
begin
|
444
423
|
# the result count might be not exact!
|
445
424
|
result_count = talk("BUFFER-STATE").split(" ")[2].to_i
|
446
|
-
|
425
|
+
break unless should_wait?
|
447
426
|
end while result_count < answer_offset
|
448
427
|
@last_result = "OK #{result_count}"
|
449
428
|
result_count
|
450
429
|
end
|
451
|
-
end
|
430
|
+
end
|
452
431
|
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# vim:encoding=utf-8
|
2
|
+
module Poliqarp
|
3
|
+
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
4
|
+
# License:: MIT License
|
5
|
+
#
|
6
|
+
# This class holds the configuration of the client.
|
7
|
+
class Config
|
8
|
+
GROUPS = [:left_context, :left_match, :right_match, :right_context]
|
9
|
+
# The size of the buffer is the maximum number of excerpts which
|
10
|
+
# are returned for single query.
|
11
|
+
attr_accessor :buffer_size, :left_context_size, :right_context_size, :tags, :lemmata
|
12
|
+
|
13
|
+
def initialize(client,buffer_size)
|
14
|
+
@client = client
|
15
|
+
@buffer_size = buffer_size
|
16
|
+
end
|
17
|
+
|
18
|
+
# Sets the size of the left short context. It must be > 0
|
19
|
+
#
|
20
|
+
# The size of the left short context is the number
|
21
|
+
# of segments displayed in the found excerpts left to the
|
22
|
+
# matched segment(s).
|
23
|
+
def left_context_size=(value)
|
24
|
+
if correct_context_value?(value)
|
25
|
+
@client.send(:left_context=,value)
|
26
|
+
@left_context_size = value
|
27
|
+
else
|
28
|
+
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Sets the size of the right short context. It must be > 0
|
33
|
+
#
|
34
|
+
# The size of the right short context is the number
|
35
|
+
# of segments displayed in the found excerpts right to the
|
36
|
+
# matched segment(s).
|
37
|
+
def right_context_size=(value)
|
38
|
+
if correct_context_value?(value)
|
39
|
+
@client.send(:right_context=,value)
|
40
|
+
@right_context_size = value
|
41
|
+
else
|
42
|
+
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the tags' flags. There are four groups of segments
|
47
|
+
# which the flags apply for:
|
48
|
+
# * +:left_context+
|
49
|
+
# * +:left_match+
|
50
|
+
# * +:right_match+
|
51
|
+
# * +:right_context+
|
52
|
+
#
|
53
|
+
# If the flag for given group is present, all segments
|
54
|
+
# in the group are annotated with grammatical tags. E.g.:
|
55
|
+
# c.find("kot")
|
56
|
+
# ...
|
57
|
+
# "kot" tags: "subst:sg:nom:m2"
|
58
|
+
#
|
59
|
+
# E.g. config.tags = [:left_context] will retrieve tags
|
60
|
+
# only for the left context.
|
61
|
+
#
|
62
|
+
# You can pass :all to turn on flags for all groups, i.e.
|
63
|
+
# config.tags = :all will retrieve tags for all groups.
|
64
|
+
def tags=(groups)
|
65
|
+
if groups == :all
|
66
|
+
@tags = GROUPS.dup
|
67
|
+
else
|
68
|
+
@tags = groups
|
69
|
+
end
|
70
|
+
@client.send(:retrieve_tags, flags_for(@tags))
|
71
|
+
end
|
72
|
+
|
73
|
+
# Sets the lemmatas' flags. There are four groups of segments
|
74
|
+
# which the flags apply for:
|
75
|
+
# * +left_context+
|
76
|
+
# * +left_match+
|
77
|
+
# * +right_match+
|
78
|
+
# * +right_context+
|
79
|
+
#
|
80
|
+
# If the flag for given group is present, all segments
|
81
|
+
# in the group are returned with the base form of the lemmata. E.g.:
|
82
|
+
# c.find("kotu")
|
83
|
+
# ...
|
84
|
+
# "kotu" base_form: "kot"
|
85
|
+
#
|
86
|
+
# E.g. config.lemmata = [:left_context] will retrieve lemmata
|
87
|
+
# only for the left context.
|
88
|
+
#
|
89
|
+
# You can pass :all to turn on flags for all groups, i.e.
|
90
|
+
# config.lemmata = :all will retrieve lemmata for all groups.
|
91
|
+
def lemmata=(groups)
|
92
|
+
if groups == :all
|
93
|
+
@lemmata = GROUPS.dup
|
94
|
+
else
|
95
|
+
@lemmata = groups
|
96
|
+
end
|
97
|
+
@client.send(:retrieve_lemmata, flags_for(@lemmata))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Allow for accessing individual group tags/lemmata flag,
|
101
|
+
# e.g. config.left_context_tags, config.left_context_lemmata
|
102
|
+
[:tags,:lemmata].each do |type|
|
103
|
+
GROUPS.each do |group|
|
104
|
+
define_method("#{group}_#{type}".to_sym) do
|
105
|
+
@tags.include?(group)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Allow for changing individual group tags/lemmata flag,
|
111
|
+
# e.g. config.left_context_tags = true, config.left_context_lemmata = true
|
112
|
+
[:tags,:lemmata].each do |type|
|
113
|
+
GROUPS.each do |group|
|
114
|
+
define_method("#{group}_#{type}=".to_sym) do |value|
|
115
|
+
if value
|
116
|
+
@tags << group unless @tags.include?(group)
|
117
|
+
else
|
118
|
+
@tags.delete(group) if @tags.include?(group)
|
119
|
+
end
|
120
|
+
@client.send("retrieve_#{type}".to_sym, flags_for(@tags))
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
protected
|
126
|
+
def correct_context_value?(value)
|
127
|
+
value.is_a?(Fixnum) && value > 0
|
128
|
+
end
|
129
|
+
|
130
|
+
def flags_for(elements)
|
131
|
+
flags = ""
|
132
|
+
GROUPS.each do |flag|
|
133
|
+
flags << (elements.include?(flag) ? "1" : "0")
|
134
|
+
end
|
135
|
+
flags
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
data/lib/poliqarpr/connector.rb
CHANGED
@@ -44,7 +44,7 @@ module Poliqarp
|
|
44
44
|
@debug = debug
|
45
45
|
end
|
46
46
|
|
47
|
-
# Opens connection with poliqarp server which runs
|
47
|
+
# Opens connection with poliqarp server which runs
|
48
48
|
# on given +host+ and +port+.
|
49
49
|
def open(host,port)
|
50
50
|
@socket_mutex.synchronize {
|
@@ -60,17 +60,16 @@ module Poliqarp
|
|
60
60
|
}
|
61
61
|
end
|
62
62
|
|
63
|
-
# Sends message to the poliqarp server. Returns the first synchronous
|
63
|
+
# Sends message to the poliqarp server. Returns the first synchronous
|
64
64
|
# answer of the server.
|
65
65
|
# * +message+ the message to send
|
66
66
|
# * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
|
67
67
|
# * +handler+ the handler of the asynchronous message
|
68
|
-
def
|
68
|
+
def send_message(message, mode, &handler)
|
69
69
|
puts "send #{mode} #{message}" if @debug
|
70
70
|
if ruby19?
|
71
71
|
massage = message.encode(UTF8)
|
72
72
|
end
|
73
|
-
#@socket.puts(message)
|
74
73
|
@socket.write(message+"\n")
|
75
74
|
if mode == :async
|
76
75
|
@handler = handler
|
@@ -79,7 +78,7 @@ module Poliqarp
|
|
79
78
|
end
|
80
79
|
|
81
80
|
# Retrives one message from the server.
|
82
|
-
# If the message indicates an error, new runtime error
|
81
|
+
# If the message indicates an error, new runtime error
|
83
82
|
# containing the error description is returned.
|
84
83
|
def read_message
|
85
84
|
message = @message_queue.shift
|
@@ -94,7 +93,7 @@ module Poliqarp
|
|
94
93
|
|
95
94
|
private
|
96
95
|
def main_loop
|
97
|
-
@loop = Thread.new {
|
96
|
+
@loop = Thread.new {
|
98
97
|
loop {
|
99
98
|
receive
|
100
99
|
# XXX ??? needed
|
@@ -124,8 +123,8 @@ private
|
|
124
123
|
|
125
124
|
def receive_async(message)
|
126
125
|
puts "receive async: #{message}" if @debug
|
127
|
-
Thread.new{
|
128
|
-
@handler.call(message)
|
126
|
+
Thread.new{
|
127
|
+
@handler.call(message)
|
129
128
|
}
|
130
129
|
end
|
131
130
|
|
data/lib/poliqarpr/exceptions.rb
CHANGED
@@ -2,7 +2,7 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT License
|
4
4
|
|
5
|
-
# The JobInProgress exception is raised if there was asynchronous call
|
5
|
+
# The JobInProgress exception is raised if there was asynchronous call
|
6
6
|
# to the server which haven't finished, which is interrupted by another
|
7
7
|
# asynchronous call.
|
8
8
|
class JobInProgress < Exception; end
|
data/lib/poliqarpr/excerpt.rb
CHANGED
@@ -2,13 +2,13 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl
|
3
3
|
# License:: MIT License
|
4
4
|
#
|
5
|
-
# The excerpt class is used to store single result of the query,
|
5
|
+
# The excerpt class is used to store single result of the query,
|
6
6
|
# i.e. the excerpt of the corpus which contains the words which
|
7
|
-
# the corpus was queried for.
|
7
|
+
# the corpus was queried for.
|
8
8
|
#
|
9
9
|
# The excerpt is divided into groups, which contain segments,
|
10
|
-
# which the texts in the corpus were divided for.
|
11
|
-
# The first group is the left context, the second -- the matched
|
10
|
+
# which the texts in the corpus were divided for.
|
11
|
+
# The first group is the left context, the second -- the matched
|
12
12
|
# query, and the last -- the right context.
|
13
13
|
class Excerpt
|
14
14
|
attr_reader :index, :base_form, :short_context
|
@@ -40,7 +40,7 @@ module Poliqarp
|
|
40
40
|
@short_context[2]
|
41
41
|
end
|
42
42
|
|
43
|
-
# Returns the matched query as string
|
43
|
+
# Returns the matched query as string
|
44
44
|
def word
|
45
45
|
#@short_context[0].split(/\s+/)[-1]
|
46
46
|
@short_context[1].map{|s| s.to_s}.join("")
|
@@ -54,7 +54,7 @@ module Poliqarp
|
|
54
54
|
@short_context.join("")
|
55
55
|
end
|
56
56
|
|
57
|
-
# Returns the long context of the query.
|
57
|
+
# Returns the long context of the query.
|
58
58
|
def context
|
59
59
|
return @context unless @context.nil?
|
60
60
|
@context = @client.context(@base_form, @index)
|
@@ -63,7 +63,7 @@ module Poliqarp
|
|
63
63
|
{ :medium => :medium, :style => :styl, :date => :data_wydania,
|
64
64
|
:city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
|
65
65
|
:author => :autor}.each do |method, keyword|
|
66
|
-
define_method method do
|
66
|
+
define_method method do
|
67
67
|
self.metadata[keyword]
|
68
68
|
end
|
69
69
|
end
|
@@ -2,9 +2,9 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT License
|
4
4
|
#
|
5
|
-
# The query result class is used to paginate results of the
|
5
|
+
# The query result class is used to paginate results of the
|
6
6
|
# query. Each query result has information about its context
|
7
|
-
# (the next and previous page).
|
7
|
+
# (the next and previous page).
|
8
8
|
class QueryResult
|
9
9
|
include Enumerable
|
10
10
|
|
@@ -51,16 +51,16 @@ module Poliqarp
|
|
51
51
|
# Returns the previous page of the query result
|
52
52
|
def previous_page
|
53
53
|
if @page > 1
|
54
|
-
@client.find(@query, :page_size => @page_size,
|
55
|
-
:page_index => @page - 1)
|
54
|
+
@client.find(@query, :page_size => @page_size,
|
55
|
+
:page_index => @page - 1)
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
59
|
# Return the next page of the query result
|
60
60
|
def next_page
|
61
61
|
if @page < @page_count
|
62
|
-
@client.find(@query, :page_size => @page_size,
|
63
|
-
:page_index => @page + 1)
|
62
|
+
@client.find(@query, :page_size => @page_size,
|
63
|
+
:page_index => @page + 1)
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
data/lib/poliqarpr/segment.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
module Poliqarp
|
1
|
+
module Poliqarp
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT LICENSE
|
4
4
|
#
|
5
|
-
# The segment is the smallest meaningful part of the text.
|
6
|
-
# It may contain many lemmata, since the segments are sometimes
|
7
|
-
# not disambiguated.
|
5
|
+
# The segment is the smallest meaningful part of the text.
|
6
|
+
# It may contain many lemmata, since the segments are sometimes
|
7
|
+
# not disambiguated.
|
8
8
|
class Segment
|
9
9
|
attr_reader :literal, :lemmata
|
10
10
|
|
11
|
-
# Creates new segment. The specified argument is the literal
|
12
|
-
# (as found in the text) representation of the segment.
|
11
|
+
# Creates new segment. The specified argument is the literal
|
12
|
+
# (as found in the text) representation of the segment.
|
13
13
|
def initialize(literal)
|
14
14
|
@literal = literal
|
15
15
|
@lemmata = []
|
16
16
|
end
|
17
17
|
|
18
18
|
# Returns the segment literal
|
19
|
-
def to_s
|
19
|
+
def to_s
|
20
20
|
@literal
|
21
21
|
end
|
22
22
|
end
|
data/lib/poliqarpr/util.rb
CHANGED
data/poliqarpr.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "poliqarpr"
|
3
|
-
s.version = "0.0
|
4
|
-
s.date = "2011-01-
|
3
|
+
s.version = "0.1.0"
|
4
|
+
s.date = "2011-01-17"
|
5
5
|
s.summary = "Ruby client for Poliqarp"
|
6
6
|
s.email = "apohllo@o2.pl"
|
7
7
|
s.homepage = "http://www.github.com/apohllo/poliqarpr"
|
8
8
|
s.description = "Ruby client for Poliqarp (NLP corpus server)"
|
9
9
|
s.authors = ['Aleksander Pohl']
|
10
|
-
s.files = ["Rakefile", "poliqarpr.gemspec",
|
10
|
+
s.files = ["Rakefile", "poliqarpr.gemspec",
|
11
11
|
"changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
|
12
12
|
s.test_files = Dir.glob("spec/**/*")
|
13
13
|
s.rdoc_options = ["--main", "README.txt"]
|
data/spec/client.rb
CHANGED
@@ -5,17 +5,17 @@ require 'poliqarpr'
|
|
5
5
|
describe Poliqarp::Client do
|
6
6
|
describe "(general test)" do
|
7
7
|
before(:each) do
|
8
|
-
@client = Poliqarp::Client.new("
|
8
|
+
@client = Poliqarp::Client.new("TEST1")
|
9
9
|
end
|
10
|
-
|
11
|
-
after(:each) do
|
10
|
+
|
11
|
+
after(:each) do
|
12
12
|
@client.close
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
it "should allow to open corpus" do
|
16
16
|
@client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it "should allow to open :default corpus" do
|
20
20
|
@client.open_corpus(:default)
|
21
21
|
end
|
@@ -32,7 +32,7 @@ describe Poliqarp::Client do
|
|
32
32
|
|
33
33
|
describe "(with 'sample' corpus)" do
|
34
34
|
before(:all) do
|
35
|
-
@client = Poliqarp::Client.new("
|
35
|
+
@client = Poliqarp::Client.new("TEST2")
|
36
36
|
@client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
|
37
37
|
end
|
38
38
|
|
@@ -40,35 +40,37 @@ describe Poliqarp::Client do
|
|
40
40
|
@client.close
|
41
41
|
end
|
42
42
|
|
43
|
-
it "should allow to set the right context size" do
|
44
|
-
@client.
|
43
|
+
it "should allow to set and get the right context size" do
|
44
|
+
@client.config.right_context_size = 5
|
45
|
+
@client.config.right_context_size.should == 5
|
45
46
|
end
|
46
47
|
|
47
|
-
it "should raise error if the size of right context is not number" do
|
48
|
-
(proc do
|
49
|
-
@client.
|
48
|
+
it "should raise error if the size of right context is not number" do
|
49
|
+
(proc do
|
50
|
+
@client.config.right_context_size = "a"
|
50
51
|
end).should raise_error(RuntimeError)
|
51
52
|
end
|
52
53
|
|
53
|
-
it "should rais error if the size of right context is less or equal 0" do
|
54
|
-
(proc do
|
55
|
-
@client.
|
54
|
+
it "should rais error if the size of right context is less or equal 0" do
|
55
|
+
(proc do
|
56
|
+
@client.config.right_context_size = 0
|
56
57
|
end).should raise_error(RuntimeError)
|
57
58
|
end
|
58
59
|
|
59
|
-
it "should allow to set the left context size" do
|
60
|
-
@client.
|
60
|
+
it "should allow to set and get the left context size" do
|
61
|
+
@client.config.left_context_size = 5
|
62
|
+
@client.config.left_context_size.should == 5
|
61
63
|
end
|
62
64
|
|
63
|
-
it "should raise error if the size of left context is not number" do
|
64
|
-
(lambda do
|
65
|
-
@client.
|
65
|
+
it "should raise error if the size of left context is not number" do
|
66
|
+
(lambda do
|
67
|
+
@client.config.left_context_size = "a"
|
66
68
|
end).should raise_error(RuntimeError)
|
67
69
|
end
|
68
70
|
|
69
|
-
it "should rais error if the size of left context is less or equal 0" do
|
70
|
-
(lambda do
|
71
|
-
@client.
|
71
|
+
it "should rais error if the size of left context is less or equal 0" do
|
72
|
+
(lambda do
|
73
|
+
@client.config.left_context_size = 0
|
72
74
|
end).should raise_error(RuntimeError)
|
73
75
|
end
|
74
76
|
|
@@ -87,7 +89,7 @@ describe Poliqarp::Client do
|
|
87
89
|
tagset[:classes].should_not == nil
|
88
90
|
end
|
89
91
|
|
90
|
-
it "should allow to find 'kot'" do
|
92
|
+
it "should allow to find 'kot'" do
|
91
93
|
@client.find("kot").size.should_not == 0
|
92
94
|
end
|
93
95
|
|
@@ -129,7 +131,7 @@ describe Poliqarp::Client do
|
|
129
131
|
end
|
130
132
|
|
131
133
|
describe("(with index specified in find)") do
|
132
|
-
before(:each) do
|
134
|
+
before(:each) do
|
133
135
|
@result = @client.find("nachalny",:index => 0)
|
134
136
|
end
|
135
137
|
|
@@ -146,13 +148,12 @@ describe Poliqarp::Client do
|
|
146
148
|
end
|
147
149
|
end
|
148
150
|
|
149
|
-
describe("(with lemmata flags set to true)") do
|
151
|
+
describe("(with lemmata flags set to true)") do
|
150
152
|
before(:all) do
|
151
|
-
@client.lemmata =
|
152
|
-
:left_match => true, :right_match => true}
|
153
|
+
@client.config.lemmata = [:left_context, :right_context, :left_match, :right_match]
|
153
154
|
end
|
154
155
|
|
155
|
-
it "should allow to find 'kotu'" do
|
156
|
+
it "should allow to find 'kotu'" do
|
156
157
|
@client.find("kotu").size.should_not == 0
|
157
158
|
end
|
158
159
|
|
data/spec/excerpt.rb
CHANGED
@@ -25,7 +25,7 @@ describe Poliqarp::Excerpt do
|
|
25
25
|
@excerpt.index.should_not == nil
|
26
26
|
end
|
27
27
|
|
28
|
-
it "should have base form" do
|
28
|
+
it "should have base form" do
|
29
29
|
@excerpt.base_form.should_not == nil
|
30
30
|
end
|
31
31
|
|
@@ -64,10 +64,10 @@ describe Poliqarp::Excerpt do
|
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should have index set to 0" do
|
67
|
-
@excerpt.index.should == 0
|
67
|
+
@excerpt.index.should == 0
|
68
68
|
end
|
69
69
|
|
70
|
-
it "should have base form set to 'kot'" do
|
70
|
+
it "should have base form set to 'kot'" do
|
71
71
|
@excerpt.base_form.should == "mu za to astronomiczną"
|
72
72
|
end
|
73
73
|
|
@@ -96,7 +96,7 @@ describe Poliqarp::Excerpt do
|
|
96
96
|
it "should have 'city' set to nil" do
|
97
97
|
@excerpt.city.should == nil
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
|
101
101
|
@excerpt.publisher.size.should == 1
|
102
102
|
@excerpt.publisher[0].should == "Wydawnictwo W.A.B."
|
@@ -114,13 +114,13 @@ describe Poliqarp::Excerpt do
|
|
114
114
|
end
|
115
115
|
|
116
116
|
describe('first result for "kotu" with lemmatization turned on') do
|
117
|
-
before(:all) do
|
118
|
-
@client.lemmata = :all
|
117
|
+
before(:all) do
|
118
|
+
@client.config.lemmata = :all
|
119
119
|
@client.open_corpus(:default)
|
120
|
-
@excerpt = @client.find("kotu")[0]
|
120
|
+
@excerpt = @client.find("kotu")[0]
|
121
121
|
end
|
122
122
|
|
123
|
-
it "should have one lemmata for each segment" do
|
123
|
+
it "should have one lemmata for each segment" do
|
124
124
|
@excerpt.short_context.each do |group|
|
125
125
|
group.each do |segment|
|
126
126
|
segment.lemmata.size.should == 1
|
@@ -134,7 +134,7 @@ describe Poliqarp::Excerpt do
|
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
|
-
it "should contain 'kot' as one of the lemmata" do
|
137
|
+
it "should contain 'kot' as one of the lemmata" do
|
138
138
|
@excerpt.short_context.flatten.
|
139
139
|
any?{|s| s.lemmata[0].base_form == "kot"}.should == true
|
140
140
|
end
|
data/spec/query_result.rb
CHANGED
@@ -12,7 +12,7 @@ describe Poliqarp::QueryResult do
|
|
12
12
|
@client.close
|
13
13
|
end
|
14
14
|
|
15
|
-
describe "(for unspecified query)" do
|
15
|
+
describe "(for unspecified query)" do
|
16
16
|
before(:all) do
|
17
17
|
@result = @client.find("kita")
|
18
18
|
end
|
@@ -57,11 +57,11 @@ describe Poliqarp::QueryResult do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
describe "(for 'kot' in :default corpus)" do
|
60
|
-
before(:all) do
|
60
|
+
before(:all) do
|
61
61
|
@result = @client.find("kot")
|
62
62
|
end
|
63
63
|
|
64
|
-
it "should have size == 6" do
|
64
|
+
it "should have size == 6" do
|
65
65
|
@result.size.should == 6
|
66
66
|
end
|
67
67
|
|
@@ -83,11 +83,11 @@ describe Poliqarp::QueryResult do
|
|
83
83
|
end
|
84
84
|
|
85
85
|
describe "(for 'kot' with page_size set to 5 in :default corpus)" do
|
86
|
-
before(:all) do
|
86
|
+
before(:all) do
|
87
87
|
@result = @client.find("kot", :page_size => 5)
|
88
88
|
end
|
89
89
|
|
90
|
-
it "should have size == 5" do
|
90
|
+
it "should have size == 5" do
|
91
91
|
@result.size.should == 5
|
92
92
|
end
|
93
93
|
|
@@ -109,11 +109,11 @@ describe Poliqarp::QueryResult do
|
|
109
109
|
end
|
110
110
|
|
111
111
|
describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
|
112
|
-
before(:all) do
|
112
|
+
before(:all) do
|
113
113
|
@result = @client.find("kot", :page_size => 5).next_page
|
114
114
|
end
|
115
115
|
|
116
|
-
it "should have size == 1" do
|
116
|
+
it "should have size == 1" do
|
117
117
|
@result.size.should == 1
|
118
118
|
end
|
119
119
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.8
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Aleksander Pohl
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-17 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- changelog.txt
|
33
33
|
- README.txt
|
34
34
|
- lib/poliqarpr.rb
|
35
|
+
- lib/poliqarpr/config.rb
|
35
36
|
- lib/poliqarpr/exceptions.rb
|
36
37
|
- lib/poliqarpr/lemmata.rb
|
37
38
|
- lib/poliqarpr/query_result.rb
|