poliqarpr 0.0.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/changelog.txt +11 -6
- data/lib/poliqarpr.rb +1 -1
- data/lib/poliqarpr/client.rb +137 -158
- data/lib/poliqarpr/config.rb +138 -0
- data/lib/poliqarpr/connector.rb +7 -8
- data/lib/poliqarpr/exceptions.rb +1 -1
- data/lib/poliqarpr/excerpt.rb +7 -7
- data/lib/poliqarpr/query_result.rb +6 -6
- data/lib/poliqarpr/segment.rb +7 -7
- data/lib/poliqarpr/util.rb +1 -1
- data/poliqarpr.gemspec +3 -3
- data/spec/client.rb +29 -28
- data/spec/excerpt.rb +9 -9
- data/spec/query_result.rb +7 -7
- metadata +4 -3
data/changelog.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
0.1.0
|
2
|
+
- synchronization on ansync call without handler done via internal mutex
|
3
|
+
instead of call to STATUS
|
4
|
+
- client config moved to external class
|
5
|
+
|
1
6
|
0.0.8
|
2
7
|
- Speed optimization: socket puts changed to write
|
3
8
|
|
@@ -8,7 +13,7 @@
|
|
8
13
|
|
9
14
|
0.0.6
|
10
15
|
- fix: Excerpt#word - the words consituing the matched query
|
11
|
-
- new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
|
16
|
+
- new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
|
12
17
|
return the matched, right context, left context segments respecively
|
13
18
|
|
14
19
|
0.0.5
|
@@ -19,16 +24,16 @@
|
|
19
24
|
- Documentation now points to gemcutter instead of github
|
20
25
|
|
21
26
|
0.0.4
|
22
|
-
- ping/pong diagnostics
|
27
|
+
- ping/pong diagnostics
|
23
28
|
- server version
|
24
|
-
- corpus statistics
|
29
|
+
- corpus statistics
|
25
30
|
- implementation of asynchronous protocol (not stable)
|
26
31
|
|
27
32
|
|
28
33
|
0.0.3
|
29
34
|
- the license of the corpus included
|
30
|
-
- client rdoc documentation
|
31
|
-
- support for lemmata retrieval
|
35
|
+
- client rdoc documentation
|
36
|
+
- support for lemmata retrieval
|
32
37
|
- excerpt now contains segments instead of strings
|
33
38
|
- buffer size setter
|
34
39
|
- default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
|
@@ -45,6 +50,6 @@
|
|
45
50
|
- README.txt included in gem
|
46
51
|
- specs included in gem
|
47
52
|
|
48
|
-
0.0.1
|
53
|
+
0.0.1
|
49
54
|
- initiali implementation
|
50
55
|
- synchorous querying for terms
|
data/lib/poliqarpr.rb
CHANGED
data/lib/poliqarpr/client.rb
CHANGED
@@ -3,61 +3,59 @@ module Poliqarp
|
|
3
3
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
4
4
|
# License:: MIT License
|
5
5
|
#
|
6
|
-
# This class is the implementation of the Poliqarp server client.
|
6
|
+
# This class is the implementation of the Poliqarp server client.
|
7
7
|
class Client
|
8
|
-
|
9
|
-
|
10
|
-
# If debug is turned on, the communication between server and client
|
8
|
+
# If debug is turned on, the communication between server and client
|
11
9
|
# is logged to standard output.
|
12
10
|
attr_writer :debug
|
13
11
|
|
14
|
-
# The
|
15
|
-
|
16
|
-
attr_writer :buffer_size
|
12
|
+
# The configuration of the client.
|
13
|
+
attr_reader :config
|
17
14
|
|
18
|
-
# Creates new poliqarp server client.
|
19
|
-
#
|
15
|
+
# Creates new poliqarp server client.
|
16
|
+
#
|
20
17
|
# Parameters:
|
21
18
|
# * +session_name+ the name of the client session. Defaults to "RUBY".
|
22
19
|
# * +debug+ if set to true, all messages sent and received from server
|
23
20
|
# are printed to standard output. Defaults to false.
|
24
21
|
def initialize(session_name="RUBY", debug=false)
|
25
22
|
@session_name = session_name
|
26
|
-
@left_context = 5
|
27
|
-
@right_context = 5
|
28
23
|
@debug = debug
|
29
|
-
@buffer_size = 500000
|
30
24
|
@connector = Connector.new(debug)
|
25
|
+
@config = Config.new(self,500000)
|
31
26
|
@answer_queue = Queue.new
|
27
|
+
@waiting_mutext = Mutex.new
|
32
28
|
new_session
|
29
|
+
config.left_context_size = 5
|
30
|
+
config.right_context_size = 5
|
31
|
+
config.tags = []
|
32
|
+
config.lemmata = []
|
33
33
|
end
|
34
34
|
|
35
35
|
# A hint about installation of default corpus gem
|
36
36
|
def self.const_missing(const)
|
37
|
-
if const.to_s =~ /DEFAULT_CORPUS/
|
37
|
+
if const.to_s =~ /DEFAULT_CORPUS/
|
38
38
|
raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
|
39
39
|
end
|
40
40
|
super
|
41
41
|
end
|
42
42
|
|
43
|
-
# Creates new session for the client with the name given in constructor.
|
44
|
-
# If the session was already opened, it is closed.
|
43
|
+
# Creates new session for the client with the name given in constructor.
|
44
|
+
# If the session was already opened, it is closed.
|
45
45
|
#
|
46
|
-
# Parameters:
|
46
|
+
# Parameters:
|
47
47
|
# * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
|
48
48
|
def new_session(port=4567)
|
49
49
|
close if @session
|
50
50
|
@connector.open("localhost",port)
|
51
51
|
talk("MAKE-SESSION #{@session_name}")
|
52
|
-
talk("BUFFER-RESIZE #{
|
52
|
+
talk("BUFFER-RESIZE #{config.buffer_size}")
|
53
53
|
@session = true
|
54
|
-
self.tags = {}
|
55
|
-
self.lemmata = {}
|
56
54
|
end
|
57
55
|
|
58
56
|
# Closes the opened session.
|
59
57
|
def close
|
60
|
-
talk "CLOSE-SESSION"
|
58
|
+
talk "CLOSE-SESSION"
|
61
59
|
@session = false
|
62
60
|
end
|
63
61
|
|
@@ -66,114 +64,36 @@ module Poliqarp
|
|
66
64
|
talk "CLOSE"
|
67
65
|
end
|
68
66
|
|
69
|
-
# Sets the size of the left short context. It must be > 0
|
70
|
-
#
|
71
|
-
# The size of the left short context is the number
|
72
|
-
# of segments displayed in the found excerpts left to the
|
73
|
-
# matched segment(s).
|
74
|
-
def left_context=(value)
|
75
|
-
if correct_context_value?(value)
|
76
|
-
result = talk("SET left-context-width #{value}")
|
77
|
-
@left_context = value if result =~ /^R OK/
|
78
|
-
else
|
79
|
-
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# Sets the size of the right short context. It must be > 0
|
84
|
-
#
|
85
|
-
# The size of the right short context is the number
|
86
|
-
# of segments displayed in the found excerpts right to the
|
87
|
-
# matched segment(s).
|
88
|
-
def right_context=(value)
|
89
|
-
if correct_context_value?(value)
|
90
|
-
result = talk("SET right-context-width #{value}")
|
91
|
-
@right_context = value if result =~ /^R OK/
|
92
|
-
else
|
93
|
-
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Sets the tags' flags. There are four groups of segments
|
98
|
-
# which the flags apply for:
|
99
|
-
# * +left_context+
|
100
|
-
# * +left_match+
|
101
|
-
# * +right_match+
|
102
|
-
# * +right_context+
|
103
|
-
#
|
104
|
-
# If the flag for given group is set to true, all segments
|
105
|
-
# in the group are annotated with grammatical tags. E.g.:
|
106
|
-
# c.find("kot")
|
107
|
-
# ...
|
108
|
-
# "kot" tags: "subst:sg:nom:m2"
|
109
|
-
#
|
110
|
-
# You can pass :all to turn on flags for all groups
|
111
|
-
def tags=(options={})
|
112
|
-
options = set_all_flags if options == :all
|
113
|
-
@tag_flags = options
|
114
|
-
flags = ""
|
115
|
-
GROUPS.each do |flag|
|
116
|
-
flags << (options[flag] ? "1" : "0")
|
117
|
-
end
|
118
|
-
talk("SET retrieve-tags #{flags}")
|
119
|
-
end
|
120
|
-
|
121
|
-
# Sets the lemmatas' flags. There are four groups of segments
|
122
|
-
# which the flags apply for:
|
123
|
-
# * +left_context+
|
124
|
-
# * +left_match+
|
125
|
-
# * +right_match+
|
126
|
-
# * +right_context+
|
127
|
-
#
|
128
|
-
# If the flag for given group is set to true, all segments
|
129
|
-
# in the group are returned with the base form of the lemmata. E.g.:
|
130
|
-
# c.find("kotu")
|
131
|
-
# ...
|
132
|
-
# "kotu" base_form: "kot"
|
133
|
-
#
|
134
|
-
# You can pass :all to turn on flags for all groups
|
135
|
-
def lemmata=(options={})
|
136
|
-
options = set_all_flags if options == :all
|
137
|
-
@lemmata_flags = options
|
138
|
-
flags = ""
|
139
|
-
GROUPS.each do |flag|
|
140
|
-
flags << (options[flag] ? "1" : "0")
|
141
|
-
end
|
142
|
-
talk("SET retrieve-lemmata #{flags}")
|
143
|
-
end
|
144
|
-
|
145
67
|
# *Asynchronous* Opens the corpus given as +path+. To open the default
|
146
|
-
# corpus pass +:default+ as the argument.
|
147
|
-
#
|
68
|
+
# corpus pass +:default+ as the argument.
|
69
|
+
#
|
148
70
|
# If you don't want to wait until the call is finished, you
|
149
71
|
# have to provide +handler+ for the asynchronous answer.
|
150
72
|
def open_corpus(path, &handler)
|
151
73
|
if path == :default
|
152
74
|
open_corpus(DEFAULT_CORPUS, &handler)
|
153
75
|
else
|
154
|
-
|
155
|
-
talk("OPEN #{path}", :async, &real_handler)
|
156
|
-
do_wait if handler.nil?
|
76
|
+
talk("OPEN #{path}", :async, &handler)
|
157
77
|
end
|
158
78
|
end
|
159
79
|
|
160
80
|
# Server diagnostics -- the result should be :pong
|
161
|
-
def ping
|
81
|
+
def ping
|
162
82
|
:pong if talk("PING") =~ /PONG/
|
163
83
|
end
|
164
84
|
|
165
85
|
# Returns server version
|
166
|
-
def version
|
86
|
+
def version
|
167
87
|
talk("VERSION")
|
168
88
|
end
|
169
89
|
|
170
90
|
# Returns corpus statistics:
|
171
|
-
# * +:segment_tokens+ the number of segments in the corpus
|
91
|
+
# * +:segment_tokens+ the number of segments in the corpus
|
172
92
|
# (two segments which look exactly the same are counted separately)
|
173
93
|
# * +:segment_types+ the number of segment types in the corpus
|
174
94
|
# (two segments which look exactly the same are counted as one type)
|
175
95
|
# * +:lemmata+ the number of lemmata (lexemes) types
|
176
|
-
# (all forms of inflected word, e.g. 'kot', 'kotu', ...
|
96
|
+
# (all forms of inflected word, e.g. 'kot', 'kotu', ...
|
177
97
|
# are treated as one "word" -- lemmata)
|
178
98
|
# * +:tags+ the number of different grammar tags (each combination
|
179
99
|
# of atomic tags is treated as different "tag")
|
@@ -181,7 +101,7 @@ module Poliqarp
|
|
181
101
|
stats = {}
|
182
102
|
talk("CORPUS-STATS").split.each_with_index do |value, index|
|
183
103
|
case index
|
184
|
-
when 1
|
104
|
+
when 1
|
185
105
|
stats[:segment_tokens] = value.to_i
|
186
106
|
when 2
|
187
107
|
stats[:segment_types] = value.to_i
|
@@ -205,7 +125,7 @@ module Poliqarp
|
|
205
125
|
# (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
|
206
126
|
# means that there are 5 genders: masculine(1,2,3), feminine and neuter)
|
207
127
|
# * +:classes+ enlists grammatical tags used to describe it
|
208
|
-
# (each class has a list of tags used to describe it, eg. adj: degree
|
128
|
+
# (each class has a list of tags used to describe it, eg. adj: degree
|
209
129
|
# gender case number, means that adjectives are described in terms
|
210
130
|
# of degree, gender, case and number)
|
211
131
|
def tagset
|
@@ -226,14 +146,14 @@ module Poliqarp
|
|
226
146
|
#
|
227
147
|
# Options:
|
228
148
|
# * +index+ the index of the (only one) result to be returned. The index is relative
|
229
|
-
# to the beginning of the query result. In normal case you should query the
|
149
|
+
# to the beginning of the query result. In normal case you should query the
|
230
150
|
# corpus without specifying the index, to see what results are returned.
|
231
|
-
# Then you can use the index and the same query to retrieve one result.
|
151
|
+
# Then you can use the index and the same query to retrieve one result.
|
232
152
|
# The pair (query, index) is a kind of unique identifier of the excerpt.
|
233
153
|
# * +page_size+ the size of the page of results. If the page size is 0, then
|
234
154
|
# all results are returned on one page. It is ignored if the +index+ option
|
235
155
|
# is present. Defaults to 0.
|
236
|
-
# * +page_index+ the index of the page of results (the first page has index 1, not 0).
|
156
|
+
# * +page_index+ the index of the page of results (the first page has index 1, not 0).
|
237
157
|
# It is ignored if the +index+ option is present. Defaults to 1.
|
238
158
|
def find(query,options={})
|
239
159
|
if options[:index]
|
@@ -243,11 +163,11 @@ module Poliqarp
|
|
243
163
|
end
|
244
164
|
end
|
245
165
|
|
246
|
-
alias query find
|
166
|
+
alias query find
|
247
167
|
|
248
168
|
# Returns the number of results for given query.
|
249
169
|
def count(query)
|
250
|
-
count_results(make_query(query))
|
170
|
+
count_results(make_query(query))
|
251
171
|
end
|
252
172
|
|
253
173
|
# Returns the long context of the excerpt which is identified by
|
@@ -257,13 +177,13 @@ module Poliqarp
|
|
257
177
|
result = []
|
258
178
|
talk "GET-CONTEXT #{index}"
|
259
179
|
# 1st part
|
260
|
-
result << read_word
|
180
|
+
result << read_word
|
261
181
|
# 2nd part
|
262
|
-
result << read_word
|
182
|
+
result << read_word
|
263
183
|
# 3rd part
|
264
|
-
result << read_word
|
184
|
+
result << read_word
|
265
185
|
# 4th part
|
266
|
-
result << read_word
|
186
|
+
result << read_word
|
267
187
|
result
|
268
188
|
end
|
269
189
|
|
@@ -286,19 +206,62 @@ module Poliqarp
|
|
286
206
|
end
|
287
207
|
|
288
208
|
protected
|
209
|
+
# Set the size of the left context.
|
210
|
+
def left_context=(value)
|
211
|
+
result = talk("SET left-context-width #{value}")
|
212
|
+
unless result =~ /^OK/
|
213
|
+
raise "Failed to set left context to #{value}: #{result}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Set the size of the right context.
|
218
|
+
def right_context=(value)
|
219
|
+
result = talk("SET right-context-width #{value}")
|
220
|
+
unless result =~ /^OK/
|
221
|
+
raise "Failed to set right context to #{value}: #{result}"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Sets the 'retrieve-tags' flags.
|
226
|
+
def retrieve_tags(flags)
|
227
|
+
talk("SET retrieve-tags #{flags}")
|
228
|
+
end
|
229
|
+
|
230
|
+
# Sets the 'retrieve-lemmata' flags.
|
231
|
+
def retrieve_lemmata(flags)
|
232
|
+
talk("SET retrieve-lemmata #{flags}")
|
233
|
+
end
|
234
|
+
|
235
|
+
|
289
236
|
# Sends a message directly to the server
|
290
237
|
# * +msg+ the message to send
|
291
238
|
# * +mode+ if set to :sync, the method block untli the message
|
292
239
|
# is received. If :async the method returns immediately.
|
293
240
|
# Default: :sync
|
294
|
-
# * +handler+ the handler of the assynchronous message.
|
241
|
+
# * +handler+ the handler of the assynchronous message.
|
295
242
|
# It is ignored when the mode is set to :sync.
|
296
243
|
def talk(msg, mode = :sync, &handler)
|
297
244
|
puts msg if @debug
|
298
|
-
|
245
|
+
if mode == :sync
|
246
|
+
@connector.send_message(msg, mode, &handler)
|
247
|
+
else
|
248
|
+
if handler.nil?
|
249
|
+
real_handler = lambda do |msg|
|
250
|
+
@answer_queue.push msg
|
251
|
+
stop_waiting
|
252
|
+
end
|
253
|
+
start_waiting
|
254
|
+
else
|
255
|
+
real_handler = handler
|
256
|
+
end
|
257
|
+
@connector.send_message(msg, mode, &real_handler)
|
258
|
+
if handler.nil?
|
259
|
+
do_wait
|
260
|
+
end
|
261
|
+
end
|
299
262
|
end
|
300
263
|
|
301
|
-
# Make query and retrieve many results.
|
264
|
+
# Make query and retrieve many results.
|
302
265
|
# * +query+ the query to be sent to the server.
|
303
266
|
# * +options+ see find
|
304
267
|
def find_many(query, options)
|
@@ -308,7 +271,7 @@ protected
|
|
308
271
|
answer_offset = page_size * (page_index - 1)
|
309
272
|
if page_size > 0
|
310
273
|
result_count = make_async_query(query,answer_offset)
|
311
|
-
answers_limit = answer_offset + page_size > result_count ?
|
274
|
+
answers_limit = answer_offset + page_size > result_count ?
|
312
275
|
result_count - answer_offset : page_size
|
313
276
|
else
|
314
277
|
# all answers needed -- the call must be synchronous
|
@@ -321,12 +284,12 @@ protected
|
|
321
284
|
|
322
285
|
result = QueryResult.new(page_index, page_count,page_size,self,query)
|
323
286
|
if answers_limit > 0
|
324
|
-
talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
|
287
|
+
talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
|
325
288
|
answers_limit.times do |answer_index|
|
326
289
|
result << fetch_result(answer_offset + answer_index, query)
|
327
290
|
end
|
328
291
|
end
|
329
|
-
result
|
292
|
+
result
|
330
293
|
end
|
331
294
|
|
332
295
|
# Make query and retrieve only one result
|
@@ -334,13 +297,13 @@ protected
|
|
334
297
|
# * +index+ the index of the answer to be retrieved
|
335
298
|
def find_one(query,index)
|
336
299
|
make_async_query(query,index)
|
337
|
-
talk("GET-RESULTS #{index} #{index}")
|
338
|
-
fetch_result(index,query)
|
300
|
+
talk("GET-RESULTS #{index} #{index}")
|
301
|
+
fetch_result(index,query)
|
339
302
|
end
|
340
303
|
|
341
304
|
# Fetches one result of the query
|
342
305
|
#
|
343
|
-
# MAKE-QUERY and GET-RESULTS must be sent to the server before
|
306
|
+
# MAKE-QUERY and GET-RESULTS must be sent to the server before
|
344
307
|
# this method is called
|
345
308
|
def fetch_result(index, query)
|
346
309
|
result = Excerpt.new(index, self, query)
|
@@ -357,15 +320,15 @@ protected
|
|
357
320
|
segments = []
|
358
321
|
size.times do |segment_index|
|
359
322
|
segment = Segment.new(read_word)
|
360
|
-
segments << segment
|
361
|
-
if
|
323
|
+
segments << segment
|
324
|
+
if config.lemmata.include?(group) || config.tags.include?(group)
|
362
325
|
lemmata_size = read_number()
|
363
|
-
lemmata_size.times do |lemmata_index|
|
326
|
+
lemmata_size.times do |lemmata_index|
|
364
327
|
lemmata = Lemmata.new()
|
365
|
-
if
|
328
|
+
if config.lemmata.include?(group)
|
366
329
|
lemmata.base_form = read_word
|
367
330
|
end
|
368
|
-
if
|
331
|
+
if config.tags.include?(group)
|
369
332
|
lemmata.tags = read_word
|
370
333
|
end
|
371
334
|
segment.lemmata << lemmata
|
@@ -377,7 +340,9 @@ protected
|
|
377
340
|
|
378
341
|
# Reads number stored in the message received from the server.
|
379
342
|
def read_number
|
380
|
-
@connector.read_message
|
343
|
+
msg = @connector.read_message
|
344
|
+
puts "XXX #{msg}" if @debug
|
345
|
+
msg.match(/\d+/)[0].to_i
|
381
346
|
end
|
382
347
|
|
383
348
|
# Counts number of results for given answer
|
@@ -387,25 +352,22 @@ protected
|
|
387
352
|
|
388
353
|
# *Asynchronous* Sends the query to the server
|
389
354
|
# * +query+ query to send
|
390
|
-
# * +handler+ if given, the method returns immediately,
|
355
|
+
# * +handler+ if given, the method returns immediately,
|
391
356
|
# and the answer is sent to the handler. In this case
|
392
357
|
# the result returned by make_query should be IGNORED!
|
393
358
|
def make_query(query, &handler)
|
394
359
|
if @last_query != query
|
395
360
|
@last_query = query
|
396
|
-
if handler.nil?
|
397
|
-
real_handler = lambda { |msg| @answer_queue.push msg }
|
398
|
-
else
|
399
|
-
real_handler = handler
|
400
|
-
end
|
401
361
|
begin
|
402
362
|
talk("MAKE-QUERY #{query}")
|
403
363
|
rescue JobInProgress
|
404
364
|
talk("CANCEL") rescue nil
|
405
365
|
talk("MAKE-QUERY #{query}")
|
406
366
|
end
|
407
|
-
talk("RUN-QUERY #{
|
408
|
-
|
367
|
+
result = talk("RUN-QUERY #{config.buffer_size}", :async, &handler)
|
368
|
+
if handler.nil?
|
369
|
+
@last_result = result
|
370
|
+
end
|
409
371
|
end
|
410
372
|
@last_result
|
411
373
|
end
|
@@ -415,38 +377,55 @@ protected
|
|
415
377
|
@connector.read_message
|
416
378
|
end
|
417
379
|
|
418
|
-
private
|
380
|
+
private
|
381
|
+
# Wait for the assynchronous answer, if some synchronous query
|
382
|
+
# was sent without handler.
|
419
383
|
def do_wait
|
420
384
|
loop {
|
421
|
-
|
422
|
-
puts "
|
423
|
-
sleep 0.
|
385
|
+
break unless should_wait?
|
386
|
+
puts "WAITING" if @debug
|
387
|
+
sleep 0.1
|
424
388
|
}
|
425
389
|
@answer_queue.shift
|
426
390
|
end
|
427
391
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
392
|
+
# Stop waiting for the ansynchonous answer.
|
393
|
+
def stop_waiting
|
394
|
+
@waiting_mutext.synchronize {
|
395
|
+
@should_wait = false
|
396
|
+
}
|
397
|
+
puts "WAITING stopped" if @debug
|
398
|
+
end
|
399
|
+
|
400
|
+
# Check if the thread should still wait for the answer.
|
401
|
+
def should_wait?
|
402
|
+
should_wait = nil
|
403
|
+
@waiting_mutext.synchronize {
|
404
|
+
should_wait = @should_wait
|
405
|
+
}
|
406
|
+
should_wait
|
432
407
|
end
|
433
|
-
|
434
|
-
|
435
|
-
|
408
|
+
|
409
|
+
# Start waiting for the answer.
|
410
|
+
def start_waiting
|
411
|
+
@waiting_mutext.synchronize {
|
412
|
+
@should_wait = true
|
413
|
+
}
|
414
|
+
puts "WAITING started" if @debug
|
436
415
|
end
|
437
416
|
|
438
|
-
def make_async_query(query,answer_offset)
|
439
|
-
|
440
|
-
# BUFFER-STATE call
|
441
|
-
make_query(query){|msg| }
|
442
|
-
result_count = 0
|
443
|
-
begin
|
417
|
+
def make_async_query(query,answer_offset)
|
418
|
+
start_waiting
|
419
|
+
# we access the result count through BUFFER-STATE call
|
420
|
+
make_query(query){|msg| stop_waiting}
|
421
|
+
result_count = 0
|
422
|
+
begin
|
444
423
|
# the result count might be not exact!
|
445
424
|
result_count = talk("BUFFER-STATE").split(" ")[2].to_i
|
446
|
-
|
425
|
+
break unless should_wait?
|
447
426
|
end while result_count < answer_offset
|
448
427
|
@last_result = "OK #{result_count}"
|
449
428
|
result_count
|
450
429
|
end
|
451
|
-
end
|
430
|
+
end
|
452
431
|
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# vim:encoding=utf-8
|
2
|
+
module Poliqarp
|
3
|
+
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
4
|
+
# License:: MIT License
|
5
|
+
#
|
6
|
+
# This class holds the configuration of the client.
|
7
|
+
class Config
|
8
|
+
GROUPS = [:left_context, :left_match, :right_match, :right_context]
|
9
|
+
# The size of the buffer is the maximum number of excerpts which
|
10
|
+
# are returned for single query.
|
11
|
+
attr_accessor :buffer_size, :left_context_size, :right_context_size, :tags, :lemmata
|
12
|
+
|
13
|
+
def initialize(client,buffer_size)
|
14
|
+
@client = client
|
15
|
+
@buffer_size = buffer_size
|
16
|
+
end
|
17
|
+
|
18
|
+
# Sets the size of the left short context. It must be > 0
|
19
|
+
#
|
20
|
+
# The size of the left short context is the number
|
21
|
+
# of segments displayed in the found excerpts left to the
|
22
|
+
# matched segment(s).
|
23
|
+
def left_context_size=(value)
|
24
|
+
if correct_context_value?(value)
|
25
|
+
@client.send(:left_context=,value)
|
26
|
+
@left_context_size = value
|
27
|
+
else
|
28
|
+
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Sets the size of the right short context. It must be > 0
|
33
|
+
#
|
34
|
+
# The size of the right short context is the number
|
35
|
+
# of segments displayed in the found excerpts right to the
|
36
|
+
# matched segment(s).
|
37
|
+
def right_context_size=(value)
|
38
|
+
if correct_context_value?(value)
|
39
|
+
@client.send(:right_context=,value)
|
40
|
+
@right_context_size = value
|
41
|
+
else
|
42
|
+
raise "Invalid argument: #{value}. It must be fixnum greater than 0."
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the tags' flags. There are four groups of segments
|
47
|
+
# which the flags apply for:
|
48
|
+
# * +:left_context+
|
49
|
+
# * +:left_match+
|
50
|
+
# * +:right_match+
|
51
|
+
# * +:right_context+
|
52
|
+
#
|
53
|
+
# If the flag for given group is present, all segments
|
54
|
+
# in the group are annotated with grammatical tags. E.g.:
|
55
|
+
# c.find("kot")
|
56
|
+
# ...
|
57
|
+
# "kot" tags: "subst:sg:nom:m2"
|
58
|
+
#
|
59
|
+
# E.g. config.tags = [:left_context] will retrieve tags
|
60
|
+
# only for the left context.
|
61
|
+
#
|
62
|
+
# You can pass :all to turn on flags for all groups, i.e.
|
63
|
+
# config.tags = :all will retrieve tags for all groups.
|
64
|
+
def tags=(groups)
|
65
|
+
if groups == :all
|
66
|
+
@tags = GROUPS.dup
|
67
|
+
else
|
68
|
+
@tags = groups
|
69
|
+
end
|
70
|
+
@client.send(:retrieve_tags, flags_for(@tags))
|
71
|
+
end
|
72
|
+
|
73
|
+
# Sets the lemmatas' flags. There are four groups of segments
|
74
|
+
# which the flags apply for:
|
75
|
+
# * +left_context+
|
76
|
+
# * +left_match+
|
77
|
+
# * +right_match+
|
78
|
+
# * +right_context+
|
79
|
+
#
|
80
|
+
# If the flag for given group is present, all segments
|
81
|
+
# in the group are returned with the base form of the lemmata. E.g.:
|
82
|
+
# c.find("kotu")
|
83
|
+
# ...
|
84
|
+
# "kotu" base_form: "kot"
|
85
|
+
#
|
86
|
+
# E.g. config.lemmata = [:left_context] will retrieve lemmata
|
87
|
+
# only for the left context.
|
88
|
+
#
|
89
|
+
# You can pass :all to turn on flags for all groups, i.e.
|
90
|
+
# config.lemmata = :all will retrieve lemmata for all groups.
|
91
|
+
def lemmata=(groups)
|
92
|
+
if groups == :all
|
93
|
+
@lemmata = GROUPS.dup
|
94
|
+
else
|
95
|
+
@lemmata = groups
|
96
|
+
end
|
97
|
+
@client.send(:retrieve_lemmata, flags_for(@lemmata))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Allow for accessing individual group tags/lemmata flag,
|
101
|
+
# e.g. config.left_context_tags, config.left_context_lemmata
|
102
|
+
[:tags,:lemmata].each do |type|
|
103
|
+
GROUPS.each do |group|
|
104
|
+
define_method("#{group}_#{type}".to_sym) do
|
105
|
+
@tags.include?(group)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Allow for changing individual group tags/lemmata flag,
|
111
|
+
# e.g. config.left_context_tags = true, config.left_context_lemmata = true
|
112
|
+
[:tags,:lemmata].each do |type|
|
113
|
+
GROUPS.each do |group|
|
114
|
+
define_method("#{group}_#{type}=".to_sym) do |value|
|
115
|
+
if value
|
116
|
+
@tags << group unless @tags.include?(group)
|
117
|
+
else
|
118
|
+
@tags.delete(group) if @tags.include?(group)
|
119
|
+
end
|
120
|
+
@client.send("retrieve_#{type}".to_sym, flags_for(@tags))
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
protected
|
126
|
+
def correct_context_value?(value)
|
127
|
+
value.is_a?(Fixnum) && value > 0
|
128
|
+
end
|
129
|
+
|
130
|
+
def flags_for(elements)
|
131
|
+
flags = ""
|
132
|
+
GROUPS.each do |flag|
|
133
|
+
flags << (elements.include?(flag) ? "1" : "0")
|
134
|
+
end
|
135
|
+
flags
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
data/lib/poliqarpr/connector.rb
CHANGED
@@ -44,7 +44,7 @@ module Poliqarp
|
|
44
44
|
@debug = debug
|
45
45
|
end
|
46
46
|
|
47
|
-
# Opens connection with poliqarp server which runs
|
47
|
+
# Opens connection with poliqarp server which runs
|
48
48
|
# on given +host+ and +port+.
|
49
49
|
def open(host,port)
|
50
50
|
@socket_mutex.synchronize {
|
@@ -60,17 +60,16 @@ module Poliqarp
|
|
60
60
|
}
|
61
61
|
end
|
62
62
|
|
63
|
-
# Sends message to the poliqarp server. Returns the first synchronous
|
63
|
+
# Sends message to the poliqarp server. Returns the first synchronous
|
64
64
|
# answer of the server.
|
65
65
|
# * +message+ the message to send
|
66
66
|
# * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
|
67
67
|
# * +handler+ the handler of the asynchronous message
|
68
|
-
def
|
68
|
+
def send_message(message, mode, &handler)
|
69
69
|
puts "send #{mode} #{message}" if @debug
|
70
70
|
if ruby19?
|
71
71
|
massage = message.encode(UTF8)
|
72
72
|
end
|
73
|
-
#@socket.puts(message)
|
74
73
|
@socket.write(message+"\n")
|
75
74
|
if mode == :async
|
76
75
|
@handler = handler
|
@@ -79,7 +78,7 @@ module Poliqarp
|
|
79
78
|
end
|
80
79
|
|
81
80
|
# Retrives one message from the server.
|
82
|
-
# If the message indicates an error, new runtime error
|
81
|
+
# If the message indicates an error, new runtime error
|
83
82
|
# containing the error description is returned.
|
84
83
|
def read_message
|
85
84
|
message = @message_queue.shift
|
@@ -94,7 +93,7 @@ module Poliqarp
|
|
94
93
|
|
95
94
|
private
|
96
95
|
def main_loop
|
97
|
-
@loop = Thread.new {
|
96
|
+
@loop = Thread.new {
|
98
97
|
loop {
|
99
98
|
receive
|
100
99
|
# XXX ??? needed
|
@@ -124,8 +123,8 @@ private
|
|
124
123
|
|
125
124
|
def receive_async(message)
|
126
125
|
puts "receive async: #{message}" if @debug
|
127
|
-
Thread.new{
|
128
|
-
@handler.call(message)
|
126
|
+
Thread.new{
|
127
|
+
@handler.call(message)
|
129
128
|
}
|
130
129
|
end
|
131
130
|
|
data/lib/poliqarpr/exceptions.rb
CHANGED
@@ -2,7 +2,7 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT License
|
4
4
|
|
5
|
-
# The JobInProgress exception is raised if there was asynchronous call
|
5
|
+
# The JobInProgress exception is raised if there was asynchronous call
|
6
6
|
# to the server which haven't finished, which is interrupted by another
|
7
7
|
# asynchronous call.
|
8
8
|
class JobInProgress < Exception; end
|
data/lib/poliqarpr/excerpt.rb
CHANGED
@@ -2,13 +2,13 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl
|
3
3
|
# License:: MIT License
|
4
4
|
#
|
5
|
-
# The excerpt class is used to store single result of the query,
|
5
|
+
# The excerpt class is used to store single result of the query,
|
6
6
|
# i.e. the excerpt of the corpus which contains the words which
|
7
|
-
# the corpus was queried for.
|
7
|
+
# the corpus was queried for.
|
8
8
|
#
|
9
9
|
# The excerpt is divided into groups, which contain segments,
|
10
|
-
# which the texts in the corpus were divided for.
|
11
|
-
# The first group is the left context, the second -- the matched
|
10
|
+
# which the texts in the corpus were divided for.
|
11
|
+
# The first group is the left context, the second -- the matched
|
12
12
|
# query, and the last -- the right context.
|
13
13
|
class Excerpt
|
14
14
|
attr_reader :index, :base_form, :short_context
|
@@ -40,7 +40,7 @@ module Poliqarp
|
|
40
40
|
@short_context[2]
|
41
41
|
end
|
42
42
|
|
43
|
-
# Returns the matched query as string
|
43
|
+
# Returns the matched query as string
|
44
44
|
def word
|
45
45
|
#@short_context[0].split(/\s+/)[-1]
|
46
46
|
@short_context[1].map{|s| s.to_s}.join("")
|
@@ -54,7 +54,7 @@ module Poliqarp
|
|
54
54
|
@short_context.join("")
|
55
55
|
end
|
56
56
|
|
57
|
-
# Returns the long context of the query.
|
57
|
+
# Returns the long context of the query.
|
58
58
|
def context
|
59
59
|
return @context unless @context.nil?
|
60
60
|
@context = @client.context(@base_form, @index)
|
@@ -63,7 +63,7 @@ module Poliqarp
|
|
63
63
|
{ :medium => :medium, :style => :styl, :date => :data_wydania,
|
64
64
|
:city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
|
65
65
|
:author => :autor}.each do |method, keyword|
|
66
|
-
define_method method do
|
66
|
+
define_method method do
|
67
67
|
self.metadata[keyword]
|
68
68
|
end
|
69
69
|
end
|
@@ -2,9 +2,9 @@ module Poliqarp
|
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT License
|
4
4
|
#
|
5
|
-
# The query result class is used to paginate results of the
|
5
|
+
# The query result class is used to paginate results of the
|
6
6
|
# query. Each query result has information about its context
|
7
|
-
# (the next and previous page).
|
7
|
+
# (the next and previous page).
|
8
8
|
class QueryResult
|
9
9
|
include Enumerable
|
10
10
|
|
@@ -51,16 +51,16 @@ module Poliqarp
|
|
51
51
|
# Returns the previous page of the query result
|
52
52
|
def previous_page
|
53
53
|
if @page > 1
|
54
|
-
@client.find(@query, :page_size => @page_size,
|
55
|
-
:page_index => @page - 1)
|
54
|
+
@client.find(@query, :page_size => @page_size,
|
55
|
+
:page_index => @page - 1)
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
59
|
# Return the next page of the query result
|
60
60
|
def next_page
|
61
61
|
if @page < @page_count
|
62
|
-
@client.find(@query, :page_size => @page_size,
|
63
|
-
:page_index => @page + 1)
|
62
|
+
@client.find(@query, :page_size => @page_size,
|
63
|
+
:page_index => @page + 1)
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
data/lib/poliqarpr/segment.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
module Poliqarp
|
1
|
+
module Poliqarp
|
2
2
|
# Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
|
3
3
|
# License:: MIT LICENSE
|
4
4
|
#
|
5
|
-
# The segment is the smallest meaningful part of the text.
|
6
|
-
# It may contain many lemmata, since the segments are sometimes
|
7
|
-
# not disambiguated.
|
5
|
+
# The segment is the smallest meaningful part of the text.
|
6
|
+
# It may contain many lemmata, since the segments are sometimes
|
7
|
+
# not disambiguated.
|
8
8
|
class Segment
|
9
9
|
attr_reader :literal, :lemmata
|
10
10
|
|
11
|
-
# Creates new segment. The specified argument is the literal
|
12
|
-
# (as found in the text) representation of the segment.
|
11
|
+
# Creates new segment. The specified argument is the literal
|
12
|
+
# (as found in the text) representation of the segment.
|
13
13
|
def initialize(literal)
|
14
14
|
@literal = literal
|
15
15
|
@lemmata = []
|
16
16
|
end
|
17
17
|
|
18
18
|
# Returns the segment literal
|
19
|
-
def to_s
|
19
|
+
def to_s
|
20
20
|
@literal
|
21
21
|
end
|
22
22
|
end
|
data/lib/poliqarpr/util.rb
CHANGED
data/poliqarpr.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "poliqarpr"
|
3
|
-
s.version = "0.0
|
4
|
-
s.date = "2011-01-
|
3
|
+
s.version = "0.1.0"
|
4
|
+
s.date = "2011-01-17"
|
5
5
|
s.summary = "Ruby client for Poliqarp"
|
6
6
|
s.email = "apohllo@o2.pl"
|
7
7
|
s.homepage = "http://www.github.com/apohllo/poliqarpr"
|
8
8
|
s.description = "Ruby client for Poliqarp (NLP corpus server)"
|
9
9
|
s.authors = ['Aleksander Pohl']
|
10
|
-
s.files = ["Rakefile", "poliqarpr.gemspec",
|
10
|
+
s.files = ["Rakefile", "poliqarpr.gemspec",
|
11
11
|
"changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
|
12
12
|
s.test_files = Dir.glob("spec/**/*")
|
13
13
|
s.rdoc_options = ["--main", "README.txt"]
|
data/spec/client.rb
CHANGED
@@ -5,17 +5,17 @@ require 'poliqarpr'
|
|
5
5
|
describe Poliqarp::Client do
|
6
6
|
describe "(general test)" do
|
7
7
|
before(:each) do
|
8
|
-
@client = Poliqarp::Client.new("
|
8
|
+
@client = Poliqarp::Client.new("TEST1")
|
9
9
|
end
|
10
|
-
|
11
|
-
after(:each) do
|
10
|
+
|
11
|
+
after(:each) do
|
12
12
|
@client.close
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
it "should allow to open corpus" do
|
16
16
|
@client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it "should allow to open :default corpus" do
|
20
20
|
@client.open_corpus(:default)
|
21
21
|
end
|
@@ -32,7 +32,7 @@ describe Poliqarp::Client do
|
|
32
32
|
|
33
33
|
describe "(with 'sample' corpus)" do
|
34
34
|
before(:all) do
|
35
|
-
@client = Poliqarp::Client.new("
|
35
|
+
@client = Poliqarp::Client.new("TEST2")
|
36
36
|
@client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
|
37
37
|
end
|
38
38
|
|
@@ -40,35 +40,37 @@ describe Poliqarp::Client do
|
|
40
40
|
@client.close
|
41
41
|
end
|
42
42
|
|
43
|
-
it "should allow to set the right context size" do
|
44
|
-
@client.
|
43
|
+
it "should allow to set and get the right context size" do
|
44
|
+
@client.config.right_context_size = 5
|
45
|
+
@client.config.right_context_size.should == 5
|
45
46
|
end
|
46
47
|
|
47
|
-
it "should raise error if the size of right context is not number" do
|
48
|
-
(proc do
|
49
|
-
@client.
|
48
|
+
it "should raise error if the size of right context is not number" do
|
49
|
+
(proc do
|
50
|
+
@client.config.right_context_size = "a"
|
50
51
|
end).should raise_error(RuntimeError)
|
51
52
|
end
|
52
53
|
|
53
|
-
it "should rais error if the size of right context is less or equal 0" do
|
54
|
-
(proc do
|
55
|
-
@client.
|
54
|
+
it "should rais error if the size of right context is less or equal 0" do
|
55
|
+
(proc do
|
56
|
+
@client.config.right_context_size = 0
|
56
57
|
end).should raise_error(RuntimeError)
|
57
58
|
end
|
58
59
|
|
59
|
-
it "should allow to set the left context size" do
|
60
|
-
@client.
|
60
|
+
it "should allow to set and get the left context size" do
|
61
|
+
@client.config.left_context_size = 5
|
62
|
+
@client.config.left_context_size.should == 5
|
61
63
|
end
|
62
64
|
|
63
|
-
it "should raise error if the size of left context is not number" do
|
64
|
-
(lambda do
|
65
|
-
@client.
|
65
|
+
it "should raise error if the size of left context is not number" do
|
66
|
+
(lambda do
|
67
|
+
@client.config.left_context_size = "a"
|
66
68
|
end).should raise_error(RuntimeError)
|
67
69
|
end
|
68
70
|
|
69
|
-
it "should rais error if the size of left context is less or equal 0" do
|
70
|
-
(lambda do
|
71
|
-
@client.
|
71
|
+
it "should rais error if the size of left context is less or equal 0" do
|
72
|
+
(lambda do
|
73
|
+
@client.config.left_context_size = 0
|
72
74
|
end).should raise_error(RuntimeError)
|
73
75
|
end
|
74
76
|
|
@@ -87,7 +89,7 @@ describe Poliqarp::Client do
|
|
87
89
|
tagset[:classes].should_not == nil
|
88
90
|
end
|
89
91
|
|
90
|
-
it "should allow to find 'kot'" do
|
92
|
+
it "should allow to find 'kot'" do
|
91
93
|
@client.find("kot").size.should_not == 0
|
92
94
|
end
|
93
95
|
|
@@ -129,7 +131,7 @@ describe Poliqarp::Client do
|
|
129
131
|
end
|
130
132
|
|
131
133
|
describe("(with index specified in find)") do
|
132
|
-
before(:each) do
|
134
|
+
before(:each) do
|
133
135
|
@result = @client.find("nachalny",:index => 0)
|
134
136
|
end
|
135
137
|
|
@@ -146,13 +148,12 @@ describe Poliqarp::Client do
|
|
146
148
|
end
|
147
149
|
end
|
148
150
|
|
149
|
-
describe("(with lemmata flags set to true)") do
|
151
|
+
describe("(with lemmata flags set to true)") do
|
150
152
|
before(:all) do
|
151
|
-
@client.lemmata =
|
152
|
-
:left_match => true, :right_match => true}
|
153
|
+
@client.config.lemmata = [:left_context, :right_context, :left_match, :right_match]
|
153
154
|
end
|
154
155
|
|
155
|
-
it "should allow to find 'kotu'" do
|
156
|
+
it "should allow to find 'kotu'" do
|
156
157
|
@client.find("kotu").size.should_not == 0
|
157
158
|
end
|
158
159
|
|
data/spec/excerpt.rb
CHANGED
@@ -25,7 +25,7 @@ describe Poliqarp::Excerpt do
|
|
25
25
|
@excerpt.index.should_not == nil
|
26
26
|
end
|
27
27
|
|
28
|
-
it "should have base form" do
|
28
|
+
it "should have base form" do
|
29
29
|
@excerpt.base_form.should_not == nil
|
30
30
|
end
|
31
31
|
|
@@ -64,10 +64,10 @@ describe Poliqarp::Excerpt do
|
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should have index set to 0" do
|
67
|
-
@excerpt.index.should == 0
|
67
|
+
@excerpt.index.should == 0
|
68
68
|
end
|
69
69
|
|
70
|
-
it "should have base form set to 'kot'" do
|
70
|
+
it "should have base form set to 'kot'" do
|
71
71
|
@excerpt.base_form.should == "mu za to astronomiczną"
|
72
72
|
end
|
73
73
|
|
@@ -96,7 +96,7 @@ describe Poliqarp::Excerpt do
|
|
96
96
|
it "should have 'city' set to nil" do
|
97
97
|
@excerpt.city.should == nil
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
|
101
101
|
@excerpt.publisher.size.should == 1
|
102
102
|
@excerpt.publisher[0].should == "Wydawnictwo W.A.B."
|
@@ -114,13 +114,13 @@ describe Poliqarp::Excerpt do
|
|
114
114
|
end
|
115
115
|
|
116
116
|
describe('first result for "kotu" with lemmatization turned on') do
|
117
|
-
before(:all) do
|
118
|
-
@client.lemmata = :all
|
117
|
+
before(:all) do
|
118
|
+
@client.config.lemmata = :all
|
119
119
|
@client.open_corpus(:default)
|
120
|
-
@excerpt = @client.find("kotu")[0]
|
120
|
+
@excerpt = @client.find("kotu")[0]
|
121
121
|
end
|
122
122
|
|
123
|
-
it "should have one lemmata for each segment" do
|
123
|
+
it "should have one lemmata for each segment" do
|
124
124
|
@excerpt.short_context.each do |group|
|
125
125
|
group.each do |segment|
|
126
126
|
segment.lemmata.size.should == 1
|
@@ -134,7 +134,7 @@ describe Poliqarp::Excerpt do
|
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
|
-
it "should contain 'kot' as one of the lemmata" do
|
137
|
+
it "should contain 'kot' as one of the lemmata" do
|
138
138
|
@excerpt.short_context.flatten.
|
139
139
|
any?{|s| s.lemmata[0].base_form == "kot"}.should == true
|
140
140
|
end
|
data/spec/query_result.rb
CHANGED
@@ -12,7 +12,7 @@ describe Poliqarp::QueryResult do
|
|
12
12
|
@client.close
|
13
13
|
end
|
14
14
|
|
15
|
-
describe "(for unspecified query)" do
|
15
|
+
describe "(for unspecified query)" do
|
16
16
|
before(:all) do
|
17
17
|
@result = @client.find("kita")
|
18
18
|
end
|
@@ -57,11 +57,11 @@ describe Poliqarp::QueryResult do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
describe "(for 'kot' in :default corpus)" do
|
60
|
-
before(:all) do
|
60
|
+
before(:all) do
|
61
61
|
@result = @client.find("kot")
|
62
62
|
end
|
63
63
|
|
64
|
-
it "should have size == 6" do
|
64
|
+
it "should have size == 6" do
|
65
65
|
@result.size.should == 6
|
66
66
|
end
|
67
67
|
|
@@ -83,11 +83,11 @@ describe Poliqarp::QueryResult do
|
|
83
83
|
end
|
84
84
|
|
85
85
|
describe "(for 'kot' with page_size set to 5 in :default corpus)" do
|
86
|
-
before(:all) do
|
86
|
+
before(:all) do
|
87
87
|
@result = @client.find("kot", :page_size => 5)
|
88
88
|
end
|
89
89
|
|
90
|
-
it "should have size == 5" do
|
90
|
+
it "should have size == 5" do
|
91
91
|
@result.size.should == 5
|
92
92
|
end
|
93
93
|
|
@@ -109,11 +109,11 @@ describe Poliqarp::QueryResult do
|
|
109
109
|
end
|
110
110
|
|
111
111
|
describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
|
112
|
-
before(:all) do
|
112
|
+
before(:all) do
|
113
113
|
@result = @client.find("kot", :page_size => 5).next_page
|
114
114
|
end
|
115
115
|
|
116
|
-
it "should have size == 1" do
|
116
|
+
it "should have size == 1" do
|
117
117
|
@result.size.should == 1
|
118
118
|
end
|
119
119
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.8
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Aleksander Pohl
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-17 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- changelog.txt
|
33
33
|
- README.txt
|
34
34
|
- lib/poliqarpr.rb
|
35
|
+
- lib/poliqarpr/config.rb
|
35
36
|
- lib/poliqarpr/exceptions.rb
|
36
37
|
- lib/poliqarpr/lemmata.rb
|
37
38
|
- lib/poliqarpr/query_result.rb
|