poliqarpr 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt ADDED
@@ -0,0 +1,107 @@
1
+ = poliqarpr
2
+
3
+ * http://github.com/apohllo/poliqarpr
4
+
5
+ == DESCRIPTION:
6
+
7
+ Poliqarpr is Ruby client for Poliqarp server.
8
+
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * built-in pagination of query results
13
+ * support for lemmatization
14
+ * asynchronous communication might be not stable (100% CPU ussage)
15
+ * only partial implementation of server protocol:
16
+ ** PING, VERSION
17
+ ** MAKE-SESSION, CLOSE-SESSION
18
+ ** OPEN (corpus), CLOSE (corpus)
19
+ ** CORPUS-STATS, GET-TAGSET
20
+ ** STATUS, CANCEL (used internally)
21
+ ** MAKE-QUERY, RUN-QUERY, BUFFER-STATE (used internally)
22
+ ** GET-RESULTS, GET-CONTEXT, METADATA
23
+ ** SET: lemmata, tags
24
+
25
+ == SYNOPSIS:
26
+
27
+ Poliqarpr is Ruby client for Poliqarp corpus server (see
28
+ http://poliqarp.sourceforge.net/), which is used to store large texts used in
29
+ Natural Language Processing.
30
+
31
+
32
+ == REQUIREMENTS:
33
+
34
+ Poliqarp server (only C implementation http://poliqarp.sourceforge.net/)
35
+
36
+ == INSTALL:
37
+
38
+ You need RubyGems v. 1.2
39
+
40
+ * gem -v
41
+ * 1.2.0 #=> ok
42
+
43
+ You need the gemcutter.org repository to be added to your sources list:
44
+
45
+ * gem sources -a http://gemcutter.org
46
+
47
+ Then you can type:
48
+
49
+ * sudo gem install poliqarpr
50
+
51
+ You can install the optional default corpus (warning: it is distributed under
52
+ different license!):
53
+
54
+ * sudo gem install poliqarpr-corpus
55
+
56
+ == BASIC USAGE:
57
+
58
+ (You need the poliqarpr-corpus to be installed for this to work. See the last
59
+ step of installation process).
60
+
61
+ Require the gem:
62
+
63
+ require 'poliaqarpr'
64
+
65
+ Create the server client and open default corpus
66
+
67
+ client = Poliqarp::Client.new
68
+ client.open_corpus :default
69
+
70
+ Query the corpus for given segment
71
+
72
+ result = client.find("kot")
73
+ result[0].to_s
74
+
75
+ Remember to close the client on exit
76
+
77
+ client.close
78
+
79
+
80
+ == LICENSE:
81
+
82
+ (The MIT License)
83
+
84
+ Copyright (c) 2008-2009 Aleksander Pohl
85
+
86
+ Permission is hereby granted, free of charge, to any person obtaining
87
+ a copy of this software and associated documentation files (the
88
+ 'Software'), to deal in the Software without restriction, including
89
+ without limitation the rights to use, copy, modify, merge, publish,
90
+ distribute, sublicense, and/or sell copies of the Software, and to
91
+ permit persons to whom the Software is furnished to do so, subject to
92
+ the following conditions:
93
+
94
+ The above copyright notice and this permission notice shall be
95
+ included in all copies or substantial portions of the Software.
96
+
97
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
98
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
99
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
100
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
101
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
102
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
103
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
104
+
105
+ == FEEDBACK
106
+
107
+ * mailto:apohllo@o2.pl
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ task :default => [:install]
2
+
3
+ $gem_name = "poliqarpr"
4
+
5
+ desc "Build the gem"
6
+ task :build do
7
+ sh "gem build #$gem_name.gemspec"
8
+ end
9
+
10
+ desc "Install the library at local machnie"
11
+ task :install => :build do
12
+ sh "sudo gem install #$gem_name"
13
+ end
14
+
15
+ desc "Uninstall the library from local machnie"
16
+ task :uninstall do
17
+ sh "sudo gem uninstall #$gem_name"
18
+ end
19
+
20
+ desc "Clean"
21
+ task :clean do
22
+ sh "rm #$gem_name*.gem"
23
+ end
data/changelog.txt ADDED
@@ -0,0 +1,37 @@
1
+ 0.0.5
2
+ - Bugfix: making parallel query might lead to silent thread death
3
+ - Support for Ruby 1.9 encoding
4
+ - Require path improvments
5
+ - Copyright year correction
6
+ - Documentation now points to gemcutter instead of github
7
+
8
+ 0.0.4
9
+ - ping/pong diagnostics
10
+ - server version
11
+ - corpus statistics
12
+ - implementation of asynchronous protocol (not stable)
13
+
14
+
15
+ 0.0.3
16
+ - the license of the corpus included
17
+ - client rdoc documentation
18
+ - support for lemmata retrieval
19
+ - excerpt now contains segments instead of strings
20
+ - buffer size setter
21
+ - default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
22
+
23
+ 0.0.2
24
+ - query result is full blown class
25
+ - source divided into client, excerpt and query resutl
26
+ - specs for client, excerpt and query result
27
+ - namespace changed to POLIQARP
28
+ - default corpus included
29
+ - singular results properly fatched
30
+ - valid result for queries containing many words
31
+ - same queries which are run in sequence are called only once
32
+ - README.txt included in gem
33
+ - specs included in gem
34
+
35
+ 0.0.1
36
+ - initiali implementation
37
+ - synchorous querying for terms
@@ -0,0 +1,452 @@
1
+ # vim:encoding=utf-8
2
+ module Poliqarp
3
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
4
+ # License:: MIT License
5
+ #
6
+ # This class is the implementation of the Poliqarp server client.
7
+ class Client
8
+ GROUPS = [:left_context, :left_match, :right_match, :right_context]
9
+
10
+ # If debug is turned on, the communication between server and client
11
+ # is logged to standard output.
12
+ attr_writer :debug
13
+
14
+ # The size of the buffer is the maximum number of excerpts which
15
+ # are returned for single query.
16
+ attr_writer :buffer_size
17
+
18
+ # Creates new poliqarp server client.
19
+ #
20
+ # Parameters:
21
+ # * +session_name+ the name of the client session. Defaults to "RUBY".
22
+ # * +debug+ if set to true, all messages sent and received from server
23
+ # are printed to standard output. Defaults to false.
24
+ def initialize(session_name="RUBY", debug=false)
25
+ @session_name = session_name
26
+ @left_context = 5
27
+ @right_context = 5
28
+ @debug = debug
29
+ @buffer_size = 500000
30
+ @connector = Connector.new(debug)
31
+ @answer_queue = Queue.new
32
+ new_session
33
+ end
34
+
35
+ # A hint about installation of default corpus gem
36
+ def self.const_missing(const)
37
+ if const.to_s =~ /DEFAULT_CORPUS/
38
+ raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
39
+ end
40
+ super
41
+ end
42
+
43
+ # Creates new session for the client with the name given in constructor.
44
+ # If the session was already opened, it is closed.
45
+ #
46
+ # Parameters:
47
+ # * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
48
+ def new_session(port=4567)
49
+ close if @session
50
+ @connector.open("localhost",port)
51
+ talk("MAKE-SESSION #{@session_name}")
52
+ talk("BUFFER-RESIZE #{@buffer_size}")
53
+ @session = true
54
+ self.tags = {}
55
+ self.lemmata = {}
56
+ end
57
+
58
+ # Closes the opened session.
59
+ def close
60
+ talk "CLOSE-SESSION"
61
+ @session = false
62
+ end
63
+
64
+ # Closes the opened corpus.
65
+ def close_corpus
66
+ talk "CLOSE"
67
+ end
68
+
69
+ # Sets the size of the left short context. It must be > 0
70
+ #
71
+ # The size of the left short context is the number
72
+ # of segments displayed in the found excerpts left to the
73
+ # matched segment(s).
74
+ def left_context=(value)
75
+ if correct_context_value?(value)
76
+ result = talk("SET left-context-width #{value}")
77
+ @left_context = value if result =~ /^R OK/
78
+ else
79
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
80
+ end
81
+ end
82
+
83
+ # Sets the size of the right short context. It must be > 0
84
+ #
85
+ # The size of the right short context is the number
86
+ # of segments displayed in the found excerpts right to the
87
+ # matched segment(s).
88
+ def right_context=(value)
89
+ if correct_context_value?(value)
90
+ result = talk("SET right-context-width #{value}")
91
+ @right_context = value if result =~ /^R OK/
92
+ else
93
+ raise "Invalid argument: #{value}. It must be fixnum greater than 0."
94
+ end
95
+ end
96
+
97
+ # Sets the tags' flags. There are four groups of segments
98
+ # which the flags apply for:
99
+ # * +left_context+
100
+ # * +left_match+
101
+ # * +right_match+
102
+ # * +right_context+
103
+ #
104
+ # If the flag for given group is set to true, all segments
105
+ # in the group are annotated with grammatical tags. E.g.:
106
+ # c.find("kot")
107
+ # ...
108
+ # "kot" tags: "subst:sg:nom:m2"
109
+ #
110
+ # You can pass :all to turn on flags for all groups
111
+ def tags=(options={})
112
+ options = set_all_flags if options == :all
113
+ @tag_flags = options
114
+ flags = ""
115
+ GROUPS.each do |flag|
116
+ flags << (options[flag] ? "1" : "0")
117
+ end
118
+ talk("SET retrieve-tags #{flags}")
119
+ end
120
+
121
+ # Sets the lemmatas' flags. There are four groups of segments
122
+ # which the flags apply for:
123
+ # * +left_context+
124
+ # * +left_match+
125
+ # * +right_match+
126
+ # * +right_context+
127
+ #
128
+ # If the flag for given group is set to true, all segments
129
+ # in the group are returned with the base form of the lemmata. E.g.:
130
+ # c.find("kotu")
131
+ # ...
132
+ # "kotu" base_form: "kot"
133
+ #
134
+ # You can pass :all to turn on flags for all groups
135
+ def lemmata=(options={})
136
+ options = set_all_flags if options == :all
137
+ @lemmata_flags = options
138
+ flags = ""
139
+ GROUPS.each do |flag|
140
+ flags << (options[flag] ? "1" : "0")
141
+ end
142
+ talk("SET retrieve-lemmata #{flags}")
143
+ end
144
+
145
+ # *Asynchronous* Opens the corpus given as +path+. To open the default
146
+ # corpus pass +:default+ as the argument.
147
+ #
148
+ # If you don't want to wait until the call is finished, you
149
+ # have to provide +handler+ for the asynchronous answer.
150
+ def open_corpus(path, &handler)
151
+ if path == :default
152
+ open_corpus(DEFAULT_CORPUS, &handler)
153
+ else
154
+ real_handler = handler || lambda{|msg| @answer_queue.push msg }
155
+ talk("OPEN #{path}", :async, &real_handler)
156
+ do_wait if handler.nil?
157
+ end
158
+ end
159
+
160
+ # Server diagnostics -- the result should be :pong
161
+ def ping
162
+ :pong if talk("PING") =~ /PONG/
163
+ end
164
+
165
+ # Returns server version
166
+ def version
167
+ talk("VERSION")
168
+ end
169
+
170
+ # Returns corpus statistics:
171
+ # * +:segment_tokens+ the number of segments in the corpus
172
+ # (two segments which look exactly the same are counted separately)
173
+ # * +:segment_types+ the number of segment types in the corpus
174
+ # (two segments which look exactly the same are counted as one type)
175
+ # * +:lemmata+ the number of lemmata (lexemes) types
176
+ # (all forms of inflected word, e.g. 'kot', 'kotu', ...
177
+ # are treated as one "word" -- lemmata)
178
+ # * +:tags+ the number of different grammar tags (each combination
179
+ # of atomic tags is treated as different "tag")
180
+ def stats
181
+ stats = {}
182
+ talk("CORPUS-STATS").split.each_with_index do |value, index|
183
+ case index
184
+ when 1
185
+ stats[:segment_tokens] = value.to_i
186
+ when 2
187
+ stats[:segment_types] = value.to_i
188
+ when 3
189
+ stats[:lemmata] = value.to_i
190
+ when 4
191
+ stats[:tags] = value.to_i
192
+ end
193
+ end
194
+ stats
195
+ end
196
+
197
+ # TODO
198
+ def metadata_types
199
+ raise "Not implemented"
200
+ end
201
+
202
+ # Returns the tag-set used in the corpus.
203
+ # It is divided into two groups:
204
+ # * +:categories+ enlists tags belonging to grammatical categories
205
+ # (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
206
+ # means that there are 5 genders: masculine(1,2,3), feminine and neuter)
207
+ # * +:classes+ enlists grammatical tags used to describe it
208
+ # (each class has a list of tags used to describe it, eg. adj: degree
209
+ # gender case number, means that adjectives are described in terms
210
+ # of degree, gender, case and number)
211
+ def tagset
212
+ answer = talk("GET-TAGSET")
213
+ counters = answer.split
214
+ result = {}
215
+ [:categories, :classes].each_with_index do |type, type_index|
216
+ result[type] = {}
217
+ counters[type_index+1].to_i.times do |index|
218
+ values = read_word.split
219
+ result[type][values[0].to_sym] = values[1..-1].map{|v| v.to_sym}
220
+ end
221
+ end
222
+ result
223
+ end
224
+
225
+ # Send the query to the opened corpus.
226
+ #
227
+ # Options:
228
+ # * +index+ the index of the (only one) result to be returned. The index is relative
229
+ # to the beginning of the query result. In normal case you should query the
230
+ # corpus without specifying the index, to see what results are returned.
231
+ # Then you can use the index and the same query to retrieve one result.
232
+ # The pair (query, index) is a kind of unique identifier of the excerpt.
233
+ # * +page_size+ the size of the page of results. If the page size is 0, then
234
+ # all results are returned on one page. It is ignored if the +index+ option
235
+ # is present. Defaults to 0.
236
+ # * +page_index+ the index of the page of results (the first page has index 1, not 0).
237
+ # It is ignored if the +index+ option is present. Defaults to 1.
238
+ def find(query,options={})
239
+ if options[:index]
240
+ find_one(query, options[:index])
241
+ else
242
+ find_many(query, options)
243
+ end
244
+ end
245
+
246
+ alias query find
247
+
248
+ # Returns the number of results for given query.
249
+ def count(query)
250
+ count_results(make_query(query))
251
+ end
252
+
253
+ # Returns the long context of the excerpt which is identified by
254
+ # given (query, index) pair.
255
+ def context(query,index)
256
+ make_query(query)
257
+ result = []
258
+ talk "GET-CONTEXT #{index}"
259
+ # 1st part
260
+ result << read_word
261
+ # 2nd part
262
+ result << read_word
263
+ # 3rd part
264
+ result << read_word
265
+ # 4th part
266
+ result << read_word
267
+ result
268
+ end
269
+
270
+ # Returns the metadata of the excerpt which is identified by
271
+ # given (query, index) pair.
272
+ def metadata(query, index)
273
+ make_query(query)
274
+ result = {}
275
+ answer = talk("METADATA #{index}")
276
+ count = answer.split(" ")[1].to_i
277
+ count.times do |index|
278
+ type = read_word.gsub(/[^a-zA-Z]/,"").to_sym
279
+ value = read_word[2..-1]
280
+ unless value.nil?
281
+ result[type] ||= []
282
+ result[type] << value
283
+ end
284
+ end
285
+ result
286
+ end
287
+
288
+ protected
289
+ # Sends a message directly to the server
290
+ # * +msg+ the message to send
291
+ # * +mode+ if set to :sync, the method block untli the message
292
+ # is received. If :async the method returns immediately.
293
+ # Default: :sync
294
+ # * +handler+ the handler of the assynchronous message.
295
+ # It is ignored when the mode is set to :sync.
296
+ def talk(msg, mode = :sync, &handler)
297
+ puts msg if @debug
298
+ @connector.send(msg, mode, &handler)
299
+ end
300
+
301
+ # Make query and retrieve many results.
302
+ # * +query+ the query to be sent to the server.
303
+ # * +options+ see find
304
+ def find_many(query, options)
305
+ page_size = (options[:page_size] || 0)
306
+ page_index = (options[:page_index] || 1)
307
+
308
+ answer_offset = page_size * (page_index - 1)
309
+ if page_size > 0
310
+ result_count = make_async_query(query,answer_offset)
311
+ answers_limit = answer_offset + page_size > result_count ?
312
+ result_count - answer_offset : page_size
313
+ else
314
+ # all answers needed -- the call must be synchronous
315
+ result_count = count_results(make_query(query))
316
+ answers_limit = result_count
317
+ end
318
+
319
+ page_count = page_size <= 0 ? 1 :
320
+ result_count / page_size + (result_count % page_size > 0 ? 1 : 0)
321
+
322
+ result = QueryResult.new(page_index, page_count,page_size,self,query)
323
+ if answers_limit > 0
324
+ talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
325
+ answers_limit.times do |answer_index|
326
+ result << fetch_result(answer_offset + answer_index, query)
327
+ end
328
+ end
329
+ result
330
+ end
331
+
332
+ # Make query and retrieve only one result
333
+ # * +query+ the query to be sent to the server
334
+ # * +index+ the index of the answer to be retrieved
335
+ def find_one(query,index)
336
+ make_async_query(query,index)
337
+ talk("GET-RESULTS #{index} #{index}")
338
+ fetch_result(index,query)
339
+ end
340
+
341
+ # Fetches one result of the query
342
+ #
343
+ # MAKE-QUERY and GET-RESULTS must be sent to the server before
344
+ # this method is called
345
+ def fetch_result(index, query)
346
+ result = Excerpt.new(index, self, query)
347
+ result << read_segments(:left_context)
348
+ result << read_segments(:left_match)
349
+ # XXX
350
+ #result << read_segments(:right_match)
351
+ result << read_segments(:right_context)
352
+ result
353
+ end
354
+
355
+ def read_segments(group)
356
+ size = read_number()
357
+ segments = []
358
+ size.times do |segment_index|
359
+ segment = Segment.new(read_word)
360
+ segments << segment
361
+ if @lemmata_flags[group] || @tag_flags[group]
362
+ lemmata_size = read_number()
363
+ lemmata_size.times do |lemmata_index|
364
+ lemmata = Lemmata.new()
365
+ if @lemmata_flags[group]
366
+ lemmata.base_form = read_word
367
+ end
368
+ if @tag_flags[group]
369
+ read_word
370
+ end
371
+ segment.lemmata << lemmata
372
+ end
373
+ end
374
+ end
375
+ segments
376
+ end
377
+
378
+ # Reads number stored in the message received from the server.
379
+ def read_number
380
+ @connector.read_message.match(/\d+/)[0].to_i
381
+ end
382
+
383
+ # Counts number of results for given answer
384
+ def count_results(answer)
385
+ answer.split(" ")[1].to_i
386
+ end
387
+
388
+ # *Asynchronous* Sends the query to the server
389
+ # * +query+ query to send
390
+ # * +handler+ if given, the method returns immediately,
391
+ # and the answer is sent to the handler. In this case
392
+ # the result returned by make_query should be IGNORED!
393
+ def make_query(query, &handler)
394
+ if @last_query != query
395
+ @last_query = query
396
+ if handler.nil?
397
+ real_handler = lambda { |msg| @answer_queue.push msg }
398
+ else
399
+ real_handler = handler
400
+ end
401
+ begin
402
+ talk("MAKE-QUERY #{query}")
403
+ rescue JobInProgress
404
+ talk("CANCEL") rescue nil
405
+ talk("MAKE-QUERY #{query}")
406
+ end
407
+ talk("RUN-QUERY #{@buffer_size}", :async, &real_handler)
408
+ @last_result = do_wait if handler.nil?
409
+ end
410
+ @last_result
411
+ end
412
+
413
+ # Reads string stored in the last message received from server
414
+ def read_word
415
+ @connector.read_message
416
+ end
417
+
418
+ private
419
+ def do_wait
420
+ loop {
421
+ status = talk("STATUS") rescue break
422
+ puts "STATUS: #{status}" if @debug
423
+ sleep 0.3
424
+ }
425
+ @answer_queue.shift
426
+ end
427
+
428
+ def set_all_flags
429
+ options = {}
430
+ GROUPS.each{|g| options[g] = true}
431
+ options
432
+ end
433
+
434
+ def correct_context_value?(value)
435
+ value.is_a?(Fixnum) && value > 0
436
+ end
437
+
438
+ def make_async_query(query,answer_offset)
439
+ # the handler is empty, since we access the result count through
440
+ # BUFFER-STATE call
441
+ make_query(query){|msg| }
442
+ result_count = 0
443
+ begin
444
+ # the result count might be not exact!
445
+ result_count = talk("BUFFER-STATE").split(" ")[2].to_i
446
+ talk("STATUS") rescue break
447
+ end while result_count < answer_offset
448
+ @last_result = "OK #{result_count}"
449
+ result_count
450
+ end
451
+ end
452
+ end
@@ -0,0 +1,140 @@
1
+ # vim:encoding=utf-8
2
+ require 'socket'
3
+ require 'thread'
4
+ require File.join(File.dirname(__FILE__),'util')
5
+
6
+ module Poliqarp
7
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
8
+ # License:: MIT License
9
+ #
10
+ # This class hold the TCP connection to the server and is responsible
11
+ # for dispatching synchronous and asynchronous queries and answers.
12
+ class Connector
13
+ include Ruby19
14
+
15
+ # Error messages assigned to error codes
16
+ # (taken from poliqarpd implementation)
17
+ ERRORS = {
18
+ 1 => "Incorrect number of arguments",
19
+ 3 => "No session opened",
20
+ 4 => "Cannot create a session for a connection that",
21
+ 5 => "Not enough memory",
22
+ 6 => "Invalid session ID",
23
+ 7 => "Session with this ID is already bound",
24
+ 8 => "Session user ID does not match the argument",
25
+ 10 => "Session already has an open corpus",
26
+ 12 => "System error while opening the corpus",
27
+ 13 => "No corpus opened",
28
+ 14 => "Invalid job ID",
29
+ 15 => "A job is already in progress",
30
+ 16 => "Incorrect query",
31
+ 17 => "Invalid result range",
32
+ 18 => "Incorrect session option",
33
+ 19 => "Invalid session option value",
34
+ 20 => "Invalid sorting criteria"
35
+ }
36
+
37
+ UTF8 = "utf-8"
38
+
39
+ # Creates new connector
40
+ def initialize(debug)
41
+ @message_queue = Queue.new
42
+ @socket_mutex = Mutex.new
43
+ @loop_mutex = Mutex.new
44
+ @debug = debug
45
+ end
46
+
47
+ # Opens connection with poliqarp server which runs
48
+ # on given +host+ and +port+.
49
+ def open(host,port)
50
+ @socket_mutex.synchronize {
51
+ @socket = TCPSocket.new(host,port) if @socket.nil?
52
+ }
53
+ running = nil
54
+ @loop_mutex.synchronize {
55
+ running = @loop_running
56
+ }
57
+ main_loop unless running
58
+ @loop_mutex.synchronize {
59
+ @loop_running = true
60
+ }
61
+ end
62
+
63
+ # Sends message to the poliqarp server. Returns the first synchronous
64
+ # answer of the server.
65
+ # * +message+ the message to send
66
+ # * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
67
+ # * +handler+ the handler of the asynchronous message
68
+ def send(message, mode, &handler)
69
+ puts "send #{mode} #{message}" if @debug
70
+ if ruby19?
71
+ massage = message.encode(UTF8)
72
+ end
73
+ @socket.puts(message)
74
+ if mode == :async
75
+ @handler = handler
76
+ end
77
+ read_message
78
+ end
79
+
80
+ # Retrives one message from the server.
81
+ # If the message indicates an error, new runtime error
82
+ # containing the error description is returned.
83
+ def read_message
84
+ message = @message_queue.shift
85
+ if message =~ /^ERR/
86
+ code = message.match(/\d+/)[0].to_i
87
+ raise JobInProgress.new() if code == 15
88
+ raise RuntimeError.new("Poliqarp Error: "+ERRORS[code])
89
+ else
90
+ message
91
+ end
92
+ end
93
+
94
+ private
95
+ def main_loop
96
+ @loop = Thread.new {
97
+ loop {
98
+ receive
99
+ # XXX ??? needed
100
+ #sleep 0.001
101
+ }
102
+ }
103
+ end
104
+
105
+ def receive
106
+ result = read_line
107
+ if ruby19?
108
+ result.force_encoding(UTF8)
109
+ end
110
+ msg = result[2..-2]
111
+ if result =~ /^M/
112
+ receive_async(msg)
113
+ elsif result
114
+ receive_sync(msg)
115
+ end
116
+ # if nil, nothing was received
117
+ end
118
+
119
+ def receive_sync(message)
120
+ puts "receive sync: #{message}" if @debug
121
+ @message_queue << message
122
+ end
123
+
124
+ def receive_async(message)
125
+ puts "receive async: #{message}" if @debug
126
+ Thread.new{
127
+ @handler.call(message)
128
+ }
129
+ end
130
+
131
+ def read_line
132
+ line = ""
133
+ begin
134
+ chars = @socket.recvfrom(1)
135
+ line << chars[0]
136
+ end while chars[0] != "\n"
137
+ line
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,9 @@
1
+ module Poliqarp
2
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
+ # License:: MIT License
4
+
5
+ # The JobInProgress exception is raised if there was asynchronous call
6
+ # to the server which haven't finished, which is interrupted by another
7
+ # asynchronous call.
8
+ class JobInProgress < Exception; end
9
+ end
@@ -0,0 +1,63 @@
1
+ module Poliqarp
2
+ # Author:: Aleksander Pohl
3
+ # License:: MIT License
4
+ #
5
+ # The excerpt class is used to store single result of the query,
6
+ # i.e. the excerpt of the corpus which contains the words which
7
+ # the corpus was queried for.
8
+ #
9
+ # The excerpt is divided into groups, which contain segments,
10
+ # which the texts in the corpus were divided for.
11
+ # The first group is the left context, the second -- the matched
12
+ # query, and the last -- the right context.
13
+ class Excerpt
14
+ attr_reader :index, :base_form, :short_context
15
+
16
+ def initialize(index, client, base_form)
17
+ @index = index
18
+ @client = client
19
+ @base_form = base_form
20
+ @short_context = []
21
+ end
22
+
23
+ # Adds segment group to the excerpt
24
+ def <<(value)
25
+ @short_context << value
26
+ end
27
+
28
+
29
+ # Returns the matched query as string
30
+ def word
31
+ #@short_context[0].split(/\s+/)[-1]
32
+ @short_context[1].to_s
33
+ end
34
+
35
+ alias inflected_form word
36
+
37
+ # The string representation of the excerpt is the shord
38
+ # context of the query.
39
+ def to_s
40
+ @short_context.join("")
41
+ end
42
+
43
+ # Returns the long context of the query.
44
+ def context
45
+ return @context unless @context.nil?
46
+ @context = @client.context(@base_form, @index)
47
+ end
48
+
49
+ { :medium => :medium, :style => :styl, :date => :data_wydania,
50
+ :city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
51
+ :author => :autor}.each do |method, keyword|
52
+ define_method method do
53
+ self.metadata[keyword]
54
+ end
55
+ end
56
+
57
+ protected
58
+ def metadata
59
+ return @metadata unless @metadata.nil?
60
+ @metadata = @client.metadata(@base_form, @index)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,11 @@
1
+ module Poliqarp
2
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
+ # License:: MIT License
4
+ #
5
+ # The lemmata contains the base form of the segment
6
+ class Lemmata
7
+ attr_accessor :base_form
8
+ def initialize()
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,73 @@
1
+ module Poliqarp
2
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
+ # License:: MIT License
4
+ #
5
+ # The query result class is used to paginate results of the
6
+ # query. Each query result has information about its context
7
+ # (the next and previous page).
8
+ class QueryResult
9
+ include Enumerable
10
+
11
+ attr_accessor :page, :page_count, :query, :page_size
12
+
13
+ def initialize(page, page_count, page_size, client, query)
14
+ @page = page
15
+ @page_count = page_count
16
+ @page_size = page_size
17
+ @client = client
18
+ @query = query
19
+ @excerpts = []
20
+ end
21
+
22
+ # Adds excerpt to the query result
23
+ def <<(excerpt)
24
+ @excerpts << excerpt
25
+ end
26
+
27
+ # Allows to iterate over the results stored in the result
28
+ def each
29
+ @excerpts.each{|e| yield e}
30
+ end
31
+
32
+ [:first, :last, :empty?].each do |method|
33
+ define_method method do
34
+ @excerpts.send(method)
35
+ end
36
+ end
37
+
38
+ # Returns excerpt with given index.
39
+ def [](index)
40
+ @excerpts[index]
41
+ end
42
+
43
+ # Two excerpts are equal iff their page number, page count,
44
+ # query and page size are equal.
45
+ def ==(other)
46
+ return false unless other.is_a? QueryResult
47
+ @page == other.page && @page_count == other.page_count &&
48
+ @query == other.query && @page_size == other.page_size
49
+ end
50
+
51
+ # Returns the previous page of the query result
52
+ def previous_page
53
+ if @page > 1
54
+ @client.find(@query, :page_size => @page_size,
55
+ :page_index => @page - 1)
56
+ end
57
+ end
58
+
59
+ # Return the next page of the query result
60
+ def next_page
61
+ if @page < @page_count
62
+ @client.find(@query, :page_size => @page_size,
63
+ :page_index => @page + 1)
64
+ end
65
+ end
66
+
67
+ # Returns the number of excerpts stored in this page (query result)
68
+ def size
69
+ @excerpts.size
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,23 @@
1
+ module Poliqarp
2
+ # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
3
+ # License:: MIT LICENSE
4
+ #
5
+ # The segment is the smallest meaningful part of the text.
6
+ # It may contain many lemmata, since the segments are sometimes
7
+ # not disambiguated.
8
+ class Segment
9
+ attr_reader :literal, :lemmata
10
+
11
+ # Creates new segment. The specified argument is the literal
12
+ # (as found in the text) representation of the segment.
13
+ def initialize(literal)
14
+ @literal = literal
15
+ @lemmata = []
16
+ end
17
+
18
+ # Returns the segment literal
19
+ def to_s
20
+ @literal
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,9 @@
1
+ #vim:encoding=utf-8
2
+ module Poliqarp #:nodoc:
3
+ module Ruby19
4
+ # Returns true if the Ruby version is at least 1.9.0
5
+ def ruby19?
6
+ RUBY_VERSION.split(".")[0..1].join(".").to_f >= 1.9
7
+ end
8
+ end
9
+ end
data/lib/poliqarpr.rb ADDED
@@ -0,0 +1,9 @@
1
+ begin
2
+ require 'poliqarpr-corpus'
3
+ rescue LoadError
4
+ # Do nothig, since the default corpus is optional
5
+ end
6
+
7
+ $LOAD_PATH.unshift File.dirname(__FILE__)
8
+ Dir.glob(File.join(File.dirname(__FILE__), 'poliqarpr/**.rb')).each { |f| require f }
9
+
data/poliqarpr.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "poliqarpr"
3
+ s.version = "0.0.5"
4
+ s.date = "2009-12-10"
5
+ s.summary = "Ruby client for Poliqarp"
6
+ s.email = "apohllo@o2.pl"
7
+ s.homepage = "http://www.github.com/apohllo/poliqarpr"
8
+ s.description = "Ruby client for Poliqarp (NLP corpus server)"
9
+ s.authors = ['Aleksander Pohl']
10
+ s.files = ["Rakefile", "poliqarpr.gemspec",
11
+ "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
12
+ s.test_files = Dir.glob("test/**/*")
13
+ s.rdoc_options = ["--main", "README.txt"]
14
+ s.has_rdoc = true
15
+ s.extra_rdoc_files = ["README.txt"]
16
+ end
17
+
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: poliqarpr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Aleksander Pohl
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Ruby client for Poliqarp (NLP corpus server)
17
+ email: apohllo@o2.pl
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ files:
25
+ - Rakefile
26
+ - poliqarpr.gemspec
27
+ - changelog.txt
28
+ - README.txt
29
+ - lib/poliqarpr.rb
30
+ - lib/poliqarpr/exceptions.rb
31
+ - lib/poliqarpr/lemmata.rb
32
+ - lib/poliqarpr/query_result.rb
33
+ - lib/poliqarpr/excerpt.rb
34
+ - lib/poliqarpr/segment.rb
35
+ - lib/poliqarpr/client.rb
36
+ - lib/poliqarpr/util.rb
37
+ - lib/poliqarpr/connector.rb
38
+ has_rdoc: true
39
+ homepage: http://www.github.com/apohllo/poliqarpr
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --main
45
+ - README.txt
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project:
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Ruby client for Poliqarp
67
+ test_files: []
68
+