apohllo-poliqarpr 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/README.txt +69 -0
  2. data/Rakefile +23 -0
  3. data/changelog.txt +15 -0
  4. data/corpus/frek.cdf +4 -0
  5. data/corpus/frek.cfg +100 -0
  6. data/corpus/frek.cfg~ +100 -0
  7. data/corpus/frek.meta.cfg +1 -0
  8. data/corpus/frek.meta.lisp +4 -0
  9. data/corpus/frek.poliqarp.base1.image +0 -0
  10. data/corpus/frek.poliqarp.base1.offset +0 -0
  11. data/corpus/frek.poliqarp.base2.image +0 -0
  12. data/corpus/frek.poliqarp.base2.offset +0 -0
  13. data/corpus/frek.poliqarp.chunk.image +0 -0
  14. data/corpus/frek.poliqarp.corpus.image +0 -0
  15. data/corpus/frek.poliqarp.meta-key.image +0 -0
  16. data/corpus/frek.poliqarp.meta-key.offset +0 -0
  17. data/corpus/frek.poliqarp.meta-value.image +0 -0
  18. data/corpus/frek.poliqarp.meta-value.offset +0 -0
  19. data/corpus/frek.poliqarp.meta.image +0 -0
  20. data/corpus/frek.poliqarp.orth.image +0 -0
  21. data/corpus/frek.poliqarp.orth.index.alpha +0 -0
  22. data/corpus/frek.poliqarp.orth.index.atergo +0 -0
  23. data/corpus/frek.poliqarp.orth.offset +0 -0
  24. data/corpus/frek.poliqarp.rindex.amb +0 -0
  25. data/corpus/frek.poliqarp.rindex.amb.offset +0 -0
  26. data/corpus/frek.poliqarp.rindex.disamb +0 -0
  27. data/corpus/frek.poliqarp.rindex.disamb.offset +0 -0
  28. data/corpus/frek.poliqarp.rindex.orth +0 -0
  29. data/corpus/frek.poliqarp.rindex.orth.offset +0 -0
  30. data/corpus/frek.poliqarp.subchunk.image +0 -0
  31. data/corpus/frek.poliqarp.subchunk.item.ch +0 -0
  32. data/corpus/frek.poliqarp.subchunk.offset +0 -0
  33. data/corpus/frek.poliqarp.subpos1.image +0 -0
  34. data/corpus/frek.poliqarp.subpos1.offset +0 -0
  35. data/corpus/frek.poliqarp.subpos2.image +0 -0
  36. data/corpus/frek.poliqarp.subpos2.offset +0 -0
  37. data/corpus/frek.poliqarp.tag.image +0 -0
  38. data/corpus/frek.poliqarp.tag.offset +0 -0
  39. data/lib/poliqarpr.rb +5 -0
  40. data/lib/poliqarpr/client.rb +272 -0
  41. data/lib/poliqarpr/excerpt.rb +47 -0
  42. data/lib/poliqarpr/query_result.rb +59 -0
  43. data/poliqarpr.gemspec +63 -0
  44. data/spec/client.rb +88 -0
  45. data/spec/excerpt.rb +95 -0
  46. data/spec/query_result.rb +134 -0
  47. metadata +98 -0
data/README.txt ADDED
@@ -0,0 +1,69 @@
1
+ = poliqarpr
2
+
3
+ * http://github.com/apohllo/poliqarpr
4
+
5
+ == DESCRIPTION:
6
+
7
+ Poliqarpr is Ruby client for Poliqarp server.
8
+
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * asynchronous communication is implemented in synchronous manner
13
+ * only partial implementation of server protocol
14
+
15
+ == SYNOPSIS:
16
+
17
+ Poliqarpr is Ruby client for Poliqarp corpus server (see
18
+ http://poliqarp.sourceforge.net/), which is used to store large texts used in
19
+ Natural Language Processing.
20
+
21
+
22
+ == REQUIREMENTS:
23
+
24
+ Poliqarp server (only C implementation http://poliqarp.sourceforge.net/)
25
+
26
+ == INSTALL:
27
+
28
+ You need RubyGems v. 1.2
29
+
30
+ * gem -v
31
+ * 1.2.0 #=> ok
32
+
33
+ You need the github.com repository to be added to your sources list:
34
+
35
+ * gem sources -a http://gems.github.com
36
+
37
+ Then you can type:
38
+
39
+ * sudo gem install apohllo-poliqarpr
40
+
41
+
42
+ == LICENSE:
43
+
44
+ (The MIT License)
45
+
46
+ Copyright (c) 2008 Aleksander Pohl
47
+
48
+ Permission is hereby granted, free of charge, to any person obtaining
49
+ a copy of this software and associated documentation files (the
50
+ 'Software'), to deal in the Software without restriction, including
51
+ without limitation the rights to use, copy, modify, merge, publish,
52
+ distribute, sublicense, and/or sell copies of the Software, and to
53
+ permit persons to whom the Software is furnished to do so, subject to
54
+ the following conditions:
55
+
56
+ The above copyright notice and this permission notice shall be
57
+ included in all copies or substantial portions of the Software.
58
+
59
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
60
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
61
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
62
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
63
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
64
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
65
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
66
+
67
+ == FEEDBACK
68
+
69
+ * mailto:apohllo@o2.pl
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ task :default => [:install]
2
+
3
+ $gem_name = "poliqarpr"
4
+
5
+ desc "Build the gem"
6
+ task :build do
7
+ sh "gem build #$gem_name.gemspec"
8
+ end
9
+
10
+ desc "Install the library at local machnie"
11
+ task :install => :build do
12
+ sh "sudo gem install #$gem_name -l"
13
+ end
14
+
15
+ desc "Uninstall the library from local machnie"
16
+ task :uninstall do
17
+ sh "sudo gem uninstall #$gem_name"
18
+ end
19
+
20
+ desc "Clean"
21
+ task :clean do
22
+ sh "rm #$gem_name*.gem"
23
+ end
data/changelog.txt ADDED
@@ -0,0 +1,15 @@
1
+ 0.0.2
2
+ - query result is full blown class
3
+ - source divided into client, excerpt and query resutl
4
+ - specs for client, excerpt and query result
5
+ - namespace changed to POLIQARP
6
+ - default corpus included
7
+ - singular results properly fatched
8
+ - valid result for queries containing many words
9
+ - same queries which are run in sequence are called only once
10
+ - README.txt included in gem
11
+ - specs included in gem
12
+
13
+ 0.0.1
14
+ - initiali implementation
15
+ - synchorous querying for terms
data/corpus/frek.cdf ADDED
@@ -0,0 +1,4 @@
1
+ version = 1
2
+ endianness = little-endian
3
+ indices = oda
4
+ index-granularity = 1024
data/corpus/frek.cfg ADDED
@@ -0,0 +1,100 @@
1
+ # Config file format for Oasis release
2
+ # Config version 1.0
3
+
4
+ # The new startup section may contain any command normally accepted by the shell
5
+
6
+ [ALIASES]
7
+
8
+ masc = m1|m2|m3
9
+ verb = pact|ppas|winien|praet|bedzie|fin|impt|aglt|ger|imps|inf|pant|pcon
10
+ noun = subst|depr|xxs|ger|ppron12|ppron3
11
+ pron = ppron12|ppron3|siebie
12
+
13
+
14
+ [ATTR]
15
+
16
+ number = sg pl
17
+ case = nom gen dat acc inst loc voc
18
+ gender = m1 m2 m3 f n
19
+ person = pri sec ter
20
+ degree = pos comp sup
21
+ aspect = imperf perf
22
+ negation = aff neg
23
+ accommodability = congr rec
24
+ accentability = akc nakc
25
+ post-prepositionality = npraep praep
26
+ agglutination = agl nagl
27
+ vocalicity = nwok wok
28
+
29
+ # Parts of speech no longer need forward declarations, this was inconvenient and ugly.
30
+ # Also, any attribute may be optional so a declaration such as:
31
+ # foo = [bar] [froz] fred [wilma]
32
+ # should no longer cause problems and ctags with such attributes now parse correctly regardless
33
+ # of presence or absence of any optional attribute
34
+
35
+ [POS]
36
+
37
+ adja =
38
+ adjp =
39
+ conj =
40
+ interp =
41
+ pred =
42
+ xxx =
43
+ adv = degree
44
+ imps = aspect
45
+ inf = aspect
46
+ pant = aspect
47
+ pcon = aspect
48
+ qub = [vocalicity]
49
+ prep = case [vocalicity]
50
+ siebie = case
51
+ subst = number case gender
52
+ depr = number case gender
53
+ xxs = number case gender
54
+ ger = number case gender aspect negation
55
+ ppron12 = number case gender person [accentability]
56
+ ppron3 = number case gender person [accentability] [post-prepositionality]
57
+ num = number case gender [accommodability]
58
+ adj = number case gender degree
59
+ pact = number case gender aspect negation
60
+ ppas = number case gender aspect negation
61
+ winien = number gender aspect
62
+ praet = number gender aspect [agglutination]
63
+ bedzie = number person aspect
64
+ fin = number person aspect
65
+ impt = number person aspect
66
+ aglt = number person aspect vocalicity
67
+ ign =
68
+
69
+ # Named entities replaced old 'special' attributes, name changed mostly because of
70
+ # unification of 'named-thing' handling code into one named-entity thing
71
+ # Entity aliasing allows for any existing entity to be seen under different name
72
+ #
73
+ # FCQP provides four builtin entities:
74
+ # entity-current
75
+ # entity-base
76
+ # entity-tag
77
+ # entity-pos
78
+
79
+ [NAMED-ENTITY]
80
+
81
+ entity-orth = orth
82
+ entity-base = base
83
+ entity-tag = tag
84
+ entity-pos = pos
85
+
86
+ # Old 'aliases' for attribute names
87
+
88
+ pos = flex
89
+ number = numb nmb
90
+ case = cas
91
+ gender = gnd gend
92
+ person = per pers
93
+ degree = deg degr
94
+ aspect = asp
95
+ negation = neg
96
+ accommodability = acco acom acm
97
+ accentability = acce acen acn
98
+ post-prepositionality = ppr ppre
99
+ agglutination = agg aggl
100
+ vocalicity = vcl
data/corpus/frek.cfg~ ADDED
@@ -0,0 +1,100 @@
1
+ # Config file format for Oasis release
2
+ # Config version 1.0
3
+
4
+ # The new startup section may contain any command normally accepted by the shell
5
+
6
+ [STARTUP]
7
+
8
+ /alias masc = m1 m2 m3
9
+ /alias verb = pact ppas winien praet bedzie fin impt aglt ger imps inf pant pcon
10
+ /alias noun = subst depr xxs ger ppron12 ppron3
11
+ /alias pron = ppron12 ppron3 siebie
12
+
13
+
14
+ [ATTR]
15
+
16
+ number = sg pl
17
+ case = nom gen dat acc inst loc voc
18
+ gender = m1 m2 m3 f n
19
+ person = pri sec ter
20
+ degree = pos comp sup
21
+ aspect = imperf perf
22
+ negation = aff neg
23
+ accommodability = congr rec
24
+ accentability = akc nakc
25
+ post-prepositionality = npraep praep
26
+ agglutination = agl nagl
27
+ vocalicity = nwok wok
28
+
29
+ # Parts of speech no longer need forward declarations, this was inconvenient and ugly.
30
+ # Also, any attribute may be optional so a declaration such as:
31
+ # foo = [bar] [froz] fred [wilma]
32
+ # should no longer cause problems and ctags with such attributes now parse correctly regardless
33
+ # of presence or absence of any optional attribute
34
+
35
+ [POS]
36
+
37
+ adja =
38
+ adjp =
39
+ conj =
40
+ interp =
41
+ pred =
42
+ xxx =
43
+ adv = degree
44
+ imps = aspect
45
+ inf = aspect
46
+ pant = aspect
47
+ pcon = aspect
48
+ qub = [vocalicity]
49
+ prep = case [vocalicity]
50
+ siebie = case
51
+ subst = number case gender
52
+ depr = number case gender
53
+ xxs = number case gender
54
+ ger = number case gender aspect negation
55
+ ppron12 = number case gender person [accentability]
56
+ ppron3 = number case gender person [accentability] [post-prepositionality]
57
+ num = number case gender [accommodability]
58
+ adj = number case gender degree
59
+ pact = number case gender aspect negation
60
+ ppas = number case gender aspect negation
61
+ winien = number gender aspect
62
+ praet = number gender aspect [agglutination]
63
+ bedzie = number person aspect
64
+ fin = number person aspect
65
+ impt = number person aspect
66
+ aglt = number person aspect vocalicity
67
+ ign =
68
+
69
+ # Named entities replaced old 'special' attributes, name changed mostly because of
70
+ # unification of 'named-thing' handling code into one named-entity thing
71
+ # Entity aliasing allows for any existing entity to be seen under different name
72
+ #
73
+ # FCQP provides four builtin entities:
74
+ # entity-current
75
+ # entity-base
76
+ # entity-tag
77
+ # entity-pos
78
+
79
+ [NAMED-ENTITY]
80
+
81
+ entity-orth = orth
82
+ entity-base = base
83
+ entity-tag = tag
84
+ entity-pos = pos
85
+
86
+ # Old 'aliases' for attribute names
87
+
88
+ pos = flex
89
+ number = numb nmb
90
+ case = cas
91
+ gender = gnd gend
92
+ person = per pers
93
+ degree = deg degr
94
+ aspect = asp
95
+ negation = neg
96
+ accommodability = acco acom acm
97
+ accentability = acce acen acn
98
+ post-prepositionality = ppr ppre
99
+ agglutination = agg aggl
100
+ vocalicity = vcl
@@ -0,0 +1 @@
1
+ S sample
@@ -0,0 +1,4 @@
1
+ (single "sample"
2
+ "/cesHeader/fileDesc/(sourceDesc/biblFull/)*sourceDesc/biblStruct/monogr/h.title")
3
+
4
+
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/lib/poliqarpr.rb ADDED
@@ -0,0 +1,5 @@
1
+ path = File.join(File.dirname(__FILE__), 'poliqarpr')
2
+ require File.join(path, 'client')
3
+ require File.join(path, 'query_result')
4
+ require File.join(path, 'excerpt')
5
+
@@ -0,0 +1,272 @@
1
+ module Poliqarp
2
+ class Client
3
+ DEFAULT_CORPUS = File.join(File.expand_path(File.dirname(__FILE__)),"..", "..", "corpus", "frek")
4
+ ERRORS = {
5
+ 1 => "Incorrect number of arguments",
6
+ 3 => "No session opened",
7
+ 4 => "Cannot create a session for a connection that",
8
+ 5 => "Not enough memory",
9
+ 6 => "Invalid session ID",
10
+ 7 => "Session with this ID is already bound",
11
+ 8 => "Session user ID does not match the argument",
12
+ 10 => "Session already has an open corpus",
13
+ 12 => "System error while opening the corpus",
14
+ 13 => "No corpus opened",
15
+ 14 => "Invalid job ID",
16
+ 15 => "A job is already in progress",
17
+ 16 => "Incorrect query",
18
+ 17 => "Invalid result range",
19
+ 18 => "Incorrect session option",
20
+ 19 => "Invalid session option value",
21
+ 20 => "Invalid sorting criteria"
22
+ }
23
+ attr_writer :debug
24
+
25
+ def initialize(session_name="RUBY", debug=false)
26
+ @session_name = session_name
27
+ @left_context = 5
28
+ @right_context = 5
29
+ @debug = debug
30
+ @buffer_size = 500000
31
+ new_session
32
+ end
33
+
34
+ def new_session
35
+ close if @session
36
+ @socket = TCPSocket.new("localhost",4567)
37
+ talk "MAKE-SESSION #{@session_name}"
38
+ rcv_sync
39
+ talk("BUFFER-RESIZE #{@buffer_size}")
40
+ rcv_sync
41
+ @session = true
42
+ self.tags = {}
43
+ self.lemmata = {}
44
+ end
45
+
46
+ def talk(msg)
47
+ puts msg if @debug
48
+ @socket.puts(msg)
49
+ end
50
+
51
+ def close
52
+ #talk "CLOSE"
53
+ #rcv_sync
54
+ talk "CLOSE-SESSION"
55
+ rcv_sync
56
+ #@socket.close
57
+ @session = false
58
+ end
59
+
60
+ def left_context=(value)
61
+ if value.is_a? Fixnum
62
+ talk "SET left-context-width #{value}"
63
+ result = rcv_sync
64
+ @left_context = value if result =~ /^R OK/
65
+ end
66
+ end
67
+
68
+ def right_context=(value)
69
+ if value.is_a? Fixnum
70
+ talk "SET right-context-width #{value}"
71
+ result = rcv_sync
72
+ @right_context = value if result =~ /^R OK/
73
+ end
74
+ end
75
+
76
+ def tags=(options={})
77
+ flags = ""
78
+ [:left_context_tags, :leftM_tags,
79
+ :rightM_tags, :right_context_tags].each do |flag|
80
+ flags << (options[flag] ? "1" : "0")
81
+ end
82
+ talk "SET retrieve-tags #{flags}"
83
+ rcv_sync
84
+ end
85
+
86
+ def lemmata=(options={})
87
+ flags = ""
88
+ [:left_context_lemmata, :leftM_lemmata,
89
+ :rightM_lemmata, :right_context_lemmata].each do |flag|
90
+ flags << (options[flag] ? "1" : "0")
91
+ end
92
+ talk "SET retrieve-lemmata #{flags}"
93
+ rcv_sync
94
+ end
95
+
96
+
97
+ def open_corpus(path)
98
+ if path == :default
99
+ open_corpus(DEFAULT_CORPUS)
100
+ else
101
+ talk("OPEN #{path}")
102
+ rcv_sync
103
+ rcv_async
104
+ end
105
+ end
106
+
107
+ def find(query,options={})
108
+ if options[:index]
109
+ find_one(query, options[:index])
110
+ else
111
+ find_many(query, options)
112
+ end
113
+ end
114
+
115
+ alias query find
116
+
117
+ def count(query)
118
+ count_results(make_query(query))
119
+ end
120
+
121
+ def context(query,index)
122
+ make_query(query)
123
+ result = []
124
+ talk "GET-CONTEXT #{index}"
125
+ # R OK
126
+ rcv_sync
127
+ # 1st part
128
+ result << read_word
129
+ # 2nd part
130
+ result << read_word
131
+ # 3rd part
132
+ result << read_word
133
+ # 4th part
134
+ result << read_word
135
+ result
136
+ end
137
+
138
+ def metadata(query, index)
139
+ make_query(query)
140
+ result = {}
141
+ talk "METADATA #{index}"
142
+ count = rcv_sync.split(" ")[2].to_i
143
+ count.times do |index|
144
+ type = read_word.gsub(/[^a-zA-Z]/,"").to_sym
145
+ value = rcv_sync[4..-2]
146
+ unless value.nil?
147
+ result[type] ||= []
148
+ result[type] << value
149
+ end
150
+ end
151
+ result
152
+ end
153
+
154
+ protected
155
+ def find_many(query, options)
156
+ page_size = (options[:page_size] || 0)
157
+ page_index = (options[:page_index] || 1)
158
+ answers = make_query(query)
159
+ #talk("GET-COLUMN-TYPES")
160
+ #rcv_sync
161
+ result_count = count_results(answers)
162
+ answer_offset = page_size * (page_index - 1)
163
+ if page_size > 0
164
+ answers_limit = answer_offset + page_size > result_count ?
165
+ result_count - answer_offset : page_size
166
+ else
167
+ answers_limit = result_count
168
+ end
169
+ page_count = if page_size > 0
170
+ result_count / page_size + (result_count % page_size > 0 ? 1 : 0)
171
+ else
172
+ 1
173
+ end
174
+ result = QueryResult.new(page_index, page_count,page_size,self,query)
175
+ if answers_limit > 0
176
+ talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
177
+ # R OK 1
178
+ rcv_sync
179
+
180
+ answers_limit.times do |answer_index|
181
+ result << fetch_result(answer_offset + answer_index, query)
182
+ end
183
+ end
184
+ result
185
+ end
186
+
187
+ def find_one(query,index)
188
+ make_query(query)
189
+ talk("GET-RESULTS #{index} #{index}")
190
+ # R OK 1
191
+ rcv_sync
192
+ fetch_result(index,query)
193
+ end
194
+
195
+ # Fetches one result of the query
196
+ #
197
+ # MAKE-QUERY and GET-RESULTS must be called on server before
198
+ # this method is called
199
+ def fetch_result(index, query)
200
+ result = Excerpt.new(index, self, query)
201
+ # left_context
202
+ result << read_segments
203
+ # matched query
204
+ result << read_segments
205
+ # right context
206
+ result << read_segments
207
+
208
+ result
209
+ end
210
+
211
+ def read_segments
212
+ answer = rcv_sync
213
+ size = answer.match(/\d+/)[0].to_i
214
+ segments = []
215
+ size.times do |segment_index|
216
+ segments << read_word
217
+ end
218
+ segments.join("")
219
+ end
220
+
221
+ def count_results(answer)
222
+ answer.split(" ")[2].to_i
223
+ end
224
+
225
+ def make_query(query)
226
+ if @last_query != query
227
+ @last_query = query
228
+ talk("MAKE-QUERY #{query}")
229
+ rcv_sync
230
+ talk("RUN-QUERY #{@buffer_size}")
231
+ @last_query_result = rcv_async
232
+ end
233
+ @last_query_result
234
+ end
235
+
236
+ def read_word
237
+ rcv_sync[2..-2]
238
+ end
239
+
240
+ def read_line
241
+ line = ""
242
+ begin
243
+ chars = @socket.recvfrom(1)
244
+ line << chars[0]
245
+ end while chars[0] != "\n"
246
+ line
247
+ end
248
+
249
+ def error_message(line)
250
+ RuntimeError.new("Poliqarp Error: "+ERRORS[line.match(/\d+/)[0].to_i])
251
+ end
252
+
253
+ # XXX
254
+ def rcv_sync
255
+ result = read_line
256
+ puts result if @debug
257
+ raise error_message(result) if result =~ /^R ERR/
258
+ result
259
+ # @socket.recvfrom(1024)
260
+ end
261
+
262
+ # XXX
263
+ def rcv_async
264
+ begin
265
+ line = read_line
266
+ raise error_message(line) if line =~ /^. ERR/
267
+ puts line if @debug
268
+ end until line =~ /^M/
269
+ line
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,47 @@
1
+ module Poliqarp
2
+ class Excerpt
3
+ attr_reader :index, :base_form, :short_context
4
+
5
+ def initialize(index, client, base_form)
6
+ @index = index
7
+ @client = client
8
+ @base_form = base_form
9
+ @short_context = []
10
+ end
11
+
12
+ def <<(value)
13
+ @short_context << value
14
+ end
15
+
16
+
17
+ def word
18
+ #@short_context[0].split(/\s+/)[-1]
19
+ @short_context[1].to_s
20
+ end
21
+
22
+ alias inflected_form word
23
+
24
+ def to_s
25
+ @short_context.join("")
26
+ end
27
+
28
+ def context
29
+ return @context unless @context.nil?
30
+ @context = @client.context(@base_form, @index)
31
+ end
32
+
33
+ { :medium => :medium, :style => :styl, :date => :data_wydania,
34
+ :city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
35
+ :author => :autor}.each do |method, keyword|
36
+ define_method method do
37
+ self.metadata[keyword]
38
+ end
39
+ end
40
+
41
+ protected
42
+ def metadata
43
+ return @metadata unless @metadata.nil?
44
+ @metadata = @client.metadata(@base_form, @index)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,59 @@
1
+ module Poliqarp
2
+ class QueryResult
3
+ include Enumerable
4
+
5
+ attr_accessor :page, :page_count, :query, :page_size
6
+
7
+ def initialize(page, page_count, page_size, client, query)
8
+ @page = page
9
+ @page_count = page_count
10
+ @page_size = page_size
11
+ @client = client
12
+ @query = query
13
+ @excerpts = []
14
+ end
15
+
16
+ def <<(excerpt)
17
+ @excerpts << excerpt
18
+ end
19
+
20
+ def each
21
+ @excerpts.each{|e| yield e}
22
+ end
23
+
24
+ [:first, :last, :empty?].each do |method|
25
+ define_method method do
26
+ @excerpts.send(method)
27
+ end
28
+ end
29
+
30
+ def [](index)
31
+ @excerpts[index]
32
+ end
33
+
34
+ def ==(other)
35
+ return false unless other.is_a? QueryResult
36
+ @page == other.page && @page_count == other.page_count &&
37
+ @query == other.query && @page_size == other.page_size
38
+ end
39
+
40
+ def previous_page
41
+ if @page > 1
42
+ @client.find(@query, :page_size => @page_size,
43
+ :page_index => @page - 1)
44
+ end
45
+ end
46
+
47
+ def next_page
48
+ if @page < @page_count
49
+ @client.find(@query, :page_size => @page_size,
50
+ :page_index => @page + 1)
51
+ end
52
+ end
53
+
54
+ def size
55
+ @excerpts.size
56
+ end
57
+
58
+ end
59
+ end
data/poliqarpr.gemspec ADDED
@@ -0,0 +1,63 @@
1
+ #require 'rake'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "poliqarpr"
5
+ s.version = "0.0.2"
6
+ s.date = "2008-12-15"
7
+ s.summary = "Ruby client for Poliqarp"
8
+ s.email = "apohllo@o2.pl"
9
+ s.homepage = "http://www.apohllo.pl/projekty/poliqarpr"
10
+ s.description = "Ruby client for Poliqarp (NLP corpus server)"
11
+ s.authors = ['Aleksander Pohl']
12
+ s.files = ["Rakefile", "poliqarpr.gemspec", 'lib/poliqarpr.rb',
13
+ "changelog.txt",
14
+ "lib/poliqarpr/client.rb",
15
+ "lib/poliqarpr/query_result.rb",
16
+ "lib/poliqarpr/excerpt.rb",
17
+ "README.txt",
18
+ "corpus/frek.cdf",
19
+ "corpus/frek.poliqarp.base1.image",
20
+ "corpus/frek.poliqarp.corpus.image",
21
+ "corpus/frek.poliqarp.meta-value.offset",
22
+ "corpus/frek.poliqarp.rindex.amb",
23
+ "corpus/frek.poliqarp.rindex.orth.offset",
24
+ "corpus/frek.poliqarp.subpos1.offset",
25
+ "corpus/frek.cfg",
26
+ "corpus/frek.poliqarp.base1.offset",
27
+ "corpus/frek.poliqarp.meta.image",
28
+ "corpus/frek.poliqarp.orth.image",
29
+ "corpus/frek.poliqarp.rindex.amb.offset",
30
+ "corpus/frek.poliqarp.subchunk.image",
31
+ "corpus/frek.poliqarp.subpos2.image",
32
+ "corpus/frek.cfg~",
33
+ "corpus/frek.poliqarp.base2.image",
34
+ "corpus/frek.poliqarp.meta-key.image",
35
+ "corpus/frek.poliqarp.orth.index.alpha",
36
+ "corpus/frek.poliqarp.rindex.disamb",
37
+ "corpus/frek.poliqarp.subchunk.item.ch",
38
+ "corpus/frek.poliqarp.subpos2.offset",
39
+ "corpus/frek.meta.cfg",
40
+ "corpus/frek.poliqarp.base2.offset",
41
+ "corpus/frek.poliqarp.meta-key.offset",
42
+ "corpus/frek.poliqarp.orth.index.atergo",
43
+ "corpus/frek.poliqarp.rindex.disamb.offset",
44
+ "corpus/frek.poliqarp.subchunk.offset",
45
+ "corpus/frek.poliqarp.tag.image",
46
+ "corpus/frek.meta.lisp",
47
+ "corpus/frek.poliqarp.chunk.image",
48
+ "corpus/frek.poliqarp.meta-value.image",
49
+ "corpus/frek.poliqarp.orth.offset",
50
+ "corpus/frek.poliqarp.rindex.orth",
51
+ "corpus/frek.poliqarp.subpos1.image",
52
+ "corpus/frek.poliqarp.tag.offset"
53
+ ]
54
+ s.test_files = [
55
+ "spec/client.rb",
56
+ "spec/query_result.rb",
57
+ "spec/excerpt.rb"
58
+ ]
59
+ s.rdoc_options = ["--main", "README.txt"]
60
+ s.has_rdoc = true
61
+ s.extra_rdoc_files = ["README.txt"]
62
+ end
63
+
data/spec/client.rb ADDED
@@ -0,0 +1,88 @@
1
+ require File.join(File.dirname(__FILE__), '..','lib','poliqarpr')
2
+
3
+ describe Poliqarp::Client do
4
+ describe "(general test)" do
5
+ before(:each) do
6
+ @client = Poliqarp::Client.new("TEST")
7
+ end
8
+
9
+ after(:each) do
10
+ @client.close
11
+ end
12
+
13
+ it "should allow to open corpus" do
14
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
15
+ end
16
+
17
+ it "should allow to open :default corpus" do
18
+ @client.open_corpus(:default)
19
+ end
20
+ end
21
+
22
+ describe "(with 'sample' corpus)" do
23
+ before(:all) do
24
+ @client = Poliqarp::Client.new("TEST")
25
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
26
+ end
27
+
28
+ after(:all) do
29
+ @client.close
30
+ end
31
+
32
+ it "should allow to find 'kot'" do
33
+ @client.find("kot").size.should_not == 0
34
+ end
35
+
36
+ it "should contain 'kot' in query result for [base=kot]" do
37
+ @client.find("[base=kot]")[0].to_s.should match(/\bkot\b/)
38
+ end
39
+
40
+ it "should allow to find 'Afrodyta [] od" do
41
+ @client.find("Afrodyta [] od").size.should_not == 0
42
+ end
43
+
44
+ it "should contain 'Afrodyta .* od' for 'Afrodyta [] od' query " do
45
+ @client.find("Afrodyta [] od")[0].to_s.should match(/Afrodyta .* od/)
46
+ end
47
+
48
+ it "should return collection for find without index specified" do
49
+ @client.find("kot").should respond_to(:[])
50
+ end
51
+
52
+ it "should allow to query for term occurences" do
53
+ @client.count("kot").should_not == nil
54
+ end
55
+
56
+ it "should return 188 occurences of 'kot'" do
57
+ @client.count("kot").should == 188
58
+ end
59
+
60
+ it "should allow to find first occurence of 'kot'" do
61
+ @client.find("kot",:index => 0).should_not == nil
62
+ end
63
+
64
+ it "should return different results for different queries" do
65
+ @client.find("kot").should_not ==
66
+ @client.find("kita")
67
+ end
68
+
69
+ it "should return same results for same queries" do
70
+ @client.find("kita").should == @client.find("kita")
71
+ end
72
+
73
+ describe("(with index specified in find)") do
74
+ before(:each) do
75
+ @result = @client.find("nachalny",:index => 0)
76
+ end
77
+
78
+ it "should not return collection for find" do
79
+ @result.should_not respond_to(:[])
80
+ end
81
+
82
+ it "should fetch the same excerpt as in find without index " do
83
+ @result.to_s.should == @client.find("nachalny")[0].to_s
84
+ end
85
+ end
86
+ end
87
+
88
+ end
data/spec/excerpt.rb ADDED
@@ -0,0 +1,95 @@
1
+ require File.join(File.dirname(__FILE__), '..','lib','poliqarpr')
2
+
3
+ describe Poliqarp::Excerpt do
4
+ before(:all) do
5
+ @client = Poliqarp::Client.new("TEST")
6
+ end
7
+
8
+ after(:all) do
9
+ @client.close
10
+ end
11
+
12
+ describe "(unspecified excerpt)" do
13
+ before(:all) do
14
+ @client.open_corpus(:default)
15
+ @excerpt = @client.find("kot").first
16
+ end
17
+
18
+ it "should have index" do
19
+ @excerpt.index.should_not == nil
20
+ end
21
+
22
+ it "should have base form" do
23
+ @excerpt.base_form.should_not == nil
24
+ end
25
+
26
+ it "should allow to add short context" do
27
+ @excerpt << "abc"
28
+ end
29
+
30
+ it "should contain the exact form which it was created for" do
31
+ @excerpt.inflected_form.should_not == nil
32
+ end
33
+
34
+ it "should contain the long context of the word" do
35
+ @excerpt.context.should_not == nil
36
+ end
37
+ end
38
+
39
+ describe "(first exceprt for 'kot' in 'sample' corpus)" do
40
+ before(:all) do
41
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
42
+ @excerpt = @client.find("kot").first
43
+ end
44
+
45
+ it "should have index set to 0" do
46
+ @excerpt.index.should == 0
47
+ end
48
+
49
+ it "should have base form set to 'kot'" do
50
+ @excerpt.base_form.should == "kot"
51
+ end
52
+
53
+ it "should have 'kot' as inflected form " do
54
+ @excerpt.inflected_form.should_not == nil
55
+ end
56
+
57
+ it "should contain the long context of the word" do
58
+ @excerpt.context.to_s.size.should > 10
59
+ end
60
+
61
+ it "should have one 'medium' set to 'książka'" do
62
+ @excerpt.medium.size.should == 1
63
+ @excerpt.medium[0].should == "książka"
64
+ end
65
+
66
+ it "should have 2 'styles' set to 'naukowo-dydaktyczny' and 'naukowo-humanistyczny'" do
67
+ @excerpt.style.size.should == 2
68
+ @excerpt.style.include?("naukowo-dydaktyczny")
69
+ @excerpt.style.include?("naukowo-humanistyczny")
70
+ end
71
+
72
+ it "should have 'date' set to nil" do
73
+ @excerpt.date.should == nil
74
+ end
75
+
76
+ it "should have 'city' set to nil" do
77
+ @excerpt.city.should == nil
78
+ end
79
+
80
+ it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
81
+ @excerpt.publisher.size.should == 1
82
+ @excerpt.publisher[0].should == "Wydawnictwo Naukowe Akademii Pedagogicznej"
83
+ end
84
+
85
+ it "should have one 'title' set to 'Wczesne nauczanie języków obcych. Integracja języka obcego z przedmiotami artystycznymi w młodszych klasach szkoły podstawowej'" do
86
+ @excerpt.title.size.should == 1
87
+ @excerpt.title[0].should == "Wczesne nauczanie języków obcych. Integracja języka obcego z przedmiotami artystycznymi w młodszych klasach szkoły podstawowej"
88
+ end
89
+
90
+ it "should have one 'author' set to 'Małgorzata Pamuła'" do
91
+ @excerpt.author.size.should == 1
92
+ @excerpt.author[0].should == "Małgorzata Pamuła"
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,134 @@
1
+ require File.join(File.dirname(__FILE__), '..','lib','poliqarpr')
2
+
3
+ describe Poliqarp::QueryResult do
4
+ before(:all) do
5
+ @client = Poliqarp::Client.new("TEST")
6
+ @client.open_corpus(:default)
7
+ end
8
+
9
+ after(:all) do
10
+ @client.close
11
+ end
12
+
13
+ describe "(for unspecified query)" do
14
+ before(:all) do
15
+ @result = @client.find("kita")
16
+ end
17
+
18
+ it "should not be nil" do
19
+ @result.should_not == nil
20
+ end
21
+
22
+ it "should containt its size" do
23
+ @result.size.should_not == nil
24
+ end
25
+
26
+ it "should be iterable" do
27
+ @result.each do |excerpt|
28
+ excerpt.should_not == nil
29
+ end
30
+ end
31
+
32
+ it "should allow to add excerpt" do
33
+ @result << Poliqarp::Excerpt.new(0,@client, "abc")
34
+ end
35
+
36
+ it "should contain current page" do
37
+ @result.page.should_not == nil
38
+ end
39
+
40
+ it "should contain the page count" do
41
+ @result.page_count.should_not == nil
42
+ end
43
+
44
+ it "should allow to call previous page" do
45
+ @result.previous_page
46
+ end
47
+
48
+ it "should allow to call next page" do
49
+ @result.next_page
50
+ end
51
+
52
+ it "should be the same if the query is the same" do
53
+ @result.should == @client.find("kita")
54
+ end
55
+ end
56
+
57
+ describe "(for 'kot' in :default corpus)" do
58
+ before(:all) do
59
+ @result = @client.find("kot")
60
+ end
61
+
62
+ it "should have size == 6" do
63
+ @result.size.should == 6
64
+ end
65
+
66
+ it "should have page set to 1" do
67
+ @result.page.should == 1
68
+ end
69
+
70
+ it "should contain only one page" do
71
+ @result.page_count.should == 1
72
+ end
73
+
74
+ it "should not have previous page" do
75
+ @result.previous_page.should == nil
76
+ end
77
+
78
+ it "should not have next page" do
79
+ @result.next_page.should == nil
80
+ end
81
+ end
82
+
83
+ describe "(for 'kot' with page_size set to 5 in :default corpus)" do
84
+ before(:all) do
85
+ @result = @client.find("kot", :page_size => 5)
86
+ end
87
+
88
+ it "should have size == 5" do
89
+ @result.size.should == 5
90
+ end
91
+
92
+ it "should have page set to 1" do
93
+ @result.page.should == 1
94
+ end
95
+
96
+ it "should contain 2 pages" do
97
+ @result.page_count.should == 2
98
+ end
99
+
100
+ it "should not have previous page" do
101
+ @result.previous_page.should == nil
102
+ end
103
+
104
+ it "should have next page" do
105
+ @result.next_page.should_not == nil
106
+ end
107
+ end
108
+
109
+ describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
110
+ before(:all) do
111
+ @result = @client.find("kot", :page_size => 5).next_page
112
+ end
113
+
114
+ it "should have size == 1" do
115
+ @result.size.should == 1
116
+ end
117
+
118
+ it "should have page set to 2" do
119
+ @result.page.should == 2
120
+ end
121
+
122
+ it "should contain 2 pages" do
123
+ @result.page_count.should == 2
124
+ end
125
+
126
+ it "should have previous page" do
127
+ @result.previous_page.should_not == nil
128
+ end
129
+
130
+ it "should not have next page" do
131
+ @result.next_page.should == nil
132
+ end
133
+ end
134
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apohllo-poliqarpr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Aleksander Pohl
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-15 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Ruby client for Poliqarp (NLP corpus server)
17
+ email: apohllo@o2.pl
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ files:
25
+ - Rakefile
26
+ - poliqarpr.gemspec
27
+ - lib/poliqarpr.rb
28
+ - changelog.txt
29
+ - lib/poliqarpr/client.rb
30
+ - lib/poliqarpr/query_result.rb
31
+ - lib/poliqarpr/excerpt.rb
32
+ - README.txt
33
+ - corpus/frek.cdf
34
+ - corpus/frek.poliqarp.base1.image
35
+ - corpus/frek.poliqarp.corpus.image
36
+ - corpus/frek.poliqarp.meta-value.offset
37
+ - corpus/frek.poliqarp.rindex.amb
38
+ - corpus/frek.poliqarp.rindex.orth.offset
39
+ - corpus/frek.poliqarp.subpos1.offset
40
+ - corpus/frek.cfg
41
+ - corpus/frek.poliqarp.base1.offset
42
+ - corpus/frek.poliqarp.meta.image
43
+ - corpus/frek.poliqarp.orth.image
44
+ - corpus/frek.poliqarp.rindex.amb.offset
45
+ - corpus/frek.poliqarp.subchunk.image
46
+ - corpus/frek.poliqarp.subpos2.image
47
+ - corpus/frek.cfg~
48
+ - corpus/frek.poliqarp.base2.image
49
+ - corpus/frek.poliqarp.meta-key.image
50
+ - corpus/frek.poliqarp.orth.index.alpha
51
+ - corpus/frek.poliqarp.rindex.disamb
52
+ - corpus/frek.poliqarp.subchunk.item.ch
53
+ - corpus/frek.poliqarp.subpos2.offset
54
+ - corpus/frek.meta.cfg
55
+ - corpus/frek.poliqarp.base2.offset
56
+ - corpus/frek.poliqarp.meta-key.offset
57
+ - corpus/frek.poliqarp.orth.index.atergo
58
+ - corpus/frek.poliqarp.rindex.disamb.offset
59
+ - corpus/frek.poliqarp.subchunk.offset
60
+ - corpus/frek.poliqarp.tag.image
61
+ - corpus/frek.meta.lisp
62
+ - corpus/frek.poliqarp.chunk.image
63
+ - corpus/frek.poliqarp.meta-value.image
64
+ - corpus/frek.poliqarp.orth.offset
65
+ - corpus/frek.poliqarp.rindex.orth
66
+ - corpus/frek.poliqarp.subpos1.image
67
+ - corpus/frek.poliqarp.tag.offset
68
+ has_rdoc: true
69
+ homepage: http://www.apohllo.pl/projekty/poliqarpr
70
+ post_install_message:
71
+ rdoc_options:
72
+ - --main
73
+ - README.txt
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: "0"
81
+ version:
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ requirements: []
89
+
90
+ rubyforge_project:
91
+ rubygems_version: 1.2.0
92
+ signing_key:
93
+ specification_version: 2
94
+ summary: Ruby client for Poliqarp
95
+ test_files:
96
+ - spec/client.rb
97
+ - spec/query_result.rb
98
+ - spec/excerpt.rb