poliqarpr 0.0.5 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt CHANGED
@@ -60,7 +60,7 @@ step of installation process).
60
60
 
61
61
  Require the gem:
62
62
 
63
- require 'poliaqarpr'
63
+ require 'poliqarpr'
64
64
 
65
65
  Create the server client and open default corpus
66
66
 
@@ -76,6 +76,8 @@ Remember to close the client on exit
76
76
 
77
77
  client.close
78
78
 
79
+ NOTE: If you wish to run the specs, you need the 'default' and '2.sample.30'
80
+ corpuses.
79
81
 
80
82
  == LICENSE:
81
83
 
data/changelog.txt CHANGED
@@ -1,3 +1,16 @@
1
+ 0.0.8
2
+ - Speed optimization: socket puts changed to write
3
+
4
+ 0.0.7
5
+ - QueryResult#to_a method added
6
+ - Lexeme#tags method added
7
+ - fix: README invalid require
8
+
9
+ 0.0.6
10
+ - fix: Excerpt#word - the words consituing the matched query
11
+ - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
12
+ return the matched, right context, left context segments respecively
13
+
1
14
  0.0.5
2
15
  - Bugfix: making parallel query might lead to silent thread death
3
16
  - Support for Ruby 1.9 encoding
@@ -366,7 +366,7 @@ protected
366
366
  lemmata.base_form = read_word
367
367
  end
368
368
  if @tag_flags[group]
369
- read_word
369
+ lemmata.tags = read_word
370
370
  end
371
371
  segment.lemmata << lemmata
372
372
  end
@@ -70,7 +70,8 @@ module Poliqarp
70
70
  if ruby19?
71
71
  massage = message.encode(UTF8)
72
72
  end
73
- @socket.puts(message)
73
+ #@socket.puts(message)
74
+ @socket.write(message+"\n")
74
75
  if mode == :async
75
76
  @handler = handler
76
77
  end
@@ -25,11 +25,25 @@ module Poliqarp
25
25
  @short_context << value
26
26
  end
27
27
 
28
+ # Returns the matched segments
29
+ def matched
30
+ @short_context[1]
31
+ end
32
+
33
+ # Returns the segments of the left short context of the match
34
+ def left_context
35
+ @short_context[0]
36
+ end
37
+
38
+ # Returns the segments of the right short context of the match
39
+ def right_context
40
+ @short_context[2]
41
+ end
28
42
 
29
43
  # Returns the matched query as string
30
44
  def word
31
45
  #@short_context[0].split(/\s+/)[-1]
32
- @short_context[1].to_s
46
+ @short_context[1].map{|s| s.to_s}.join("")
33
47
  end
34
48
 
35
49
  alias inflected_form word
@@ -4,7 +4,7 @@ module Poliqarp
4
4
  #
5
5
  # The lemmata contains the base form of the segment
6
6
  class Lemmata
7
- attr_accessor :base_form
7
+ attr_accessor :base_form, :tags
8
8
  def initialize()
9
9
  end
10
10
  end
@@ -69,5 +69,10 @@ module Poliqarp
69
69
  @excerpts.size
70
70
  end
71
71
 
72
+ # Converts current query result page into an array.
73
+ def to_a
74
+ @excerpts.dup
75
+ end
76
+
72
77
  end
73
78
  end
data/poliqarpr.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "poliqarpr"
3
- s.version = "0.0.5"
4
- s.date = "2009-12-10"
3
+ s.version = "0.0.8"
4
+ s.date = "2011-01-12"
5
5
  s.summary = "Ruby client for Poliqarp"
6
6
  s.email = "apohllo@o2.pl"
7
7
  s.homepage = "http://www.github.com/apohllo/poliqarpr"
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.authors = ['Aleksander Pohl']
10
10
  s.files = ["Rakefile", "poliqarpr.gemspec",
11
11
  "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
12
- s.test_files = Dir.glob("test/**/*")
12
+ s.test_files = Dir.glob("spec/**/*")
13
13
  s.rdoc_options = ["--main", "README.txt"]
14
14
  s.has_rdoc = true
15
15
  s.extra_rdoc_files = ["README.txt"]
data/spec/client.rb ADDED
@@ -0,0 +1,171 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::Client do
6
+ describe "(general test)" do
7
+ before(:each) do
8
+ @client = Poliqarp::Client.new("TEST")
9
+ end
10
+
11
+ after(:each) do
12
+ @client.close
13
+ end
14
+
15
+ it "should allow to open corpus" do
16
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
17
+ end
18
+
19
+ it "should allow to open :default corpus" do
20
+ @client.open_corpus(:default)
21
+ end
22
+
23
+ it "should respond to :ping" do
24
+ @client.ping.should == :pong
25
+ end
26
+
27
+ it "should return server version" do
28
+ @client.version.should_not == nil
29
+ end
30
+
31
+ end
32
+
33
+ describe "(with 'sample' corpus)" do
34
+ before(:all) do
35
+ @client = Poliqarp::Client.new("TEST")
36
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
37
+ end
38
+
39
+ after(:all) do
40
+ @client.close
41
+ end
42
+
43
+ it "should allow to set the right context size" do
44
+ @client.right_context = 5
45
+ end
46
+
47
+ it "should raise error if the size of right context is not number" do
48
+ (proc do
49
+ @client.right_context = "a"
50
+ end).should raise_error(RuntimeError)
51
+ end
52
+
53
+ it "should rais error if the size of right context is less or equal 0" do
54
+ (proc do
55
+ @client.right_context = 0
56
+ end).should raise_error(RuntimeError)
57
+ end
58
+
59
+ it "should allow to set the left context size" do
60
+ @client.right_context = 5
61
+ end
62
+
63
+ it "should raise error if the size of left context is not number" do
64
+ (lambda do
65
+ @client.left_context = "a"
66
+ end).should raise_error(RuntimeError)
67
+ end
68
+
69
+ it "should rais error if the size of left context is less or equal 0" do
70
+ (lambda do
71
+ @client.left_context = 0
72
+ end).should raise_error(RuntimeError)
73
+ end
74
+
75
+ it "should return corpus statistics" do
76
+ stats = @client.stats
77
+ stats.size.should == 4
78
+ [:segment_tokens, :segment_types, :lemmata, :tags].each do |type|
79
+ stats[type].should_not == nil
80
+ stats[type].should > 0
81
+ end
82
+ end
83
+
84
+ it "should return the corpus tagset" do
85
+ tagset = @client.tagset
86
+ tagset[:categories].should_not == nil
87
+ tagset[:classes].should_not == nil
88
+ end
89
+
90
+ it "should allow to find 'kot'" do
91
+ @client.find("kot").size.should_not == 0
92
+ end
93
+
94
+ it "should contain 'kot' in query result for [base=kot]" do
95
+ @client.find("[base=kot]")[0].to_s.should match(/\bkot\b/)
96
+ end
97
+
98
+ it "should allow to find 'Afrodyta [] od" do
99
+ @client.find("Afrodyta [] od").size.should_not == 0
100
+ end
101
+
102
+ it "should contain 'Afrodyta .* od' for 'Afrodyta [] od' query " do
103
+ @client.find("Afrodyta [] od")[0].to_s.should match(/Afrodyta .* od/)
104
+ end
105
+
106
+ it "should return collection for find without index specified" do
107
+ @client.find("kot").should respond_to(:[])
108
+ end
109
+
110
+ it "should allow to query for term occurences" do
111
+ @client.count("kot").should_not == nil
112
+ end
113
+
114
+ it "should return 188 occurences of 'kot'" do
115
+ @client.count("kot").should == 188
116
+ end
117
+
118
+ it "should allow to find first occurence of 'kot'" do
119
+ @client.find("kot",:index => 0).should_not == nil
120
+ end
121
+
122
+ it "should return different results for different queries" do
123
+ @client.find("kot").should_not ==
124
+ @client.find("kita")
125
+ end
126
+
127
+ it "should return same results for same queries" do
128
+ @client.find("kita").should == @client.find("kita")
129
+ end
130
+
131
+ describe("(with index specified in find)") do
132
+ before(:each) do
133
+ @result = @client.find("nachalny",:index => 0)
134
+ end
135
+
136
+ it "should not return collection for find" do
137
+ @result.should_not respond_to(:[])
138
+ end
139
+
140
+ it "should not be nil" do
141
+ @result.should_not == nil
142
+ end
143
+
144
+ it "should fetch the same excerpt as in find without index " do
145
+ @result.to_s.should == @client.find("nachalny")[0].to_s
146
+ end
147
+ end
148
+
149
+ describe("(with lemmata flags set to true)") do
150
+ before(:all) do
151
+ @client.lemmata = {:left_context => true, :right_context => true,
152
+ :left_match => true, :right_match => true}
153
+ end
154
+
155
+ it "should allow to find 'kotu'" do
156
+ @client.find("kotu").size.should_not == 0
157
+ end
158
+
159
+ it "should contain 'kotu' in query result for 'kotu'" do
160
+ @client.find("kotu")[0].to_s.should match(/\bkotu\b/)
161
+ end
162
+
163
+ it "should contain 'kot' in lemmatized query result for 'kotu'" do
164
+ @client.find("kotu")[0].short_context.flatten.
165
+ map{|e| e.lemmata[0].base_form}.join(" ").should match(/\bkot\b/)
166
+ end
167
+
168
+ end
169
+ end
170
+
171
+ end
data/spec/excerpt.rb ADDED
@@ -0,0 +1,142 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::Excerpt do
6
+ before(:all) do
7
+ @client = Poliqarp::Client.new("TEST")
8
+ end
9
+
10
+ after(:all) do
11
+ @client.close
12
+ end
13
+
14
+ describe "(unspecified excerpt)" do
15
+ before(:all) do
16
+ @client.open_corpus(:default)
17
+ @excerpt = @client.find("kot").first
18
+ end
19
+
20
+ after(:all) do
21
+ @client.close_corpus
22
+ end
23
+
24
+ it "should have index" do
25
+ @excerpt.index.should_not == nil
26
+ end
27
+
28
+ it "should have base form" do
29
+ @excerpt.base_form.should_not == nil
30
+ end
31
+
32
+ it "should contain 3 groups in short context" do
33
+ @excerpt.short_context.size.should == 3
34
+ end
35
+
36
+ it "should allow to add segment group" do
37
+ @excerpt << [Poliqarp::Segment.new("abc")]
38
+ end
39
+
40
+
41
+ it "should contain non empty segments in short context" do
42
+ @excerpt.short_context.flatten.each do |segment|
43
+ segment.literal.should_not == nil
44
+ end
45
+ end
46
+
47
+ it "should contain the exact form which it was created for" do
48
+ @excerpt.inflected_form.should_not == nil
49
+ end
50
+
51
+ it "should contain the long context of the word" do
52
+ @excerpt.context.should_not == nil
53
+ end
54
+ end
55
+
56
+ describe "(first exceprt for 'mu za to astronomiczną' in 'sample' corpus)" do
57
+ before(:all) do
58
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
59
+ @excerpt = @client.find("mu za to astronomiczną").first
60
+ end
61
+
62
+ after(:all) do
63
+ @client.close_corpus
64
+ end
65
+
66
+ it "should have index set to 0" do
67
+ @excerpt.index.should == 0
68
+ end
69
+
70
+ it "should have base form set to 'kot'" do
71
+ @excerpt.base_form.should == "mu za to astronomiczną"
72
+ end
73
+
74
+ it "should have 'kot' as inflected form " do
75
+ @excerpt.inflected_form.should_not == nil
76
+ end
77
+
78
+ it "should contain the long context of the word" do
79
+ @excerpt.context.to_s.size.should > 10
80
+ end
81
+
82
+ it "should have one 'medium' set to 'książka'" do
83
+ @excerpt.medium.size.should == 1
84
+ @excerpt.medium[0].should == "książka"
85
+ end
86
+
87
+ it "should have 2 'styles' set to 'naukowo-dydaktyczny' and 'naukowo-humanistyczny'" do
88
+ @excerpt.style.size.should == 1
89
+ @excerpt.style.include?("naukowo-dydaktyczny")
90
+ end
91
+
92
+ it "should have 'date' set to nil" do
93
+ @excerpt.date.should == nil
94
+ end
95
+
96
+ it "should have 'city' set to nil" do
97
+ @excerpt.city.should == nil
98
+ end
99
+
100
+ it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
101
+ @excerpt.publisher.size.should == 1
102
+ @excerpt.publisher[0].should == "Wydawnictwo W.A.B."
103
+ end
104
+
105
+ it "should have one 'title' set to 'Wczesne nauczanie języków obcych. Integracja języka obcego z przedmiotami artystycznymi w młodszych klasach szkoły podstawowej'" do
106
+ @excerpt.title.size.should == 1
107
+ @excerpt.title[0].should == "Modlitwa o deszcz"
108
+ end
109
+
110
+ it "should have one 'author' set to 'Małgorzata Pamuła'" do
111
+ @excerpt.author.size.should == 1
112
+ @excerpt.author[0].should == "Wojciech Jagielski"
113
+ end
114
+ end
115
+
116
+ describe('first result for "kotu" with lemmatization turned on') do
117
+ before(:all) do
118
+ @client.lemmata = :all
119
+ @client.open_corpus(:default)
120
+ @excerpt = @client.find("kotu")[0]
121
+ end
122
+
123
+ it "should have one lemmata for each segment" do
124
+ @excerpt.short_context.each do |group|
125
+ group.each do |segment|
126
+ segment.lemmata.size.should == 1
127
+ end
128
+ end
129
+ end
130
+
131
+ it "should have non-nil lemmata for each segment" do
132
+ @excerpt.short_context.flatten.each do |segment|
133
+ segment.lemmata[0].should_not == nil
134
+ end
135
+ end
136
+
137
+ it "should contain 'kot' as one of the lemmata" do
138
+ @excerpt.short_context.flatten.
139
+ any?{|s| s.lemmata[0].base_form == "kot"}.should == true
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,136 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::QueryResult do
6
+ before(:all) do
7
+ @client = Poliqarp::Client.new("TEST")
8
+ @client.open_corpus(:default)
9
+ end
10
+
11
+ after(:all) do
12
+ @client.close
13
+ end
14
+
15
+ describe "(for unspecified query)" do
16
+ before(:all) do
17
+ @result = @client.find("kita")
18
+ end
19
+
20
+ it "should not be nil" do
21
+ @result.should_not == nil
22
+ end
23
+
24
+ it "should containt its size" do
25
+ @result.size.should_not == nil
26
+ end
27
+
28
+ it "should be iterable" do
29
+ @result.each do |excerpt|
30
+ excerpt.should_not == nil
31
+ end
32
+ end
33
+
34
+ it "should allow to add excerpt" do
35
+ @result << Poliqarp::Excerpt.new(0,@client, "abc")
36
+ end
37
+
38
+ it "should contain current page" do
39
+ @result.page.should_not == nil
40
+ end
41
+
42
+ it "should contain the page count" do
43
+ @result.page_count.should_not == nil
44
+ end
45
+
46
+ it "should allow to call previous page" do
47
+ @result.previous_page
48
+ end
49
+
50
+ it "should allow to call next page" do
51
+ @result.next_page
52
+ end
53
+
54
+ it "should be the same if the query is the same" do
55
+ @result.should == @client.find("kita")
56
+ end
57
+ end
58
+
59
+ describe "(for 'kot' in :default corpus)" do
60
+ before(:all) do
61
+ @result = @client.find("kot")
62
+ end
63
+
64
+ it "should have size == 6" do
65
+ @result.size.should == 6
66
+ end
67
+
68
+ it "should have page set to 1" do
69
+ @result.page.should == 1
70
+ end
71
+
72
+ it "should contain only one page" do
73
+ @result.page_count.should == 1
74
+ end
75
+
76
+ it "should not have previous page" do
77
+ @result.previous_page.should == nil
78
+ end
79
+
80
+ it "should not have next page" do
81
+ @result.next_page.should == nil
82
+ end
83
+ end
84
+
85
+ describe "(for 'kot' with page_size set to 5 in :default corpus)" do
86
+ before(:all) do
87
+ @result = @client.find("kot", :page_size => 5)
88
+ end
89
+
90
+ it "should have size == 5" do
91
+ @result.size.should == 5
92
+ end
93
+
94
+ it "should have page set to 1" do
95
+ @result.page.should == 1
96
+ end
97
+
98
+ it "should contain 2 pages" do
99
+ @result.page_count.should == 2
100
+ end
101
+
102
+ it "should not have previous page" do
103
+ @result.previous_page.should == nil
104
+ end
105
+
106
+ it "should have next page" do
107
+ @result.next_page.should_not == nil
108
+ end
109
+ end
110
+
111
+ describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
112
+ before(:all) do
113
+ @result = @client.find("kot", :page_size => 5).next_page
114
+ end
115
+
116
+ it "should have size == 1" do
117
+ @result.size.should == 1
118
+ end
119
+
120
+ it "should have page set to 2" do
121
+ @result.page.should == 2
122
+ end
123
+
124
+ it "should contain 2 pages" do
125
+ @result.page_count.should == 2
126
+ end
127
+
128
+ it "should have previous page" do
129
+ @result.previous_page.should_not == nil
130
+ end
131
+
132
+ it "should not have next page" do
133
+ @result.next_page.should == nil
134
+ end
135
+ end
136
+ end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: poliqarpr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 8
9
+ version: 0.0.8
5
10
  platform: ruby
6
11
  authors:
7
12
  - Aleksander Pohl
@@ -9,7 +14,7 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-12-10 00:00:00 +01:00
17
+ date: 2011-01-12 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies: []
15
20
 
@@ -35,6 +40,9 @@ files:
35
40
  - lib/poliqarpr/client.rb
36
41
  - lib/poliqarpr/util.rb
37
42
  - lib/poliqarpr/connector.rb
43
+ - spec/query_result.rb
44
+ - spec/excerpt.rb
45
+ - spec/client.rb
38
46
  has_rdoc: true
39
47
  homepage: http://www.github.com/apohllo/poliqarpr
40
48
  licenses: []
@@ -46,23 +54,29 @@ rdoc_options:
46
54
  require_paths:
47
55
  - lib
48
56
  required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
49
58
  requirements:
50
59
  - - ">="
51
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
52
63
  version: "0"
53
- version:
54
64
  required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
55
66
  requirements:
56
67
  - - ">="
57
68
  - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
58
71
  version: "0"
59
- version:
60
72
  requirements: []
61
73
 
62
74
  rubyforge_project:
63
- rubygems_version: 1.3.5
75
+ rubygems_version: 1.3.7
64
76
  signing_key:
65
77
  specification_version: 3
66
78
  summary: Ruby client for Poliqarp
67
- test_files: []
68
-
79
+ test_files:
80
+ - spec/query_result.rb
81
+ - spec/excerpt.rb
82
+ - spec/client.rb