poliqarpr 0.0.5 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt CHANGED
@@ -60,7 +60,7 @@ step of installation process).
60
60
 
61
61
  Require the gem:
62
62
 
63
- require 'poliaqarpr'
63
+ require 'poliqarpr'
64
64
 
65
65
  Create the server client and open default corpus
66
66
 
@@ -76,6 +76,8 @@ Remember to close the client on exit
76
76
 
77
77
  client.close
78
78
 
79
+ NOTE: If you wish to run the specs, you need the 'default' and '2.sample.30'
80
+ corpuses.
79
81
 
80
82
  == LICENSE:
81
83
 
data/changelog.txt CHANGED
@@ -1,3 +1,16 @@
1
+ 0.0.8
2
+ - Speed optimization: socket puts changed to write
3
+
4
+ 0.0.7
5
+ - QueryResult#to_a method added
6
+ - Lexeme#tags method added
7
+ - fix: README invalid require
8
+
9
+ 0.0.6
10
+ - fix: Excerpt#word - the words consituing the matched query
11
+ - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
12
+ return the matched, right context, left context segments respecively
13
+
1
14
  0.0.5
2
15
  - Bugfix: making parallel query might lead to silent thread death
3
16
  - Support for Ruby 1.9 encoding
@@ -366,7 +366,7 @@ protected
366
366
  lemmata.base_form = read_word
367
367
  end
368
368
  if @tag_flags[group]
369
- read_word
369
+ lemmata.tags = read_word
370
370
  end
371
371
  segment.lemmata << lemmata
372
372
  end
@@ -70,7 +70,8 @@ module Poliqarp
70
70
  if ruby19?
71
71
  massage = message.encode(UTF8)
72
72
  end
73
- @socket.puts(message)
73
+ #@socket.puts(message)
74
+ @socket.write(message+"\n")
74
75
  if mode == :async
75
76
  @handler = handler
76
77
  end
@@ -25,11 +25,25 @@ module Poliqarp
25
25
  @short_context << value
26
26
  end
27
27
 
28
+ # Returns the matched segments
29
+ def matched
30
+ @short_context[1]
31
+ end
32
+
33
+ # Returns the segments of the left short context of the match
34
+ def left_context
35
+ @short_context[0]
36
+ end
37
+
38
+ # Returns the segments of the right short context of the match
39
+ def right_context
40
+ @short_context[2]
41
+ end
28
42
 
29
43
  # Returns the matched query as string
30
44
  def word
31
45
  #@short_context[0].split(/\s+/)[-1]
32
- @short_context[1].to_s
46
+ @short_context[1].map{|s| s.to_s}.join("")
33
47
  end
34
48
 
35
49
  alias inflected_form word
@@ -4,7 +4,7 @@ module Poliqarp
4
4
  #
5
5
  # The lemmata contains the base form of the segment
6
6
  class Lemmata
7
- attr_accessor :base_form
7
+ attr_accessor :base_form, :tags
8
8
  def initialize()
9
9
  end
10
10
  end
@@ -69,5 +69,10 @@ module Poliqarp
69
69
  @excerpts.size
70
70
  end
71
71
 
72
+ # Converts current query result page into an array.
73
+ def to_a
74
+ @excerpts.dup
75
+ end
76
+
72
77
  end
73
78
  end
data/poliqarpr.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "poliqarpr"
3
- s.version = "0.0.5"
4
- s.date = "2009-12-10"
3
+ s.version = "0.0.8"
4
+ s.date = "2011-01-12"
5
5
  s.summary = "Ruby client for Poliqarp"
6
6
  s.email = "apohllo@o2.pl"
7
7
  s.homepage = "http://www.github.com/apohllo/poliqarpr"
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.authors = ['Aleksander Pohl']
10
10
  s.files = ["Rakefile", "poliqarpr.gemspec",
11
11
  "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
12
- s.test_files = Dir.glob("test/**/*")
12
+ s.test_files = Dir.glob("spec/**/*")
13
13
  s.rdoc_options = ["--main", "README.txt"]
14
14
  s.has_rdoc = true
15
15
  s.extra_rdoc_files = ["README.txt"]
data/spec/client.rb ADDED
@@ -0,0 +1,171 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::Client do
6
+ describe "(general test)" do
7
+ before(:each) do
8
+ @client = Poliqarp::Client.new("TEST")
9
+ end
10
+
11
+ after(:each) do
12
+ @client.close
13
+ end
14
+
15
+ it "should allow to open corpus" do
16
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
17
+ end
18
+
19
+ it "should allow to open :default corpus" do
20
+ @client.open_corpus(:default)
21
+ end
22
+
23
+ it "should respond to :ping" do
24
+ @client.ping.should == :pong
25
+ end
26
+
27
+ it "should return server version" do
28
+ @client.version.should_not == nil
29
+ end
30
+
31
+ end
32
+
33
+ describe "(with 'sample' corpus)" do
34
+ before(:all) do
35
+ @client = Poliqarp::Client.new("TEST")
36
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
37
+ end
38
+
39
+ after(:all) do
40
+ @client.close
41
+ end
42
+
43
+ it "should allow to set the right context size" do
44
+ @client.right_context = 5
45
+ end
46
+
47
+ it "should raise error if the size of right context is not number" do
48
+ (proc do
49
+ @client.right_context = "a"
50
+ end).should raise_error(RuntimeError)
51
+ end
52
+
53
+ it "should rais error if the size of right context is less or equal 0" do
54
+ (proc do
55
+ @client.right_context = 0
56
+ end).should raise_error(RuntimeError)
57
+ end
58
+
59
+ it "should allow to set the left context size" do
60
+ @client.right_context = 5
61
+ end
62
+
63
+ it "should raise error if the size of left context is not number" do
64
+ (lambda do
65
+ @client.left_context = "a"
66
+ end).should raise_error(RuntimeError)
67
+ end
68
+
69
+ it "should rais error if the size of left context is less or equal 0" do
70
+ (lambda do
71
+ @client.left_context = 0
72
+ end).should raise_error(RuntimeError)
73
+ end
74
+
75
+ it "should return corpus statistics" do
76
+ stats = @client.stats
77
+ stats.size.should == 4
78
+ [:segment_tokens, :segment_types, :lemmata, :tags].each do |type|
79
+ stats[type].should_not == nil
80
+ stats[type].should > 0
81
+ end
82
+ end
83
+
84
+ it "should return the corpus tagset" do
85
+ tagset = @client.tagset
86
+ tagset[:categories].should_not == nil
87
+ tagset[:classes].should_not == nil
88
+ end
89
+
90
+ it "should allow to find 'kot'" do
91
+ @client.find("kot").size.should_not == 0
92
+ end
93
+
94
+ it "should contain 'kot' in query result for [base=kot]" do
95
+ @client.find("[base=kot]")[0].to_s.should match(/\bkot\b/)
96
+ end
97
+
98
+ it "should allow to find 'Afrodyta [] od" do
99
+ @client.find("Afrodyta [] od").size.should_not == 0
100
+ end
101
+
102
+ it "should contain 'Afrodyta .* od' for 'Afrodyta [] od' query " do
103
+ @client.find("Afrodyta [] od")[0].to_s.should match(/Afrodyta .* od/)
104
+ end
105
+
106
+ it "should return collection for find without index specified" do
107
+ @client.find("kot").should respond_to(:[])
108
+ end
109
+
110
+ it "should allow to query for term occurences" do
111
+ @client.count("kot").should_not == nil
112
+ end
113
+
114
+ it "should return 188 occurences of 'kot'" do
115
+ @client.count("kot").should == 188
116
+ end
117
+
118
+ it "should allow to find first occurence of 'kot'" do
119
+ @client.find("kot",:index => 0).should_not == nil
120
+ end
121
+
122
+ it "should return different results for different queries" do
123
+ @client.find("kot").should_not ==
124
+ @client.find("kita")
125
+ end
126
+
127
+ it "should return same results for same queries" do
128
+ @client.find("kita").should == @client.find("kita")
129
+ end
130
+
131
+ describe("(with index specified in find)") do
132
+ before(:each) do
133
+ @result = @client.find("nachalny",:index => 0)
134
+ end
135
+
136
+ it "should not return collection for find" do
137
+ @result.should_not respond_to(:[])
138
+ end
139
+
140
+ it "should not be nil" do
141
+ @result.should_not == nil
142
+ end
143
+
144
+ it "should fetch the same excerpt as in find without index " do
145
+ @result.to_s.should == @client.find("nachalny")[0].to_s
146
+ end
147
+ end
148
+
149
+ describe("(with lemmata flags set to true)") do
150
+ before(:all) do
151
+ @client.lemmata = {:left_context => true, :right_context => true,
152
+ :left_match => true, :right_match => true}
153
+ end
154
+
155
+ it "should allow to find 'kotu'" do
156
+ @client.find("kotu").size.should_not == 0
157
+ end
158
+
159
+ it "should contain 'kotu' in query result for 'kotu'" do
160
+ @client.find("kotu")[0].to_s.should match(/\bkotu\b/)
161
+ end
162
+
163
+ it "should contain 'kot' in lemmatized query result for 'kotu'" do
164
+ @client.find("kotu")[0].short_context.flatten.
165
+ map{|e| e.lemmata[0].base_form}.join(" ").should match(/\bkot\b/)
166
+ end
167
+
168
+ end
169
+ end
170
+
171
+ end
data/spec/excerpt.rb ADDED
@@ -0,0 +1,142 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::Excerpt do
6
+ before(:all) do
7
+ @client = Poliqarp::Client.new("TEST")
8
+ end
9
+
10
+ after(:all) do
11
+ @client.close
12
+ end
13
+
14
+ describe "(unspecified excerpt)" do
15
+ before(:all) do
16
+ @client.open_corpus(:default)
17
+ @excerpt = @client.find("kot").first
18
+ end
19
+
20
+ after(:all) do
21
+ @client.close_corpus
22
+ end
23
+
24
+ it "should have index" do
25
+ @excerpt.index.should_not == nil
26
+ end
27
+
28
+ it "should have base form" do
29
+ @excerpt.base_form.should_not == nil
30
+ end
31
+
32
+ it "should contain 3 groups in short context" do
33
+ @excerpt.short_context.size.should == 3
34
+ end
35
+
36
+ it "should allow to add segment group" do
37
+ @excerpt << [Poliqarp::Segment.new("abc")]
38
+ end
39
+
40
+
41
+ it "should contain non empty segments in short context" do
42
+ @excerpt.short_context.flatten.each do |segment|
43
+ segment.literal.should_not == nil
44
+ end
45
+ end
46
+
47
+ it "should contain the exact form which it was created for" do
48
+ @excerpt.inflected_form.should_not == nil
49
+ end
50
+
51
+ it "should contain the long context of the word" do
52
+ @excerpt.context.should_not == nil
53
+ end
54
+ end
55
+
56
+ describe "(first exceprt for 'mu za to astronomiczną' in 'sample' corpus)" do
57
+ before(:all) do
58
+ @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
59
+ @excerpt = @client.find("mu za to astronomiczną").first
60
+ end
61
+
62
+ after(:all) do
63
+ @client.close_corpus
64
+ end
65
+
66
+ it "should have index set to 0" do
67
+ @excerpt.index.should == 0
68
+ end
69
+
70
+ it "should have base form set to 'kot'" do
71
+ @excerpt.base_form.should == "mu za to astronomiczną"
72
+ end
73
+
74
+ it "should have 'kot' as inflected form " do
75
+ @excerpt.inflected_form.should_not == nil
76
+ end
77
+
78
+ it "should contain the long context of the word" do
79
+ @excerpt.context.to_s.size.should > 10
80
+ end
81
+
82
+ it "should have one 'medium' set to 'książka'" do
83
+ @excerpt.medium.size.should == 1
84
+ @excerpt.medium[0].should == "książka"
85
+ end
86
+
87
+ it "should have 2 'styles' set to 'naukowo-dydaktyczny' and 'naukowo-humanistyczny'" do
88
+ @excerpt.style.size.should == 1
89
+ @excerpt.style.include?("naukowo-dydaktyczny")
90
+ end
91
+
92
+ it "should have 'date' set to nil" do
93
+ @excerpt.date.should == nil
94
+ end
95
+
96
+ it "should have 'city' set to nil" do
97
+ @excerpt.city.should == nil
98
+ end
99
+
100
+ it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
101
+ @excerpt.publisher.size.should == 1
102
+ @excerpt.publisher[0].should == "Wydawnictwo W.A.B."
103
+ end
104
+
105
+ it "should have one 'title' set to 'Wczesne nauczanie języków obcych. Integracja języka obcego z przedmiotami artystycznymi w młodszych klasach szkoły podstawowej'" do
106
+ @excerpt.title.size.should == 1
107
+ @excerpt.title[0].should == "Modlitwa o deszcz"
108
+ end
109
+
110
+ it "should have one 'author' set to 'Małgorzata Pamuła'" do
111
+ @excerpt.author.size.should == 1
112
+ @excerpt.author[0].should == "Wojciech Jagielski"
113
+ end
114
+ end
115
+
116
+ describe('first result for "kotu" with lemmatization turned on') do
117
+ before(:all) do
118
+ @client.lemmata = :all
119
+ @client.open_corpus(:default)
120
+ @excerpt = @client.find("kotu")[0]
121
+ end
122
+
123
+ it "should have one lemmata for each segment" do
124
+ @excerpt.short_context.each do |group|
125
+ group.each do |segment|
126
+ segment.lemmata.size.should == 1
127
+ end
128
+ end
129
+ end
130
+
131
+ it "should have non-nil lemmata for each segment" do
132
+ @excerpt.short_context.flatten.each do |segment|
133
+ segment.lemmata[0].should_not == nil
134
+ end
135
+ end
136
+
137
+ it "should contain 'kot' as one of the lemmata" do
138
+ @excerpt.short_context.flatten.
139
+ any?{|s| s.lemmata[0].base_form == "kot"}.should == true
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,136 @@
1
+ #vim:encoding=utf-8
2
+ $:.unshift("lib")
3
+ require 'poliqarpr'
4
+
5
+ describe Poliqarp::QueryResult do
6
+ before(:all) do
7
+ @client = Poliqarp::Client.new("TEST")
8
+ @client.open_corpus(:default)
9
+ end
10
+
11
+ after(:all) do
12
+ @client.close
13
+ end
14
+
15
+ describe "(for unspecified query)" do
16
+ before(:all) do
17
+ @result = @client.find("kita")
18
+ end
19
+
20
+ it "should not be nil" do
21
+ @result.should_not == nil
22
+ end
23
+
24
+ it "should containt its size" do
25
+ @result.size.should_not == nil
26
+ end
27
+
28
+ it "should be iterable" do
29
+ @result.each do |excerpt|
30
+ excerpt.should_not == nil
31
+ end
32
+ end
33
+
34
+ it "should allow to add excerpt" do
35
+ @result << Poliqarp::Excerpt.new(0,@client, "abc")
36
+ end
37
+
38
+ it "should contain current page" do
39
+ @result.page.should_not == nil
40
+ end
41
+
42
+ it "should contain the page count" do
43
+ @result.page_count.should_not == nil
44
+ end
45
+
46
+ it "should allow to call previous page" do
47
+ @result.previous_page
48
+ end
49
+
50
+ it "should allow to call next page" do
51
+ @result.next_page
52
+ end
53
+
54
+ it "should be the same if the query is the same" do
55
+ @result.should == @client.find("kita")
56
+ end
57
+ end
58
+
59
+ describe "(for 'kot' in :default corpus)" do
60
+ before(:all) do
61
+ @result = @client.find("kot")
62
+ end
63
+
64
+ it "should have size == 6" do
65
+ @result.size.should == 6
66
+ end
67
+
68
+ it "should have page set to 1" do
69
+ @result.page.should == 1
70
+ end
71
+
72
+ it "should contain only one page" do
73
+ @result.page_count.should == 1
74
+ end
75
+
76
+ it "should not have previous page" do
77
+ @result.previous_page.should == nil
78
+ end
79
+
80
+ it "should not have next page" do
81
+ @result.next_page.should == nil
82
+ end
83
+ end
84
+
85
+ describe "(for 'kot' with page_size set to 5 in :default corpus)" do
86
+ before(:all) do
87
+ @result = @client.find("kot", :page_size => 5)
88
+ end
89
+
90
+ it "should have size == 5" do
91
+ @result.size.should == 5
92
+ end
93
+
94
+ it "should have page set to 1" do
95
+ @result.page.should == 1
96
+ end
97
+
98
+ it "should contain 2 pages" do
99
+ @result.page_count.should == 2
100
+ end
101
+
102
+ it "should not have previous page" do
103
+ @result.previous_page.should == nil
104
+ end
105
+
106
+ it "should have next page" do
107
+ @result.next_page.should_not == nil
108
+ end
109
+ end
110
+
111
+ describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
112
+ before(:all) do
113
+ @result = @client.find("kot", :page_size => 5).next_page
114
+ end
115
+
116
+ it "should have size == 1" do
117
+ @result.size.should == 1
118
+ end
119
+
120
+ it "should have page set to 2" do
121
+ @result.page.should == 2
122
+ end
123
+
124
+ it "should contain 2 pages" do
125
+ @result.page_count.should == 2
126
+ end
127
+
128
+ it "should have previous page" do
129
+ @result.previous_page.should_not == nil
130
+ end
131
+
132
+ it "should not have next page" do
133
+ @result.next_page.should == nil
134
+ end
135
+ end
136
+ end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: poliqarpr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 8
9
+ version: 0.0.8
5
10
  platform: ruby
6
11
  authors:
7
12
  - Aleksander Pohl
@@ -9,7 +14,7 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-12-10 00:00:00 +01:00
17
+ date: 2011-01-12 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies: []
15
20
 
@@ -35,6 +40,9 @@ files:
35
40
  - lib/poliqarpr/client.rb
36
41
  - lib/poliqarpr/util.rb
37
42
  - lib/poliqarpr/connector.rb
43
+ - spec/query_result.rb
44
+ - spec/excerpt.rb
45
+ - spec/client.rb
38
46
  has_rdoc: true
39
47
  homepage: http://www.github.com/apohllo/poliqarpr
40
48
  licenses: []
@@ -46,23 +54,29 @@ rdoc_options:
46
54
  require_paths:
47
55
  - lib
48
56
  required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
49
58
  requirements:
50
59
  - - ">="
51
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
52
63
  version: "0"
53
- version:
54
64
  required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
55
66
  requirements:
56
67
  - - ">="
57
68
  - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
58
71
  version: "0"
59
- version:
60
72
  requirements: []
61
73
 
62
74
  rubyforge_project:
63
- rubygems_version: 1.3.5
75
+ rubygems_version: 1.3.7
64
76
  signing_key:
65
77
  specification_version: 3
66
78
  summary: Ruby client for Poliqarp
67
- test_files: []
68
-
79
+ test_files:
80
+ - spec/query_result.rb
81
+ - spec/excerpt.rb
82
+ - spec/client.rb