search_do 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,100 @@
1
+ = Pure Ruby Interface of Hyper Estraier
2
+
3
+ Hyper Estraier is a full-text search system for communities.
4
+
5
+ == Introduction
6
+
7
+ This is a package implementing the node API of {Hyper Estraier}[http://hyperestraier.sourceforge.net/]. This is a pure ruby package. So, it works on Linux, Mac OS X, Windows, and so on. It does not depend on the core library of Hyper Estraier. Applications are implemented as clients of node servers running on local or remote machines.
8
+
9
+ Though Hyper Estraier itself is released under the terms of the GNU LGPL, this package is released under the terms of a BSD-style license.
10
+
11
+ == Setting
12
+
13
+ Get the package of the latest version of Hyper Estraier.
14
+
15
+ Extract the package and enter the sub directory `rubypure' and perform installation.
16
+
17
+ cd rubypure
18
+ ./configure
19
+ make
20
+ su
21
+ make install
22
+
23
+ The package `estraierpure' should be required in each source file of application programs and include the module `EstraierPure' at pleasure.
24
+
25
+ == Example of Gatherer
26
+
27
+ The following is the simplest implementation of a gatherer.
28
+
29
+ require "estraierpure"
30
+ include EstraierPure
31
+
32
+ # create and configure the node connecton object
33
+ node = Node::new
34
+ node.set_url("http://localhost:1978/node/test1")
35
+ node.set_auth("admin", "admin")
36
+
37
+ # create a document object
38
+ doc = Document::new
39
+
40
+ # add attributes to the document object
41
+ doc.add_attr("@uri", "http://estraier.gov/example.txt")
42
+ doc.add_attr("@title", "Over the Rainbow")
43
+
44
+ # add the body text to the document object
45
+ doc.add_text("Somewhere over the rainbow. Way up high.")
46
+ doc.add_text("There's a land that I heard of once in a lullaby.")
47
+
48
+ # register the document object to the node
49
+ unless node.put_doc(doc)
50
+ STDERR.printf("error: %d\n", node.status)
51
+ end
52
+
53
+ ==Example of Searcher
54
+
55
+ The following is the simplest implementation of a searcher.
56
+
57
+ require "estraierpure"
58
+ include EstraierPure
59
+
60
+ # create and configure the node connecton object
61
+ node = Node::new
62
+ node.set_url("http://localhost:1978/node/test1")
63
+
64
+ # create a search condition object
65
+ cond = Condition::new
66
+
67
+ # set the search phrase to the search condition object
68
+ cond.set_phrase("rainbow AND lullaby")
69
+
70
+ # get the result of search
71
+ nres = node.search(cond, 0);
72
+ if nres
73
+ # for each document in the result
74
+ for i in 0...nres.doc_num
75
+ # get a result document object
76
+ rdoc = nres.get_doc(i)
77
+ # display attributes
78
+ value = rdoc.attr("@uri")
79
+ printf("URI: %s\n", value) if value
80
+ value = rdoc.attr("@title")
81
+ printf("Title: %s\n", value) if value
82
+ # display the snippet text */
83
+ printf("%s", rdoc.snippet)
84
+ end
85
+ else
86
+ STDERR.printf("error: %d\n", node.status)
87
+ end
88
+
89
+ == License
90
+
91
+ Copyright (C) 2004-2006 Mikio Hirabayashi
92
+ All rights reserved.
93
+
94
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
95
+
96
+ - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
97
+ - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
98
+ - Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
99
+
100
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,52 @@
1
+ namespace :search do
2
+
3
+ desc "Reindexes all model attributes"
4
+ task :reindex do
5
+ rake = fetch(:rake, "rake")
6
+ rails_env = fetch(:rails_env, "production")
7
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:reindex MODEL=#{ENV['MODEL']}"
8
+ end
9
+
10
+ namespace :node do
11
+
12
+ desc "Create HE node"
13
+ task :create do
14
+ rake = fetch(:rake, "rake")
15
+ rails_env = fetch(:rails_env, "production")
16
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:node:create MODEL=#{ENV['MODEL']}"
17
+ end
18
+
19
+ desc "Delete HE node"
20
+ task :delete do
21
+ rake = fetch(:rake, "rake")
22
+ rails_env = fetch(:rails_env, "production")
23
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:node:delete MODEL=#{ENV['MODEL']}"
24
+ end
25
+ end
26
+ end
27
+ namespace :search do
28
+
29
+ desc "Reindexes all model attributes"
30
+ task :reindex do
31
+ rake = fetch(:rake, "rake")
32
+ rails_env = fetch(:rails_env, "production")
33
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:reindex MODEL=#{ENV['MODEL']}"
34
+ end
35
+
36
+ namespace :node do
37
+
38
+ desc "Create HE node"
39
+ task :create do
40
+ rake = fetch(:rake, "rake")
41
+ rails_env = fetch(:rails_env, "production")
42
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:node:create MODEL=#{ENV['MODEL']}"
43
+ end
44
+
45
+ desc "Delete HE node"
46
+ task :delete do
47
+ rake = fetch(:rake, "rake")
48
+ rails_env = fetch(:rails_env, "production")
49
+ run "cd #{current_path}; #{rake} RAILS_ENV=#{rails_env} search:node:delete MODEL=#{ENV['MODEL']}"
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,220 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+
3
+ describe SearchDo::Backends::HyperEstraier do
4
+ before do
5
+ @backend = SearchDo::Backends::HyperEstraier.new(Story, ActiveRecord::Base.configurations["test"]["estraier"])
6
+ end
7
+
8
+ describe "without index" do
9
+ before do
10
+ @backend.connection.should_receive(:search).and_return(nil)
11
+ end
12
+
13
+ it "should be empty" do
14
+ @backend.index.should == []
15
+ end
16
+ end
17
+
18
+ describe "include nil to indexed text" do
19
+ it "should not raise error" do
20
+ lambda{
21
+ @backend.add_to_index(['foo', nil], {})
22
+ }.should_not raise_error
23
+ end
24
+ end
25
+
26
+ describe "date searching" do
27
+ before do
28
+ @backend.clear_index!
29
+ @time = Time.local(2008,9,17)
30
+ @backend.add_to_index([@time], 'db_id' => "1", '@uri' => "/Story/1")
31
+ end
32
+
33
+ it "should searchable with time in iso format" do
34
+ @backend.count(@time.iso8601).should > 0
35
+ end
36
+
37
+ it "is ordered through dates" do
38
+ @backend.add_to_index([], 'db_id' => "1", 'read_at'=>Time.now, '@uri' => "/Story/1")
39
+ @backend.add_to_index([], 'db_id' => "2", 'read_at'=>Time.now+1.day, '@uri' => "/Story/2")
40
+ @backend.add_to_index([], 'db_id' => "3", 'read_at'=>Time.now-1.day, '@uri' => "/Story/3")
41
+ @backend.search_all_ids('',:order=>"read_at ASC").should == [3,1,2]
42
+ end
43
+ end
44
+
45
+ describe "raw" do
46
+ before do
47
+ @backend.add_to_index([], 'db_id' => "1", '@uri' => "/Story/1")
48
+ end
49
+
50
+ it "finds a raw document" do
51
+ @backend.raw(1).should_not be_nil
52
+ @backend.raw(1).attr('db_id').should == '1'
53
+ end
54
+ end
55
+
56
+ describe :build_fulltext_condition do
57
+ it "does not use limit for counting" do
58
+ @backend.send(:build_fulltext_condition,'',:count=>true).max.should == -1
59
+ end
60
+
61
+ describe 'parsing attributes' do
62
+ def condition(options)
63
+ @backend.send(:build_fulltext_condition,'something',options)
64
+ end
65
+
66
+ it 'raises on unknown' do
67
+ lambda{condition(:attributes=>1)}.should raise_error(RuntimeError)
68
+ end
69
+
70
+ it "removes blanks" do
71
+ condition(:attributes=>['',' ','a']).attrs.should == ['a']
72
+ end
73
+
74
+ it 'ignores empty' do
75
+ condition(:attributes=>nil).attrs.should == []
76
+ condition(:attributes=>'').attrs.should == []
77
+ end
78
+
79
+ it "adds a always-true condition when search is blank" do
80
+ @backend.send(:build_fulltext_condition,' ').attrs.should_not be_blank
81
+ end
82
+
83
+ it "parses a string" do
84
+ condition(:attributes=>'x y z').attrs.should == ['x y z']
85
+ end
86
+
87
+ it "parses an array" do
88
+ condition(:attributes=>['a b c','d e f']).attrs.should == ['a b c','d e f']
89
+ end
90
+
91
+
92
+ describe 'parsing a hash' do
93
+ before :all do
94
+ Story.columns_hash['popularity'].should be_number
95
+ Story.columns_hash['title'].should be_text
96
+ end
97
+
98
+ it "parses a simple hash" do
99
+ condition(:attributes=>{:a=>'b'}).attrs.should == ['a iSTRINC b']
100
+ end
101
+
102
+ it "parses a blank hash" do
103
+ condition(:attributes=>{:a=>''}).attrs.should == []
104
+ end
105
+
106
+ it "parses a keyless hash" do
107
+ condition(:attributes=>{''=>'wtf'}).attrs.should == []
108
+ end
109
+
110
+ it "parses a number column to number search" do
111
+ condition(:attributes=>{'popularity'=>12}).attrs.should == ['popularity NUMEQ 12']
112
+ end
113
+
114
+ it "parses a string column to string search" do
115
+ condition(:attributes=>{'title'=>12}).attrs.should == ['title iSTRINC 12']
116
+ end
117
+
118
+ it "translates columns" do
119
+ condition(:attributes=>{'id'=>1}).attrs.should == ['db_id NUMEQ 1']
120
+ end
121
+
122
+ it "parses a unknown column to string search" do
123
+ condition(:attributes=>{'xxx'=>'x'}).attrs.should == ['xxx iSTRINC x']
124
+ end
125
+
126
+ it "parses a date or time" do
127
+ pending do
128
+ #raw output: "@mdate" "2008-09-21T09:51:27+02:00"
129
+ raise
130
+ end
131
+ end
132
+ end
133
+ end
134
+
135
+ describe "translate order" do
136
+ def translated_order(order)
137
+ @backend.send(:build_fulltext_condition,'',:order=>order).order
138
+ end
139
+
140
+ it "translates nil to nil" do
141
+ translated_order(nil).should == nil
142
+ end
143
+
144
+ it "translates blank to nil" do
145
+ translated_order("").should == nil
146
+ end
147
+
148
+ it "translates strings to STRA" do
149
+ translated_order('title ASC').should == 'title STRA'
150
+ translated_order('title asc').should == 'title STRA'
151
+ end
152
+
153
+ it "translates strings to STRD" do
154
+ translated_order('title').should == 'title STRD'
155
+ translated_order('title DESC').should == 'title STRD'
156
+ translated_order('title desc').should == 'title STRD'
157
+ end
158
+
159
+ it "translates dates to NUMD" do
160
+ translated_order(:written_on).should == "written_on NUMD"
161
+ end
162
+
163
+ it "translates datetimes to NUM" do
164
+ translated_order("read_at ASC").should == "read_at NUMA"
165
+ end
166
+
167
+ it "translates numbers to NUMD" do
168
+ translated_order(:popularity).should == "popularity NUMD"
169
+ end
170
+
171
+ it "translates numbers with asc to NUMA" do
172
+ translated_order("popularity ASC").should == "popularity NUMA"
173
+ end
174
+
175
+ it "translate non-columns to string" do
176
+ translated_order("paid_at ASC").should == "paid_at STRA"
177
+ end
178
+
179
+ describe "translating rails-terms" do
180
+ before :all do
181
+ class FakeColumn
182
+ def number?;false;end
183
+ def type;:date;end
184
+ end
185
+ Story.columns_hash["created_on"] = FakeColumn.new
186
+ Story.columns_hash["updated_on"] = FakeColumn.new
187
+ end
188
+
189
+ after :all do
190
+ Story.reset_column_information
191
+ end
192
+
193
+ #symbols and desc <-> DESC only need testing once, to see if order values get normalized
194
+ ['updated_at','updated_on',:updated_at,'updated_at DESC','updated_at desc'].each do |order|
195
+ it "translates #{order}" do
196
+ translated_order(order).should == "@mdate NUMD"
197
+ end
198
+ end
199
+ ['created_at','created_on','created_at DESC'].each do |order|
200
+ it "translates #{order}" do
201
+ translated_order(order).should == "@cdate NUMD"
202
+ end
203
+ end
204
+ ['id','id DESC'].each do |order|
205
+ it "translates #{order}" do
206
+ translated_order(order).should == "db_id NUMD"
207
+ end
208
+ end
209
+
210
+ it "does not translate strange things" do
211
+ translated_order('id strange').should == 'id strange'
212
+ end
213
+
214
+ it "does not translate long orders" do
215
+ translated_order('id desc wtf').should == 'id desc wtf'
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+ require 'search_do/backends/hyper_estraier/estraier_pure_extention'
3
+
4
+
5
+ describe EstraierPure::ResultDocument do
6
+ describe :snippet_a do
7
+ before do
8
+ #result of a search for "i am so blue"
9
+ snip = "...i\ti\n \nam\tam\n \nso\tso\n \nblue\tblue\n test\ni\ti\nng makes me happy\n\n"
10
+ @res = EstraierPure::ResultDocument.new('',{},snip,'')
11
+ end
12
+
13
+ it "returns a snippet_a" do
14
+ @res.snippet_a.should_not be_nil
15
+ end
16
+
17
+ it "transform the snippet to a array of lines" do
18
+ @res.snippet_a.size.should == 10
19
+ end
20
+
21
+ it "highlights the found words" do
22
+ # i ' ' am ' ' so ' ' blue ' test' i ng makes...
23
+ @res.snippet_a.map{|x|x[1]}.should == [true,false,true,false,true,false,true, false, true,false]
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,33 @@
1
+ require File.expand_path("../spec_helper", File.dirname(__FILE__))
2
+
3
+ describe :need_update_index? do
4
+ def needs
5
+ @story.send(:need_update_index?)
6
+ end
7
+
8
+ before do
9
+ @story = Story.new
10
+ end
11
+
12
+ it "does not need update when object is new" do
13
+ needs.should be_false
14
+ end
15
+
16
+ it "needs update when column changes" do
17
+ @story.title = "new value"
18
+ needs.should be_true
19
+ end
20
+
21
+ it "needs update when a indexed non-column changes" do
22
+ pending do
23
+ @story.non_column = "new value"
24
+ needs.should be_true
25
+ end
26
+ end
27
+
28
+ it "does not need update after changes have been cleared" do
29
+ @story.title = "new value"
30
+ @story.send(:clear_changed_attributes)
31
+ needs.should be_false
32
+ end
33
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path("../lib/estraier_admin", File.dirname(__FILE__))
2
+
3
+ describe EstraierAdmin do
4
+ before do
5
+ @admin = EstraierAdmin.new(:user=>"hoge",:password=>"foo")
6
+ end
7
+
8
+ it "@config[:user] should == 'hoge'" do
9
+ @admin.instance_eval{@config}[:user].should == 'hoge'
10
+ end
11
+
12
+ it "@config[:password] should == 'foo'" do
13
+ @admin.instance_eval{@config}[:password].should == 'foo'
14
+ end
15
+
16
+ it "should receive :request_or_raise, with {:name=>'piyo', :label=>'piyo', :action=>8} when create_node()" do
17
+ @admin.should_receive(:request_or_raise).with(:name=>'piyo', :label=>'piyo', :action=>8)
18
+ @admin.create_node('piyo')
19
+ end
20
+
21
+ it "should receive :request_or_raise, with {:name=>'piyo', :action=>9, :sure=>1} when delete_node()" do
22
+ @admin.should_receive(:request_or_raise).with(:name=>'piyo', :action=>9, :sure=>1)
23
+ @admin.delete_node('piyo')
24
+ end
25
+ end
26
+
@@ -0,0 +1,27 @@
1
+ sanshiro:
2
+ id: 101
3
+ title: "三四郎"
4
+ body: "
5
+ うとうととして目がさめると女はいつのまにか、隣のじいさんと話を始めている。
6
+ このじいさんはたしかに前の前の駅から乗ったいなか者である。発車まぎわに頓狂
7
+ (とんきょう)な声を出して駆け込んで来て、いきなり肌(はだ)をぬいだと
8
+ 思ったら背中にお灸(きゅう)のあとがいっぱいあったので、三四郎(さんしろう)
9
+ の記憶に残っている。じいさんが汗をふいて、肌を入れて、女の隣に腰をかけたまで
10
+ よく注意して見ていたくらいである。"
11
+ popularity: 10
12
+ updated_at: 2008-04-05 12:34:56
13
+ created_at: 2008-04-05 12:34:56
14
+
15
+ neko:
16
+ id: 102
17
+ title: "吾輩は猫である"
18
+ body: "
19
+ 吾輩(わがはい)は猫である。名前はまだ無い。
20
+ どこで生れたかとんと見当(けんとう)がつかぬ。何でも薄暗いじめじめした所でニャ
21
+ ーニャー泣いていた事だけは記憶している。吾輩はここで始めて人間というものを見た
22
+ 。しかもあとで聞くとそれは書生という人間中で一番獰悪(どうあく)な種族であった
23
+ そうだ。"
24
+ popularity: 15
25
+ updated_at: 2008-04-01 12:34:56
26
+ created_at: 2008-04-01 12:34:56
27
+
@@ -0,0 +1,59 @@
1
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
2
+ require 'search_do/indexer'
3
+
4
+ describe SearchDo::Indexer,"new(AR[created_at updated_at]) / and set some attrs" do
5
+ before do
6
+ @ar_klass = mock("ar_klass")
7
+ @ar_klass.stub!(:column_names).and_return(%[created_at updated_at])
8
+
9
+ @indexer = SearchDo::Indexer.new(@ar_klass, {})
10
+ @indexer.searchable_fields = %w[title body]
11
+ @indexer.attributes_to_store = {:user_id => :user_id}
12
+ @indexer.if_changed = %w[popularity]
13
+ end
14
+
15
+ it "#searchable_fields should == %w[title body]" do
16
+ @indexer.searchable_fields.should == %w[title body]
17
+ end
18
+
19
+ it "#attributes_to_store['user_id'].should == :user_id" do
20
+ @indexer.attributes_to_store['user_id'].should == :user_id
21
+ end
22
+
23
+ it "#observing_fields.should == Set.new(%w[title body user_id popularity])" do
24
+ @indexer.observing_fields.should == Set.new(%w[title body user_id popularity])
25
+ end
26
+
27
+ describe "call '.record_timestamps!'" do
28
+ before do
29
+ @indexer.record_timestamps!
30
+ end
31
+
32
+ it "#attributes_to_store['cdate'] should == 'created_at'" do
33
+ @indexer.attributes_to_store['cdate'].should == 'created_at'
34
+ end
35
+
36
+ it "#attributes_to_store['mdate'] should == 'updated_at'" do
37
+ @indexer.attributes_to_store['mdate'].should == 'updated_at'
38
+ end
39
+
40
+ it "#observing_fields.should include 'updated_at' and 'created_at'" do
41
+ @indexer.observing_fields.should include('created_at')
42
+ @indexer.observing_fields.should include('updated_at')
43
+ end
44
+ end
45
+
46
+ describe "#add_callbacks!() " do
47
+ before do
48
+ @ar_klass.should_receive(:after_update).with(:update_index)
49
+ @ar_klass.should_receive(:after_create).with(:add_to_index)
50
+ @ar_klass.should_receive(:after_save).with(:clear_changed_attributes)
51
+ @ar_klass.should_receive(:after_destroy).with(:remove_from_index)
52
+ end
53
+
54
+ it "adds appropriate callbacks" do
55
+ @indexer.add_callbacks!
56
+ end
57
+ end
58
+ end
59
+