greglu-solr-ruby 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. data/CHANGES.yml +50 -0
  2. data/LICENSE.txt +201 -0
  3. data/README +56 -0
  4. data/Rakefile +190 -0
  5. data/examples/delicious_library/dl_importer.rb +60 -0
  6. data/examples/delicious_library/sample_export.txt +164 -0
  7. data/examples/marc/marc_importer.rb +106 -0
  8. data/examples/tang/tang_importer.rb +58 -0
  9. data/lib/solr.rb +21 -0
  10. data/lib/solr/connection.rb +179 -0
  11. data/lib/solr/document.rb +73 -0
  12. data/lib/solr/exception.rb +13 -0
  13. data/lib/solr/field.rb +39 -0
  14. data/lib/solr/importer.rb +19 -0
  15. data/lib/solr/importer/array_mapper.rb +26 -0
  16. data/lib/solr/importer/delimited_file_source.rb +38 -0
  17. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  18. data/lib/solr/importer/mapper.rb +51 -0
  19. data/lib/solr/importer/solr_source.rb +43 -0
  20. data/lib/solr/importer/xpath_mapper.rb +35 -0
  21. data/lib/solr/indexer.rb +52 -0
  22. data/lib/solr/request.rb +26 -0
  23. data/lib/solr/request/add_document.rb +63 -0
  24. data/lib/solr/request/base.rb +36 -0
  25. data/lib/solr/request/commit.rb +31 -0
  26. data/lib/solr/request/delete.rb +50 -0
  27. data/lib/solr/request/dismax.rb +46 -0
  28. data/lib/solr/request/index_info.rb +22 -0
  29. data/lib/solr/request/modify_document.rb +51 -0
  30. data/lib/solr/request/optimize.rb +21 -0
  31. data/lib/solr/request/ping.rb +36 -0
  32. data/lib/solr/request/select.rb +56 -0
  33. data/lib/solr/request/spellcheck.rb +30 -0
  34. data/lib/solr/request/standard.rb +374 -0
  35. data/lib/solr/request/update.rb +23 -0
  36. data/lib/solr/response.rb +27 -0
  37. data/lib/solr/response/add_document.rb +17 -0
  38. data/lib/solr/response/base.rb +42 -0
  39. data/lib/solr/response/commit.rb +17 -0
  40. data/lib/solr/response/delete.rb +13 -0
  41. data/lib/solr/response/dismax.rb +20 -0
  42. data/lib/solr/response/index_info.rb +26 -0
  43. data/lib/solr/response/modify_document.rb +17 -0
  44. data/lib/solr/response/optimize.rb +14 -0
  45. data/lib/solr/response/ping.rb +28 -0
  46. data/lib/solr/response/ruby.rb +42 -0
  47. data/lib/solr/response/select.rb +17 -0
  48. data/lib/solr/response/spellcheck.rb +20 -0
  49. data/lib/solr/response/standard.rb +60 -0
  50. data/lib/solr/response/xml.rb +42 -0
  51. data/lib/solr/solrtasks.rb +27 -0
  52. data/lib/solr/util.rb +32 -0
  53. data/lib/solr/xml.rb +47 -0
  54. data/script/setup.rb +14 -0
  55. data/script/solrshell +18 -0
  56. data/solr-ruby.gemspec +26 -0
  57. data/solr/conf/admin-extra.html +31 -0
  58. data/solr/conf/protwords.txt +21 -0
  59. data/solr/conf/schema.xml +221 -0
  60. data/solr/conf/scripts.conf +24 -0
  61. data/solr/conf/solrconfig.xml +394 -0
  62. data/solr/conf/stopwords.txt +58 -0
  63. data/solr/conf/synonyms.txt +31 -0
  64. data/solr/conf/xslt/example.xsl +132 -0
  65. data/test/conf/admin-extra.html +31 -0
  66. data/test/conf/protwords.txt +21 -0
  67. data/test/conf/schema.xml +237 -0
  68. data/test/conf/scripts.conf +24 -0
  69. data/test/conf/solrconfig.xml +376 -0
  70. data/test/conf/stopwords.txt +58 -0
  71. data/test/conf/synonyms.txt +31 -0
  72. data/test/functional/server_test.rb +218 -0
  73. data/test/functional/test_solr_server.rb +104 -0
  74. data/test/unit/add_document_test.rb +40 -0
  75. data/test/unit/array_mapper_test.rb +37 -0
  76. data/test/unit/changes_yaml_test.rb +21 -0
  77. data/test/unit/commit_test.rb +41 -0
  78. data/test/unit/connection_test.rb +55 -0
  79. data/test/unit/data_mapper_test.rb +75 -0
  80. data/test/unit/delete_test.rb +56 -0
  81. data/test/unit/delimited_file_source_test.rb +29 -0
  82. data/test/unit/dismax_request_test.rb +26 -0
  83. data/test/unit/document_test.rb +69 -0
  84. data/test/unit/field_test.rb +48 -0
  85. data/test/unit/hpricot_mapper_test.rb +44 -0
  86. data/test/unit/hpricot_test_file.xml +26 -0
  87. data/test/unit/indexer_test.rb +57 -0
  88. data/test/unit/modify_document_test.rb +24 -0
  89. data/test/unit/ping_test.rb +51 -0
  90. data/test/unit/request_test.rb +61 -0
  91. data/test/unit/response_test.rb +43 -0
  92. data/test/unit/select_test.rb +25 -0
  93. data/test/unit/solr_mock_base.rb +40 -0
  94. data/test/unit/spellcheck_response_test.rb +26 -0
  95. data/test/unit/spellchecker_request_test.rb +27 -0
  96. data/test/unit/standard_request_test.rb +324 -0
  97. data/test/unit/standard_response_test.rb +174 -0
  98. data/test/unit/suite.rb +16 -0
  99. data/test/unit/tab_delimited.txt +2 -0
  100. data/test/unit/util_test.rb +24 -0
  101. data/test/unit/xpath_mapper_test.rb +38 -0
  102. data/test/unit/xpath_test_file.xml +25 -0
  103. metadata +173 -0
@@ -0,0 +1,31 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ #some test synonym mappings unlikely to appear in real input text
15
+ aaa => aaaa
16
+ bbb => bbbb1 bbbb2
17
+ ccc => cccc1,cccc2
18
+ a\=>a => b\=>b
19
+ a\,a => b\,b
20
+ fooaaa,baraaa,bazaaa
21
+
22
+ # Some synonym groups specific to this example
23
+ GB,gib,gigabyte,gigabytes
24
+ MB,mib,megabyte,megabytes
25
+ Television, Televisions, TV, TVs
26
+ #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
27
+ #after us won't split it into two words.
28
+
29
+ # Synonym mappings can be used for spelling correction too
30
+ pixima => pixma
31
+
@@ -0,0 +1,218 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'test/unit'
14
+ require 'solr'
15
+
16
+ class BadRequest < Solr::Request::Standard
17
+ def response_format
18
+ :invalid
19
+ end
20
+ end
21
+
22
+ class ServerTest < Test::Unit::TestCase
23
+ include Solr
24
+
25
+ def setup
26
+ @connection = Connection.new("http://localhost:8888/solr", :autocommit => :on)
27
+ clean
28
+ end
29
+
30
+ def test_full_lifecycle
31
+ # make sure autocommit is on
32
+ assert @connection.autocommit
33
+
34
+ # make sure this doc isn't there to begin with
35
+ @connection.delete(123456)
36
+
37
+ # add it
38
+ @connection.add(:id => 123456, :text => 'Borges') # add :some_date => 'NOW/HOUR' to test richer data type handling
39
+ # now = DateTime.now
40
+
41
+ # look for it
42
+ response = @connection.query('Borges')
43
+ assert_equal 1, response.total_hits
44
+ hit = response.hits[0]
45
+ assert_equal '123456', hit['id']
46
+ # assert_equal now.year, hit['whatever_date'].year
47
+
48
+ # look for it via dismax
49
+ response = @connection.search('Borges')
50
+ assert_equal 1, response.total_hits
51
+ assert_equal '123456', response.hits[0]['id']
52
+
53
+ # delete it
54
+ @connection.delete(123456)
55
+
56
+ # make sure it's gone
57
+ response = @connection.query('Borges')
58
+ assert_equal 0, response.total_hits
59
+ end
60
+
61
+ def test_i18n_full_lifecycle
62
+ # make sure autocommit is on
63
+ assert @connection.autocommit
64
+
65
+ # make sure this doc isn't there to begin with
66
+ @connection.delete(123456)
67
+
68
+ # add it
69
+ @connection.add(:id => 123456, :text => 'Åäöêâîôû')
70
+
71
+ # look for it
72
+ response = @connection.query('Åäöêâîôû')
73
+ assert_equal 1, response.total_hits
74
+ assert_equal '123456', response.hits[0]['id']
75
+
76
+ # delete it
77
+ @connection.delete(123456)
78
+
79
+ # make sure it's gone
80
+ response = @connection.query('Åäöêâîôû Öëäïöü')
81
+ assert_equal 0, response.total_hits
82
+ end
83
+
84
+ def test_sorting
85
+ @connection.add(:id => 1, :text => 'aaa woot')
86
+ @connection.add(:id => 2, :text => 'bbb woot')
87
+ @connection.add(:id => 3, :text => 'ccc woot')
88
+ @connection.commit
89
+
90
+ results = @connection.query('woot', :sort => [:id => :descending], :rows => 2)
91
+ assert_equal([3, 2], results.hits.map { |h| h['id'].to_i })
92
+
93
+ results = @connection.search('woot', :sort => [:id => :descending], :rows => 2)
94
+ assert_equal([3, 2], results.hits.map { |h| h['id'].to_i })
95
+
96
+ @connection.delete_by_query("id:1 OR id:2 OR id:3")
97
+ end
98
+
99
+ def test_bad_connection
100
+ conn = Solr::Connection.new 'http://127.0.0.1:9999/invalid'
101
+ begin
102
+ conn.send(Solr::Request::Ping.new)
103
+ flunk "Expected exception not raised"
104
+ rescue ::Exception
105
+ # expected
106
+ assert true
107
+ end
108
+ end
109
+
110
+ def test_bad_url
111
+ conn = Solr::Connection.new 'http://localhost:8888/invalid'
112
+ assert_raise(Net::HTTPServerException) do
113
+ conn.send(Solr::Request::Ping.new)
114
+ end
115
+ end
116
+
117
+ def test_commit
118
+ response = @connection.send(Solr::Request::Commit.new)
119
+ assert response.ok?
120
+ end
121
+
122
+ def test_optimize
123
+ response = @connection.send(Solr::Request::Optimize.new)
124
+ assert response.ok?
125
+ end
126
+
127
+ # TODO: add test_ping back... something seems to have changed with the response, so adjustments are needed.
128
+ # non-critical - if Solr is broken we'll know from other tests!
129
+ # def test_ping
130
+ # assert_equal true, @connection.ping
131
+ # end
132
+
133
+ def test_delete_with_query
134
+ assert_equal true, @connection.delete_by_query('[* TO *]')
135
+ end
136
+
137
+ def test_ping_with_bad_server
138
+ conn = Solr::Connection.new 'http://localhost:8888/invalid'
139
+ assert_equal false, conn.ping
140
+ end
141
+
142
+ def test_invalid_response_format
143
+ request = BadRequest.new(:query => "solr")
144
+ assert_raise(Solr::Exception) do
145
+ @connection.send(request)
146
+ end
147
+ end
148
+
149
+ def test_escaping
150
+ doc = Solr::Document.new :id => 47, :ruby_text => 'puts "ouch!"'
151
+ @connection.add(doc)
152
+ @connection.commit
153
+
154
+ request = Solr::Request::Standard.new :query => 'ouch'
155
+ result = @connection.send(request)
156
+
157
+ assert_match /puts/, result.raw_response
158
+ end
159
+
160
+ def test_add_document
161
+ doc = {:id => 999, :text => 'hi there!'}
162
+ request = Solr::Request::AddDocument.new(doc)
163
+ response = @connection.send(request)
164
+ assert response.status_code == '0'
165
+ end
166
+
167
+ def test_update
168
+ @connection.update(:id => 999, :text => 'update test')
169
+ end
170
+
171
+ def test_no_such_field
172
+ doc = {:id => 999, :bogus => 'foo'}
173
+ request = Solr::Request::AddDocument.new(doc)
174
+ assert_raise(Net::HTTPServerException) do
175
+ response = @connection.send(request)
176
+ end
177
+ # assert_equal false, response.ok?
178
+ # assert_match "ERROR:unknown field 'bogus'", response.status_message
179
+ end
180
+
181
+ def test_index_info
182
+ doc = {:id => 999, :test_index_facet => 'value'}
183
+ @connection.add(doc)
184
+ ii = Solr::Request::IndexInfo.new
185
+ info = @connection.send(Solr::Request::IndexInfo.new)
186
+ assert info.field_names.include?("id") && info.field_names.include?("test_index_facet")
187
+ assert_equal 1, info.num_docs
188
+ end
189
+
190
+ def test_highlighting
191
+ @connection.add(:id => 1, :title_text => "Apache Solr")
192
+
193
+ request = Solr::Request::Standard.new(:query => 'solr',
194
+ :highlighting => {
195
+ :field_list => ['title_text'],
196
+ :max_snippets => 3,
197
+ :prefix => ">>",
198
+ :suffix => "<<"
199
+ }
200
+ )
201
+
202
+ response = @connection.send(request)
203
+ assert_equal ["Apache >>Solr<<"], response.highlighted(1, :title_text)
204
+ end
205
+
206
+ def test_entities
207
+ @connection.add(:id => 1, :title_text => "&nbsp;")
208
+ response = @connection.query('nbsp')
209
+ assert_equal 1, response.total_hits
210
+ assert_equal '1', response.hits[0]['id']
211
+ end
212
+
213
+ # wipe the index clean
214
+ def clean
215
+ @connection.delete_by_query('*:*')
216
+ end
217
+
218
+ end
@@ -0,0 +1,104 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # A singleton class for starting/stopping a Solr server for testing purposes
14
+ # The behavior of TestSolrServer can be modified prior to start() by changing
15
+ # port, solr_home, and quiet properties.
16
+
17
+ class TestSolrServer
18
+ require 'singleton'
19
+ include Singleton
20
+ attr_accessor :port, :jetty_home, :solr_home, :quiet
21
+
22
+ # configure the singleton with some defaults
23
+ def initialize
24
+ @pid = nil
25
+ end
26
+
27
+ def self.wrap(params = {})
28
+ error = false
29
+ solr_server = self.instance
30
+ solr_server.quiet = params[:quiet] || true
31
+ solr_server.jetty_home = params[:jetty_home]
32
+ solr_server.solr_home = params[:solr_home]
33
+ solr_server.port = params[:jetty_port] || 8888
34
+ begin
35
+ puts "starting solr server on #{RUBY_PLATFORM}"
36
+ solr_server.start
37
+ sleep params[:startup_wait] || 5
38
+ yield
39
+ rescue
40
+ error = true
41
+ ensure
42
+ puts "stopping solr server"
43
+ solr_server.stop
44
+ end
45
+
46
+ return error
47
+ end
48
+
49
+ def jetty_command
50
+ "java -Djetty.port=#{@port} -Dsolr.solr.home=#{@solr_home} -jar start.jar"
51
+ end
52
+
53
+ def start
54
+ puts "jetty_home: #{@jetty_home}"
55
+ puts "solr_home: #{@solr_home}"
56
+ puts "jetty_command: #{jetty_command}"
57
+ platform_specific_start
58
+ end
59
+
60
+ def stop
61
+ platform_specific_stop
62
+ end
63
+
64
+ if RUBY_PLATFORM =~ /mswin32/
65
+ require 'win32/process'
66
+
67
+ # start the solr server
68
+ def platform_specific_start
69
+ Dir.chdir(@jetty_home) do
70
+ @pid = Process.create(
71
+ :app_name => jetty_command,
72
+ :creation_flags => Process::DETACHED_PROCESS,
73
+ :process_inherit => false,
74
+ :thread_inherit => true,
75
+ :cwd => "#{@jetty_home}"
76
+ ).process_id
77
+ end
78
+ end
79
+
80
+ # stop a running solr server
81
+ def platform_specific_stop
82
+ Process.kill(1, @pid)
83
+ Process.wait
84
+ end
85
+ else # Not Windows
86
+ # start the solr server
87
+ def platform_specific_start
88
+ puts self.inspect
89
+ Dir.chdir(@jetty_home) do
90
+ @pid = fork do
91
+ STDERR.close if @quiet
92
+ exec jetty_command
93
+ end
94
+ end
95
+ end
96
+
97
+ # stop a running solr server
98
+ def platform_specific_stop
99
+ Process.kill('TERM', @pid)
100
+ Process.wait
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,40 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr_mock_base'
14
+
15
+ class AddDocumentTest < SolrMockBaseTestCase
16
+
17
+ def test_add_document_response
18
+ conn = Solr::Connection.new('http://localhost:9999/solr')
19
+ set_post_return('<?xml version="1.0" encoding="UTF-8"?><response><lst name="responseHeader"><int name="status">0</int><int name="QTime">2</int></lst></response>')
20
+ doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'}
21
+ response = conn.send(Solr::Request::AddDocument.new(doc))
22
+ assert_equal true, response.ok?
23
+ end
24
+
25
+ def test_bad_add_document_response
26
+ conn = Solr::Connection.new('http://localhost:9999/solr')
27
+ set_post_return('<?xml version="1.0" encoding="UTF-8"?><response><lst name="responseHeader"><int name="status">1</int><int name="QTime">2</int></lst></response>')
28
+ doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'}
29
+ response = conn.send(Solr::Request::AddDocument.new(doc))
30
+ assert_equal false, response.ok?
31
+ end
32
+
33
+ def test_shorthand
34
+ conn = Solr::Connection.new('http://localhost:9999/solr')
35
+ set_post_return('<?xml version="1.0" encoding="UTF-8"?><response><lst name="responseHeader"><int name="status">0</int><int name="QTime">2</int></lst></response>')
36
+ doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'}
37
+ assert_equal true, conn.add(:id => '123', :text => 'Tlon, Uqbar, Orbis Tetius')
38
+ end
39
+
40
+ end
@@ -0,0 +1,37 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr'
14
+ require 'test/unit'
15
+
16
+ include Solr::Importer
17
+
18
+ class ArrayMapperTest < Test::Unit::TestCase
19
+ def test_simple
20
+ mapping1 = {:one => "uno"}
21
+ mapping2 = {:two => "dos"}
22
+
23
+ mapper = Solr::Importer::ArrayMapper.new([Mapper.new(mapping1),Mapper.new(mapping2)])
24
+ mapped_data = mapper.map([{},{}])
25
+ assert_equal "uno", mapped_data[:one]
26
+ assert_equal "dos", mapped_data[:two]
27
+ end
28
+
29
+ def test_field_conflict_goes_to_last
30
+ mapping1 = {:same => "uno"}
31
+ mapping2 = {:same => "dos"}
32
+
33
+ mapper = Solr::Importer::ArrayMapper.new([Mapper.new(mapping1),Mapper.new(mapping2)])
34
+ mapped_data = mapper.map([{},{}])
35
+ assert_equal "dos", mapped_data[:same]
36
+ end
37
+ end
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'test/unit'
14
+
15
+ class ChangesYamlTest < Test::Unit::TestCase
16
+ def test_parse
17
+ change_log = YAML.load_file(File.expand_path(File.dirname(__FILE__)) + "/../../CHANGES.yml")
18
+ assert_equal Date.parse("2007-02-15"), change_log["v0.0.1"]["release_date"]
19
+ assert_equal ["initial release"], change_log["v0.0.1"]["changes"]
20
+ end
21
+ end