acts_as_solr 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. data/.gitignore +8 -0
  2. data/CHANGE_LOG +233 -0
  3. data/FORKED_CHANGES +3 -0
  4. data/LICENSE +19 -0
  5. data/README.markdown +94 -0
  6. data/README.rdoc +84 -0
  7. data/Rakefile +57 -0
  8. data/TESTING_THE_PLUGIN +25 -0
  9. data/VERSION +1 -0
  10. data/acts_as_solr.gemspec +237 -0
  11. data/config/solr.yml +15 -0
  12. data/config/solr_environment.rb +22 -0
  13. data/init.rb +21 -0
  14. data/install.rb +11 -0
  15. data/lib/acts_as_solr.rb +61 -0
  16. data/lib/acts_methods.rb +284 -0
  17. data/lib/class_methods.rb +239 -0
  18. data/lib/common_methods.rb +89 -0
  19. data/lib/deprecation.rb +61 -0
  20. data/lib/instance_methods.rb +181 -0
  21. data/lib/lazy_document.rb +18 -0
  22. data/lib/parser_methods.rb +230 -0
  23. data/lib/search_results.rb +69 -0
  24. data/lib/solr/connection.rb +191 -0
  25. data/lib/solr/document.rb +78 -0
  26. data/lib/solr/exception.rb +13 -0
  27. data/lib/solr/field.rb +39 -0
  28. data/lib/solr/importer/array_mapper.rb +26 -0
  29. data/lib/solr/importer/delimited_file_source.rb +38 -0
  30. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  31. data/lib/solr/importer/mapper.rb +51 -0
  32. data/lib/solr/importer/solr_source.rb +43 -0
  33. data/lib/solr/importer/xpath_mapper.rb +35 -0
  34. data/lib/solr/importer.rb +19 -0
  35. data/lib/solr/indexer.rb +52 -0
  36. data/lib/solr/request/add_document.rb +63 -0
  37. data/lib/solr/request/base.rb +36 -0
  38. data/lib/solr/request/commit.rb +31 -0
  39. data/lib/solr/request/delete.rb +50 -0
  40. data/lib/solr/request/dismax.rb +46 -0
  41. data/lib/solr/request/index_info.rb +22 -0
  42. data/lib/solr/request/modify_document.rb +51 -0
  43. data/lib/solr/request/optimize.rb +21 -0
  44. data/lib/solr/request/ping.rb +36 -0
  45. data/lib/solr/request/select.rb +56 -0
  46. data/lib/solr/request/spellcheck.rb +30 -0
  47. data/lib/solr/request/standard.rb +402 -0
  48. data/lib/solr/request/update.rb +23 -0
  49. data/lib/solr/request.rb +26 -0
  50. data/lib/solr/response/add_document.rb +17 -0
  51. data/lib/solr/response/base.rb +42 -0
  52. data/lib/solr/response/commit.rb +17 -0
  53. data/lib/solr/response/delete.rb +13 -0
  54. data/lib/solr/response/dismax.rb +8 -0
  55. data/lib/solr/response/index_info.rb +26 -0
  56. data/lib/solr/response/modify_document.rb +17 -0
  57. data/lib/solr/response/optimize.rb +14 -0
  58. data/lib/solr/response/ping.rb +28 -0
  59. data/lib/solr/response/ruby.rb +42 -0
  60. data/lib/solr/response/select.rb +17 -0
  61. data/lib/solr/response/spellcheck.rb +20 -0
  62. data/lib/solr/response/standard.rb +64 -0
  63. data/lib/solr/response/xml.rb +42 -0
  64. data/lib/solr/response.rb +27 -0
  65. data/lib/solr/solrtasks.rb +27 -0
  66. data/lib/solr/util.rb +32 -0
  67. data/lib/solr/xml.rb +44 -0
  68. data/lib/solr.rb +21 -0
  69. data/lib/solr_fixtures.rb +13 -0
  70. data/lib/tasks/database.rake +18 -0
  71. data/lib/tasks/solr.rake +137 -0
  72. data/lib/tasks/test.rake +7 -0
  73. data/lib/will_paginate_support.rb +12 -0
  74. data/solr/CHANGES.txt +1207 -0
  75. data/solr/LICENSE.txt +712 -0
  76. data/solr/NOTICE.txt +90 -0
  77. data/solr/etc/jetty.xml +205 -0
  78. data/solr/etc/webdefault.xml +379 -0
  79. data/solr/lib/easymock.jar +0 -0
  80. data/solr/lib/jetty-6.1.3.jar +0 -0
  81. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  82. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  83. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  84. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  85. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  86. data/solr/lib/servlet-api-2.4.jar +0 -0
  87. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  88. data/solr/lib/xpp3-1.1.3.4.O.jar +0 -0
  89. data/solr/logs/.empty-dir-for-git +0 -0
  90. data/solr/solr/README.txt +52 -0
  91. data/solr/solr/bin/abc +176 -0
  92. data/solr/solr/bin/abo +176 -0
  93. data/solr/solr/bin/backup +108 -0
  94. data/solr/solr/bin/backupcleaner +142 -0
  95. data/solr/solr/bin/commit +128 -0
  96. data/solr/solr/bin/optimize +129 -0
  97. data/solr/solr/bin/readercycle +129 -0
  98. data/solr/solr/bin/rsyncd-disable +77 -0
  99. data/solr/solr/bin/rsyncd-enable +76 -0
  100. data/solr/solr/bin/rsyncd-start +145 -0
  101. data/solr/solr/bin/rsyncd-stop +105 -0
  102. data/solr/solr/bin/scripts-util +83 -0
  103. data/solr/solr/bin/snapcleaner +148 -0
  104. data/solr/solr/bin/snapinstaller +168 -0
  105. data/solr/solr/bin/snappuller +248 -0
  106. data/solr/solr/bin/snappuller-disable +77 -0
  107. data/solr/solr/bin/snappuller-enable +77 -0
  108. data/solr/solr/bin/snapshooter +109 -0
  109. data/solr/solr/conf/admin-extra.html +31 -0
  110. data/solr/solr/conf/protwords.txt +21 -0
  111. data/solr/solr/conf/schema.xml +126 -0
  112. data/solr/solr/conf/scripts.conf +24 -0
  113. data/solr/solr/conf/solrconfig.xml +458 -0
  114. data/solr/solr/conf/stopwords.txt +57 -0
  115. data/solr/solr/conf/synonyms.txt +31 -0
  116. data/solr/solr/conf/xslt/example.xsl +132 -0
  117. data/solr/solr/conf/xslt/example_atom.xsl +63 -0
  118. data/solr/solr/conf/xslt/example_rss.xsl +62 -0
  119. data/solr/start.jar +0 -0
  120. data/solr/tmp/.empty-dir-for-git +0 -0
  121. data/solr/webapps/solr.war +0 -0
  122. data/test/config/solr.yml +2 -0
  123. data/test/db/connections/mysql/connection.rb +10 -0
  124. data/test/db/connections/sqlite/connection.rb +8 -0
  125. data/test/db/migrate/001_create_books.rb +15 -0
  126. data/test/db/migrate/002_create_movies.rb +12 -0
  127. data/test/db/migrate/003_create_categories.rb +11 -0
  128. data/test/db/migrate/004_create_electronics.rb +16 -0
  129. data/test/db/migrate/005_create_authors.rb +12 -0
  130. data/test/db/migrate/006_create_postings.rb +9 -0
  131. data/test/db/migrate/007_create_posts.rb +13 -0
  132. data/test/db/migrate/008_create_gadgets.rb +11 -0
  133. data/test/fixtures/authors.yml +9 -0
  134. data/test/fixtures/books.yml +13 -0
  135. data/test/fixtures/categories.yml +7 -0
  136. data/test/fixtures/db_definitions/mysql.sql +41 -0
  137. data/test/fixtures/electronics.yml +49 -0
  138. data/test/fixtures/movies.yml +9 -0
  139. data/test/fixtures/postings.yml +10 -0
  140. data/test/functional/acts_as_solr_test.rb +413 -0
  141. data/test/functional/association_indexing_test.rb +37 -0
  142. data/test/functional/faceted_search_test.rb +163 -0
  143. data/test/functional/multi_solr_search_test.rb +51 -0
  144. data/test/models/author.rb +10 -0
  145. data/test/models/book.rb +10 -0
  146. data/test/models/category.rb +8 -0
  147. data/test/models/electronic.rb +21 -0
  148. data/test/models/gadget.rb +9 -0
  149. data/test/models/movie.rb +17 -0
  150. data/test/models/novel.rb +2 -0
  151. data/test/models/post.rb +3 -0
  152. data/test/models/posting.rb +11 -0
  153. data/test/test_helper.rb +51 -0
  154. data/test/unit/acts_methods_shoulda.rb +70 -0
  155. data/test/unit/class_methods_shoulda.rb +90 -0
  156. data/test/unit/common_methods_shoulda.rb +112 -0
  157. data/test/unit/instance_methods_shoulda.rb +326 -0
  158. data/test/unit/lazy_document_shoulda.rb +35 -0
  159. data/test/unit/parser_instance.rb +19 -0
  160. data/test/unit/parser_methods_shoulda.rb +279 -0
  161. data/test/unit/solr_instance.rb +46 -0
  162. data/test/unit/test_helper.rb +26 -0
  163. metadata +259 -0
@@ -0,0 +1,230 @@
1
+ module ActsAsSolr #:nodoc:
2
+ module ParserMethods
3
+ protected
4
+
5
+ # Method used by mostly all the ClassMethods when doing a search
6
+ def parse_query(query=nil, options={}, models=nil)
7
+ valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator, :include, :lazy, :highlight]
8
+ query_options = {}
9
+
10
+ return nil if (query.nil? || query.strip == '')
11
+
12
+ raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty?
13
+ begin
14
+ Deprecation.validate_query(options)
15
+ query_options[:start] = options[:offset]
16
+ query_options[:rows] = options[:limit]
17
+ query_options[:operator] = options[:operator]
18
+
19
+ if options[:highlight] == true
20
+ options[:highlight] = {:fields => "*"}
21
+ end
22
+
23
+ if options[:highlight]
24
+ query_options[:highlighting] = {}
25
+ query_options[:highlighting][:field_list] = []
26
+ query_options[:highlighting][:field_list] << options[:highlight][:fields].collect {|k| "#{k}_t"} if options[:highlight][:fields]
27
+ query_options[:highlighting][:require_field_match] = options[:highlight][:require_field_match] if options[:highlight][:require_field_match]
28
+ query_options[:highlighting][:max_snippets] = options[:highlight][:max_snippets] if options[:highlight][:max_snippets]
29
+ query_options[:highlighting][:prefix] = options[:highlight][:prefix] if options[:highlight][:prefix]
30
+ query_options[:highlighting][:suffix] = options[:highlight][:suffix] if options[:highlight][:suffix]
31
+ end
32
+
33
+ # first steps on the facet parameter processing
34
+ if options[:facets]
35
+ query_options[:facets] = {}
36
+ query_options[:facets][:limit] = -1 # TODO: make this configurable
37
+ query_options[:facets][:sort] = :count if options[:facets][:sort]
38
+ query_options[:facets][:mincount] = 0
39
+ query_options[:facets][:mincount] = 1 if options[:facets][:zeros] == false
40
+ # override the :zeros (it's deprecated anyway) if :mincount exists
41
+ query_options[:facets][:mincount] = options[:facets][:mincount] if options[:facets][:mincount]
42
+ query_options[:facets][:fields] = options[:facets][:fields].collect{|k| "#{k}_facet"} if options[:facets][:fields]
43
+ query_options[:filter_queries] = replace_types([*options[:facets][:browse]].collect{|k| "#{k.sub!(/ *: */,"_facet:")}"}) if options[:facets][:browse]
44
+ query_options[:facets][:queries] = replace_types(options[:facets][:query].collect{|k| "#{k.sub!(/ *: */,"_t:")}"}) if options[:facets][:query]
45
+
46
+ if options[:facets][:dates]
47
+ query_options[:date_facets] = {}
48
+ # if options[:facets][:dates][:fields] exists then :start, :end, and :gap must be there
49
+ if options[:facets][:dates][:fields]
50
+ [:start, :end, :gap].each { |k| raise "#{k} must be present in faceted date query" unless options[:facets][:dates].include?(k) }
51
+ query_options[:date_facets][:fields] = []
52
+ options[:facets][:dates][:fields].each { |f|
53
+ if f.kind_of? Hash
54
+ key = f.keys[0]
55
+ query_options[:date_facets][:fields] << {"#{key}_d" => f[key]}
56
+ validate_date_facet_other_options(f[key][:other]) if f[key][:other]
57
+ else
58
+ query_options[:date_facets][:fields] << "#{f}_d"
59
+ end
60
+ }
61
+ end
62
+
63
+ query_options[:date_facets][:start] = options[:facets][:dates][:start] if options[:facets][:dates][:start]
64
+ query_options[:date_facets][:end] = options[:facets][:dates][:end] if options[:facets][:dates][:end]
65
+ query_options[:date_facets][:gap] = options[:facets][:dates][:gap] if options[:facets][:dates][:gap]
66
+ query_options[:date_facets][:hardend] = options[:facets][:dates][:hardend] if options[:facets][:dates][:hardend]
67
+ query_options[:date_facets][:filter] = replace_types([*options[:facets][:dates][:filter]].collect{|k| "#{k.sub!(/ *:(?!\d) */,"_d:")}"}) if options[:facets][:dates][:filter]
68
+
69
+ if options[:facets][:dates][:other]
70
+ validate_date_facet_other_options(options[:facets][:dates][:other])
71
+ query_options[:date_facets][:other] = options[:facets][:dates][:other]
72
+ end
73
+
74
+ end
75
+ end
76
+
77
+ if models.nil?
78
+ # TODO: use a filter query for type, allowing Solr to cache it individually
79
+ models = "AND #{solr_type_condition}"
80
+ field_list = solr_configuration[:primary_key_field]
81
+ else
82
+ field_list = "id"
83
+ end
84
+
85
+ query_options[:field_list] = [field_list, 'score']
86
+ query = "(#{query.gsub(/ *: */,"_t:")}) #{models}"
87
+ order = options[:order].split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ').gsub(/\bscore_t\b/, 'score') }.join(',') if options[:order]
88
+ query_options[:query] = replace_types([query])[0] # TODO adjust replace_types to work with String or Array
89
+
90
+ if options[:order]
91
+ # TODO: set the sort parameter instead of the old ;order. style.
92
+ query_options[:query] << ';' << replace_types([order], false)[0]
93
+ end
94
+
95
+ ActsAsSolr::Post.execute(Solr::Request::Standard.new(query_options))
96
+ rescue
97
+ raise "There was a problem executing your search: #{$!} in #{$!.backtrace.first}"
98
+ end
99
+ end
100
+
101
+ def solr_type_condition
102
+ subclasses.inject("(#{solr_configuration[:type_field]}:#{self.name}") do |condition, subclass|
103
+ condition << " OR #{solr_configuration[:type_field]}:#{subclass.name}"
104
+ end << ')'
105
+ end
106
+
107
+ # Parses the data returned from Solr
108
+ def parse_results(solr_data, options = {})
109
+ results = {
110
+ :docs => [],
111
+ :total => 0
112
+ }
113
+
114
+ configuration = {
115
+ :format => :objects
116
+ }
117
+ results.update(:facets => {'facet_fields' => []}) if options[:facets]
118
+ return SearchResults.new(results) if (solr_data.nil? || solr_data.total_hits == 0)
119
+
120
+ configuration.update(options) if options.is_a?(Hash)
121
+
122
+ ids = solr_data.hits.collect {|doc| doc["#{solr_configuration[:primary_key_field]}"]}.flatten
123
+
124
+ result = find_objects(ids, options, configuration)
125
+
126
+ add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores]
127
+
128
+ highlighted = {}
129
+ solr_data.highlighting.map do |x,y|
130
+ e={}
131
+ y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil?
132
+ highlighted[x.gsub(/[^:]*:/,"").to_i]=e
133
+ end unless solr_data.highlighting.nil?
134
+
135
+ results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
136
+ results.update({:docs => result, :total => solr_data.total_hits, :max_score => solr_data.max_score, :query_time => solr_data.data['responseHeader']['QTime']})
137
+ results.update({:highlights=>highlighted})
138
+
139
+
140
+ sr = SearchResults.new(results)
141
+
142
+ sr.records.each do |model|
143
+ model.init_solr(results) if model.respond_to?(:init_solr)
144
+ end if sr.records
145
+
146
+ sr
147
+ end
148
+
149
+
150
+ def find_objects(ids, options, configuration)
151
+ result = if configuration[:lazy] && configuration[:format] != :ids
152
+ ids.collect {|id| ActsAsSolr::LazyDocument.new(id, self)}
153
+ elsif configuration[:format] == :objects
154
+ conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
155
+ find_options = {:conditions => conditions}
156
+ find_options[:include] = options[:include] if options[:include]
157
+ result = reorder(self.find(:all, find_options), ids)
158
+ else
159
+ ids
160
+ end
161
+
162
+ result
163
+ end
164
+
165
+ # Reorders the instances keeping the order returned from Solr
166
+ def reorder(things, ids)
167
+ ordered_things = Array.new(things.size)
168
+ raise "Out of sync! Found #{ids.size} items in index, but only #{things.size} were found in database!" unless things.size == ids.size
169
+ things.each do |thing|
170
+ position = ids.index(thing.id)
171
+ ordered_things[position] = thing
172
+ end
173
+ ordered_things
174
+ end
175
+
176
+ # Replaces the field types based on the types (if any) specified
177
+ # on the acts_as_solr call
178
+ def replace_types(strings, include_colon=true)
179
+ suffix = include_colon ? ":" : ""
180
+ if configuration[:solr_fields]
181
+ configuration[:solr_fields].each do |name, options|
182
+ solr_name = options[:as] || name.to_s
183
+ solr_type = get_solr_field_type(options[:type])
184
+ field = "#{solr_name}_#{solr_type}#{suffix}"
185
+ strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
186
+ end
187
+ end
188
+ if configuration[:solr_includes]
189
+ configuration[:solr_includes].each do |association, options|
190
+ solr_name = options[:as] || association.to_s.singularize
191
+ solr_type = get_solr_field_type(options[:type])
192
+ field = "#{solr_name}_#{solr_type}#{suffix}"
193
+ strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
194
+ end
195
+ end
196
+ strings
197
+ end
198
+
199
+ # Adds the score to each one of the instances found
200
+ def add_scores(results, solr_data)
201
+ with_score = []
202
+ solr_data.hits.each do |doc|
203
+ with_score.push([doc["score"],
204
+ results.find {|record| scorable_record?(record, doc) }])
205
+ end
206
+ with_score.each do |score, object|
207
+ class << object; attr_accessor :solr_score; end
208
+ object.solr_score = score
209
+ end
210
+ end
211
+
212
+ def scorable_record?(record, doc)
213
+ doc_id = doc["#{solr_configuration[:primary_key_field]}"]
214
+ if doc_id.nil?
215
+ doc_id = doc["id"]
216
+ "#{record.class.name}:#{record_id(record)}" == doc_id.first.to_s
217
+ else
218
+ record_id(record).to_s == doc_id.to_s
219
+ end
220
+ end
221
+
222
+ def validate_date_facet_other_options(options)
223
+ valid_other_options = [:after, :all, :before, :between, :none]
224
+ options = [options] unless options.kind_of? Array
225
+ bad_options = options.map {|x| x.to_sym} - valid_other_options
226
+ raise "Invalid option#{'s' if bad_options.size > 1} for faceted date's other param: #{bad_options.join(', ')}. May only be one of :after, :all, :before, :between, :none" if bad_options.size > 0
227
+ end
228
+
229
+ end
230
+ end
@@ -0,0 +1,69 @@
1
+ module ActsAsSolr #:nodoc:
2
+
3
+ # TODO: Possibly looking into hooking it up with Solr::Response::Standard
4
+ #
5
+ # Class that returns the search results with four methods.
6
+ #
7
+ # books = Book.find_by_solr 'ruby'
8
+ #
9
+ # the above will return a SearchResults class with 4 methods:
10
+ #
11
+ # docs|results|records: will return an array of records found
12
+ #
13
+ # books.records.empty?
14
+ # => false
15
+ #
16
+ # total|num_found|total_hits: will return the total number of records found
17
+ #
18
+ # books.total
19
+ # => 2
20
+ #
21
+ # facets: will return the facets when doing a faceted search
22
+ #
23
+ # max_score|highest_score: returns the highest score found
24
+ #
25
+ # books.max_score
26
+ # => 1.3213213
27
+ #
28
+ #
29
+ class SearchResults
30
+ def initialize(solr_data={})
31
+ @solr_data = solr_data
32
+ # $log.debug "sd:#{solr_data.inspect}"
33
+ end
34
+
35
+ # Returns an array with the instances. This method
36
+ # is also aliased as docs and records
37
+ def results
38
+ @solr_data[:docs]
39
+ end
40
+
41
+ # Returns the total records found. This method is
42
+ # also aliased as num_found and total_hits
43
+ def total
44
+ @solr_data[:total]
45
+ end
46
+
47
+ # Returns the facets when doing a faceted search
48
+ def facets
49
+ @solr_data[:facets]
50
+ end
51
+
52
+ # Returns the highest score found. This method is
53
+ # also aliased as highest_score
54
+ def max_score
55
+ @solr_data[:max_score]
56
+ end
57
+
58
+ def query_time
59
+ @solr_data[:query_time]
60
+ end
61
+
62
+ alias docs results
63
+ alias records results
64
+ alias num_found total
65
+ alias total_hits total
66
+ alias highest_score max_score
67
+ end
68
+
69
+ end
@@ -0,0 +1,191 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'net/http'
14
+
15
+ # TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh
16
+
17
+ class Solr::Connection
18
+ attr_reader :url, :autocommit, :connection
19
+
20
+ ILLEGAL_XML_CHARS = /\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0C|\x0E|\x0F|\x10|\x11|\x12|\x13|\x14|\x15|\x16|\x17|\x18|\x19|\x1A|\x1B|\x1C|\x1D|\x1E|\x1F/
21
+
22
+ # create a connection to a solr instance using the url for the solr
23
+ # application context:
24
+ #
25
+ # conn = Solr::Connection.new("http://example.com:8080/solr")
26
+ #
27
+ # if you would prefer to have all adds/updates autocommitted,
28
+ # use :autocommit => :on
29
+ #
30
+ # conn = Solr::Connection.new('http://example.com:8080/solr',
31
+ # :autocommit => :on)
32
+
33
+ def initialize(url="http://localhost:8983/solr", opts={})
34
+ @url = URI.parse(url)
35
+ unless @url.kind_of? URI::HTTP
36
+ raise "invalid http url: #{url}"
37
+ end
38
+
39
+ # TODO: Autocommit seems nice at one level, but it currently is confusing because
40
+ # only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...))
41
+ # does not autocommit. Maybe #send should check for the request types that require a commit and
42
+ # commit in #send instead of the individual methods?
43
+ @autocommit = opts[:autocommit] == :on
44
+
45
+ # Not actually opening the connection yet, just setting up the persistent connection.
46
+ @connection = Net::HTTP.new(@url.host, @url.port)
47
+
48
+ @connection.read_timeout = opts[:timeout] if opts[:timeout]
49
+ end
50
+
51
+ # add a document to the index. you can pass in either a hash
52
+ #
53
+ # conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius')
54
+ #
55
+ # or a Solr::Document
56
+ #
57
+ # conn.add(Solr::Document.new(:id => 123, :title = 'On Writing')
58
+ #
59
+ # true/false will be returned to designate success/failure
60
+
61
+ def add(doc)
62
+ request = Solr::Request::AddDocument.new(doc)
63
+ response = send(request)
64
+ commit if @autocommit
65
+ return response.ok?
66
+ end
67
+
68
+ # update a document in the index (really just an alias to add)
69
+
70
+ def update(doc)
71
+ return add(doc)
72
+ end
73
+
74
+ # performs a standard query and returns a Solr::Response::Standard
75
+ #
76
+ # response = conn.query('borges')
77
+ #
78
+ # alternative you can pass in a block and iterate over hits
79
+ #
80
+ # conn.query('borges') do |hit|
81
+ # puts hit
82
+ # end
83
+ #
84
+ # options include:
85
+ #
86
+ # :sort, :default_field, :rows, :filter_queries, :debug_query,
87
+ # :explain_other, :facets, :highlighting, :mlt,
88
+ # :operator => :or / :and
89
+ # :start => defaults to 0
90
+ # :field_list => array, defaults to ["*", "score"]
91
+
92
+ def query(query, options={}, &action)
93
+ # TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false).
94
+ create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action)
95
+ end
96
+
97
+ # performs a dismax search and returns a Solr::Response::Standard
98
+ #
99
+ # response = conn.search('borges')
100
+ #
101
+ # options are same as query, but also include:
102
+ #
103
+ # :tie_breaker, :query_fields, :minimum_match, :phrase_fields,
104
+ # :phrase_slop, :boost_query, :boost_functions
105
+
106
+ def search(query, options={}, &action)
107
+ create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action)
108
+ end
109
+
110
+ # sends a commit message to the server
111
+ def commit(options={})
112
+ response = send(Solr::Request::Commit.new(options))
113
+ return response.ok?
114
+ end
115
+
116
+ # sends an optimize message to the server
117
+ def optimize
118
+ response = send(Solr::Request::Optimize.new)
119
+ return response.ok?
120
+ end
121
+
122
+ # pings the connection and returns true/false if it is alive or not
123
+ def ping
124
+ begin
125
+ response = send(Solr::Request::Ping.new)
126
+ return response.ok?
127
+ rescue
128
+ return false
129
+ end
130
+ end
131
+
132
+ # delete a document from the index using the document id
133
+ def delete(document_id)
134
+ response = send(Solr::Request::Delete.new(:id => document_id))
135
+ commit if @autocommit
136
+ response.ok?
137
+ end
138
+
139
+ # delete using a query
140
+ def delete_by_query(query)
141
+ response = send(Solr::Request::Delete.new(:query => query))
142
+ commit if @autocommit
143
+ response.ok?
144
+ end
145
+
146
+ def info
147
+ send(Solr::Request::IndexInfo.new)
148
+ end
149
+
150
+ # send a given Solr::Request and return a RubyResponse or XmlResponse
151
+ # depending on the type of request
152
+ def send(request)
153
+ data = post(request)
154
+ Solr::Response::Base.make_response(request, data)
155
+ end
156
+
157
+ # send the http post request to solr; for convenience there are shortcuts
158
+ # to some requests: add(), query(), commit(), delete() or send()
159
+ def post(request)
160
+ if ENV["DEBUG"]
161
+ puts "POST #{@url.path + "/" + request.handler}"
162
+ puts "-- DATA -------------------"
163
+ puts request.to_s
164
+ puts "-- END DATA ---------------"
165
+ end
166
+
167
+ response = @connection.post(@url.path + "/" + request.handler,
168
+ request.to_s.gsub(ILLEGAL_XML_CHARS, ''),
169
+ { "Content-Type" => request.content_type })
170
+
171
+ case response
172
+ when Net::HTTPSuccess then response.body
173
+ else
174
+ response.error!
175
+ end
176
+
177
+ end
178
+
179
+ private
180
+
181
+ def create_and_send_query(klass, options = {}, &action)
182
+ request = klass.new(options)
183
+ response = send(request)
184
+ return response unless action
185
+ response.each {|hit| action.call(hit)}
186
+ end
187
+
188
+ end
189
+
190
+
191
+
@@ -0,0 +1,78 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'solr/field'
15
+
16
+ class Solr::Document
17
+ include Enumerable
18
+ attr_accessor :boost
19
+ attr_reader :fields
20
+
21
+ # Create a new Solr::Document, optionally passing in a hash of
22
+ # key/value pairs for the fields
23
+ #
24
+ # doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
25
+ def initialize(hash={})
26
+ @fields = []
27
+ self << hash
28
+ end
29
+
30
+ # Append a Solr::Field
31
+ #
32
+ # doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
33
+ #
34
+ # If you are truly lazy you can simply pass in a hash:
35
+ #
36
+ # doc << {:creator => 'Jorge Luis Borges'}
37
+ def <<(fields)
38
+ case fields
39
+ when Hash
40
+ fields.each_pair do |name,value|
41
+ if value.respond_to?(:each) && !value.is_a?(String)
42
+ value.each {|v| @fields << Solr::Field.new(name => v)}
43
+ else
44
+ @fields << Solr::Field.new(name => value)
45
+ end
46
+ end
47
+ when Solr::Field
48
+ @fields << fields
49
+ else
50
+ raise "must pass in Solr::Field or Hash"
51
+ end
52
+ end
53
+
54
+ # shorthand to allow hash lookups
55
+ # doc['name']
56
+ def [](name)
57
+ field = @fields.find {|f| f.name == name.to_s}
58
+ return field.value if field
59
+ return nil
60
+ end
61
+
62
+ # shorthand to assign as a hash
63
+ def []=(name,value)
64
+ @fields << Solr::Field.new(name => value)
65
+ end
66
+
67
+ # convert the Document to a REXML::Element
68
+ def to_xml
69
+ e = Solr::XML::Element.new 'doc'
70
+ e.attributes['boost'] = @boost.to_s if @boost
71
+ @fields.each {|f| e.add_element(f.to_xml)}
72
+ return e
73
+ end
74
+
75
+ def each(*args, &blk)
76
+ fields.each(&blk)
77
+ end
78
+ end
@@ -0,0 +1,13 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Exception < Exception; end
data/lib/solr/field.rb ADDED
@@ -0,0 +1,39 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'time'
15
+
16
+ class Solr::Field
17
+ VALID_PARAMS = [:boost]
18
+ attr_accessor :name
19
+ attr_accessor :value
20
+ attr_accessor :boost
21
+
22
+ # Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
23
+ def initialize(params)
24
+ @boost = params[:boost]
25
+ name_key = (params.keys - VALID_PARAMS).first
26
+ @name, @value = name_key.to_s, params[name_key]
27
+ # Convert any Time values into UTC/XML schema format (which Solr requires).
28
+ @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
29
+ end
30
+
31
+ def to_xml
32
+ e = Solr::XML::Element.new 'field'
33
+ e.attributes['name'] = @name
34
+ e.attributes['boost'] = @boost.to_s if @boost
35
+ e.text = @value
36
+ return e
37
+ end
38
+
39
+ end
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+
15
+ class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
16
+ # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
17
+
18
+ # TODO: make merge conflict handling configurable. as is, the last map fields win.
19
+ def map(orig_data_array)
20
+ mapped_data = {}
21
+ orig_data_array.each_with_index do |data,i|
22
+ mapped_data.merge!(@mapping[i].map(data))
23
+ end
24
+ mapped_data
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # For files with the first line containing field names
14
+ # Currently not designed for enormous files, as all lines are
15
+ # read into an array
16
+ class Solr::Importer::DelimitedFileSource
17
+ include Enumerable
18
+
19
+ def initialize(filename, splitter=/\t/)
20
+ @filename = filename
21
+ @splitter = splitter
22
+ end
23
+
24
+ def each
25
+ lines = IO.readlines(@filename)
26
+ headers = lines[0].split(@splitter).collect{|h| h.chomp}
27
+
28
+ lines[1..-1].each do |line|
29
+ data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
30
+ def data.[](key)
31
+ self.assoc(key.to_s)[1]
32
+ end
33
+
34
+ yield(data)
35
+ end
36
+ end
37
+
38
+ end