acts_as_solr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. data/.gitignore +8 -0
  2. data/CHANGE_LOG +233 -0
  3. data/FORKED_CHANGES +3 -0
  4. data/LICENSE +19 -0
  5. data/README.markdown +94 -0
  6. data/README.rdoc +84 -0
  7. data/Rakefile +57 -0
  8. data/TESTING_THE_PLUGIN +25 -0
  9. data/VERSION +1 -0
  10. data/acts_as_solr.gemspec +237 -0
  11. data/config/solr.yml +15 -0
  12. data/config/solr_environment.rb +22 -0
  13. data/init.rb +21 -0
  14. data/install.rb +11 -0
  15. data/lib/acts_as_solr.rb +61 -0
  16. data/lib/acts_methods.rb +284 -0
  17. data/lib/class_methods.rb +239 -0
  18. data/lib/common_methods.rb +89 -0
  19. data/lib/deprecation.rb +61 -0
  20. data/lib/instance_methods.rb +181 -0
  21. data/lib/lazy_document.rb +18 -0
  22. data/lib/parser_methods.rb +230 -0
  23. data/lib/search_results.rb +69 -0
  24. data/lib/solr/connection.rb +191 -0
  25. data/lib/solr/document.rb +78 -0
  26. data/lib/solr/exception.rb +13 -0
  27. data/lib/solr/field.rb +39 -0
  28. data/lib/solr/importer/array_mapper.rb +26 -0
  29. data/lib/solr/importer/delimited_file_source.rb +38 -0
  30. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  31. data/lib/solr/importer/mapper.rb +51 -0
  32. data/lib/solr/importer/solr_source.rb +43 -0
  33. data/lib/solr/importer/xpath_mapper.rb +35 -0
  34. data/lib/solr/importer.rb +19 -0
  35. data/lib/solr/indexer.rb +52 -0
  36. data/lib/solr/request/add_document.rb +63 -0
  37. data/lib/solr/request/base.rb +36 -0
  38. data/lib/solr/request/commit.rb +31 -0
  39. data/lib/solr/request/delete.rb +50 -0
  40. data/lib/solr/request/dismax.rb +46 -0
  41. data/lib/solr/request/index_info.rb +22 -0
  42. data/lib/solr/request/modify_document.rb +51 -0
  43. data/lib/solr/request/optimize.rb +21 -0
  44. data/lib/solr/request/ping.rb +36 -0
  45. data/lib/solr/request/select.rb +56 -0
  46. data/lib/solr/request/spellcheck.rb +30 -0
  47. data/lib/solr/request/standard.rb +402 -0
  48. data/lib/solr/request/update.rb +23 -0
  49. data/lib/solr/request.rb +26 -0
  50. data/lib/solr/response/add_document.rb +17 -0
  51. data/lib/solr/response/base.rb +42 -0
  52. data/lib/solr/response/commit.rb +17 -0
  53. data/lib/solr/response/delete.rb +13 -0
  54. data/lib/solr/response/dismax.rb +8 -0
  55. data/lib/solr/response/index_info.rb +26 -0
  56. data/lib/solr/response/modify_document.rb +17 -0
  57. data/lib/solr/response/optimize.rb +14 -0
  58. data/lib/solr/response/ping.rb +28 -0
  59. data/lib/solr/response/ruby.rb +42 -0
  60. data/lib/solr/response/select.rb +17 -0
  61. data/lib/solr/response/spellcheck.rb +20 -0
  62. data/lib/solr/response/standard.rb +64 -0
  63. data/lib/solr/response/xml.rb +42 -0
  64. data/lib/solr/response.rb +27 -0
  65. data/lib/solr/solrtasks.rb +27 -0
  66. data/lib/solr/util.rb +32 -0
  67. data/lib/solr/xml.rb +44 -0
  68. data/lib/solr.rb +21 -0
  69. data/lib/solr_fixtures.rb +13 -0
  70. data/lib/tasks/database.rake +18 -0
  71. data/lib/tasks/solr.rake +137 -0
  72. data/lib/tasks/test.rake +7 -0
  73. data/lib/will_paginate_support.rb +12 -0
  74. data/solr/CHANGES.txt +1207 -0
  75. data/solr/LICENSE.txt +712 -0
  76. data/solr/NOTICE.txt +90 -0
  77. data/solr/etc/jetty.xml +205 -0
  78. data/solr/etc/webdefault.xml +379 -0
  79. data/solr/lib/easymock.jar +0 -0
  80. data/solr/lib/jetty-6.1.3.jar +0 -0
  81. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  82. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  83. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  84. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  85. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  86. data/solr/lib/servlet-api-2.4.jar +0 -0
  87. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  88. data/solr/lib/xpp3-1.1.3.4.O.jar +0 -0
  89. data/solr/logs/.empty-dir-for-git +0 -0
  90. data/solr/solr/README.txt +52 -0
  91. data/solr/solr/bin/abc +176 -0
  92. data/solr/solr/bin/abo +176 -0
  93. data/solr/solr/bin/backup +108 -0
  94. data/solr/solr/bin/backupcleaner +142 -0
  95. data/solr/solr/bin/commit +128 -0
  96. data/solr/solr/bin/optimize +129 -0
  97. data/solr/solr/bin/readercycle +129 -0
  98. data/solr/solr/bin/rsyncd-disable +77 -0
  99. data/solr/solr/bin/rsyncd-enable +76 -0
  100. data/solr/solr/bin/rsyncd-start +145 -0
  101. data/solr/solr/bin/rsyncd-stop +105 -0
  102. data/solr/solr/bin/scripts-util +83 -0
  103. data/solr/solr/bin/snapcleaner +148 -0
  104. data/solr/solr/bin/snapinstaller +168 -0
  105. data/solr/solr/bin/snappuller +248 -0
  106. data/solr/solr/bin/snappuller-disable +77 -0
  107. data/solr/solr/bin/snappuller-enable +77 -0
  108. data/solr/solr/bin/snapshooter +109 -0
  109. data/solr/solr/conf/admin-extra.html +31 -0
  110. data/solr/solr/conf/protwords.txt +21 -0
  111. data/solr/solr/conf/schema.xml +126 -0
  112. data/solr/solr/conf/scripts.conf +24 -0
  113. data/solr/solr/conf/solrconfig.xml +458 -0
  114. data/solr/solr/conf/stopwords.txt +57 -0
  115. data/solr/solr/conf/synonyms.txt +31 -0
  116. data/solr/solr/conf/xslt/example.xsl +132 -0
  117. data/solr/solr/conf/xslt/example_atom.xsl +63 -0
  118. data/solr/solr/conf/xslt/example_rss.xsl +62 -0
  119. data/solr/start.jar +0 -0
  120. data/solr/tmp/.empty-dir-for-git +0 -0
  121. data/solr/webapps/solr.war +0 -0
  122. data/test/config/solr.yml +2 -0
  123. data/test/db/connections/mysql/connection.rb +10 -0
  124. data/test/db/connections/sqlite/connection.rb +8 -0
  125. data/test/db/migrate/001_create_books.rb +15 -0
  126. data/test/db/migrate/002_create_movies.rb +12 -0
  127. data/test/db/migrate/003_create_categories.rb +11 -0
  128. data/test/db/migrate/004_create_electronics.rb +16 -0
  129. data/test/db/migrate/005_create_authors.rb +12 -0
  130. data/test/db/migrate/006_create_postings.rb +9 -0
  131. data/test/db/migrate/007_create_posts.rb +13 -0
  132. data/test/db/migrate/008_create_gadgets.rb +11 -0
  133. data/test/fixtures/authors.yml +9 -0
  134. data/test/fixtures/books.yml +13 -0
  135. data/test/fixtures/categories.yml +7 -0
  136. data/test/fixtures/db_definitions/mysql.sql +41 -0
  137. data/test/fixtures/electronics.yml +49 -0
  138. data/test/fixtures/movies.yml +9 -0
  139. data/test/fixtures/postings.yml +10 -0
  140. data/test/functional/acts_as_solr_test.rb +413 -0
  141. data/test/functional/association_indexing_test.rb +37 -0
  142. data/test/functional/faceted_search_test.rb +163 -0
  143. data/test/functional/multi_solr_search_test.rb +51 -0
  144. data/test/models/author.rb +10 -0
  145. data/test/models/book.rb +10 -0
  146. data/test/models/category.rb +8 -0
  147. data/test/models/electronic.rb +21 -0
  148. data/test/models/gadget.rb +9 -0
  149. data/test/models/movie.rb +17 -0
  150. data/test/models/novel.rb +2 -0
  151. data/test/models/post.rb +3 -0
  152. data/test/models/posting.rb +11 -0
  153. data/test/test_helper.rb +51 -0
  154. data/test/unit/acts_methods_shoulda.rb +70 -0
  155. data/test/unit/class_methods_shoulda.rb +90 -0
  156. data/test/unit/common_methods_shoulda.rb +112 -0
  157. data/test/unit/instance_methods_shoulda.rb +326 -0
  158. data/test/unit/lazy_document_shoulda.rb +35 -0
  159. data/test/unit/parser_instance.rb +19 -0
  160. data/test/unit/parser_methods_shoulda.rb +279 -0
  161. data/test/unit/solr_instance.rb +46 -0
  162. data/test/unit/test_helper.rb +26 -0
  163. metadata +259 -0
@@ -0,0 +1,230 @@
1
+ module ActsAsSolr #:nodoc:
2
+ module ParserMethods
3
+ protected
4
+
5
+ # Method used by mostly all the ClassMethods when doing a search
6
+ def parse_query(query=nil, options={}, models=nil)
7
+ valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator, :include, :lazy, :highlight]
8
+ query_options = {}
9
+
10
+ return nil if (query.nil? || query.strip == '')
11
+
12
+ raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty?
13
+ begin
14
+ Deprecation.validate_query(options)
15
+ query_options[:start] = options[:offset]
16
+ query_options[:rows] = options[:limit]
17
+ query_options[:operator] = options[:operator]
18
+
19
+ if options[:highlight] == true
20
+ options[:highlight] = {:fields => "*"}
21
+ end
22
+
23
+ if options[:highlight]
24
+ query_options[:highlighting] = {}
25
+ query_options[:highlighting][:field_list] = []
26
+ query_options[:highlighting][:field_list] << options[:highlight][:fields].collect {|k| "#{k}_t"} if options[:highlight][:fields]
27
+ query_options[:highlighting][:require_field_match] = options[:highlight][:require_field_match] if options[:highlight][:require_field_match]
28
+ query_options[:highlighting][:max_snippets] = options[:highlight][:max_snippets] if options[:highlight][:max_snippets]
29
+ query_options[:highlighting][:prefix] = options[:highlight][:prefix] if options[:highlight][:prefix]
30
+ query_options[:highlighting][:suffix] = options[:highlight][:suffix] if options[:highlight][:suffix]
31
+ end
32
+
33
+ # first steps on the facet parameter processing
34
+ if options[:facets]
35
+ query_options[:facets] = {}
36
+ query_options[:facets][:limit] = -1 # TODO: make this configurable
37
+ query_options[:facets][:sort] = :count if options[:facets][:sort]
38
+ query_options[:facets][:mincount] = 0
39
+ query_options[:facets][:mincount] = 1 if options[:facets][:zeros] == false
40
+ # override the :zeros (it's deprecated anyway) if :mincount exists
41
+ query_options[:facets][:mincount] = options[:facets][:mincount] if options[:facets][:mincount]
42
+ query_options[:facets][:fields] = options[:facets][:fields].collect{|k| "#{k}_facet"} if options[:facets][:fields]
43
+ query_options[:filter_queries] = replace_types([*options[:facets][:browse]].collect{|k| "#{k.sub!(/ *: */,"_facet:")}"}) if options[:facets][:browse]
44
+ query_options[:facets][:queries] = replace_types(options[:facets][:query].collect{|k| "#{k.sub!(/ *: */,"_t:")}"}) if options[:facets][:query]
45
+
46
+ if options[:facets][:dates]
47
+ query_options[:date_facets] = {}
48
+ # if options[:facets][:dates][:fields] exists then :start, :end, and :gap must be there
49
+ if options[:facets][:dates][:fields]
50
+ [:start, :end, :gap].each { |k| raise "#{k} must be present in faceted date query" unless options[:facets][:dates].include?(k) }
51
+ query_options[:date_facets][:fields] = []
52
+ options[:facets][:dates][:fields].each { |f|
53
+ if f.kind_of? Hash
54
+ key = f.keys[0]
55
+ query_options[:date_facets][:fields] << {"#{key}_d" => f[key]}
56
+ validate_date_facet_other_options(f[key][:other]) if f[key][:other]
57
+ else
58
+ query_options[:date_facets][:fields] << "#{f}_d"
59
+ end
60
+ }
61
+ end
62
+
63
+ query_options[:date_facets][:start] = options[:facets][:dates][:start] if options[:facets][:dates][:start]
64
+ query_options[:date_facets][:end] = options[:facets][:dates][:end] if options[:facets][:dates][:end]
65
+ query_options[:date_facets][:gap] = options[:facets][:dates][:gap] if options[:facets][:dates][:gap]
66
+ query_options[:date_facets][:hardend] = options[:facets][:dates][:hardend] if options[:facets][:dates][:hardend]
67
+ query_options[:date_facets][:filter] = replace_types([*options[:facets][:dates][:filter]].collect{|k| "#{k.sub!(/ *:(?!\d) */,"_d:")}"}) if options[:facets][:dates][:filter]
68
+
69
+ if options[:facets][:dates][:other]
70
+ validate_date_facet_other_options(options[:facets][:dates][:other])
71
+ query_options[:date_facets][:other] = options[:facets][:dates][:other]
72
+ end
73
+
74
+ end
75
+ end
76
+
77
+ if models.nil?
78
+ # TODO: use a filter query for type, allowing Solr to cache it individually
79
+ models = "AND #{solr_type_condition}"
80
+ field_list = solr_configuration[:primary_key_field]
81
+ else
82
+ field_list = "id"
83
+ end
84
+
85
+ query_options[:field_list] = [field_list, 'score']
86
+ query = "(#{query.gsub(/ *: */,"_t:")}) #{models}"
87
+ order = options[:order].split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ').gsub(/\bscore_t\b/, 'score') }.join(',') if options[:order]
88
+ query_options[:query] = replace_types([query])[0] # TODO adjust replace_types to work with String or Array
89
+
90
+ if options[:order]
91
+ # TODO: set the sort parameter instead of the old ;order. style.
92
+ query_options[:query] << ';' << replace_types([order], false)[0]
93
+ end
94
+
95
+ ActsAsSolr::Post.execute(Solr::Request::Standard.new(query_options))
96
+ rescue
97
+ raise "There was a problem executing your search: #{$!} in #{$!.backtrace.first}"
98
+ end
99
+ end
100
+
101
+ def solr_type_condition
102
+ subclasses.inject("(#{solr_configuration[:type_field]}:#{self.name}") do |condition, subclass|
103
+ condition << " OR #{solr_configuration[:type_field]}:#{subclass.name}"
104
+ end << ')'
105
+ end
106
+
107
+ # Parses the data returned from Solr
108
+ def parse_results(solr_data, options = {})
109
+ results = {
110
+ :docs => [],
111
+ :total => 0
112
+ }
113
+
114
+ configuration = {
115
+ :format => :objects
116
+ }
117
+ results.update(:facets => {'facet_fields' => []}) if options[:facets]
118
+ return SearchResults.new(results) if (solr_data.nil? || solr_data.total_hits == 0)
119
+
120
+ configuration.update(options) if options.is_a?(Hash)
121
+
122
+ ids = solr_data.hits.collect {|doc| doc["#{solr_configuration[:primary_key_field]}"]}.flatten
123
+
124
+ result = find_objects(ids, options, configuration)
125
+
126
+ add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores]
127
+
128
+ highlighted = {}
129
+ solr_data.highlighting.map do |x,y|
130
+ e={}
131
+ y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil?
132
+ highlighted[x.gsub(/[^:]*:/,"").to_i]=e
133
+ end unless solr_data.highlighting.nil?
134
+
135
+ results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
136
+ results.update({:docs => result, :total => solr_data.total_hits, :max_score => solr_data.max_score, :query_time => solr_data.data['responseHeader']['QTime']})
137
+ results.update({:highlights=>highlighted})
138
+
139
+
140
+ sr = SearchResults.new(results)
141
+
142
+ sr.records.each do |model|
143
+ model.init_solr(results) if model.respond_to?(:init_solr)
144
+ end if sr.records
145
+
146
+ sr
147
+ end
148
+
149
+
150
+ def find_objects(ids, options, configuration)
151
+ result = if configuration[:lazy] && configuration[:format] != :ids
152
+ ids.collect {|id| ActsAsSolr::LazyDocument.new(id, self)}
153
+ elsif configuration[:format] == :objects
154
+ conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
155
+ find_options = {:conditions => conditions}
156
+ find_options[:include] = options[:include] if options[:include]
157
+ result = reorder(self.find(:all, find_options), ids)
158
+ else
159
+ ids
160
+ end
161
+
162
+ result
163
+ end
164
+
165
+ # Reorders the instances keeping the order returned from Solr
166
+ def reorder(things, ids)
167
+ ordered_things = Array.new(things.size)
168
+ raise "Out of sync! Found #{ids.size} items in index, but only #{things.size} were found in database!" unless things.size == ids.size
169
+ things.each do |thing|
170
+ position = ids.index(thing.id)
171
+ ordered_things[position] = thing
172
+ end
173
+ ordered_things
174
+ end
175
+
176
+ # Replaces the field types based on the types (if any) specified
177
+ # on the acts_as_solr call
178
+ def replace_types(strings, include_colon=true)
179
+ suffix = include_colon ? ":" : ""
180
+ if configuration[:solr_fields]
181
+ configuration[:solr_fields].each do |name, options|
182
+ solr_name = options[:as] || name.to_s
183
+ solr_type = get_solr_field_type(options[:type])
184
+ field = "#{solr_name}_#{solr_type}#{suffix}"
185
+ strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
186
+ end
187
+ end
188
+ if configuration[:solr_includes]
189
+ configuration[:solr_includes].each do |association, options|
190
+ solr_name = options[:as] || association.to_s.singularize
191
+ solr_type = get_solr_field_type(options[:type])
192
+ field = "#{solr_name}_#{solr_type}#{suffix}"
193
+ strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
194
+ end
195
+ end
196
+ strings
197
+ end
198
+
199
+ # Adds the score to each one of the instances found
200
+ def add_scores(results, solr_data)
201
+ with_score = []
202
+ solr_data.hits.each do |doc|
203
+ with_score.push([doc["score"],
204
+ results.find {|record| scorable_record?(record, doc) }])
205
+ end
206
+ with_score.each do |score, object|
207
+ class << object; attr_accessor :solr_score; end
208
+ object.solr_score = score
209
+ end
210
+ end
211
+
212
+ def scorable_record?(record, doc)
213
+ doc_id = doc["#{solr_configuration[:primary_key_field]}"]
214
+ if doc_id.nil?
215
+ doc_id = doc["id"]
216
+ "#{record.class.name}:#{record_id(record)}" == doc_id.first.to_s
217
+ else
218
+ record_id(record).to_s == doc_id.to_s
219
+ end
220
+ end
221
+
222
+ def validate_date_facet_other_options(options)
223
+ valid_other_options = [:after, :all, :before, :between, :none]
224
+ options = [options] unless options.kind_of? Array
225
+ bad_options = options.map {|x| x.to_sym} - valid_other_options
226
+ raise "Invalid option#{'s' if bad_options.size > 1} for faceted date's other param: #{bad_options.join(', ')}. May only be one of :after, :all, :before, :between, :none" if bad_options.size > 0
227
+ end
228
+
229
+ end
230
+ end
@@ -0,0 +1,69 @@
1
+ module ActsAsSolr #:nodoc:
2
+
3
+ # TODO: Possibly looking into hooking it up with Solr::Response::Standard
4
+ #
5
+ # Class that returns the search results with four methods.
6
+ #
7
+ # books = Book.find_by_solr 'ruby'
8
+ #
9
+ # the above will return a SearchResults class with 4 methods:
10
+ #
11
+ # docs|results|records: will return an array of records found
12
+ #
13
+ # books.records.empty?
14
+ # => false
15
+ #
16
+ # total|num_found|total_hits: will return the total number of records found
17
+ #
18
+ # books.total
19
+ # => 2
20
+ #
21
+ # facets: will return the facets when doing a faceted search
22
+ #
23
+ # max_score|highest_score: returns the highest score found
24
+ #
25
+ # books.max_score
26
+ # => 1.3213213
27
+ #
28
+ #
29
+ class SearchResults
30
+ def initialize(solr_data={})
31
+ @solr_data = solr_data
32
+ # $log.debug "sd:#{solr_data.inspect}"
33
+ end
34
+
35
+ # Returns an array with the instances. This method
36
+ # is also aliased as docs and records
37
+ def results
38
+ @solr_data[:docs]
39
+ end
40
+
41
+ # Returns the total records found. This method is
42
+ # also aliased as num_found and total_hits
43
+ def total
44
+ @solr_data[:total]
45
+ end
46
+
47
+ # Returns the facets when doing a faceted search
48
+ def facets
49
+ @solr_data[:facets]
50
+ end
51
+
52
+ # Returns the highest score found. This method is
53
+ # also aliased as highest_score
54
+ def max_score
55
+ @solr_data[:max_score]
56
+ end
57
+
58
+ def query_time
59
+ @solr_data[:query_time]
60
+ end
61
+
62
+ alias docs results
63
+ alias records results
64
+ alias num_found total
65
+ alias total_hits total
66
+ alias highest_score max_score
67
+ end
68
+
69
+ end
@@ -0,0 +1,191 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'net/http'
14
+
15
+ # TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh
16
+
17
+ class Solr::Connection
18
+ attr_reader :url, :autocommit, :connection
19
+
20
+ ILLEGAL_XML_CHARS = /\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0C|\x0E|\x0F|\x10|\x11|\x12|\x13|\x14|\x15|\x16|\x17|\x18|\x19|\x1A|\x1B|\x1C|\x1D|\x1E|\x1F/
21
+
22
+ # create a connection to a solr instance using the url for the solr
23
+ # application context:
24
+ #
25
+ # conn = Solr::Connection.new("http://example.com:8080/solr")
26
+ #
27
+ # if you would prefer to have all adds/updates autocommitted,
28
+ # use :autocommit => :on
29
+ #
30
+ # conn = Solr::Connection.new('http://example.com:8080/solr',
31
+ # :autocommit => :on)
32
+
33
+ def initialize(url="http://localhost:8983/solr", opts={})
34
+ @url = URI.parse(url)
35
+ unless @url.kind_of? URI::HTTP
36
+ raise "invalid http url: #{url}"
37
+ end
38
+
39
+ # TODO: Autocommit seems nice at one level, but it currently is confusing because
40
+ # only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...))
41
+ # does not autocommit. Maybe #send should check for the request types that require a commit and
42
+ # commit in #send instead of the individual methods?
43
+ @autocommit = opts[:autocommit] == :on
44
+
45
+ # Not actually opening the connection yet, just setting up the persistent connection.
46
+ @connection = Net::HTTP.new(@url.host, @url.port)
47
+
48
+ @connection.read_timeout = opts[:timeout] if opts[:timeout]
49
+ end
50
+
51
+ # add a document to the index. you can pass in either a hash
52
+ #
53
+ # conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius')
54
+ #
55
+ # or a Solr::Document
56
+ #
57
+ # conn.add(Solr::Document.new(:id => 123, :title = 'On Writing')
58
+ #
59
+ # true/false will be returned to designate success/failure
60
+
61
+ def add(doc)
62
+ request = Solr::Request::AddDocument.new(doc)
63
+ response = send(request)
64
+ commit if @autocommit
65
+ return response.ok?
66
+ end
67
+
68
+ # update a document in the index (really just an alias to add)
69
+
70
+ def update(doc)
71
+ return add(doc)
72
+ end
73
+
74
+ # performs a standard query and returns a Solr::Response::Standard
75
+ #
76
+ # response = conn.query('borges')
77
+ #
78
+ # alternative you can pass in a block and iterate over hits
79
+ #
80
+ # conn.query('borges') do |hit|
81
+ # puts hit
82
+ # end
83
+ #
84
+ # options include:
85
+ #
86
+ # :sort, :default_field, :rows, :filter_queries, :debug_query,
87
+ # :explain_other, :facets, :highlighting, :mlt,
88
+ # :operator => :or / :and
89
+ # :start => defaults to 0
90
+ # :field_list => array, defaults to ["*", "score"]
91
+
92
+ def query(query, options={}, &action)
93
+ # TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false).
94
+ create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action)
95
+ end
96
+
97
+ # performs a dismax search and returns a Solr::Response::Standard
98
+ #
99
+ # response = conn.search('borges')
100
+ #
101
+ # options are same as query, but also include:
102
+ #
103
+ # :tie_breaker, :query_fields, :minimum_match, :phrase_fields,
104
+ # :phrase_slop, :boost_query, :boost_functions
105
+
106
+ def search(query, options={}, &action)
107
+ create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action)
108
+ end
109
+
110
+ # sends a commit message to the server
111
+ def commit(options={})
112
+ response = send(Solr::Request::Commit.new(options))
113
+ return response.ok?
114
+ end
115
+
116
+ # sends an optimize message to the server
117
+ def optimize
118
+ response = send(Solr::Request::Optimize.new)
119
+ return response.ok?
120
+ end
121
+
122
+ # pings the connection and returns true/false if it is alive or not
123
+ def ping
124
+ begin
125
+ response = send(Solr::Request::Ping.new)
126
+ return response.ok?
127
+ rescue
128
+ return false
129
+ end
130
+ end
131
+
132
+ # delete a document from the index using the document id
133
+ def delete(document_id)
134
+ response = send(Solr::Request::Delete.new(:id => document_id))
135
+ commit if @autocommit
136
+ response.ok?
137
+ end
138
+
139
+ # delete using a query
140
+ def delete_by_query(query)
141
+ response = send(Solr::Request::Delete.new(:query => query))
142
+ commit if @autocommit
143
+ response.ok?
144
+ end
145
+
146
+ def info
147
+ send(Solr::Request::IndexInfo.new)
148
+ end
149
+
150
+ # send a given Solr::Request and return a RubyResponse or XmlResponse
151
+ # depending on the type of request
152
+ def send(request)
153
+ data = post(request)
154
+ Solr::Response::Base.make_response(request, data)
155
+ end
156
+
157
+ # send the http post request to solr; for convenience there are shortcuts
158
+ # to some requests: add(), query(), commit(), delete() or send()
159
+ def post(request)
160
+ if ENV["DEBUG"]
161
+ puts "POST #{@url.path + "/" + request.handler}"
162
+ puts "-- DATA -------------------"
163
+ puts request.to_s
164
+ puts "-- END DATA ---------------"
165
+ end
166
+
167
+ response = @connection.post(@url.path + "/" + request.handler,
168
+ request.to_s.gsub(ILLEGAL_XML_CHARS, ''),
169
+ { "Content-Type" => request.content_type })
170
+
171
+ case response
172
+ when Net::HTTPSuccess then response.body
173
+ else
174
+ response.error!
175
+ end
176
+
177
+ end
178
+
179
+ private
180
+
181
+ def create_and_send_query(klass, options = {}, &action)
182
+ request = klass.new(options)
183
+ response = send(request)
184
+ return response unless action
185
+ response.each {|hit| action.call(hit)}
186
+ end
187
+
188
+ end
189
+
190
+
191
+
@@ -0,0 +1,78 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'solr/field'
15
+
16
+ class Solr::Document
17
+ include Enumerable
18
+ attr_accessor :boost
19
+ attr_reader :fields
20
+
21
+ # Create a new Solr::Document, optionally passing in a hash of
22
+ # key/value pairs for the fields
23
+ #
24
+ # doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
25
+ def initialize(hash={})
26
+ @fields = []
27
+ self << hash
28
+ end
29
+
30
+ # Append a Solr::Field
31
+ #
32
+ # doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
33
+ #
34
+ # If you are truly lazy you can simply pass in a hash:
35
+ #
36
+ # doc << {:creator => 'Jorge Luis Borges'}
37
+ def <<(fields)
38
+ case fields
39
+ when Hash
40
+ fields.each_pair do |name,value|
41
+ if value.respond_to?(:each) && !value.is_a?(String)
42
+ value.each {|v| @fields << Solr::Field.new(name => v)}
43
+ else
44
+ @fields << Solr::Field.new(name => value)
45
+ end
46
+ end
47
+ when Solr::Field
48
+ @fields << fields
49
+ else
50
+ raise "must pass in Solr::Field or Hash"
51
+ end
52
+ end
53
+
54
+ # shorthand to allow hash lookups
55
+ # doc['name']
56
+ def [](name)
57
+ field = @fields.find {|f| f.name == name.to_s}
58
+ return field.value if field
59
+ return nil
60
+ end
61
+
62
+ # shorthand to assign as a hash
63
+ def []=(name,value)
64
+ @fields << Solr::Field.new(name => value)
65
+ end
66
+
67
+ # convert the Document to a REXML::Element
68
+ def to_xml
69
+ e = Solr::XML::Element.new 'doc'
70
+ e.attributes['boost'] = @boost.to_s if @boost
71
+ @fields.each {|f| e.add_element(f.to_xml)}
72
+ return e
73
+ end
74
+
75
+ def each(*args, &blk)
76
+ fields.each(&blk)
77
+ end
78
+ end
@@ -0,0 +1,13 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Exception < Exception; end
data/lib/solr/field.rb ADDED
@@ -0,0 +1,39 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'time'
15
+
16
+ class Solr::Field
17
+ VALID_PARAMS = [:boost]
18
+ attr_accessor :name
19
+ attr_accessor :value
20
+ attr_accessor :boost
21
+
22
+ # Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
23
+ def initialize(params)
24
+ @boost = params[:boost]
25
+ name_key = (params.keys - VALID_PARAMS).first
26
+ @name, @value = name_key.to_s, params[name_key]
27
+ # Convert any Time values into UTC/XML schema format (which Solr requires).
28
+ @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
29
+ end
30
+
31
+ def to_xml
32
+ e = Solr::XML::Element.new 'field'
33
+ e.attributes['name'] = @name
34
+ e.attributes['boost'] = @boost.to_s if @boost
35
+ e.text = @value
36
+ return e
37
+ end
38
+
39
+ end
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+
15
+ class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
16
+ # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
17
+
18
+ # TODO: make merge conflict handling configurable. as is, the last map fields win.
19
+ def map(orig_data_array)
20
+ mapped_data = {}
21
+ orig_data_array.each_with_index do |data,i|
22
+ mapped_data.merge!(@mapping[i].map(data))
23
+ end
24
+ mapped_data
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # For files with the first line containing field names
14
+ # Currently not designed for enormous files, as all lines are
15
+ # read into an array
16
+ class Solr::Importer::DelimitedFileSource
17
+ include Enumerable
18
+
19
+ def initialize(filename, splitter=/\t/)
20
+ @filename = filename
21
+ @splitter = splitter
22
+ end
23
+
24
+ def each
25
+ lines = IO.readlines(@filename)
26
+ headers = lines[0].split(@splitter).collect{|h| h.chomp}
27
+
28
+ lines[1..-1].each do |line|
29
+ data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
30
+ def data.[](key)
31
+ self.assoc(key.to_s)[1]
32
+ end
33
+
34
+ yield(data)
35
+ end
36
+ end
37
+
38
+ end