jbasdf-muck-solr 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. data/CHANGE_LOG +239 -0
  2. data/LICENSE +19 -0
  3. data/README.markdown +118 -0
  4. data/README.rdoc +107 -0
  5. data/Rakefile +99 -0
  6. data/TESTING_THE_PLUGIN +25 -0
  7. data/VERSION.yml +4 -0
  8. data/config/solr.yml +15 -0
  9. data/config/solr_environment.rb +32 -0
  10. data/lib/acts_as_solr/acts_methods.rb +352 -0
  11. data/lib/acts_as_solr/class_methods.rb +236 -0
  12. data/lib/acts_as_solr/common_methods.rb +89 -0
  13. data/lib/acts_as_solr/deprecation.rb +61 -0
  14. data/lib/acts_as_solr/instance_methods.rb +165 -0
  15. data/lib/acts_as_solr/lazy_document.rb +18 -0
  16. data/lib/acts_as_solr/parser_methods.rb +203 -0
  17. data/lib/acts_as_solr/search_results.rb +68 -0
  18. data/lib/acts_as_solr/solr_fixtures.rb +13 -0
  19. data/lib/acts_as_solr/tasks/database.rake +16 -0
  20. data/lib/acts_as_solr/tasks/solr.rake +135 -0
  21. data/lib/acts_as_solr/tasks/test.rake +5 -0
  22. data/lib/acts_as_solr/tasks.rb +10 -0
  23. data/lib/acts_as_solr.rb +65 -0
  24. data/lib/solr/connection.rb +177 -0
  25. data/lib/solr/document.rb +75 -0
  26. data/lib/solr/exception.rb +13 -0
  27. data/lib/solr/field.rb +36 -0
  28. data/lib/solr/importer/array_mapper.rb +26 -0
  29. data/lib/solr/importer/delimited_file_source.rb +38 -0
  30. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  31. data/lib/solr/importer/mapper.rb +51 -0
  32. data/lib/solr/importer/solr_source.rb +41 -0
  33. data/lib/solr/importer/xpath_mapper.rb +35 -0
  34. data/lib/solr/importer.rb +19 -0
  35. data/lib/solr/indexer.rb +52 -0
  36. data/lib/solr/request/add_document.rb +58 -0
  37. data/lib/solr/request/base.rb +36 -0
  38. data/lib/solr/request/commit.rb +29 -0
  39. data/lib/solr/request/delete.rb +48 -0
  40. data/lib/solr/request/dismax.rb +46 -0
  41. data/lib/solr/request/index_info.rb +22 -0
  42. data/lib/solr/request/modify_document.rb +46 -0
  43. data/lib/solr/request/optimize.rb +19 -0
  44. data/lib/solr/request/ping.rb +36 -0
  45. data/lib/solr/request/select.rb +54 -0
  46. data/lib/solr/request/spellcheck.rb +30 -0
  47. data/lib/solr/request/standard.rb +402 -0
  48. data/lib/solr/request/update.rb +23 -0
  49. data/lib/solr/request.rb +26 -0
  50. data/lib/solr/response/add_document.rb +17 -0
  51. data/lib/solr/response/base.rb +42 -0
  52. data/lib/solr/response/commit.rb +15 -0
  53. data/lib/solr/response/delete.rb +13 -0
  54. data/lib/solr/response/dismax.rb +8 -0
  55. data/lib/solr/response/index_info.rb +26 -0
  56. data/lib/solr/response/modify_document.rb +17 -0
  57. data/lib/solr/response/optimize.rb +14 -0
  58. data/lib/solr/response/ping.rb +26 -0
  59. data/lib/solr/response/ruby.rb +42 -0
  60. data/lib/solr/response/select.rb +17 -0
  61. data/lib/solr/response/spellcheck.rb +20 -0
  62. data/lib/solr/response/standard.rb +60 -0
  63. data/lib/solr/response/xml.rb +39 -0
  64. data/lib/solr/response.rb +27 -0
  65. data/lib/solr/solrtasks.rb +27 -0
  66. data/lib/solr/util.rb +32 -0
  67. data/lib/solr/xml.rb +44 -0
  68. data/lib/solr.rb +26 -0
  69. data/solr/CHANGES.txt +1207 -0
  70. data/solr/LICENSE.txt +712 -0
  71. data/solr/NOTICE.txt +90 -0
  72. data/solr/etc/jetty.xml +205 -0
  73. data/solr/etc/webdefault.xml +379 -0
  74. data/solr/lib/easymock.jar +0 -0
  75. data/solr/lib/jetty-6.1.3.jar +0 -0
  76. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  77. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  78. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  79. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  80. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  81. data/solr/lib/servlet-api-2.4.jar +0 -0
  82. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  83. data/solr/lib/xpp3-1.1.3.4.O.jar +0 -0
  84. data/solr/solr/README.txt +52 -0
  85. data/solr/solr/bin/abc +176 -0
  86. data/solr/solr/bin/abo +176 -0
  87. data/solr/solr/bin/backup +108 -0
  88. data/solr/solr/bin/backupcleaner +142 -0
  89. data/solr/solr/bin/commit +128 -0
  90. data/solr/solr/bin/optimize +129 -0
  91. data/solr/solr/bin/readercycle +129 -0
  92. data/solr/solr/bin/rsyncd-disable +77 -0
  93. data/solr/solr/bin/rsyncd-enable +76 -0
  94. data/solr/solr/bin/rsyncd-start +145 -0
  95. data/solr/solr/bin/rsyncd-stop +105 -0
  96. data/solr/solr/bin/scripts-util +83 -0
  97. data/solr/solr/bin/snapcleaner +148 -0
  98. data/solr/solr/bin/snapinstaller +168 -0
  99. data/solr/solr/bin/snappuller +248 -0
  100. data/solr/solr/bin/snappuller-disable +77 -0
  101. data/solr/solr/bin/snappuller-enable +77 -0
  102. data/solr/solr/bin/snapshooter +109 -0
  103. data/solr/solr/conf/admin-extra.html +31 -0
  104. data/solr/solr/conf/protwords.txt +21 -0
  105. data/solr/solr/conf/schema.xml +126 -0
  106. data/solr/solr/conf/scripts.conf +24 -0
  107. data/solr/solr/conf/solrconfig.xml +458 -0
  108. data/solr/solr/conf/stopwords.txt +57 -0
  109. data/solr/solr/conf/synonyms.txt +31 -0
  110. data/solr/solr/conf/xslt/example.xsl +132 -0
  111. data/solr/solr/conf/xslt/example_atom.xsl +63 -0
  112. data/solr/solr/conf/xslt/example_rss.xsl +62 -0
  113. data/solr/start.jar +0 -0
  114. data/solr/webapps/solr.war +0 -0
  115. data/test/config/solr.yml +2 -0
  116. data/test/db/connections/mysql/connection.rb +10 -0
  117. data/test/db/connections/sqlite/connection.rb +8 -0
  118. data/test/db/migrate/001_create_books.rb +15 -0
  119. data/test/db/migrate/002_create_movies.rb +12 -0
  120. data/test/db/migrate/003_create_categories.rb +11 -0
  121. data/test/db/migrate/004_create_electronics.rb +16 -0
  122. data/test/db/migrate/005_create_authors.rb +12 -0
  123. data/test/db/migrate/006_create_postings.rb +9 -0
  124. data/test/db/migrate/007_create_posts.rb +13 -0
  125. data/test/db/migrate/008_create_gadgets.rb +11 -0
  126. data/test/fixtures/authors.yml +9 -0
  127. data/test/fixtures/books.yml +13 -0
  128. data/test/fixtures/categories.yml +7 -0
  129. data/test/fixtures/db_definitions/mysql.sql +41 -0
  130. data/test/fixtures/electronics.yml +49 -0
  131. data/test/fixtures/movies.yml +9 -0
  132. data/test/fixtures/postings.yml +10 -0
  133. data/test/functional/acts_as_solr_test.rb +413 -0
  134. data/test/functional/association_indexing_test.rb +37 -0
  135. data/test/functional/faceted_search_test.rb +163 -0
  136. data/test/functional/multi_solr_search_test.rb +57 -0
  137. data/test/models/author.rb +10 -0
  138. data/test/models/book.rb +10 -0
  139. data/test/models/category.rb +8 -0
  140. data/test/models/electronic.rb +25 -0
  141. data/test/models/gadget.rb +9 -0
  142. data/test/models/movie.rb +17 -0
  143. data/test/models/novel.rb +2 -0
  144. data/test/models/post.rb +3 -0
  145. data/test/models/posting.rb +11 -0
  146. data/test/test_helper.rb +54 -0
  147. data/test/unit/acts_methods_shoulda.rb +68 -0
  148. data/test/unit/class_methods_shoulda.rb +85 -0
  149. data/test/unit/common_methods_shoulda.rb +111 -0
  150. data/test/unit/instance_methods_shoulda.rb +318 -0
  151. data/test/unit/lazy_document_shoulda.rb +34 -0
  152. data/test/unit/parser_instance.rb +19 -0
  153. data/test/unit/parser_methods_shoulda.rb +268 -0
  154. data/test/unit/solr_instance.rb +49 -0
  155. data/test/unit/test_helper.rb +24 -0
  156. metadata +241 -0
@@ -0,0 +1,10 @@
1
+ dir = File.dirname(__FILE__)
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'net/http'
5
+ require 'active_record'
6
+ require File.expand_path("#{dir}/solr_fixtures")
7
+
8
+ load File.expand_path("#{dir}/tasks/database.rake")
9
+ load File.expand_path("#{dir}/tasks/solr.rake")
10
+ load File.expand_path("#{dir}/tasks/test.rake")
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2006 Erik Hatcher, Thiago Jackiw
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'active_record'
22
+ require 'rexml/document'
23
+ require 'net/http'
24
+ require 'yaml'
25
+ require 'time'
26
+ require 'erb'
27
+ require 'rexml/xpath'
28
+
29
+ require File.dirname(__FILE__) + '/solr'
30
+ require File.dirname(__FILE__) + '/acts_as_solr/acts_methods'
31
+ require File.dirname(__FILE__) + '/acts_as_solr/common_methods'
32
+ require File.dirname(__FILE__) + '/acts_as_solr/parser_methods'
33
+ require File.dirname(__FILE__) + '/acts_as_solr/class_methods'
34
+ require File.dirname(__FILE__) + '/acts_as_solr/instance_methods'
35
+ require File.dirname(__FILE__) + '/acts_as_solr/common_methods'
36
+ require File.dirname(__FILE__) + '/acts_as_solr/deprecation'
37
+ require File.dirname(__FILE__) + '/acts_as_solr/search_results'
38
+ require File.dirname(__FILE__) + '/acts_as_solr/lazy_document'
39
+ module ActsAsSolr
40
+
41
+ class Post
42
+ def self.execute(request, core = nil)
43
+ begin
44
+ if File.exists?(RAILS_ROOT+'/config/solr.yml')
45
+ config = YAML::load_file(RAILS_ROOT+'/config/solr.yml')
46
+ url = config[ENV['RAILS_ENV']]['url']
47
+ # for backwards compatibility
48
+ url ||= "http://#{config[ENV['RAILS_ENV']]['host']}:#{config[ENV['RAILS_ENV']]['port']}/#{config[ENV['RAILS_ENV']]['servlet_path']}"
49
+ else
50
+ url = 'http://localhost:8982/solr'
51
+ end
52
+ url += "/" + core if !core.nil?
53
+ connection = Solr::Connection.new(url)
54
+ return connection.send(request)
55
+ rescue
56
+ raise "Couldn't connect to the Solr server at #{url}. #{$!}"
57
+ false
58
+ end
59
+ end
60
+ end
61
+
62
+ end
63
+
64
+ # reopen ActiveRecord and include the acts_as_solr method
65
+ ActiveRecord::Base.extend ActsAsSolr::ActsMethods
@@ -0,0 +1,177 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh
14
+
15
+ class Solr::Connection
16
+ attr_reader :url, :autocommit, :connection
17
+
18
+ # create a connection to a solr instance using the url for the solr
19
+ # application context:
20
+ #
21
+ # conn = Solr::Connection.new("http://example.com:8080/solr")
22
+ #
23
+ # if you would prefer to have all adds/updates autocommitted,
24
+ # use :autocommit => :on
25
+ #
26
+ # conn = Solr::Connection.new('http://example.com:8080/solr',
27
+ # :autocommit => :on)
28
+
29
+ def initialize(url="http://localhost:8983/solr", opts={})
30
+ @url = URI.parse(url)
31
+ unless @url.kind_of? URI::HTTP
32
+ raise "invalid http url: #{url}"
33
+ end
34
+
35
+ # TODO: Autocommit seems nice at one level, but it currently is confusing because
36
+ # only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...))
37
+ # does not autocommit. Maybe #send should check for the request types that require a commit and
38
+ # commit in #send instead of the individual methods?
39
+ @autocommit = opts[:autocommit] == :on
40
+
41
+ # Not actually opening the connection yet, just setting up the persistent connection.
42
+ @connection = Net::HTTP.new(@url.host, @url.port)
43
+
44
+ @connection.read_timeout = opts[:timeout] if opts[:timeout]
45
+ end
46
+
47
+ # add a document to the index. you can pass in either a hash
48
+ #
49
+ # conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius')
50
+ #
51
+ # or a Solr::Document
52
+ #
53
+ # conn.add(Solr::Document.new(:id => 123, :title = 'On Writing')
54
+ #
55
+ # true/false will be returned to designate success/failure
56
+
57
+ def add(doc)
58
+ request = Solr::Request::AddDocument.new(doc)
59
+ response = send(request)
60
+ commit if @autocommit
61
+ return response.ok?
62
+ end
63
+
64
+ # update a document in the index (really just an alias to add)
65
+
66
+ def update(doc)
67
+ return add(doc)
68
+ end
69
+
70
+ # performs a standard query and returns a Solr::Response::Standard
71
+ #
72
+ # response = conn.query('borges')
73
+ #
74
+ # alternative you can pass in a block and iterate over hits
75
+ #
76
+ # conn.query('borges') do |hit|
77
+ # puts hit
78
+ # end
79
+ #
80
+ # options include:
81
+ #
82
+ # :sort, :default_field, :rows, :filter_queries, :debug_query,
83
+ # :explain_other, :facets, :highlighting, :mlt,
84
+ # :operator => :or / :and
85
+ # :start => defaults to 0
86
+ # :field_list => array, defaults to ["*", "score"]
87
+
88
+ def query(query, options={}, &action)
89
+ # TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false).
90
+ create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action)
91
+ end
92
+
93
+ # performs a dismax search and returns a Solr::Response::Standard
94
+ #
95
+ # response = conn.search('borges')
96
+ #
97
+ # options are same as query, but also include:
98
+ #
99
+ # :tie_breaker, :query_fields, :minimum_match, :phrase_fields,
100
+ # :phrase_slop, :boost_query, :boost_functions
101
+
102
+ def search(query, options={}, &action)
103
+ create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action)
104
+ end
105
+
106
+ # sends a commit message to the server
107
+ def commit(options={})
108
+ response = send(Solr::Request::Commit.new(options))
109
+ return response.ok?
110
+ end
111
+
112
+ # sends an optimize message to the server
113
+ def optimize
114
+ response = send(Solr::Request::Optimize.new)
115
+ return response.ok?
116
+ end
117
+
118
+ # pings the connection and returns true/false if it is alive or not
119
+ def ping
120
+ begin
121
+ response = send(Solr::Request::Ping.new)
122
+ return response.ok?
123
+ rescue
124
+ return false
125
+ end
126
+ end
127
+
128
+ # delete a document from the index using the document id
129
+ def delete(document_id)
130
+ response = send(Solr::Request::Delete.new(:id => document_id))
131
+ commit if @autocommit
132
+ response.ok?
133
+ end
134
+
135
+ # delete using a query
136
+ def delete_by_query(query)
137
+ response = send(Solr::Request::Delete.new(:query => query))
138
+ commit if @autocommit
139
+ response.ok?
140
+ end
141
+
142
+ def info
143
+ send(Solr::Request::IndexInfo.new)
144
+ end
145
+
146
+ # send a given Solr::Request and return a RubyResponse or XmlResponse
147
+ # depending on the type of request
148
+ def send(request)
149
+ data = post(request)
150
+ Solr::Response::Base.make_response(request, data)
151
+ end
152
+
153
+ # send the http post request to solr; for convenience there are shortcuts
154
+ # to some requests: add(), query(), commit(), delete() or send()
155
+ def post(request)
156
+ response = @connection.post(@url.path + "/" + request.handler,
157
+ request.to_s,
158
+ { "Content-Type" => request.content_type })
159
+
160
+ case response
161
+ when Net::HTTPSuccess then response.body
162
+ else
163
+ response.error!
164
+ end
165
+
166
+ end
167
+
168
+ private
169
+
170
+ def create_and_send_query(klass, options = {}, &action)
171
+ request = klass.new(options)
172
+ response = send(request)
173
+ return response unless action
174
+ response.each {|hit| action.call(hit)}
175
+ end
176
+
177
+ end
@@ -0,0 +1,75 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Document
14
+ include Enumerable
15
+ attr_accessor :boost
16
+ attr_reader :fields
17
+
18
+ # Create a new Solr::Document, optionally passing in a hash of
19
+ # key/value pairs for the fields
20
+ #
21
+ # doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
22
+ def initialize(hash={})
23
+ @fields = []
24
+ self << hash
25
+ end
26
+
27
+ # Append a Solr::Field
28
+ #
29
+ # doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
30
+ #
31
+ # If you are truly lazy you can simply pass in a hash:
32
+ #
33
+ # doc << {:creator => 'Jorge Luis Borges'}
34
+ def <<(fields)
35
+ case fields
36
+ when Hash
37
+ fields.each_pair do |name,value|
38
+ if value.respond_to?(:each) && !value.is_a?(String)
39
+ value.each {|v| @fields << Solr::Field.new(name => v)}
40
+ else
41
+ @fields << Solr::Field.new(name => value)
42
+ end
43
+ end
44
+ when Solr::Field
45
+ @fields << fields
46
+ else
47
+ raise "must pass in Solr::Field or Hash"
48
+ end
49
+ end
50
+
51
+ # shorthand to allow hash lookups
52
+ # doc['name']
53
+ def [](name)
54
+ field = @fields.find {|f| f.name == name.to_s}
55
+ return field.value if field
56
+ return nil
57
+ end
58
+
59
+ # shorthand to assign as a hash
60
+ def []=(name,value)
61
+ @fields << Solr::Field.new(name => value)
62
+ end
63
+
64
+ # convert the Document to a REXML::Element
65
+ def to_xml
66
+ e = Solr::XML::Element.new 'doc'
67
+ e.attributes['boost'] = @boost.to_s if @boost
68
+ @fields.each {|f| e.add_element(f.to_xml)}
69
+ return e
70
+ end
71
+
72
+ def each(*args, &blk)
73
+ fields.each(&blk)
74
+ end
75
+ end
@@ -0,0 +1,13 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Exception < Exception; end
data/lib/solr/field.rb ADDED
@@ -0,0 +1,36 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Field
14
+ VALID_PARAMS = [:boost]
15
+ attr_accessor :name
16
+ attr_accessor :value
17
+ attr_accessor :boost
18
+
19
+ # Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
20
+ def initialize(params)
21
+ @boost = params[:boost]
22
+ name_key = (params.keys - VALID_PARAMS).first
23
+ @name, @value = name_key.to_s, params[name_key]
24
+ # Convert any Time values into UTC/XML schema format (which Solr requires).
25
+ @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
26
+ end
27
+
28
+ def to_xml
29
+ e = Solr::XML::Element.new 'field'
30
+ e.attributes['name'] = @name
31
+ e.attributes['boost'] = @boost.to_s if @boost
32
+ e.text = @value
33
+ return e
34
+ end
35
+
36
+ end
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+
15
+ class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
16
+ # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
17
+
18
+ # TODO: make merge conflict handling configurable. as is, the last map fields win.
19
+ def map(orig_data_array)
20
+ mapped_data = {}
21
+ orig_data_array.each_with_index do |data,i|
22
+ mapped_data.merge!(@mapping[i].map(data))
23
+ end
24
+ mapped_data
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # For files with the first line containing field names
14
+ # Currently not designed for enormous files, as all lines are
15
+ # read into an array
16
+ class Solr::Importer::DelimitedFileSource
17
+ include Enumerable
18
+
19
+ def initialize(filename, splitter=/\t/)
20
+ @filename = filename
21
+ @splitter = splitter
22
+ end
23
+
24
+ def each
25
+ lines = IO.readlines(@filename)
26
+ headers = lines[0].split(@splitter).collect{|h| h.chomp}
27
+
28
+ lines[1..-1].each do |line|
29
+ data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
30
+ def data.[](key)
31
+ self.assoc(key.to_s)[1]
32
+ end
33
+
34
+ yield(data)
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,27 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'hpricot'
15
+
16
+ class Solr::Importer::HpricotMapper < Solr::Importer::Mapper
17
+ def field_data(doc, path)
18
+ doc.search(path.to_s).collect { |e| e.inner_html }
19
+ end
20
+ end
21
+ rescue LoadError => e # If we can't load hpricot
22
+ class Solr::Importer::HpricotMapper
23
+ def initialize(mapping, options={})
24
+ raise "Hpricot not installed."
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,51 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Importer::Mapper
14
+ def initialize(mapping, options={})
15
+ @mapping = mapping
16
+ @options = options
17
+ end
18
+
19
+ def field_data(orig_data, field_name)
20
+ orig_data[field_name]
21
+ end
22
+
23
+ def mapped_field_value(orig_data, field_mapping)
24
+ case field_mapping
25
+ when String
26
+ field_mapping
27
+ when Proc
28
+ field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc
29
+ when Symbol
30
+ field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping)
31
+ when Enumerable
32
+ field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten
33
+ else
34
+ raise "Unknown mapping for #{field_mapping}"
35
+ end
36
+ end
37
+
38
+ def map(orig_data)
39
+ mapped_data = {}
40
+ @mapping.each do |solr_name, field_mapping|
41
+ value = mapped_field_value(orig_data, field_mapping)
42
+ mapped_data[solr_name] = value if value
43
+ end
44
+
45
+ mapped_data
46
+ end
47
+
48
+
49
+
50
+
51
+ end
@@ -0,0 +1,41 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Importer::SolrSource
14
+ def initialize(solr_url, query, filter_queries=nil, options={})
15
+ @connection = Solr::Connection.new(solr_url)
16
+ @query = query
17
+ @filter_queries = filter_queries
18
+
19
+ @page_size = options[:page_size] || 1000
20
+ @field_list = options[:field_list] || ["*"]
21
+ end
22
+
23
+ def each
24
+ done = false
25
+ start = 0
26
+ until done do
27
+ # request N documents from a starting point
28
+ request = Solr::Request::Standard.new(:query => @query,
29
+ :rows => @page_size,
30
+ :start => start,
31
+ :field_list => @field_list,
32
+ :filter_queries => @filter_queries)
33
+ response = @connection.send(request)
34
+ response.each do |doc|
35
+ yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary
36
+ end
37
+ done = start + @page_size >= response.total_hits
38
+ start = start + @page_size
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,35 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'xml/libxml'
15
+
16
+ # For files with the first line containing field names
17
+ class Solr::Importer::XPathMapper < Solr::Importer::Mapper
18
+ def field_data(doc, xpath)
19
+ doc.find(xpath.to_s).collect do |node|
20
+ case node
21
+ when XML::Attr
22
+ node.value
23
+ when XML::Node
24
+ node.content
25
+ end
26
+ end
27
+ end
28
+ end
29
+ rescue LoadError => e # If we can't load libxml
30
+ class Solr::Importer::XPathMapper
31
+ def initialize(mapping, options={})
32
+ raise "libxml not installed"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,19 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ module Solr; module Importer; end; end
14
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/mapper")
15
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/array_mapper")
16
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/delimited_file_source")
17
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/hpricot_mapper")
18
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/xpath_mapper")
19
+ require File.expand_path("#{File.dirname(__FILE__)}/importer/solr_source")
@@ -0,0 +1,52 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Indexer
14
+ attr_reader :solr
15
+
16
+ # TODO: document options!
17
+ def initialize(data_source, mapper_or_mapping, options={})
18
+ solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
19
+ @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
20
+
21
+ @data_source = data_source
22
+ @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
23
+
24
+ @buffer_docs = options[:buffer_docs]
25
+ @debug = options[:debug]
26
+ end
27
+
28
+ def index
29
+ buffer = []
30
+ @data_source.each do |record|
31
+ document = @mapper.map(record)
32
+
33
+ # TODO: check arrity of block, if 3, pass counter as 3rd argument
34
+ yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty?
35
+
36
+ buffer << document
37
+
38
+ if !@buffer_docs || buffer.size == @buffer_docs
39
+ add_docs(buffer)
40
+ buffer.clear
41
+ end
42
+ end
43
+ add_docs(buffer) if !buffer.empty?
44
+
45
+ @solr.commit unless @debug
46
+ end
47
+
48
+ def add_docs(documents)
49
+ @solr.add(documents) unless @debug
50
+ puts documents.inspect if @debug
51
+ end
52
+ end