greglu-solr-ruby 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.yml +50 -0
- data/LICENSE.txt +201 -0
- data/README +56 -0
- data/Rakefile +190 -0
- data/examples/delicious_library/dl_importer.rb +60 -0
- data/examples/delicious_library/sample_export.txt +164 -0
- data/examples/marc/marc_importer.rb +106 -0
- data/examples/tang/tang_importer.rb +58 -0
- data/lib/solr.rb +21 -0
- data/lib/solr/connection.rb +179 -0
- data/lib/solr/document.rb +73 -0
- data/lib/solr/exception.rb +13 -0
- data/lib/solr/field.rb +39 -0
- data/lib/solr/importer.rb +19 -0
- data/lib/solr/importer/array_mapper.rb +26 -0
- data/lib/solr/importer/delimited_file_source.rb +38 -0
- data/lib/solr/importer/hpricot_mapper.rb +27 -0
- data/lib/solr/importer/mapper.rb +51 -0
- data/lib/solr/importer/solr_source.rb +43 -0
- data/lib/solr/importer/xpath_mapper.rb +35 -0
- data/lib/solr/indexer.rb +52 -0
- data/lib/solr/request.rb +26 -0
- data/lib/solr/request/add_document.rb +63 -0
- data/lib/solr/request/base.rb +36 -0
- data/lib/solr/request/commit.rb +31 -0
- data/lib/solr/request/delete.rb +50 -0
- data/lib/solr/request/dismax.rb +46 -0
- data/lib/solr/request/index_info.rb +22 -0
- data/lib/solr/request/modify_document.rb +51 -0
- data/lib/solr/request/optimize.rb +21 -0
- data/lib/solr/request/ping.rb +36 -0
- data/lib/solr/request/select.rb +56 -0
- data/lib/solr/request/spellcheck.rb +30 -0
- data/lib/solr/request/standard.rb +374 -0
- data/lib/solr/request/update.rb +23 -0
- data/lib/solr/response.rb +27 -0
- data/lib/solr/response/add_document.rb +17 -0
- data/lib/solr/response/base.rb +42 -0
- data/lib/solr/response/commit.rb +17 -0
- data/lib/solr/response/delete.rb +13 -0
- data/lib/solr/response/dismax.rb +20 -0
- data/lib/solr/response/index_info.rb +26 -0
- data/lib/solr/response/modify_document.rb +17 -0
- data/lib/solr/response/optimize.rb +14 -0
- data/lib/solr/response/ping.rb +28 -0
- data/lib/solr/response/ruby.rb +42 -0
- data/lib/solr/response/select.rb +17 -0
- data/lib/solr/response/spellcheck.rb +20 -0
- data/lib/solr/response/standard.rb +60 -0
- data/lib/solr/response/xml.rb +42 -0
- data/lib/solr/solrtasks.rb +27 -0
- data/lib/solr/util.rb +32 -0
- data/lib/solr/xml.rb +47 -0
- data/script/setup.rb +14 -0
- data/script/solrshell +18 -0
- data/solr-ruby.gemspec +26 -0
- data/solr/conf/admin-extra.html +31 -0
- data/solr/conf/protwords.txt +21 -0
- data/solr/conf/schema.xml +221 -0
- data/solr/conf/scripts.conf +24 -0
- data/solr/conf/solrconfig.xml +394 -0
- data/solr/conf/stopwords.txt +58 -0
- data/solr/conf/synonyms.txt +31 -0
- data/solr/conf/xslt/example.xsl +132 -0
- data/test/conf/admin-extra.html +31 -0
- data/test/conf/protwords.txt +21 -0
- data/test/conf/schema.xml +237 -0
- data/test/conf/scripts.conf +24 -0
- data/test/conf/solrconfig.xml +376 -0
- data/test/conf/stopwords.txt +58 -0
- data/test/conf/synonyms.txt +31 -0
- data/test/functional/server_test.rb +218 -0
- data/test/functional/test_solr_server.rb +104 -0
- data/test/unit/add_document_test.rb +40 -0
- data/test/unit/array_mapper_test.rb +37 -0
- data/test/unit/changes_yaml_test.rb +21 -0
- data/test/unit/commit_test.rb +41 -0
- data/test/unit/connection_test.rb +55 -0
- data/test/unit/data_mapper_test.rb +75 -0
- data/test/unit/delete_test.rb +56 -0
- data/test/unit/delimited_file_source_test.rb +29 -0
- data/test/unit/dismax_request_test.rb +26 -0
- data/test/unit/document_test.rb +69 -0
- data/test/unit/field_test.rb +48 -0
- data/test/unit/hpricot_mapper_test.rb +44 -0
- data/test/unit/hpricot_test_file.xml +26 -0
- data/test/unit/indexer_test.rb +57 -0
- data/test/unit/modify_document_test.rb +24 -0
- data/test/unit/ping_test.rb +51 -0
- data/test/unit/request_test.rb +61 -0
- data/test/unit/response_test.rb +43 -0
- data/test/unit/select_test.rb +25 -0
- data/test/unit/solr_mock_base.rb +40 -0
- data/test/unit/spellcheck_response_test.rb +26 -0
- data/test/unit/spellchecker_request_test.rb +27 -0
- data/test/unit/standard_request_test.rb +324 -0
- data/test/unit/standard_response_test.rb +174 -0
- data/test/unit/suite.rb +16 -0
- data/test/unit/tab_delimited.txt +2 -0
- data/test/unit/util_test.rb +24 -0
- data/test/unit/xpath_mapper_test.rb +38 -0
- data/test/unit/xpath_test_file.xml +25 -0
- metadata +173 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'solr/xml'
|
|
14
|
+
require 'solr/field'
|
|
15
|
+
|
|
16
|
+
class Solr::Document
|
|
17
|
+
include Enumerable
|
|
18
|
+
attr_accessor :boost
|
|
19
|
+
|
|
20
|
+
# Create a new Solr::Document, optionally passing in a hash of
|
|
21
|
+
# key/value pairs for the fields
|
|
22
|
+
#
|
|
23
|
+
# doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
|
|
24
|
+
def initialize(hash={})
|
|
25
|
+
@fields = []
|
|
26
|
+
self << hash
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Append a Solr::Field
|
|
30
|
+
#
|
|
31
|
+
# doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
|
|
32
|
+
#
|
|
33
|
+
# If you are truly lazy you can simply pass in a hash:
|
|
34
|
+
#
|
|
35
|
+
# doc << {:creator => 'Jorge Luis Borges'}
|
|
36
|
+
def <<(fields)
|
|
37
|
+
case fields
|
|
38
|
+
when Hash
|
|
39
|
+
fields.each_pair do |name,value|
|
|
40
|
+
if value.respond_to?(:each) && !value.is_a?(String)
|
|
41
|
+
value.each {|v| @fields << Solr::Field.new(name => v)}
|
|
42
|
+
else
|
|
43
|
+
@fields << Solr::Field.new(name => value)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
when Solr::Field
|
|
47
|
+
@fields << fields
|
|
48
|
+
else
|
|
49
|
+
raise "must pass in Solr::Field or Hash"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# shorthand to allow hash lookups
|
|
54
|
+
# doc['name']
|
|
55
|
+
def [](name)
|
|
56
|
+
field = @fields.find {|f| f.name == name.to_s}
|
|
57
|
+
return field.value if field
|
|
58
|
+
return nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# shorthand to assign as a hash
|
|
62
|
+
def []=(name,value)
|
|
63
|
+
@fields << Solr::Field.new(name => value)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# convert the Document to a REXML::Element
|
|
67
|
+
def to_xml
|
|
68
|
+
e = Solr::XML::Element.new 'doc'
|
|
69
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
70
|
+
@fields.each {|f| e.add_element(f.to_xml)}
|
|
71
|
+
return e
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Exception < Exception; end
|
data/lib/solr/field.rb
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'solr/xml'
|
|
14
|
+
require 'time'
|
|
15
|
+
|
|
16
|
+
class Solr::Field
|
|
17
|
+
VALID_PARAMS = [:boost]
|
|
18
|
+
attr_accessor :name
|
|
19
|
+
attr_accessor :value
|
|
20
|
+
attr_accessor :boost
|
|
21
|
+
|
|
22
|
+
# Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
|
|
23
|
+
def initialize(params)
|
|
24
|
+
@boost = params[:boost]
|
|
25
|
+
name_key = (params.keys - VALID_PARAMS).first
|
|
26
|
+
@name, @value = name_key.to_s, params[name_key]
|
|
27
|
+
# Convert any Time values into UTC/XML schema format (which Solr requires).
|
|
28
|
+
@value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def to_xml
|
|
32
|
+
e = Solr::XML::Element.new 'field'
|
|
33
|
+
e.attributes['name'] = @name
|
|
34
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
35
|
+
e.text = @value
|
|
36
|
+
return e
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
module Solr; module Importer; end; end
|
|
14
|
+
require 'solr/importer/mapper'
|
|
15
|
+
require 'solr/importer/array_mapper'
|
|
16
|
+
require 'solr/importer/delimited_file_source'
|
|
17
|
+
require 'solr/importer/hpricot_mapper'
|
|
18
|
+
require 'solr/importer/xpath_mapper'
|
|
19
|
+
require 'solr/importer/solr_source'
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
|
|
16
|
+
# TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
|
|
17
|
+
|
|
18
|
+
# TODO: make merge conflict handling configurable. as is, the last map fields win.
|
|
19
|
+
def map(orig_data_array)
|
|
20
|
+
mapped_data = {}
|
|
21
|
+
orig_data_array.each_with_index do |data,i|
|
|
22
|
+
mapped_data.merge!(@mapping[i].map(data))
|
|
23
|
+
end
|
|
24
|
+
mapped_data
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
# For files with the first line containing field names
|
|
14
|
+
# Currently not designed for enormous files, as all lines are
|
|
15
|
+
# read into an array
|
|
16
|
+
class Solr::Importer::DelimitedFileSource
|
|
17
|
+
include Enumerable
|
|
18
|
+
|
|
19
|
+
def initialize(filename, splitter=/\t/)
|
|
20
|
+
@filename = filename
|
|
21
|
+
@splitter = splitter
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def each
|
|
25
|
+
lines = IO.readlines(@filename)
|
|
26
|
+
headers = lines[0].split(@splitter).collect{|h| h.chomp}
|
|
27
|
+
|
|
28
|
+
lines[1..-1].each do |line|
|
|
29
|
+
data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
|
|
30
|
+
def data.[](key)
|
|
31
|
+
self.assoc(key.to_s)[1]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
yield(data)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require 'hpricot'
|
|
15
|
+
|
|
16
|
+
class Solr::Importer::HpricotMapper < Solr::Importer::Mapper
|
|
17
|
+
def field_data(doc, path)
|
|
18
|
+
doc.search(path.to_s).collect { |e| e.inner_html }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
rescue LoadError => e # If we can't load hpricot
|
|
22
|
+
class Solr::Importer::HpricotMapper
|
|
23
|
+
def initialize(mapping, options={})
|
|
24
|
+
raise "Hpricot not installed."
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Importer::Mapper
|
|
14
|
+
def initialize(mapping, options={})
|
|
15
|
+
@mapping = mapping
|
|
16
|
+
@options = options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def field_data(orig_data, field_name)
|
|
20
|
+
orig_data[field_name]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def mapped_field_value(orig_data, field_mapping)
|
|
24
|
+
case field_mapping
|
|
25
|
+
when String
|
|
26
|
+
field_mapping
|
|
27
|
+
when Proc
|
|
28
|
+
field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc
|
|
29
|
+
when Symbol
|
|
30
|
+
field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping)
|
|
31
|
+
when Enumerable
|
|
32
|
+
field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten
|
|
33
|
+
else
|
|
34
|
+
raise "Unknown mapping for #{field_mapping}"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def map(orig_data)
|
|
39
|
+
mapped_data = {}
|
|
40
|
+
@mapping.each do |solr_name, field_mapping|
|
|
41
|
+
value = mapped_field_value(orig_data, field_mapping)
|
|
42
|
+
mapped_data[solr_name] = value if value
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
mapped_data
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'solr'
|
|
14
|
+
|
|
15
|
+
class Solr::Importer::SolrSource
|
|
16
|
+
def initialize(solr_url, query, filter_queries=nil, options={})
|
|
17
|
+
@connection = Solr::Connection.new(solr_url)
|
|
18
|
+
@query = query
|
|
19
|
+
@filter_queries = filter_queries
|
|
20
|
+
|
|
21
|
+
@page_size = options[:page_size] || 1000
|
|
22
|
+
@field_list = options[:field_list] || ["*"]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def each
|
|
26
|
+
done = false
|
|
27
|
+
start = 0
|
|
28
|
+
until done do
|
|
29
|
+
# request N documents from a starting point
|
|
30
|
+
request = Solr::Request::Standard.new(:query => @query,
|
|
31
|
+
:rows => @page_size,
|
|
32
|
+
:start => start,
|
|
33
|
+
:field_list => @field_list,
|
|
34
|
+
:filter_queries => @filter_queries)
|
|
35
|
+
response = @connection.send(request)
|
|
36
|
+
response.each do |doc|
|
|
37
|
+
yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary
|
|
38
|
+
end
|
|
39
|
+
done = start + @page_size >= response.total_hits
|
|
40
|
+
start = start + @page_size
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require 'xml/libxml'
|
|
15
|
+
|
|
16
|
+
# For files with the first line containing field names
|
|
17
|
+
class Solr::Importer::XPathMapper < Solr::Importer::Mapper
|
|
18
|
+
def field_data(doc, xpath)
|
|
19
|
+
doc.find(xpath.to_s).collect do |node|
|
|
20
|
+
case node
|
|
21
|
+
when XML::Attr
|
|
22
|
+
node.value
|
|
23
|
+
when XML::Node
|
|
24
|
+
node.content
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
rescue LoadError => e # If we can't load libxml
|
|
30
|
+
class Solr::Importer::XPathMapper
|
|
31
|
+
def initialize(mapping, options={})
|
|
32
|
+
raise "libxml not installed"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
data/lib/solr/indexer.rb
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Indexer
|
|
14
|
+
attr_reader :solr
|
|
15
|
+
|
|
16
|
+
# TODO: document options!
|
|
17
|
+
def initialize(data_source, mapper_or_mapping, options={})
|
|
18
|
+
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
|
19
|
+
@solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
|
20
|
+
|
|
21
|
+
@data_source = data_source
|
|
22
|
+
@mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
|
23
|
+
|
|
24
|
+
@buffer_docs = options[:buffer_docs]
|
|
25
|
+
@debug = options[:debug]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def index
|
|
29
|
+
buffer = []
|
|
30
|
+
@data_source.each do |record|
|
|
31
|
+
document = @mapper.map(record)
|
|
32
|
+
|
|
33
|
+
# TODO: check arrity of block, if 3, pass counter as 3rd argument
|
|
34
|
+
yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty?
|
|
35
|
+
|
|
36
|
+
buffer << document
|
|
37
|
+
|
|
38
|
+
if !@buffer_docs || buffer.size == @buffer_docs
|
|
39
|
+
add_docs(buffer)
|
|
40
|
+
buffer.clear
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
add_docs(buffer) if !buffer.empty?
|
|
44
|
+
|
|
45
|
+
@solr.commit unless @debug
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def add_docs(documents)
|
|
49
|
+
@solr.add(documents) unless @debug
|
|
50
|
+
puts documents.inspect if @debug
|
|
51
|
+
end
|
|
52
|
+
end
|
data/lib/solr/request.rb
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
module Solr; module Request; end; end
|
|
14
|
+
require 'solr/request/add_document'
|
|
15
|
+
require 'solr/request/modify_document'
|
|
16
|
+
require 'solr/request/base'
|
|
17
|
+
require 'solr/request/commit'
|
|
18
|
+
require 'solr/request/delete'
|
|
19
|
+
require 'solr/request/ping'
|
|
20
|
+
require 'solr/request/select'
|
|
21
|
+
require 'solr/request/standard'
|
|
22
|
+
require 'solr/request/spellcheck'
|
|
23
|
+
require 'solr/request/dismax'
|
|
24
|
+
require 'solr/request/update'
|
|
25
|
+
require 'solr/request/index_info'
|
|
26
|
+
require 'solr/request/optimize'
|