greglu-solr-ruby 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. data/CHANGES.yml +50 -0
  2. data/LICENSE.txt +201 -0
  3. data/README +56 -0
  4. data/Rakefile +190 -0
  5. data/examples/delicious_library/dl_importer.rb +60 -0
  6. data/examples/delicious_library/sample_export.txt +164 -0
  7. data/examples/marc/marc_importer.rb +106 -0
  8. data/examples/tang/tang_importer.rb +58 -0
  9. data/lib/solr.rb +21 -0
  10. data/lib/solr/connection.rb +179 -0
  11. data/lib/solr/document.rb +73 -0
  12. data/lib/solr/exception.rb +13 -0
  13. data/lib/solr/field.rb +39 -0
  14. data/lib/solr/importer.rb +19 -0
  15. data/lib/solr/importer/array_mapper.rb +26 -0
  16. data/lib/solr/importer/delimited_file_source.rb +38 -0
  17. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  18. data/lib/solr/importer/mapper.rb +51 -0
  19. data/lib/solr/importer/solr_source.rb +43 -0
  20. data/lib/solr/importer/xpath_mapper.rb +35 -0
  21. data/lib/solr/indexer.rb +52 -0
  22. data/lib/solr/request.rb +26 -0
  23. data/lib/solr/request/add_document.rb +63 -0
  24. data/lib/solr/request/base.rb +36 -0
  25. data/lib/solr/request/commit.rb +31 -0
  26. data/lib/solr/request/delete.rb +50 -0
  27. data/lib/solr/request/dismax.rb +46 -0
  28. data/lib/solr/request/index_info.rb +22 -0
  29. data/lib/solr/request/modify_document.rb +51 -0
  30. data/lib/solr/request/optimize.rb +21 -0
  31. data/lib/solr/request/ping.rb +36 -0
  32. data/lib/solr/request/select.rb +56 -0
  33. data/lib/solr/request/spellcheck.rb +30 -0
  34. data/lib/solr/request/standard.rb +374 -0
  35. data/lib/solr/request/update.rb +23 -0
  36. data/lib/solr/response.rb +27 -0
  37. data/lib/solr/response/add_document.rb +17 -0
  38. data/lib/solr/response/base.rb +42 -0
  39. data/lib/solr/response/commit.rb +17 -0
  40. data/lib/solr/response/delete.rb +13 -0
  41. data/lib/solr/response/dismax.rb +20 -0
  42. data/lib/solr/response/index_info.rb +26 -0
  43. data/lib/solr/response/modify_document.rb +17 -0
  44. data/lib/solr/response/optimize.rb +14 -0
  45. data/lib/solr/response/ping.rb +28 -0
  46. data/lib/solr/response/ruby.rb +42 -0
  47. data/lib/solr/response/select.rb +17 -0
  48. data/lib/solr/response/spellcheck.rb +20 -0
  49. data/lib/solr/response/standard.rb +60 -0
  50. data/lib/solr/response/xml.rb +42 -0
  51. data/lib/solr/solrtasks.rb +27 -0
  52. data/lib/solr/util.rb +32 -0
  53. data/lib/solr/xml.rb +47 -0
  54. data/script/setup.rb +14 -0
  55. data/script/solrshell +18 -0
  56. data/solr-ruby.gemspec +26 -0
  57. data/solr/conf/admin-extra.html +31 -0
  58. data/solr/conf/protwords.txt +21 -0
  59. data/solr/conf/schema.xml +221 -0
  60. data/solr/conf/scripts.conf +24 -0
  61. data/solr/conf/solrconfig.xml +394 -0
  62. data/solr/conf/stopwords.txt +58 -0
  63. data/solr/conf/synonyms.txt +31 -0
  64. data/solr/conf/xslt/example.xsl +132 -0
  65. data/test/conf/admin-extra.html +31 -0
  66. data/test/conf/protwords.txt +21 -0
  67. data/test/conf/schema.xml +237 -0
  68. data/test/conf/scripts.conf +24 -0
  69. data/test/conf/solrconfig.xml +376 -0
  70. data/test/conf/stopwords.txt +58 -0
  71. data/test/conf/synonyms.txt +31 -0
  72. data/test/functional/server_test.rb +218 -0
  73. data/test/functional/test_solr_server.rb +104 -0
  74. data/test/unit/add_document_test.rb +40 -0
  75. data/test/unit/array_mapper_test.rb +37 -0
  76. data/test/unit/changes_yaml_test.rb +21 -0
  77. data/test/unit/commit_test.rb +41 -0
  78. data/test/unit/connection_test.rb +55 -0
  79. data/test/unit/data_mapper_test.rb +75 -0
  80. data/test/unit/delete_test.rb +56 -0
  81. data/test/unit/delimited_file_source_test.rb +29 -0
  82. data/test/unit/dismax_request_test.rb +26 -0
  83. data/test/unit/document_test.rb +69 -0
  84. data/test/unit/field_test.rb +48 -0
  85. data/test/unit/hpricot_mapper_test.rb +44 -0
  86. data/test/unit/hpricot_test_file.xml +26 -0
  87. data/test/unit/indexer_test.rb +57 -0
  88. data/test/unit/modify_document_test.rb +24 -0
  89. data/test/unit/ping_test.rb +51 -0
  90. data/test/unit/request_test.rb +61 -0
  91. data/test/unit/response_test.rb +43 -0
  92. data/test/unit/select_test.rb +25 -0
  93. data/test/unit/solr_mock_base.rb +40 -0
  94. data/test/unit/spellcheck_response_test.rb +26 -0
  95. data/test/unit/spellchecker_request_test.rb +27 -0
  96. data/test/unit/standard_request_test.rb +324 -0
  97. data/test/unit/standard_response_test.rb +174 -0
  98. data/test/unit/suite.rb +16 -0
  99. data/test/unit/tab_delimited.txt +2 -0
  100. data/test/unit/util_test.rb +24 -0
  101. data/test/unit/xpath_mapper_test.rb +38 -0
  102. data/test/unit/xpath_test_file.xml +25 -0
  103. metadata +173 -0
@@ -0,0 +1,73 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'solr/field'
15
+
16
+ class Solr::Document
17
+ include Enumerable
18
+ attr_accessor :boost
19
+
20
+ # Create a new Solr::Document, optionally passing in a hash of
21
+ # key/value pairs for the fields
22
+ #
23
+ # doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
24
+ def initialize(hash={})
25
+ @fields = []
26
+ self << hash
27
+ end
28
+
29
+ # Append a Solr::Field
30
+ #
31
+ # doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
32
+ #
33
+ # If you are truly lazy you can simply pass in a hash:
34
+ #
35
+ # doc << {:creator => 'Jorge Luis Borges'}
36
+ def <<(fields)
37
+ case fields
38
+ when Hash
39
+ fields.each_pair do |name,value|
40
+ if value.respond_to?(:each) && !value.is_a?(String)
41
+ value.each {|v| @fields << Solr::Field.new(name => v)}
42
+ else
43
+ @fields << Solr::Field.new(name => value)
44
+ end
45
+ end
46
+ when Solr::Field
47
+ @fields << fields
48
+ else
49
+ raise "must pass in Solr::Field or Hash"
50
+ end
51
+ end
52
+
53
+ # shorthand to allow hash lookups
54
+ # doc['name']
55
+ def [](name)
56
+ field = @fields.find {|f| f.name == name.to_s}
57
+ return field.value if field
58
+ return nil
59
+ end
60
+
61
+ # shorthand to assign as a hash
62
+ def []=(name,value)
63
+ @fields << Solr::Field.new(name => value)
64
+ end
65
+
66
+ # convert the Document to a REXML::Element
67
+ def to_xml
68
+ e = Solr::XML::Element.new 'doc'
69
+ e.attributes['boost'] = @boost.to_s if @boost
70
+ @fields.each {|f| e.add_element(f.to_xml)}
71
+ return e
72
+ end
73
+ end
@@ -0,0 +1,13 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Exception < Exception; end
data/lib/solr/field.rb ADDED
@@ -0,0 +1,39 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'time'
15
+
16
+ class Solr::Field
17
+ VALID_PARAMS = [:boost]
18
+ attr_accessor :name
19
+ attr_accessor :value
20
+ attr_accessor :boost
21
+
22
+ # Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
23
+ def initialize(params)
24
+ @boost = params[:boost]
25
+ name_key = (params.keys - VALID_PARAMS).first
26
+ @name, @value = name_key.to_s, params[name_key]
27
+ # Convert any Time values into UTC/XML schema format (which Solr requires).
28
+ @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
29
+ end
30
+
31
+ def to_xml
32
+ e = Solr::XML::Element.new 'field'
33
+ e.attributes['name'] = @name
34
+ e.attributes['boost'] = @boost.to_s if @boost
35
+ e.text = @value
36
+ return e
37
+ end
38
+
39
+ end
@@ -0,0 +1,19 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ module Solr; module Importer; end; end
14
+ require 'solr/importer/mapper'
15
+ require 'solr/importer/array_mapper'
16
+ require 'solr/importer/delimited_file_source'
17
+ require 'solr/importer/hpricot_mapper'
18
+ require 'solr/importer/xpath_mapper'
19
+ require 'solr/importer/solr_source'
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+
15
+ class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
16
+ # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
17
+
18
+ # TODO: make merge conflict handling configurable. as is, the last map fields win.
19
+ def map(orig_data_array)
20
+ mapped_data = {}
21
+ orig_data_array.each_with_index do |data,i|
22
+ mapped_data.merge!(@mapping[i].map(data))
23
+ end
24
+ mapped_data
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # For files with the first line containing field names
14
+ # Currently not designed for enormous files, as all lines are
15
+ # read into an array
16
+ class Solr::Importer::DelimitedFileSource
17
+ include Enumerable
18
+
19
+ def initialize(filename, splitter=/\t/)
20
+ @filename = filename
21
+ @splitter = splitter
22
+ end
23
+
24
+ def each
25
+ lines = IO.readlines(@filename)
26
+ headers = lines[0].split(@splitter).collect{|h| h.chomp}
27
+
28
+ lines[1..-1].each do |line|
29
+ data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
30
+ def data.[](key)
31
+ self.assoc(key.to_s)[1]
32
+ end
33
+
34
+ yield(data)
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,27 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'hpricot'
15
+
16
+ class Solr::Importer::HpricotMapper < Solr::Importer::Mapper
17
+ def field_data(doc, path)
18
+ doc.search(path.to_s).collect { |e| e.inner_html }
19
+ end
20
+ end
21
+ rescue LoadError => e # If we can't load hpricot
22
+ class Solr::Importer::HpricotMapper
23
+ def initialize(mapping, options={})
24
+ raise "Hpricot not installed."
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,51 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Importer::Mapper
14
+ def initialize(mapping, options={})
15
+ @mapping = mapping
16
+ @options = options
17
+ end
18
+
19
+ def field_data(orig_data, field_name)
20
+ orig_data[field_name]
21
+ end
22
+
23
+ def mapped_field_value(orig_data, field_mapping)
24
+ case field_mapping
25
+ when String
26
+ field_mapping
27
+ when Proc
28
+ field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc
29
+ when Symbol
30
+ field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping)
31
+ when Enumerable
32
+ field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten
33
+ else
34
+ raise "Unknown mapping for #{field_mapping}"
35
+ end
36
+ end
37
+
38
+ def map(orig_data)
39
+ mapped_data = {}
40
+ @mapping.each do |solr_name, field_mapping|
41
+ value = mapped_field_value(orig_data, field_mapping)
42
+ mapped_data[solr_name] = value if value
43
+ end
44
+
45
+ mapped_data
46
+ end
47
+
48
+
49
+
50
+
51
+ end
@@ -0,0 +1,43 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr'
14
+
15
+ class Solr::Importer::SolrSource
16
+ def initialize(solr_url, query, filter_queries=nil, options={})
17
+ @connection = Solr::Connection.new(solr_url)
18
+ @query = query
19
+ @filter_queries = filter_queries
20
+
21
+ @page_size = options[:page_size] || 1000
22
+ @field_list = options[:field_list] || ["*"]
23
+ end
24
+
25
+ def each
26
+ done = false
27
+ start = 0
28
+ until done do
29
+ # request N documents from a starting point
30
+ request = Solr::Request::Standard.new(:query => @query,
31
+ :rows => @page_size,
32
+ :start => start,
33
+ :field_list => @field_list,
34
+ :filter_queries => @filter_queries)
35
+ response = @connection.send(request)
36
+ response.each do |doc|
37
+ yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary
38
+ end
39
+ done = start + @page_size >= response.total_hits
40
+ start = start + @page_size
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,35 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'xml/libxml'
15
+
16
+ # For files with the first line containing field names
17
+ class Solr::Importer::XPathMapper < Solr::Importer::Mapper
18
+ def field_data(doc, xpath)
19
+ doc.find(xpath.to_s).collect do |node|
20
+ case node
21
+ when XML::Attr
22
+ node.value
23
+ when XML::Node
24
+ node.content
25
+ end
26
+ end
27
+ end
28
+ end
29
+ rescue LoadError => e # If we can't load libxml
30
+ class Solr::Importer::XPathMapper
31
+ def initialize(mapping, options={})
32
+ raise "libxml not installed"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,52 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Indexer
14
+ attr_reader :solr
15
+
16
+ # TODO: document options!
17
+ def initialize(data_source, mapper_or_mapping, options={})
18
+ solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
19
+ @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
20
+
21
+ @data_source = data_source
22
+ @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
23
+
24
+ @buffer_docs = options[:buffer_docs]
25
+ @debug = options[:debug]
26
+ end
27
+
28
+ def index
29
+ buffer = []
30
+ @data_source.each do |record|
31
+ document = @mapper.map(record)
32
+
33
+ # TODO: check arrity of block, if 3, pass counter as 3rd argument
34
+ yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty?
35
+
36
+ buffer << document
37
+
38
+ if !@buffer_docs || buffer.size == @buffer_docs
39
+ add_docs(buffer)
40
+ buffer.clear
41
+ end
42
+ end
43
+ add_docs(buffer) if !buffer.empty?
44
+
45
+ @solr.commit unless @debug
46
+ end
47
+
48
+ def add_docs(documents)
49
+ @solr.add(documents) unless @debug
50
+ puts documents.inspect if @debug
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ module Solr; module Request; end; end
14
+ require 'solr/request/add_document'
15
+ require 'solr/request/modify_document'
16
+ require 'solr/request/base'
17
+ require 'solr/request/commit'
18
+ require 'solr/request/delete'
19
+ require 'solr/request/ping'
20
+ require 'solr/request/select'
21
+ require 'solr/request/standard'
22
+ require 'solr/request/spellcheck'
23
+ require 'solr/request/dismax'
24
+ require 'solr/request/update'
25
+ require 'solr/request/index_info'
26
+ require 'solr/request/optimize'