greglu-solr-ruby 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/CHANGES.yml +50 -0
  2. data/LICENSE.txt +201 -0
  3. data/README +56 -0
  4. data/Rakefile +190 -0
  5. data/examples/delicious_library/dl_importer.rb +60 -0
  6. data/examples/delicious_library/sample_export.txt +164 -0
  7. data/examples/marc/marc_importer.rb +106 -0
  8. data/examples/tang/tang_importer.rb +58 -0
  9. data/lib/solr.rb +21 -0
  10. data/lib/solr/connection.rb +179 -0
  11. data/lib/solr/document.rb +73 -0
  12. data/lib/solr/exception.rb +13 -0
  13. data/lib/solr/field.rb +39 -0
  14. data/lib/solr/importer.rb +19 -0
  15. data/lib/solr/importer/array_mapper.rb +26 -0
  16. data/lib/solr/importer/delimited_file_source.rb +38 -0
  17. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  18. data/lib/solr/importer/mapper.rb +51 -0
  19. data/lib/solr/importer/solr_source.rb +43 -0
  20. data/lib/solr/importer/xpath_mapper.rb +35 -0
  21. data/lib/solr/indexer.rb +52 -0
  22. data/lib/solr/request.rb +26 -0
  23. data/lib/solr/request/add_document.rb +63 -0
  24. data/lib/solr/request/base.rb +36 -0
  25. data/lib/solr/request/commit.rb +31 -0
  26. data/lib/solr/request/delete.rb +50 -0
  27. data/lib/solr/request/dismax.rb +46 -0
  28. data/lib/solr/request/index_info.rb +22 -0
  29. data/lib/solr/request/modify_document.rb +51 -0
  30. data/lib/solr/request/optimize.rb +21 -0
  31. data/lib/solr/request/ping.rb +36 -0
  32. data/lib/solr/request/select.rb +56 -0
  33. data/lib/solr/request/spellcheck.rb +30 -0
  34. data/lib/solr/request/standard.rb +374 -0
  35. data/lib/solr/request/update.rb +23 -0
  36. data/lib/solr/response.rb +27 -0
  37. data/lib/solr/response/add_document.rb +17 -0
  38. data/lib/solr/response/base.rb +42 -0
  39. data/lib/solr/response/commit.rb +17 -0
  40. data/lib/solr/response/delete.rb +13 -0
  41. data/lib/solr/response/dismax.rb +20 -0
  42. data/lib/solr/response/index_info.rb +26 -0
  43. data/lib/solr/response/modify_document.rb +17 -0
  44. data/lib/solr/response/optimize.rb +14 -0
  45. data/lib/solr/response/ping.rb +28 -0
  46. data/lib/solr/response/ruby.rb +42 -0
  47. data/lib/solr/response/select.rb +17 -0
  48. data/lib/solr/response/spellcheck.rb +20 -0
  49. data/lib/solr/response/standard.rb +60 -0
  50. data/lib/solr/response/xml.rb +42 -0
  51. data/lib/solr/solrtasks.rb +27 -0
  52. data/lib/solr/util.rb +32 -0
  53. data/lib/solr/xml.rb +47 -0
  54. data/script/setup.rb +14 -0
  55. data/script/solrshell +18 -0
  56. data/solr-ruby.gemspec +26 -0
  57. data/solr/conf/admin-extra.html +31 -0
  58. data/solr/conf/protwords.txt +21 -0
  59. data/solr/conf/schema.xml +221 -0
  60. data/solr/conf/scripts.conf +24 -0
  61. data/solr/conf/solrconfig.xml +394 -0
  62. data/solr/conf/stopwords.txt +58 -0
  63. data/solr/conf/synonyms.txt +31 -0
  64. data/solr/conf/xslt/example.xsl +132 -0
  65. data/test/conf/admin-extra.html +31 -0
  66. data/test/conf/protwords.txt +21 -0
  67. data/test/conf/schema.xml +237 -0
  68. data/test/conf/scripts.conf +24 -0
  69. data/test/conf/solrconfig.xml +376 -0
  70. data/test/conf/stopwords.txt +58 -0
  71. data/test/conf/synonyms.txt +31 -0
  72. data/test/functional/server_test.rb +218 -0
  73. data/test/functional/test_solr_server.rb +104 -0
  74. data/test/unit/add_document_test.rb +40 -0
  75. data/test/unit/array_mapper_test.rb +37 -0
  76. data/test/unit/changes_yaml_test.rb +21 -0
  77. data/test/unit/commit_test.rb +41 -0
  78. data/test/unit/connection_test.rb +55 -0
  79. data/test/unit/data_mapper_test.rb +75 -0
  80. data/test/unit/delete_test.rb +56 -0
  81. data/test/unit/delimited_file_source_test.rb +29 -0
  82. data/test/unit/dismax_request_test.rb +26 -0
  83. data/test/unit/document_test.rb +69 -0
  84. data/test/unit/field_test.rb +48 -0
  85. data/test/unit/hpricot_mapper_test.rb +44 -0
  86. data/test/unit/hpricot_test_file.xml +26 -0
  87. data/test/unit/indexer_test.rb +57 -0
  88. data/test/unit/modify_document_test.rb +24 -0
  89. data/test/unit/ping_test.rb +51 -0
  90. data/test/unit/request_test.rb +61 -0
  91. data/test/unit/response_test.rb +43 -0
  92. data/test/unit/select_test.rb +25 -0
  93. data/test/unit/solr_mock_base.rb +40 -0
  94. data/test/unit/spellcheck_response_test.rb +26 -0
  95. data/test/unit/spellchecker_request_test.rb +27 -0
  96. data/test/unit/standard_request_test.rb +324 -0
  97. data/test/unit/standard_response_test.rb +174 -0
  98. data/test/unit/suite.rb +16 -0
  99. data/test/unit/tab_delimited.txt +2 -0
  100. data/test/unit/util_test.rb +24 -0
  101. data/test/unit/xpath_mapper_test.rb +38 -0
  102. data/test/unit/xpath_test_file.xml +25 -0
  103. metadata +173 -0
@@ -0,0 +1,73 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'solr/field'
15
+
16
+ class Solr::Document
17
+ include Enumerable
18
+ attr_accessor :boost
19
+
20
+ # Create a new Solr::Document, optionally passing in a hash of
21
+ # key/value pairs for the fields
22
+ #
23
+ # doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
24
+ def initialize(hash={})
25
+ @fields = []
26
+ self << hash
27
+ end
28
+
29
+ # Append a Solr::Field
30
+ #
31
+ # doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
32
+ #
33
+ # If you are truly lazy you can simply pass in a hash:
34
+ #
35
+ # doc << {:creator => 'Jorge Luis Borges'}
36
+ def <<(fields)
37
+ case fields
38
+ when Hash
39
+ fields.each_pair do |name,value|
40
+ if value.respond_to?(:each) && !value.is_a?(String)
41
+ value.each {|v| @fields << Solr::Field.new(name => v)}
42
+ else
43
+ @fields << Solr::Field.new(name => value)
44
+ end
45
+ end
46
+ when Solr::Field
47
+ @fields << fields
48
+ else
49
+ raise "must pass in Solr::Field or Hash"
50
+ end
51
+ end
52
+
53
+ # shorthand to allow hash lookups
54
+ # doc['name']
55
+ def [](name)
56
+ field = @fields.find {|f| f.name == name.to_s}
57
+ return field.value if field
58
+ return nil
59
+ end
60
+
61
+ # shorthand to assign as a hash
62
+ def []=(name,value)
63
+ @fields << Solr::Field.new(name => value)
64
+ end
65
+
66
+ # convert the Document to a REXML::Element
67
+ def to_xml
68
+ e = Solr::XML::Element.new 'doc'
69
+ e.attributes['boost'] = @boost.to_s if @boost
70
+ @fields.each {|f| e.add_element(f.to_xml)}
71
+ return e
72
+ end
73
+ end
@@ -0,0 +1,13 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Exception < Exception; end
data/lib/solr/field.rb ADDED
@@ -0,0 +1,39 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr/xml'
14
+ require 'time'
15
+
16
+ class Solr::Field
17
+ VALID_PARAMS = [:boost]
18
+ attr_accessor :name
19
+ attr_accessor :value
20
+ attr_accessor :boost
21
+
22
+ # Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
23
+ def initialize(params)
24
+ @boost = params[:boost]
25
+ name_key = (params.keys - VALID_PARAMS).first
26
+ @name, @value = name_key.to_s, params[name_key]
27
+ # Convert any Time values into UTC/XML schema format (which Solr requires).
28
+ @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
29
+ end
30
+
31
+ def to_xml
32
+ e = Solr::XML::Element.new 'field'
33
+ e.attributes['name'] = @name
34
+ e.attributes['boost'] = @boost.to_s if @boost
35
+ e.text = @value
36
+ return e
37
+ end
38
+
39
+ end
@@ -0,0 +1,19 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ module Solr; module Importer; end; end
14
+ require 'solr/importer/mapper'
15
+ require 'solr/importer/array_mapper'
16
+ require 'solr/importer/delimited_file_source'
17
+ require 'solr/importer/hpricot_mapper'
18
+ require 'solr/importer/xpath_mapper'
19
+ require 'solr/importer/solr_source'
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+
15
+ class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
16
+ # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
17
+
18
+ # TODO: make merge conflict handling configurable. as is, the last map fields win.
19
+ def map(orig_data_array)
20
+ mapped_data = {}
21
+ orig_data_array.each_with_index do |data,i|
22
+ mapped_data.merge!(@mapping[i].map(data))
23
+ end
24
+ mapped_data
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # For files with the first line containing field names
14
+ # Currently not designed for enormous files, as all lines are
15
+ # read into an array
16
+ class Solr::Importer::DelimitedFileSource
17
+ include Enumerable
18
+
19
+ def initialize(filename, splitter=/\t/)
20
+ @filename = filename
21
+ @splitter = splitter
22
+ end
23
+
24
+ def each
25
+ lines = IO.readlines(@filename)
26
+ headers = lines[0].split(@splitter).collect{|h| h.chomp}
27
+
28
+ lines[1..-1].each do |line|
29
+ data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
30
+ def data.[](key)
31
+ self.assoc(key.to_s)[1]
32
+ end
33
+
34
+ yield(data)
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,27 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'hpricot'
15
+
16
+ class Solr::Importer::HpricotMapper < Solr::Importer::Mapper
17
+ def field_data(doc, path)
18
+ doc.search(path.to_s).collect { |e| e.inner_html }
19
+ end
20
+ end
21
+ rescue LoadError => e # If we can't load hpricot
22
+ class Solr::Importer::HpricotMapper
23
+ def initialize(mapping, options={})
24
+ raise "Hpricot not installed."
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,51 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Importer::Mapper
14
+ def initialize(mapping, options={})
15
+ @mapping = mapping
16
+ @options = options
17
+ end
18
+
19
+ def field_data(orig_data, field_name)
20
+ orig_data[field_name]
21
+ end
22
+
23
+ def mapped_field_value(orig_data, field_mapping)
24
+ case field_mapping
25
+ when String
26
+ field_mapping
27
+ when Proc
28
+ field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc
29
+ when Symbol
30
+ field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping)
31
+ when Enumerable
32
+ field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten
33
+ else
34
+ raise "Unknown mapping for #{field_mapping}"
35
+ end
36
+ end
37
+
38
+ def map(orig_data)
39
+ mapped_data = {}
40
+ @mapping.each do |solr_name, field_mapping|
41
+ value = mapped_field_value(orig_data, field_mapping)
42
+ mapped_data[solr_name] = value if value
43
+ end
44
+
45
+ mapped_data
46
+ end
47
+
48
+
49
+
50
+
51
+ end
@@ -0,0 +1,43 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ require 'solr'
14
+
15
+ class Solr::Importer::SolrSource
16
+ def initialize(solr_url, query, filter_queries=nil, options={})
17
+ @connection = Solr::Connection.new(solr_url)
18
+ @query = query
19
+ @filter_queries = filter_queries
20
+
21
+ @page_size = options[:page_size] || 1000
22
+ @field_list = options[:field_list] || ["*"]
23
+ end
24
+
25
+ def each
26
+ done = false
27
+ start = 0
28
+ until done do
29
+ # request N documents from a starting point
30
+ request = Solr::Request::Standard.new(:query => @query,
31
+ :rows => @page_size,
32
+ :start => start,
33
+ :field_list => @field_list,
34
+ :filter_queries => @filter_queries)
35
+ response = @connection.send(request)
36
+ response.each do |doc|
37
+ yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary
38
+ end
39
+ done = start + @page_size >= response.total_hits
40
+ start = start + @page_size
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,35 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ begin
14
+ require 'xml/libxml'
15
+
16
+ # For files with the first line containing field names
17
+ class Solr::Importer::XPathMapper < Solr::Importer::Mapper
18
+ def field_data(doc, xpath)
19
+ doc.find(xpath.to_s).collect do |node|
20
+ case node
21
+ when XML::Attr
22
+ node.value
23
+ when XML::Node
24
+ node.content
25
+ end
26
+ end
27
+ end
28
+ end
29
+ rescue LoadError => e # If we can't load libxml
30
+ class Solr::Importer::XPathMapper
31
+ def initialize(mapping, options={})
32
+ raise "libxml not installed"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,52 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class Solr::Indexer
14
+ attr_reader :solr
15
+
16
+ # TODO: document options!
17
+ def initialize(data_source, mapper_or_mapping, options={})
18
+ solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
19
+ @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
20
+
21
+ @data_source = data_source
22
+ @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
23
+
24
+ @buffer_docs = options[:buffer_docs]
25
+ @debug = options[:debug]
26
+ end
27
+
28
+ def index
29
+ buffer = []
30
+ @data_source.each do |record|
31
+ document = @mapper.map(record)
32
+
33
+ # TODO: check arrity of block, if 3, pass counter as 3rd argument
34
+ yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty?
35
+
36
+ buffer << document
37
+
38
+ if !@buffer_docs || buffer.size == @buffer_docs
39
+ add_docs(buffer)
40
+ buffer.clear
41
+ end
42
+ end
43
+ add_docs(buffer) if !buffer.empty?
44
+
45
+ @solr.commit unless @debug
46
+ end
47
+
48
+ def add_docs(documents)
49
+ @solr.add(documents) unless @debug
50
+ puts documents.inspect if @debug
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ module Solr; module Request; end; end
14
+ require 'solr/request/add_document'
15
+ require 'solr/request/modify_document'
16
+ require 'solr/request/base'
17
+ require 'solr/request/commit'
18
+ require 'solr/request/delete'
19
+ require 'solr/request/ping'
20
+ require 'solr/request/select'
21
+ require 'solr/request/standard'
22
+ require 'solr/request/spellcheck'
23
+ require 'solr/request/dismax'
24
+ require 'solr/request/update'
25
+ require 'solr/request/index_info'
26
+ require 'solr/request/optimize'