simple_solr_client 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (136) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +349 -0
  5. data/Rakefile +11 -0
  6. data/lib/simple_solr.rb +42 -0
  7. data/lib/simple_solr/client.rb +139 -0
  8. data/lib/simple_solr/client/core_admin.rb +0 -0
  9. data/lib/simple_solr/core.rb +50 -0
  10. data/lib/simple_solr/core/admin.rb +47 -0
  11. data/lib/simple_solr/core/core_data.rb +51 -0
  12. data/lib/simple_solr/core/index.rb +25 -0
  13. data/lib/simple_solr/core/search.rb +21 -0
  14. data/lib/simple_solr/response/document.rb +45 -0
  15. data/lib/simple_solr/response/generic_response.rb +19 -0
  16. data/lib/simple_solr/response/query_response.rb +54 -0
  17. data/lib/simple_solr/schema.rb +261 -0
  18. data/lib/simple_solr/schema/analysis.rb +58 -0
  19. data/lib/simple_solr/schema/copyfield.rb +42 -0
  20. data/lib/simple_solr/schema/dynamic_field.rb +23 -0
  21. data/lib/simple_solr/schema/field.rb +35 -0
  22. data/lib/simple_solr/schema/field_or_type.rb +112 -0
  23. data/lib/simple_solr/schema/field_type.rb +62 -0
  24. data/lib/simple_solr/schema/matcher.rb +16 -0
  25. data/lib/simple_solr/version.rb +3 -0
  26. data/simple_solr_client.gemspec +39 -0
  27. data/solr_sample_core/conf/_schema_analysis_stopwords_english.json +38 -0
  28. data/solr_sample_core/conf/_schema_analysis_synonyms_english.json +11 -0
  29. data/solr_sample_core/conf/admin-extra.html +24 -0
  30. data/solr_sample_core/conf/admin-extra.menu-bottom.html +25 -0
  31. data/solr_sample_core/conf/admin-extra.menu-top.html +25 -0
  32. data/solr_sample_core/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
  33. data/solr_sample_core/conf/clustering/carrot2/lingo-attributes.xml +24 -0
  34. data/solr_sample_core/conf/clustering/carrot2/stc-attributes.xml +19 -0
  35. data/solr_sample_core/conf/currency.xml +67 -0
  36. data/solr_sample_core/conf/elevate.xml +38 -0
  37. data/solr_sample_core/conf/lang/contractions_ca.txt +8 -0
  38. data/solr_sample_core/conf/lang/contractions_fr.txt +15 -0
  39. data/solr_sample_core/conf/lang/contractions_ga.txt +5 -0
  40. data/solr_sample_core/conf/lang/contractions_it.txt +23 -0
  41. data/solr_sample_core/conf/lang/hyphenations_ga.txt +5 -0
  42. data/solr_sample_core/conf/lang/stemdict_nl.txt +6 -0
  43. data/solr_sample_core/conf/lang/stoptags_ja.txt +420 -0
  44. data/solr_sample_core/conf/lang/stopwords_ar.txt +125 -0
  45. data/solr_sample_core/conf/lang/stopwords_bg.txt +193 -0
  46. data/solr_sample_core/conf/lang/stopwords_ca.txt +220 -0
  47. data/solr_sample_core/conf/lang/stopwords_ckb.txt +136 -0
  48. data/solr_sample_core/conf/lang/stopwords_cz.txt +172 -0
  49. data/solr_sample_core/conf/lang/stopwords_da.txt +110 -0
  50. data/solr_sample_core/conf/lang/stopwords_de.txt +294 -0
  51. data/solr_sample_core/conf/lang/stopwords_el.txt +78 -0
  52. data/solr_sample_core/conf/lang/stopwords_en.txt +54 -0
  53. data/solr_sample_core/conf/lang/stopwords_es.txt +356 -0
  54. data/solr_sample_core/conf/lang/stopwords_eu.txt +99 -0
  55. data/solr_sample_core/conf/lang/stopwords_fa.txt +313 -0
  56. data/solr_sample_core/conf/lang/stopwords_fi.txt +97 -0
  57. data/solr_sample_core/conf/lang/stopwords_fr.txt +186 -0
  58. data/solr_sample_core/conf/lang/stopwords_ga.txt +110 -0
  59. data/solr_sample_core/conf/lang/stopwords_gl.txt +161 -0
  60. data/solr_sample_core/conf/lang/stopwords_hi.txt +235 -0
  61. data/solr_sample_core/conf/lang/stopwords_hu.txt +211 -0
  62. data/solr_sample_core/conf/lang/stopwords_hy.txt +46 -0
  63. data/solr_sample_core/conf/lang/stopwords_id.txt +359 -0
  64. data/solr_sample_core/conf/lang/stopwords_it.txt +303 -0
  65. data/solr_sample_core/conf/lang/stopwords_ja.txt +127 -0
  66. data/solr_sample_core/conf/lang/stopwords_lv.txt +172 -0
  67. data/solr_sample_core/conf/lang/stopwords_nl.txt +119 -0
  68. data/solr_sample_core/conf/lang/stopwords_no.txt +194 -0
  69. data/solr_sample_core/conf/lang/stopwords_pt.txt +253 -0
  70. data/solr_sample_core/conf/lang/stopwords_ro.txt +233 -0
  71. data/solr_sample_core/conf/lang/stopwords_ru.txt +243 -0
  72. data/solr_sample_core/conf/lang/stopwords_sv.txt +133 -0
  73. data/solr_sample_core/conf/lang/stopwords_th.txt +119 -0
  74. data/solr_sample_core/conf/lang/stopwords_tr.txt +212 -0
  75. data/solr_sample_core/conf/lang/userdict_ja.txt +29 -0
  76. data/solr_sample_core/conf/mapping-FoldToASCII.txt +3813 -0
  77. data/solr_sample_core/conf/mapping-ISOLatin1Accent.txt +246 -0
  78. data/solr_sample_core/conf/protwords.txt +21 -0
  79. data/solr_sample_core/conf/schema.xml +62 -0
  80. data/solr_sample_core/conf/scripts.conf +24 -0
  81. data/solr_sample_core/conf/solrconfig.xml +1702 -0
  82. data/solr_sample_core/conf/spellings.txt +2 -0
  83. data/solr_sample_core/conf/stopwords.txt +14 -0
  84. data/solr_sample_core/conf/syn.txt +0 -0
  85. data/solr_sample_core/conf/synonyms.txt +29 -0
  86. data/solr_sample_core/conf/token_fixing_charfilter.txt +110 -0
  87. data/solr_sample_core/conf/update-script.js +53 -0
  88. data/solr_sample_core/conf/velocity/README.txt +101 -0
  89. data/solr_sample_core/conf/velocity/VM_global_library.vm +175 -0
  90. data/solr_sample_core/conf/velocity/browse.vm +33 -0
  91. data/solr_sample_core/conf/velocity/cluster.vm +19 -0
  92. data/solr_sample_core/conf/velocity/cluster_results.vm +31 -0
  93. data/solr_sample_core/conf/velocity/debug.vm +28 -0
  94. data/solr_sample_core/conf/velocity/did_you_mean.vm +9 -0
  95. data/solr_sample_core/conf/velocity/error.vm +11 -0
  96. data/solr_sample_core/conf/velocity/facet_fields.vm +23 -0
  97. data/solr_sample_core/conf/velocity/facet_pivot.vm +12 -0
  98. data/solr_sample_core/conf/velocity/facet_queries.vm +12 -0
  99. data/solr_sample_core/conf/velocity/facet_ranges.vm +23 -0
  100. data/solr_sample_core/conf/velocity/facets.vm +10 -0
  101. data/solr_sample_core/conf/velocity/footer.vm +43 -0
  102. data/solr_sample_core/conf/velocity/head.vm +35 -0
  103. data/solr_sample_core/conf/velocity/header.vm +7 -0
  104. data/solr_sample_core/conf/velocity/hit.vm +25 -0
  105. data/solr_sample_core/conf/velocity/hit_grouped.vm +43 -0
  106. data/solr_sample_core/conf/velocity/hit_plain.vm +25 -0
  107. data/solr_sample_core/conf/velocity/join_doc.vm +20 -0
  108. data/solr_sample_core/conf/velocity/jquery.autocomplete.css +48 -0
  109. data/solr_sample_core/conf/velocity/jquery.autocomplete.js +763 -0
  110. data/solr_sample_core/conf/velocity/layout.vm +24 -0
  111. data/solr_sample_core/conf/velocity/main.css +230 -0
  112. data/solr_sample_core/conf/velocity/mime_type_lists.vm +68 -0
  113. data/solr_sample_core/conf/velocity/pagination_bottom.vm +22 -0
  114. data/solr_sample_core/conf/velocity/pagination_top.vm +29 -0
  115. data/solr_sample_core/conf/velocity/product_doc.vm +32 -0
  116. data/solr_sample_core/conf/velocity/query.vm +42 -0
  117. data/solr_sample_core/conf/velocity/query_form.vm +64 -0
  118. data/solr_sample_core/conf/velocity/query_group.vm +43 -0
  119. data/solr_sample_core/conf/velocity/query_spatial.vm +75 -0
  120. data/solr_sample_core/conf/velocity/results_list.vm +22 -0
  121. data/solr_sample_core/conf/velocity/richtext_doc.vm +153 -0
  122. data/solr_sample_core/conf/velocity/suggest.vm +8 -0
  123. data/solr_sample_core/conf/velocity/tabs.vm +50 -0
  124. data/solr_sample_core/conf/xslt/example.xsl +132 -0
  125. data/solr_sample_core/conf/xslt/example_atom.xsl +67 -0
  126. data/solr_sample_core/conf/xslt/example_rss.xsl +66 -0
  127. data/solr_sample_core/conf/xslt/luke.xsl +337 -0
  128. data/solr_sample_core/conf/xslt/updateXml.xsl +70 -0
  129. data/spec/client_basics_spec.rb +26 -0
  130. data/spec/connect_spec.rb +25 -0
  131. data/spec/core_basics.rb +21 -0
  132. data/spec/index_spec.rb +31 -0
  133. data/spec/load_spec.rb +7 -0
  134. data/spec/minitest_helper.rb +36 -0
  135. data/spec/schema_spec.rb +113 -0
  136. metadata +284 -0
@@ -0,0 +1,58 @@
1
+
2
+ # Figure out how the field type will parse out tokens
3
+ # and change them in the analysis chain. Just calls the
4
+ # provided solr analysis endpoints
5
+ #
6
+ # To be mixed into FieldType
7
+
8
+ class SimpleSolrClient::Schema
9
+ module Analysis
10
+
11
+ #https://lucene.apache.org/solr/4_1_0/solr-core/org/apache/solr/handler/FieldAnalysisRequestHandler.html
12
+ def fieldtype_tokens(val, type)
13
+ target = 'analysis/field'
14
+ h = {'analysis.fieldtype' => name,
15
+ 'analysis.fieldvalue' => val,
16
+ 'analysis.query' => val,
17
+ }
18
+ resp = @core.get(target, h)
19
+ ftdata = resp['analysis']['field_types'][name][type]
20
+ rv = []
21
+ ftdata.last.each do |t|
22
+ pos = t['position'] - 1
23
+ text = t['text']
24
+ if rv[pos]
25
+ rv[pos] = Array[rv[pos]] << text
26
+ else
27
+ rv[pos] = text
28
+ end
29
+ end
30
+ rv
31
+ end
32
+
33
+ private :fieldtype_tokens
34
+
35
+ # Get an array of tokens as analyzed/transformed at index time
36
+ # Note that you may have multiple values at each token position if
37
+ # you use a synonym filter or a stemmer
38
+ # @param [String] ft the name of the fieldType (*not* the field)
39
+ # @param [String] val the search string to parse
40
+ # @return [Array] An array of tokens as produced by that index analysis chain
41
+ #
42
+ # @example Results when there's a stemmer
43
+ # c.fieldtype_index_tokens 'text', "That's Life"
44
+ # => [["that's", "that"], "life"]
45
+ #
46
+ def index_tokens(val)
47
+ fieldtype_tokens(val, 'index')
48
+ end
49
+
50
+ # Get an array of tokens as analyzed/transformed at query time
51
+ # See #fieldtype_index_tokens
52
+ def query_tokens(val)
53
+ fieldtype_tokens(val, 'query')
54
+ end
55
+
56
+
57
+ end
58
+ end
@@ -0,0 +1,42 @@
1
+ require 'simple_solr/schema/matcher'
2
+
3
+ class SimpleSolrClient::Schema::CopyField
4
+ include SimpleSolrClient::Schema::Matcher
5
+
6
+ attr_accessor :source, :dest
7
+
8
+ def initialize(source, dest)
9
+ self.source = source
10
+ @dest = dest
11
+ @matcher = derive_matcher(source)
12
+ @dest_matcher = derive_matcher(dest)
13
+ end
14
+
15
+ # What name will we get from a matching thing?
16
+ def dynamic_name(s)
17
+ return @dest unless @dest =~ /\*/
18
+
19
+ m = @matcher.match(s)
20
+ if m
21
+ prefix = m[1]
22
+ return @dest.sub(/\*/, prefix)
23
+ end
24
+ nil
25
+
26
+ end
27
+
28
+ def source=(s)
29
+ @matcher = derive_matcher(s)
30
+ @source = s
31
+ end
32
+
33
+ def to_xml_node(doc = nil)
34
+ doc ||= Nokogiri::XML::Document.new
35
+ cf = Nokogiri::XML::Element.new('copyField', doc)
36
+ cf['source'] = source
37
+ cf['dest'] = dest
38
+ cf
39
+ end
40
+
41
+
42
+ end
@@ -0,0 +1,23 @@
1
+ require 'simple_solr/schema/field'
2
+ class SimpleSolrClient::Schema
3
+ class DynamicField < Field
4
+
5
+ def initialize(*args)
6
+ super
7
+ @dynamic = true
8
+ end
9
+
10
+ def xml_node(doc)
11
+ Nokogiri::XML::Element.new('dynamicField', doc)
12
+ end
13
+
14
+ # What name will we get from a matching thing?
15
+ def dynamic_name(s)
16
+ m = @matcher.match(s)
17
+ if m
18
+ m[1] << m[2]
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ require 'simple_solr/schema/field_or_type'
2
+ class SimpleSolrClient::Schema
3
+ class Field < Field_or_Type
4
+ include Matcher
5
+
6
+ attr_accessor :type_name, :type
7
+ attr_reader :matcher
8
+
9
+
10
+ def initialize(*args)
11
+ super
12
+ @dynamic = false
13
+ end
14
+
15
+ def xml_node(doc)
16
+ Nokogiri::XML::Element.new('field', doc)
17
+ end
18
+
19
+ # We can only resolve the actual type in the presence of a
20
+ # particular schema
21
+ def resolve_type(schema)
22
+ self.type = schema.field_type(self.type_name)
23
+ self
24
+ end
25
+
26
+
27
+ # When we reset the name, make sure to re-derive the matcher
28
+ # object
29
+ def name=(n)
30
+ @name = n
31
+ @matcher = derive_matcher(n)
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,112 @@
1
+ # Base class for Field/DynamicField/FieldType
2
+ module SimpleSolrClient
3
+ class Schema
4
+ class Field_or_Type
5
+ attr_accessor :name,
6
+ :type_name
7
+ attr_writer :indexed,
8
+ :stored,
9
+ :multi,
10
+ :sort_missing_last,
11
+ :precision_step,
12
+ :position_increment_gap
13
+
14
+ # Take in a hash, and set anything in it that we recognize.
15
+ # Sloppy from a data point of view, but make fore easy
16
+ # duplication and creation from xml/json
17
+
18
+ def initialize(h={})
19
+ h.each_pair do |k, v|
20
+ begin
21
+ self[k] = v
22
+ rescue
23
+ end
24
+
25
+ end
26
+ end
27
+
28
+
29
+ TEXT_ATTR_MAP = {
30
+ :name => 'name',
31
+ :type_name => 'type',
32
+ :precision_step => 'precisionStep',
33
+ :position_increment_gap => 'positionIncrementGap'
34
+ }
35
+
36
+ BOOL_ATTR_MAP = {
37
+ :stored => 'stored',
38
+ :indexed => 'indexed',
39
+ :multi => 'multiValued',
40
+ :sort_missing_last => 'sortMissingLast'
41
+ }
42
+
43
+ # Do this little bit of screwing around to forward unknown attributes to
44
+ # the assigned type, if it exists. Will just use regular old methods
45
+ # once I get the mappings nailed down.
46
+ [TEXT_ATTR_MAP.keys, BOOL_ATTR_MAP.keys].flatten.delete_if { |x| [:type_name].include? x }.each do |x|
47
+ define_method(x) do
48
+ local = instance_variable_get("@#{x}".to_sym)
49
+ if local.nil?
50
+ self.type[x] if self.type
51
+ else
52
+ local
53
+ end
54
+ end
55
+ end
56
+
57
+ def ==(other)
58
+ if other.respond_to? :name
59
+ name == other.name
60
+ else
61
+ name == other
62
+ end
63
+ end
64
+
65
+
66
+ def self.new_from_solr_hash(h)
67
+ f = self.new
68
+
69
+ TEXT_ATTR_MAP.merge(BOOL_ATTR_MAP).each_pair do |field, xmlattr|
70
+ f[field] = h[xmlattr]
71
+ end
72
+ # Set the name "manually" to force the
73
+ # matcher
74
+ f.name = h['name']
75
+
76
+ f
77
+ end
78
+
79
+
80
+ # Reverse the process to get XML
81
+ def to_xml_node(doc = nil)
82
+ doc ||= Nokogiri::XML::Document.new
83
+ xml = xml_node(doc)
84
+ TEXT_ATTR_MAP.merge(BOOL_ATTR_MAP).each_pair do |field, xmlattr|
85
+ iv = instance_variable_get("@#{field}".to_sym)
86
+ xml[xmlattr] = iv unless iv.nil?
87
+ end
88
+ xml
89
+ end
90
+
91
+ # Allow access to methods via [], for easy looping
92
+ def [](k)
93
+ self.send(k.to_sym)
94
+ end
95
+
96
+ def []=(k, v)
97
+ self.send("#{k}=".to_sym, v)
98
+ end
99
+
100
+
101
+ # Make a hash out of it, for easy feeding back into another call to #new
102
+ def to_h
103
+ h = {}
104
+ instance_variables.each do |iv|
105
+ h[iv.to_s.sub('@', '')] = instance_variable_get(iv)
106
+ end
107
+ h
108
+ end
109
+
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,62 @@
1
+ require 'simple_solr/schema/analysis'
2
+
3
+ # A basic field type
4
+ #
5
+ # We don't even try to represent the analysis chain; just store the raw
6
+ # xml
7
+ #
8
+ # We also, in blatent disregard for separation of concerns and encapsulation,
9
+ # put in a place to store a core. This is filled when the fieldtype is added
10
+ # to the schema via add_field_type, so we can have access to the
11
+ # analysis chain.
12
+
13
+ class SimpleSolrClient::Schema
14
+ class FieldType < Field_or_Type
15
+ include SimpleSolrClient::Schema::Analysis
16
+
17
+ attr_accessor :xml, :solr_class, :core
18
+
19
+ def initialize(*args)
20
+ super
21
+ @xml = nil
22
+ end
23
+
24
+ # Make sure the type is never set, so we don't get stuck
25
+ # trying to find a type's "type"
26
+ def type
27
+ nil
28
+ end
29
+
30
+ # Create a Nokogiri node out of the currently-set
31
+ # element attributes (indexed, stored, etc.) and the
32
+ # XML
33
+ def xml_node(doc)
34
+ ft = Nokogiri::XML::Element.new('fieldType', doc)
35
+ ft['class'] = self.solr_class
36
+ xmldoc = Nokogiri.XML(xml)
37
+ unless xmldoc.children.empty?
38
+ xmldoc.children.first.children.each do |c|
39
+ ft.add_child(c)
40
+ end
41
+ end
42
+
43
+ ft
44
+ end
45
+
46
+ def self.new_from_solr_hash(h)
47
+ ft = super
48
+ ft.solr_class = h['class']
49
+ ft
50
+ end
51
+
52
+ # Luckily, a nokogiri node can act like a hash, so we can
53
+ # just re-use #new_from_solr_hash
54
+ def self.new_from_xml(xml)
55
+ ft = new_from_solr_hash(Nokogiri.XML(xml).children.first)
56
+ ft.xml = xml
57
+ ft
58
+ end
59
+ end
60
+ end
61
+
62
+
@@ -0,0 +1,16 @@
1
+ # Figure out how to match a field name to a copyfield/dynamicfield
2
+ class SimpleSolrClient::Schema
3
+ module Matcher
4
+ def derive_matcher(src)
5
+ if src =~ /\A\*(.*)/
6
+ Regexp.new("\\A(.*)(#{Regexp.escape($1)})\\Z")
7
+ else
8
+ src
9
+ end
10
+ end
11
+
12
+ def matches(s)
13
+ @matcher === s
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleSolrClient
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'simple_solr/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "simple_solr_client"
8
+ spec.version = SimpleSolrClient::VERSION
9
+ spec.authors = ["Bill Dueber"]
10
+ spec.email = ["bill@dueber.com"]
11
+ spec.summary = %q{Interact with a Solr API via JSON}
12
+ spec.homepage = "https://github.com/billdueber/simple_solr"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+
21
+ # Thread-safe, cross-platform http client
22
+ spec.add_dependency "httpclient"
23
+
24
+ # XML parsing. Slower, but less screwy than Nokogiri
25
+ spec.add_dependency 'nokogiri'
26
+
27
+ # Only require Oj for MRI/rbx. We'll use stock JSON on jruby
28
+ if defined? JRUBY
29
+ spec.platform = "java"
30
+ else
31
+ spec.add_dependency 'oj'
32
+ end
33
+
34
+
35
+ spec.add_development_dependency "bundler", "~> 1.7"
36
+ spec.add_development_dependency "rake", "~> 10.0"
37
+ spec.add_development_dependency "minitest"
38
+ spec.add_development_dependency 'minitest-reporters'
39
+ end
@@ -0,0 +1,38 @@
1
+ {
2
+ "initArgs":{"ignoreCase":true},
3
+ "managedList":[
4
+ "a",
5
+ "an",
6
+ "and",
7
+ "are",
8
+ "as",
9
+ "at",
10
+ "be",
11
+ "but",
12
+ "by",
13
+ "for",
14
+ "if",
15
+ "in",
16
+ "into",
17
+ "is",
18
+ "it",
19
+ "no",
20
+ "not",
21
+ "of",
22
+ "on",
23
+ "or",
24
+ "stopworda",
25
+ "stopwordb",
26
+ "such",
27
+ "that",
28
+ "the",
29
+ "their",
30
+ "then",
31
+ "there",
32
+ "these",
33
+ "they",
34
+ "this",
35
+ "to",
36
+ "was",
37
+ "will",
38
+ "with"]}
@@ -0,0 +1,11 @@
1
+ {
2
+ "initArgs":{
3
+ "ignoreCase":true,
4
+ "format":"solr"
5
+ },
6
+ "managedMap":{
7
+ "GB":["GiB","Gigabyte"],
8
+ "happy":["glad","joyful"],
9
+ "TV":["Television"]
10
+ }
11
+ }