sunspot 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. data/History.txt +83 -0
  2. data/LICENSE +18 -0
  3. data/README.rdoc +154 -0
  4. data/Rakefile +9 -0
  5. data/TODO +9 -0
  6. data/VERSION.yml +4 -0
  7. data/bin/sunspot-configure-solr +46 -0
  8. data/bin/sunspot-solr +62 -0
  9. data/lib/light_config.rb +40 -0
  10. data/lib/sunspot.rb +469 -0
  11. data/lib/sunspot/adapters.rb +265 -0
  12. data/lib/sunspot/composite_setup.rb +186 -0
  13. data/lib/sunspot/configuration.rb +38 -0
  14. data/lib/sunspot/data_extractor.rb +47 -0
  15. data/lib/sunspot/dsl.rb +3 -0
  16. data/lib/sunspot/dsl/field_query.rb +72 -0
  17. data/lib/sunspot/dsl/fields.rb +86 -0
  18. data/lib/sunspot/dsl/query.rb +59 -0
  19. data/lib/sunspot/dsl/query_facet.rb +31 -0
  20. data/lib/sunspot/dsl/restriction.rb +25 -0
  21. data/lib/sunspot/dsl/scope.rb +193 -0
  22. data/lib/sunspot/dsl/search.rb +30 -0
  23. data/lib/sunspot/facet.rb +16 -0
  24. data/lib/sunspot/facet_data.rb +120 -0
  25. data/lib/sunspot/facet_row.rb +10 -0
  26. data/lib/sunspot/field.rb +157 -0
  27. data/lib/sunspot/field_factory.rb +126 -0
  28. data/lib/sunspot/indexer.rb +123 -0
  29. data/lib/sunspot/instantiated_facet.rb +42 -0
  30. data/lib/sunspot/instantiated_facet_row.rb +22 -0
  31. data/lib/sunspot/query.rb +191 -0
  32. data/lib/sunspot/query/base_query.rb +90 -0
  33. data/lib/sunspot/query/connective.rb +126 -0
  34. data/lib/sunspot/query/dynamic_query.rb +69 -0
  35. data/lib/sunspot/query/field_facet.rb +151 -0
  36. data/lib/sunspot/query/field_query.rb +63 -0
  37. data/lib/sunspot/query/pagination.rb +39 -0
  38. data/lib/sunspot/query/query_facet.rb +73 -0
  39. data/lib/sunspot/query/query_facet_row.rb +19 -0
  40. data/lib/sunspot/query/query_field_facet.rb +13 -0
  41. data/lib/sunspot/query/restriction.rb +233 -0
  42. data/lib/sunspot/query/scope.rb +165 -0
  43. data/lib/sunspot/query/sort.rb +36 -0
  44. data/lib/sunspot/query/sort_composite.rb +33 -0
  45. data/lib/sunspot/schema.rb +165 -0
  46. data/lib/sunspot/search.rb +219 -0
  47. data/lib/sunspot/search/hit.rb +66 -0
  48. data/lib/sunspot/session.rb +201 -0
  49. data/lib/sunspot/setup.rb +271 -0
  50. data/lib/sunspot/type.rb +200 -0
  51. data/lib/sunspot/util.rb +164 -0
  52. data/solr/etc/jetty.xml +212 -0
  53. data/solr/etc/webdefault.xml +379 -0
  54. data/solr/lib/jetty-6.1.3.jar +0 -0
  55. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  56. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  57. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  58. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  59. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  60. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  61. data/solr/solr/conf/elevate.xml +36 -0
  62. data/solr/solr/conf/protwords.txt +21 -0
  63. data/solr/solr/conf/schema.xml +50 -0
  64. data/solr/solr/conf/solrconfig.xml +696 -0
  65. data/solr/solr/conf/stopwords.txt +57 -0
  66. data/solr/solr/conf/synonyms.txt +31 -0
  67. data/solr/start.jar +0 -0
  68. data/solr/webapps/solr.war +0 -0
  69. data/spec/api/adapters_spec.rb +33 -0
  70. data/spec/api/build_search_spec.rb +1039 -0
  71. data/spec/api/indexer_spec.rb +311 -0
  72. data/spec/api/query_spec.rb +153 -0
  73. data/spec/api/search_retrieval_spec.rb +362 -0
  74. data/spec/api/session_spec.rb +157 -0
  75. data/spec/api/spec_helper.rb +1 -0
  76. data/spec/api/sunspot_spec.rb +18 -0
  77. data/spec/integration/dynamic_fields_spec.rb +55 -0
  78. data/spec/integration/faceting_spec.rb +169 -0
  79. data/spec/integration/keyword_search_spec.rb +83 -0
  80. data/spec/integration/scoped_search_spec.rb +289 -0
  81. data/spec/integration/spec_helper.rb +1 -0
  82. data/spec/integration/stored_fields_spec.rb +10 -0
  83. data/spec/integration/test_pagination.rb +32 -0
  84. data/spec/mocks/adapters.rb +32 -0
  85. data/spec/mocks/blog.rb +3 -0
  86. data/spec/mocks/comment.rb +19 -0
  87. data/spec/mocks/connection.rb +84 -0
  88. data/spec/mocks/mock_adapter.rb +30 -0
  89. data/spec/mocks/mock_record.rb +48 -0
  90. data/spec/mocks/photo.rb +8 -0
  91. data/spec/mocks/post.rb +73 -0
  92. data/spec/mocks/user.rb +8 -0
  93. data/spec/spec_helper.rb +47 -0
  94. data/tasks/gemspec.rake +25 -0
  95. data/tasks/rcov.rake +28 -0
  96. data/tasks/rdoc.rake +22 -0
  97. data/tasks/schema.rake +19 -0
  98. data/tasks/spec.rake +24 -0
  99. data/tasks/todo.rake +4 -0
  100. data/templates/schema.xml.haml +24 -0
  101. metadata +246 -0
@@ -0,0 +1,30 @@
1
+ module Sunspot
2
+ module DSL
3
+ #
4
+ # This top-level DSL class is the context in which the block passed to
5
+ # Sunspot.query. See Sunspot::DSL::Query, Sunspot::DSL::FieldQuery, and
6
+ # Sunspot::DSL::Scope for the full API presented.
7
+ #
8
+ class Search < Query
9
+ def initialize(search) #:nodoc:
10
+ @search = search
11
+ @query = search.query
12
+ end
13
+
14
+ #
15
+ # Retrieve the data accessor used to load instances of the given class
16
+ # out of persistent storage. Data accessors are free to implement any
17
+ # extra methods that may be useful in this context.
18
+ #
19
+ # ==== Example
20
+ #
21
+ # Sunspot.search Post do
22
+ # data_acccessor_for(Post).includes = [:blog, :comments]
23
+ # end
24
+ #
25
+ def data_accessor_for(clazz)
26
+ @search.data_accessor_for(clazz)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,16 @@
1
+ module Sunspot
2
+ class Facet
3
+ def initialize(facet_data)
4
+ @facet_data = facet_data
5
+ end
6
+
7
+ def name
8
+ @facet_data.name
9
+ end
10
+ alias_method :field_name, :name
11
+
12
+ def rows
13
+ @facet_data.rows { |value, count| FacetRow.new(value, count) }
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,120 @@
1
+ require 'enumerator'
2
+
3
+ module Sunspot
4
+ module FacetData
5
+ class Abstract
6
+ attr_reader :field #:nodoc:
7
+
8
+ def reference
9
+ @field.reference if @field
10
+ end
11
+
12
+ def cast(value)
13
+ if @field
14
+ @field.cast(value)
15
+ else
16
+ value
17
+ end
18
+ end
19
+
20
+ def row_value(value)
21
+ cast(value)
22
+ end
23
+ end
24
+
25
+ class FieldFacetData < Abstract
26
+ def initialize(facet_values, field) #:nodoc:
27
+ @facet_values, @field = facet_values, field
28
+ end
29
+
30
+ # The name of the field that contains this facet's values
31
+ #
32
+ # ==== Returns
33
+ #
34
+ # Symbol:: The field name
35
+ #
36
+ def name
37
+ @field.name
38
+ end
39
+
40
+ # The rows returned for this facet.
41
+ #
42
+ # ==== Returns
43
+ #
44
+ # Array:: Collection of FacetRow objects, in the order returned by Solr
45
+ #
46
+ def rows
47
+ @rows ||=
48
+ begin
49
+ rows = []
50
+ @facet_values.each_slice(2) do |value, count|
51
+ rows << yield(row_value(value), count)
52
+ end
53
+ rows
54
+ end
55
+ end
56
+ end
57
+
58
+ class DateFacetData < FieldFacetData
59
+ def initialize(facet_values, field) #:nodoc:
60
+ @gap = facet_values.delete('gap')[/\+(\d+)SECONDS/,1].to_i
61
+ %w(start end).each { |key| facet_values.delete(key) }
62
+ super(facet_values.to_a.flatten, field)
63
+ end
64
+
65
+ #
66
+ # Get the rows of this date facet, which are instances of DateFacetRow.
67
+ # The rows will always be sorted in chronological order.
68
+ #
69
+ #--
70
+ #
71
+ # The date facet info comes back from Solr as a hash, so we need to sort
72
+ # it manually. FIXME this currently assumes we want to do a "lexical"
73
+ # sort, but we should support count sort as well, even if it's not a
74
+ # common use case.
75
+ #
76
+ def rows(&block)
77
+ super(&block).sort { |a, b| a.value.first <=> b.value.first }
78
+ end
79
+
80
+ private
81
+
82
+ def row_value(value)
83
+ cast(value)..(cast(value) + @gap)
84
+ end
85
+ end
86
+
87
+ class QueryFacetData < Abstract
88
+ def initialize(outgoing_query_facet, row_data) #:nodoc:
89
+ @outgoing_query_facet, @row_data = outgoing_query_facet, row_data
90
+ @field = @outgoing_query_facet.field
91
+ end
92
+
93
+ def name
94
+ outgoing_query_facet.name
95
+ end
96
+
97
+ #
98
+ # Get the rows associated with this query facet. Returned rows are always
99
+ # ordered by count.
100
+ #
101
+ # ==== Returns
102
+ #
103
+ # Array:: Collection of QueryFacetRow objects, ordered by count
104
+ #
105
+ def rows
106
+ @rows ||=
107
+ begin
108
+ rows = []
109
+ for row in @outgoing_query_facet.rows
110
+ row_query = row.to_boolean_phrase
111
+ if @row_data.has_key?(row_query)
112
+ rows << yield(row.label, @row_data[row_query])
113
+ end
114
+ end
115
+ rows.sort! { |x, y| y.count <=> x.count }
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,10 @@
1
+ module Sunspot
2
+ # This class encapsulates a facet row (value) for a facet.
3
+ class FacetRow
4
+ attr_reader :value, :count
5
+
6
+ def initialize(value, count) #:nodoc:
7
+ @value, @count = value, count
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,157 @@
1
+ module Sunspot
2
+ class Field #:nodoc:
3
+ attr_accessor :name # The public-facing name of the field
4
+ attr_accessor :type # The Type of the field
5
+ attr_accessor :reference # Model class that the value of this field refers to
6
+ attr_accessor :attributes
7
+
8
+ #
9
+ #
10
+ def initialize(name, type) #:nodoc
11
+ @name, @type = name.to_sym, type
12
+ @attributes = {}
13
+ end
14
+
15
+ # Convert a value to its representation for Solr indexing. This delegates
16
+ # to the #to_indexed method on the field's type.
17
+ #
18
+ # ==== Parameters
19
+ #
20
+ # value<Object>:: Value to convert to Solr representation
21
+ #
22
+ # ==== Returns
23
+ #
24
+ # String:: Solr representation of the object
25
+ #
26
+ # ==== Raises
27
+ #
28
+ # ArgumentError::
29
+ # the value is an array, but this field does not allow multiple values
30
+ #
31
+ def to_indexed(value)
32
+ if value.is_a? Array
33
+ if @multiple
34
+ value.map { |val| to_indexed(val) }
35
+ else
36
+ raise ArgumentError, "#{name} is not a multiple-value field, so it cannot index values #{value.inspect}"
37
+ end
38
+ else
39
+ @type.to_indexed(value)
40
+ end
41
+ end
42
+
43
+ # Cast the value into the appropriate Ruby class for the field's type
44
+ #
45
+ # ==== Parameters
46
+ #
47
+ # value<String>:: Solr's representation of the value
48
+ #
49
+ # ==== Returns
50
+ #
51
+ # Object:: The cast value
52
+ #
53
+ def cast(value)
54
+ @type.cast(value)
55
+ end
56
+
57
+ #
58
+ # Name with which this field is indexed internally. Based on public name and
59
+ # type.
60
+ #
61
+ # ==== Returns
62
+ #
63
+ # String:: Internal name of the field
64
+ #
65
+ def indexed_name
66
+ @type.indexed_name(@name)
67
+ end
68
+
69
+ #
70
+ # Whether this field accepts multiple values.
71
+ #
72
+ # ==== Returns
73
+ #
74
+ # Boolean:: True if this field accepts multiple values.
75
+ #
76
+ def multiple?
77
+ !!@multiple
78
+ end
79
+ end
80
+
81
+ #
82
+ # FulltextField instances represent fields that are indexed as fulltext.
83
+ # These fields are tokenized in the index, and can have boost applied to
84
+ # them. They also always allow multiple values (since the only downside of
85
+ # allowing multiple values is that it prevents the field from being sortable,
86
+ # and sorting on tokenized fields is nonsensical anyway, there is no reason
87
+ # to do otherwise). FulltextField instances always have the type TextType.
88
+ #
89
+ class FulltextField < Field #:nodoc:
90
+ def initialize(name, options = {})
91
+ super(name, Type::TextType)
92
+ if options.has_key?(:boost)
93
+ @attributes[:boost] = options.delete(:boost)
94
+ end
95
+ @multiple = true
96
+ raise ArgumentError, "Unknown field option #{options.keys.first.inspect} provided for field #{name.inspect}" unless options.empty?
97
+ end
98
+ end
99
+
100
+ #
101
+ # AttributeField instances encapsulate non-tokenized attribute data.
102
+ # AttributeFields can have any type except TextType, and can also have
103
+ # a reference (for instantiated facets), optionally allow multiple values
104
+ # (false by default), and can store their values (false by default). All
105
+ # scoping, sorting, and faceting is done with attribute fields.
106
+ #
107
+ class AttributeField < Field #:nodoc:
108
+ def initialize(name, type, options = {})
109
+ super(name, type)
110
+ @multiple = !!options.delete(:multiple)
111
+ @reference =
112
+ if (reference = options.delete(:references)).respond_to?(:name)
113
+ reference.name
114
+ elsif reference.respond_to?(:to_sym)
115
+ reference.to_sym
116
+ end
117
+ @stored = !!options.delete(:stored)
118
+ raise ArgumentError, "Unknown field option #{options.keys.first.inspect} provided for field #{name.inspect}" unless options.empty?
119
+ end
120
+
121
+ # The name of the field as it is indexed in Solr. The indexed name
122
+ # contains a suffix that contains information about the type as well as
123
+ # whether the field allows multiple values for a document.
124
+ #
125
+ # ==== Returns
126
+ #
127
+ # String:: The field's indexed name
128
+ #
129
+ def indexed_name
130
+ "#{super}#{'m' if @multiple}#{'s' if @stored}"
131
+ end
132
+ end
133
+
134
+ #
135
+ # RandomField instances are used for random sorting.
136
+ #
137
+ class RandomField #:nodoc:
138
+ #
139
+ # Never multiple, but this has to return false so Sunspot doesn't barf
140
+ # when you try to order by it.
141
+ #
142
+ def multiple?
143
+ false
144
+ end
145
+
146
+ #
147
+ # Solr uses the dynamic field name as a seed for random, so we randomize the
148
+ # field name accordingly.
149
+ #
150
+ # #XXX I think it's bad to use a random number as a seed. Would it be
151
+ # better to pass in the current timestamp or some such thing?
152
+ #
153
+ def indexed_name
154
+ "random_#{rand(1<<16)}"
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,126 @@
1
+ module Sunspot
2
+ #
3
+ # The FieldFactory module contains classes for generating fields. FieldFactory
4
+ # implementation classes should implement a #build method, although the arity
5
+ # of the method depends on the type of factory. They also must implement a
6
+ # #populate_document method, which extracts field data from a given model and
7
+ # adds it into the RSolr document for indexing.
8
+ #
9
+ module FieldFactory #:nodoc:all
10
+ #
11
+ # Base class for field factories.
12
+ #
13
+ class Abstract
14
+ attr_reader :name
15
+
16
+ def initialize(name, options = {}, &block)
17
+ @name = name.to_sym
18
+ @data_extractor =
19
+ if block
20
+ DataExtractor::BlockExtractor.new(&block)
21
+ else
22
+ DataExtractor::AttributeExtractor.new(options.delete(:using) || name)
23
+ end
24
+ end
25
+ end
26
+
27
+ #
28
+ # A StaticFieldFactory generates normal static fields. Each factory instance
29
+ # contains an eager-initialized field instance, which is returned by the
30
+ # #build method.
31
+ #
32
+ class Static < Abstract
33
+ def initialize(name, type, options = {}, &block)
34
+ super(name, options, &block)
35
+ unless name.to_s =~ /^\w+$/
36
+ raise ArgumentError, "Invalid field name #{name}: only letters, numbers, and underscores are allowed."
37
+ end
38
+ @field =
39
+ if type == Type::TextType
40
+ FulltextField.new(name, options)
41
+ else
42
+ AttributeField.new(name, type, options)
43
+ end
44
+ end
45
+
46
+ #
47
+ # Return the field instance built by this factory
48
+ #
49
+ def build
50
+ @field
51
+ end
52
+
53
+ #
54
+ # Extract the encapsulated field's data from the given model and add it
55
+ # into the RSolr document for indexing.
56
+ #
57
+ def populate_document(document, model) #:nodoc:
58
+ unless (value = @data_extractor.value_for(model)).nil?
59
+ for scalar_value in Array(@field.to_indexed(value))
60
+ document.add_field(
61
+ @field.indexed_name.to_sym,
62
+ scalar_value, @field.attributes
63
+ )
64
+ end
65
+ end
66
+ end
67
+
68
+ #
69
+ # A unique signature identifying this field by name and type.
70
+ #
71
+ def signature
72
+ [@field.name, @field.type]
73
+ end
74
+ end
75
+
76
+ #
77
+ # DynamicFieldFactories create dynamic field instances based on dynamic
78
+ # configuration.
79
+ #
80
+ class Dynamic < Abstract
81
+ attr_accessor :name, :type
82
+
83
+ def initialize(name, type, options = {}, &block)
84
+ super(name, options, &block)
85
+ @type, @options = type, options
86
+ end
87
+
88
+ #
89
+ # Build a field based on the dynamic name given.
90
+ #
91
+ def build(dynamic_name)
92
+ AttributeField.new("#{@name}:#{dynamic_name}", @type, @options.dup)
93
+ end
94
+ #
95
+ # This alias allows a DynamicFieldFactory to be used in place of a Setup
96
+ # or CompositeSetup instance by query components.
97
+ #
98
+ alias_method :field, :build
99
+
100
+ #
101
+ # Generate dynamic fields based on hash returned by data accessor and
102
+ # add the field data to the document.
103
+ #
104
+ def populate_document(document, model)
105
+ if values = @data_extractor.value_for(model)
106
+ values.each_pair do |dynamic_name, value|
107
+ field_instance = build(dynamic_name)
108
+ for scalar_value in Array(field_instance.to_indexed(value))
109
+ document.add_field(
110
+ field_instance.indexed_name.to_sym,
111
+ scalar_value
112
+ )
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ #
119
+ # Unique signature identifying this dynamic field based on name and type
120
+ #
121
+ def signature
122
+ [@name, @type]
123
+ end
124
+ end
125
+ end
126
+ end