kuahyeow-sunspot 0.9.8 → 0.10.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. data/History.txt +38 -0
  2. data/README.rdoc +40 -3
  3. data/TODO +10 -8
  4. data/VERSION.yml +2 -2
  5. data/bin/sunspot-configure-solr +22 -28
  6. data/bin/sunspot-solr +50 -29
  7. data/lib/sunspot.rb +8 -18
  8. data/lib/sunspot/adapters.rb +1 -1
  9. data/lib/sunspot/composite_setup.rb +13 -15
  10. data/lib/sunspot/configuration.rb +21 -0
  11. data/lib/sunspot/data_extractor.rb +3 -0
  12. data/lib/sunspot/dsl.rb +2 -1
  13. data/lib/sunspot/dsl/field_query.rb +33 -6
  14. data/lib/sunspot/dsl/fields.rb +14 -1
  15. data/lib/sunspot/dsl/fulltext.rb +168 -0
  16. data/lib/sunspot/dsl/query.rb +82 -5
  17. data/lib/sunspot/dsl/query_facet.rb +3 -3
  18. data/lib/sunspot/dsl/restriction.rb +7 -7
  19. data/lib/sunspot/dsl/scope.rb +17 -10
  20. data/lib/sunspot/dsl/search.rb +2 -2
  21. data/lib/sunspot/facet.rb +12 -39
  22. data/lib/sunspot/facet_data.rb +169 -0
  23. data/lib/sunspot/facet_row.rb +5 -27
  24. data/lib/sunspot/field.rb +50 -26
  25. data/lib/sunspot/field_factory.rb +15 -0
  26. data/lib/sunspot/indexer.rb +6 -0
  27. data/lib/sunspot/instantiated_facet.rb +7 -6
  28. data/lib/sunspot/instantiated_facet_row.rb +16 -1
  29. data/lib/sunspot/query.rb +2 -187
  30. data/lib/sunspot/query/boost_query.rb +20 -0
  31. data/lib/sunspot/query/connective.rb +98 -35
  32. data/lib/sunspot/query/dismax.rb +73 -0
  33. data/lib/sunspot/query/field_facet.rb +3 -23
  34. data/lib/sunspot/query/fulltext_base_query.rb +47 -0
  35. data/lib/sunspot/query/highlighting.rb +43 -0
  36. data/lib/sunspot/query/local.rb +24 -0
  37. data/lib/sunspot/query/pagination.rb +3 -4
  38. data/lib/sunspot/query/query.rb +93 -0
  39. data/lib/sunspot/query/query_facet.rb +15 -9
  40. data/lib/sunspot/query/query_facet_row.rb +3 -3
  41. data/lib/sunspot/query/query_field_facet.rb +20 -0
  42. data/lib/sunspot/query/restriction.rb +36 -15
  43. data/lib/sunspot/query/scope.rb +3 -159
  44. data/lib/sunspot/query/sort.rb +84 -15
  45. data/lib/sunspot/query/text_field_boost.rb +15 -0
  46. data/lib/sunspot/schema.rb +7 -25
  47. data/lib/sunspot/search.rb +63 -45
  48. data/lib/sunspot/search/highlight.rb +38 -0
  49. data/lib/sunspot/search/hit.rb +50 -3
  50. data/lib/sunspot/session.rb +40 -11
  51. data/lib/sunspot/setup.rb +47 -10
  52. data/lib/sunspot/text_field_setup.rb +29 -0
  53. data/lib/sunspot/type.rb +4 -4
  54. data/lib/sunspot/util.rb +27 -1
  55. data/solr/solr/conf/schema.xml +54 -40
  56. data/solr/solr/conf/solrconfig.xml +30 -0
  57. data/solr/solr/lib/geoapi-nogenerics-2.1-M2.jar +0 -0
  58. data/solr/solr/lib/gt2-referencing-2.3.1.jar +0 -0
  59. data/solr/solr/lib/jsr108-0.01.jar +0 -0
  60. data/solr/solr/lib/locallucene.jar +0 -0
  61. data/solr/solr/lib/localsolr.jar +0 -0
  62. data/spec/api/indexer/attributes_spec.rb +100 -0
  63. data/spec/api/indexer/batch_spec.rb +46 -0
  64. data/spec/api/indexer/dynamic_fields_spec.rb +33 -0
  65. data/spec/api/indexer/fixed_fields_spec.rb +57 -0
  66. data/spec/api/indexer/fulltext_spec.rb +43 -0
  67. data/spec/api/indexer/removal_spec.rb +46 -0
  68. data/spec/api/indexer/spec_helper.rb +1 -0
  69. data/spec/api/indexer_spec.rb +1 -308
  70. data/spec/api/query/connectives_spec.rb +162 -0
  71. data/spec/api/query/dsl_spec.rb +12 -0
  72. data/spec/api/query/dynamic_fields_spec.rb +149 -0
  73. data/spec/api/query/faceting_spec.rb +272 -0
  74. data/spec/api/query/fulltext_spec.rb +193 -0
  75. data/spec/api/query/highlighting_spec.rb +138 -0
  76. data/spec/api/query/local_spec.rb +54 -0
  77. data/spec/api/query/ordering_pagination_spec.rb +95 -0
  78. data/spec/api/query/scope_spec.rb +266 -0
  79. data/spec/api/query/spec_helper.rb +1 -0
  80. data/spec/api/query/text_field_scoping_spec.rb +30 -0
  81. data/spec/api/query/types_spec.rb +20 -0
  82. data/spec/api/search/dynamic_fields_spec.rb +27 -0
  83. data/spec/api/search/faceting_spec.rb +206 -0
  84. data/spec/api/search/highlighting_spec.rb +65 -0
  85. data/spec/api/search/hits_spec.rb +62 -0
  86. data/spec/api/search/results_spec.rb +52 -0
  87. data/spec/api/search/search_spec.rb +23 -0
  88. data/spec/api/search/spec_helper.rb +1 -0
  89. data/spec/api/session_spec.rb +11 -5
  90. data/spec/api/spec_helper.rb +1 -1
  91. data/spec/helpers/indexer_helper.rb +29 -0
  92. data/spec/helpers/query_helper.rb +13 -0
  93. data/spec/helpers/search_helper.rb +78 -0
  94. data/spec/integration/faceting_spec.rb +1 -1
  95. data/spec/integration/highlighting_spec.rb +22 -0
  96. data/spec/integration/keyword_search_spec.rb +65 -0
  97. data/spec/integration/local_search_spec.rb +56 -0
  98. data/spec/integration/scoped_search_spec.rb +15 -1
  99. data/spec/integration/spec_helper.rb +7 -1
  100. data/spec/mocks/connection.rb +24 -2
  101. data/spec/mocks/photo.rb +1 -1
  102. data/spec/mocks/post.rb +5 -3
  103. data/spec/mocks/super_class.rb +2 -0
  104. data/spec/spec_helper.rb +13 -0
  105. data/tasks/gemspec.rake +20 -8
  106. data/tasks/schema.rake +1 -1
  107. data/tasks/spec.rake +1 -1
  108. data/templates/schema.xml.erb +36 -0
  109. metadata +118 -52
  110. data/lib/sunspot/date_facet.rb +0 -36
  111. data/lib/sunspot/date_facet_row.rb +0 -17
  112. data/lib/sunspot/query/base_query.rb +0 -94
  113. data/lib/sunspot/query/dynamic_query.rb +0 -69
  114. data/lib/sunspot/query/field_query.rb +0 -57
  115. data/lib/sunspot/query_facet.rb +0 -33
  116. data/lib/sunspot/query_facet_row.rb +0 -21
  117. data/spec/api/build_search_spec.rb +0 -1018
  118. data/spec/api/query_spec.rb +0 -153
  119. data/spec/api/search_retrieval_spec.rb +0 -335
  120. data/templates/schema.xml.haml +0 -24
@@ -122,5 +122,20 @@ module Sunspot
122
122
  [@name, @type]
123
123
  end
124
124
  end
125
+
126
+ #XXX Right now this doubles as a Field and a FieldFactory - good idea?
127
+ class Coordinates
128
+ def initialize(name)
129
+ @data_extractor = DataExtractor::AttributeExtractor.new(name)
130
+ end
131
+
132
+ def populate_document(document, model)
133
+ if coordinates = @data_extractor.value_for(model)
134
+ coordinates = Util::Coordinates.new(coordinates)
135
+ document.add_field(:lat, coordinates.lat)
136
+ document.add_field(:long, coordinates.lng)
137
+ end
138
+ end
139
+ end
125
140
  end
126
141
  end
@@ -49,10 +49,16 @@ module Sunspot
49
49
  @connection.delete_by_query("type:#{escape(clazz.name)}")
50
50
  end
51
51
 
52
+ #
53
+ # Start batch processing
54
+ #
52
55
  def start_batch
53
56
  @batch = []
54
57
  end
55
58
 
59
+ #
60
+ # Write batch out to Solr and clear it
61
+ #
56
62
  def flush_batch
57
63
  add_documents(@batch)
58
64
  @batch = nil
@@ -4,6 +4,9 @@ module Sunspot
4
4
  # primary key stored in facet rows' values. The rows are hydrated lazily, but
5
5
  # all rows are hydrated the first time #instance is called on any of the rows.
6
6
  #
7
+ # Instatiated facets are possible for fields which are defined with a
8
+ # :references option.
9
+ #
7
10
  # The #rows method returns InstantiatedFacetRow objects.
8
11
  #
9
12
  class InstantiatedFacet < Facet
@@ -15,7 +18,7 @@ module Sunspot
15
18
  #
16
19
  def populate_instances! #:nodoc:
17
20
  ids = rows.map { |row| row.value }
18
- reference_class = Sunspot::Util.full_const_get(@field.reference.to_s)
21
+ reference_class = Sunspot::Util.full_const_get(@facet_data.reference.to_s)
19
22
  accessor = Adapters::DataAccessor.create(reference_class)
20
23
  instance_map = accessor.load_all(ids).inject({}) do |map, instance|
21
24
  map[Adapters::InstanceAdapter.adapt(instance).id] = instance
@@ -26,13 +29,11 @@ module Sunspot
26
29
  end
27
30
  end
28
31
 
29
- private
30
-
31
32
  #
32
- # Override the Facet#new_row method to return an InstantiateFacetRow
33
+ # A collection of InstantiatedFacetRow objects
33
34
  #
34
- def new_row(pair)
35
- InstantiatedFacetRow.new(pair, self)
35
+ def rows
36
+ @facet_data.rows { |value, count| InstantiatedFacetRow.new(value, count, self) }
36
37
  end
37
38
  end
38
39
  end
@@ -1,7 +1,22 @@
1
1
  module Sunspot
2
+ #
3
+ # InstantiatedFacetRow objects represent a single value for an instantiated
4
+ # facet. As well as the usual FacetRow methods, InstantedFacetRow objects
5
+ # provide access to the persistent object referenced by the row's value.
6
+ #
2
7
  class InstantiatedFacetRow < FacetRow
3
- attr_writer :instance
8
+ attr_writer :instance #:nodoc:
4
9
 
10
+ def initialize(value, count, facet) #:nodoc:
11
+ super(value, count)
12
+ @facet = facet
13
+ end
14
+
15
+ #
16
+ # Get the persistent object referenced by this row's value. Instances are
17
+ # batch-lazy-loaded, which means that for a given facet, all of the
18
+ # instances are loaded the first time any row's instance is requested.
19
+ #
5
20
  def instance
6
21
  unless defined?(@instance)
7
22
  @facet.populate_instances!
@@ -1,190 +1,5 @@
1
- %w(base_query scope field_query connective dynamic_query field_facet query_facet
2
- query_facet_row pagination restriction sort sort_composite).each do |file|
3
- require File.join(File.dirname(__FILE__), 'query', file)
4
- end
5
-
1
+ %w(connective boost_query dismax field_facet highlighting local pagination restriction query query_facet query_field_facet query_facet_row scope sort sort_composite text_field_boost).each { |file| require(File.join(File.dirname(__FILE__), 'query', file)) }
6
2
  module Sunspot
7
- module Query #:nodoc:
8
- #
9
- # This class encapsulates a query that is to be sent to Solr. The query is
10
- # constructed in the block passed to the Sunspot.search method, using the
11
- # Sunspot::DSL::Query interface. It can also be accessed directly by calling
12
- # #query on a Search object (presumably a not-yet-run one created using
13
- # Sunspot#new_search), which might be more suitable than the DSL when an
14
- # intermediate object has responsibility for building the query dynamically.
15
- #--
16
- # Instances of Query, as well as all of the components it contains, respond to
17
- # the #to_params method, which returns a hash of parameters in the format
18
- # recognized by the solr-ruby API.
19
- #
20
- class Query < FieldQuery
21
- attr_reader :query_facets #:nodoc:
22
-
23
- def initialize(types, setup, configuration) #:nodoc:
24
- @setup = setup
25
- @components = []
26
- @query_facets = {}
27
- @components << @base_query = BaseQuery.new(types, setup)
28
- @components << @pagination = Pagination.new(configuration)
29
- @components << @sort = SortComposite.new
30
- end
31
-
32
- #
33
- # Set the keywords for this query. Keywords are parsed with Solr's dismax
34
- # handler.
35
- #
36
- def keywords=(keywords)
37
- set_keywords(keywords)
38
- end
39
-
40
- #
41
- # Add a component to the query. Used by objects that proxy to the query
42
- # object.
43
- #
44
- # ==== Parameters
45
- #
46
- # component<~to_params>:: Query component to add.
47
- #
48
- def add_component(component) #:nodoc:
49
- @components << component
50
- end
51
-
52
- #
53
- # Sets @start and @rows instance variables using pagination semantics
54
- #
55
- # ==== Parameters
56
- #
57
- # page<Integer>:: Page on which to start
58
- # per_page<Integer>::
59
- # How many rows to display per page. Default taken from
60
- # Sunspot.config.pagination.default_per_page
61
- #
62
- def paginate(page, per_page = nil)
63
- @pagination.page, @pagination.per_page = page, per_page
64
- end
65
-
66
- #
67
- # Add random ordering to the search. This can be added after other
68
- # field-based sorts if desired.
69
- #
70
- def order_by_random
71
- add_sort(Sort.new(RandomField.new))
72
- end
73
-
74
- #
75
- # Representation of this query as solr-ruby parameters. Constructs the hash
76
- # by deep-merging scope and facet parameters, adding in various other
77
- # parameters from instance data.
78
- #
79
- # Note that solr-ruby takes the :q parameter as a separate argument; for
80
- # the sake of consistency, the Query object ignores this fact (the Search
81
- # object extracts it back out).
82
- #
83
- # ==== Returns
84
- #
85
- # Hash:: Representation of query in solr-ruby form
86
- #
87
- def to_params #:nodoc:
88
- params = {}
89
- query_components = []
90
- for component in @components
91
- Util.deep_merge!(params, component.to_params)
92
- end
93
- params
94
- end
95
-
96
- #
97
- # Page that this query will return (used by Sunspot::Search to expose
98
- # pagination)
99
- #
100
- # ==== Returns
101
- #
102
- # Integer:: Page number
103
- #
104
- def page #:nodoc:
105
- @pagination.page
106
- end
107
-
108
- #
109
- # Number of rows per page that this query will return (used by
110
- # Sunspot::Search to expose pagination)
111
- #
112
- # ==== Returns
113
- #
114
- # Integer:: Rows per page
115
- #
116
- def per_page #:nodoc:
117
- @pagination.per_page
118
- end
119
-
120
- #
121
- # Get the query facet with the given name. Used by the Search object to
122
- # match query facet results with the requested query facets.
123
- #
124
- def query_facet(name) #:nodoc:
125
- @query_facets[name.to_sym]
126
- end
127
-
128
- #
129
- # Add a Sort object into this query's sort composite.
130
- #
131
- def add_sort(sort) #:nodoc:
132
- @sort << sort
133
- end
134
-
135
- #
136
- # Set the keywords for this query, along with keyword options. See
137
- # Query::BaseQuery for information on what the options do.
138
- #
139
- def set_keywords(keywords, options = {}) #:nodoc:
140
- @base_query.keywords = keywords
141
- @base_query.keyword_options = options
142
- end
143
-
144
- #
145
- # Pass in search options as a hash. This is not the preferred way of
146
- # building a Sunspot search, but it is made available as experience shows
147
- # Ruby developers like to pass in hashes. Probably nice for quick one-offs
148
- # on the console, anyway.
149
- #
150
- # ==== Options (+options+)
151
- #
152
- # :keywords:: Keyword string for fulltext search
153
- # :conditions::
154
- # Hash of key-value pairs, where keys are field names, and values are one
155
- # of scalar, Array, or Range. Scalars are evaluated as EqualTo
156
- # restrictions; Arrays are AnyOf restrictions, and Ranges are Between
157
- # restrictions.
158
- # :order::
159
- # Order the search results. Either a string or array of strings of the
160
- # form "field_name direction"
161
- # :page::
162
- # Page to use for pagination
163
- # :per_page::
164
- # Number of results to show per page
165
- #
166
- def options=(options) #:nodoc:
167
- if options.has_key?(:keywords)
168
- self.keywords = options[:keywords]
169
- end
170
- if options.has_key?(:conditions)
171
- options[:conditions].each_pair do |field_name, value|
172
- begin
173
- add_shorthand_restriction(field_name, value)
174
- rescue UnrecognizedFieldError
175
- # ignore fields we don't recognize
176
- end
177
- end
178
- end
179
- if options.has_key?(:order)
180
- for order in Array(options[:order])
181
- order_by(*order.split(' '))
182
- end
183
- end
184
- if options.has_key?(:page)
185
- paginate(options[:page], options[:per_page])
186
- end
187
- end
188
- end
3
+ module Query #:nodoc:all
189
4
  end
190
5
  end
@@ -0,0 +1,20 @@
1
+ module Sunspot
2
+ module Query
3
+ #
4
+ # Representation of a BoostQuery, which allows the searcher to specify a
5
+ # scope for which matching documents should have an extra boost. This is
6
+ # essentially a conjunction, with an extra instance variable containing
7
+ # the boost that should be applied.
8
+ #
9
+ class BoostQuery < Connective::Conjunction #:nodoc:
10
+ def initialize(boost)
11
+ super(false)
12
+ @boost = boost
13
+ end
14
+
15
+ def to_boolean_phrase
16
+ "#{super}^#{@boost}"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,55 +1,110 @@
1
1
  module Sunspot
2
2
  module Query
3
- module Connective #:nodoc:
3
+ module Connective #:nodoc:all
4
4
  #
5
5
  # Base class for connectives (conjunctions and disjunctions).
6
6
  #
7
- class Abstract < Scope
8
- def initialize(setup, negated = false) #:nodoc:
9
- @setup, @negated = setup, negated
7
+ class Abstract
8
+ def initialize(negated = false) #:nodoc:
9
+ @negated = negated
10
10
  @components = []
11
11
  end
12
12
 
13
13
  #
14
- # Connective as solr params.
14
+ # Add a restriction to the connective.
15
15
  #
16
- def to_params #:nodoc:
17
- { :fq => to_boolean_phrase }
16
+ def add_restriction(field, restriction_type, value, negated = false)
17
+ @components << restriction_type.new(field, value, negated)
18
18
  end
19
19
 
20
20
  #
21
- # Express the connective as a Lucene boolean phrase.
21
+ # Add a shorthand restriction; the restriction type is determined by
22
+ # the value.
22
23
  #
23
- def to_boolean_phrase #:nodoc:
24
- phrase = if @components.length == 1
25
- @components.first.to_boolean_phrase
26
- else
27
- component_phrases = @components.map do |component|
28
- component.to_boolean_phrase
24
+ def add_shorthand_restriction(field, value, negated = false)
25
+ restriction_type =
26
+ case value
27
+ when Array then Restriction::AnyOf
28
+ when Range then Restriction::Between
29
+ else Restriction::EqualTo
29
30
  end
30
- "(#{component_phrases.join(" #{connector} ")})"
31
- end
32
- if negated?
33
- "-#{phrase}"
34
- else
35
- phrase
36
- end
31
+ add_restriction(field, restriction_type, value, negated)
32
+ end
33
+
34
+ #
35
+ # Add a negated restriction. The added restriction will match all
36
+ # documents who do not match the terms of the restriction.
37
+ #
38
+ def add_negated_restriction(field, restriction_type, value)
39
+ add_restriction(field, restriction_type, value, true)
40
+ end
41
+
42
+ #
43
+ # Add a negated shorthand restriction (see add_shorthand_restriction)
44
+ #
45
+ def add_negated_shorthand_restriction(field, value)
46
+ add_shorthand_restriction(field, value, true)
47
+ end
48
+
49
+ #
50
+ # Add a new conjunction and return it.
51
+ #
52
+ def add_conjunction
53
+ add_component(Conjunction.new)
54
+ end
55
+
56
+ #
57
+ # Add a new disjunction and return it.
58
+ #
59
+ def add_disjunction
60
+ add_component(Disjunction.new)
37
61
  end
38
62
 
39
63
  #
40
- # Add a component to the connective. All components must implement the
41
- # #to_boolean_phrase method.
64
+ # Add an arbitrary component to the conjunction, and return it.
65
+ # The component must respond to #to_boolean_phrase
42
66
  #
43
- def add_component(component) #:nodoc:
67
+ def add_component(component)
44
68
  @components << component
69
+ component
45
70
  end
46
71
 
72
+ #
73
+ # Express the connective as a Lucene boolean phrase.
74
+ #
75
+ def to_boolean_phrase #:nodoc:
76
+ unless @components.empty?
77
+ phrase =
78
+ if @components.length == 1
79
+ @components.first.to_boolean_phrase
80
+ else
81
+ component_phrases = @components.map do |component|
82
+ component.to_boolean_phrase
83
+ end
84
+ "(#{component_phrases.join(" #{connector} ")})"
85
+ end
86
+ if negated?
87
+ "-#{phrase}"
88
+ else
89
+ phrase
90
+ end
91
+ end
92
+ end
93
+
94
+ #
95
+ # Connectives can be negated during the process of denormalization that
96
+ # is performed when a disjunction contains a negated component. This
97
+ # method conforms to the duck type for all boolean query components.
98
+ #
47
99
  def negated?
48
100
  @negated
49
101
  end
50
102
 
103
+ #
104
+ # Returns a new connective that's a negated version of this one.
105
+ #
51
106
  def negate
52
- negated = self.class.new(@setup, !negated?)
107
+ negated = self.class.new(!negated?)
53
108
  for component in @components
54
109
  negated.add_component(component)
55
110
  end
@@ -67,6 +122,9 @@ module Sunspot
67
122
  end
68
123
  end
69
124
 
125
+ #
126
+ # Express this disjunction as a Lucene boolean phrase
127
+ #
70
128
  def to_boolean_phrase
71
129
  if @components.any? { |component| component.negated? }
72
130
  denormalize.to_boolean_phrase
@@ -76,16 +134,8 @@ module Sunspot
76
134
  end
77
135
 
78
136
  #
79
- # Add a conjunction to the disjunction. This overrides the method in
80
- # the Scope class since scopes are implicitly conjunctive and thus
81
- # can return themselves as a conjunction. Inside a disjunction, however,
82
- # a conjunction must explicitly be created.
137
+ # No-op - this is already a disjunction
83
138
  #
84
- def add_conjunction
85
- @components << conjunction = Conjunction.new(setup)
86
- conjunction
87
- end
88
-
89
139
  def add_disjunction
90
140
  self
91
141
  end
@@ -96,8 +146,17 @@ module Sunspot
96
146
  'OR'
97
147
  end
98
148
 
149
+ #
150
+ # If a disjunction contains negated components, it must be
151
+ # "denormalized", because the Lucene parser interprets any negated
152
+ # boolean phrase using AND semantics (this isn't a bug, it's just a
153
+ # subtlety of how Lucene parses queries). So, per DeMorgan's law we
154
+ # create a negated conjunction and add to it all of our components,
155
+ # negated themselves, which creates a query whose Lucene semantics are
156
+ # in line with our intentions.
157
+ #
99
158
  def denormalize
100
- denormalized = self.class.inverse.new(@setup, !negated?)
159
+ denormalized = self.class.inverse.new(!negated?)
101
160
  for component in @components
102
161
  denormalized.add_component(component.negate)
103
162
  end
@@ -115,6 +174,10 @@ module Sunspot
115
174
  end
116
175
  end
117
176
 
177
+ def add_conjunction
178
+ self
179
+ end
180
+
118
181
  private
119
182
 
120
183
  def connector