sunspot 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. data/History.txt +83 -0
  2. data/LICENSE +18 -0
  3. data/README.rdoc +154 -0
  4. data/Rakefile +9 -0
  5. data/TODO +9 -0
  6. data/VERSION.yml +4 -0
  7. data/bin/sunspot-configure-solr +46 -0
  8. data/bin/sunspot-solr +62 -0
  9. data/lib/light_config.rb +40 -0
  10. data/lib/sunspot.rb +469 -0
  11. data/lib/sunspot/adapters.rb +265 -0
  12. data/lib/sunspot/composite_setup.rb +186 -0
  13. data/lib/sunspot/configuration.rb +38 -0
  14. data/lib/sunspot/data_extractor.rb +47 -0
  15. data/lib/sunspot/dsl.rb +3 -0
  16. data/lib/sunspot/dsl/field_query.rb +72 -0
  17. data/lib/sunspot/dsl/fields.rb +86 -0
  18. data/lib/sunspot/dsl/query.rb +59 -0
  19. data/lib/sunspot/dsl/query_facet.rb +31 -0
  20. data/lib/sunspot/dsl/restriction.rb +25 -0
  21. data/lib/sunspot/dsl/scope.rb +193 -0
  22. data/lib/sunspot/dsl/search.rb +30 -0
  23. data/lib/sunspot/facet.rb +16 -0
  24. data/lib/sunspot/facet_data.rb +120 -0
  25. data/lib/sunspot/facet_row.rb +10 -0
  26. data/lib/sunspot/field.rb +157 -0
  27. data/lib/sunspot/field_factory.rb +126 -0
  28. data/lib/sunspot/indexer.rb +123 -0
  29. data/lib/sunspot/instantiated_facet.rb +42 -0
  30. data/lib/sunspot/instantiated_facet_row.rb +22 -0
  31. data/lib/sunspot/query.rb +191 -0
  32. data/lib/sunspot/query/base_query.rb +90 -0
  33. data/lib/sunspot/query/connective.rb +126 -0
  34. data/lib/sunspot/query/dynamic_query.rb +69 -0
  35. data/lib/sunspot/query/field_facet.rb +151 -0
  36. data/lib/sunspot/query/field_query.rb +63 -0
  37. data/lib/sunspot/query/pagination.rb +39 -0
  38. data/lib/sunspot/query/query_facet.rb +73 -0
  39. data/lib/sunspot/query/query_facet_row.rb +19 -0
  40. data/lib/sunspot/query/query_field_facet.rb +13 -0
  41. data/lib/sunspot/query/restriction.rb +233 -0
  42. data/lib/sunspot/query/scope.rb +165 -0
  43. data/lib/sunspot/query/sort.rb +36 -0
  44. data/lib/sunspot/query/sort_composite.rb +33 -0
  45. data/lib/sunspot/schema.rb +165 -0
  46. data/lib/sunspot/search.rb +219 -0
  47. data/lib/sunspot/search/hit.rb +66 -0
  48. data/lib/sunspot/session.rb +201 -0
  49. data/lib/sunspot/setup.rb +271 -0
  50. data/lib/sunspot/type.rb +200 -0
  51. data/lib/sunspot/util.rb +164 -0
  52. data/solr/etc/jetty.xml +212 -0
  53. data/solr/etc/webdefault.xml +379 -0
  54. data/solr/lib/jetty-6.1.3.jar +0 -0
  55. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  56. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  57. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  58. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  59. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  60. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  61. data/solr/solr/conf/elevate.xml +36 -0
  62. data/solr/solr/conf/protwords.txt +21 -0
  63. data/solr/solr/conf/schema.xml +50 -0
  64. data/solr/solr/conf/solrconfig.xml +696 -0
  65. data/solr/solr/conf/stopwords.txt +57 -0
  66. data/solr/solr/conf/synonyms.txt +31 -0
  67. data/solr/start.jar +0 -0
  68. data/solr/webapps/solr.war +0 -0
  69. data/spec/api/adapters_spec.rb +33 -0
  70. data/spec/api/build_search_spec.rb +1039 -0
  71. data/spec/api/indexer_spec.rb +311 -0
  72. data/spec/api/query_spec.rb +153 -0
  73. data/spec/api/search_retrieval_spec.rb +362 -0
  74. data/spec/api/session_spec.rb +157 -0
  75. data/spec/api/spec_helper.rb +1 -0
  76. data/spec/api/sunspot_spec.rb +18 -0
  77. data/spec/integration/dynamic_fields_spec.rb +55 -0
  78. data/spec/integration/faceting_spec.rb +169 -0
  79. data/spec/integration/keyword_search_spec.rb +83 -0
  80. data/spec/integration/scoped_search_spec.rb +289 -0
  81. data/spec/integration/spec_helper.rb +1 -0
  82. data/spec/integration/stored_fields_spec.rb +10 -0
  83. data/spec/integration/test_pagination.rb +32 -0
  84. data/spec/mocks/adapters.rb +32 -0
  85. data/spec/mocks/blog.rb +3 -0
  86. data/spec/mocks/comment.rb +19 -0
  87. data/spec/mocks/connection.rb +84 -0
  88. data/spec/mocks/mock_adapter.rb +30 -0
  89. data/spec/mocks/mock_record.rb +48 -0
  90. data/spec/mocks/photo.rb +8 -0
  91. data/spec/mocks/post.rb +73 -0
  92. data/spec/mocks/user.rb +8 -0
  93. data/spec/spec_helper.rb +47 -0
  94. data/tasks/gemspec.rake +25 -0
  95. data/tasks/rcov.rake +28 -0
  96. data/tasks/rdoc.rake +22 -0
  97. data/tasks/schema.rake +19 -0
  98. data/tasks/spec.rake +24 -0
  99. data/tasks/todo.rake +4 -0
  100. data/templates/schema.xml.haml +24 -0
  101. metadata +246 -0
@@ -0,0 +1,123 @@
1
+ module Sunspot
2
+ #
3
+ # This class presents a service for adding, updating, and removing data
4
+ # from the Solr index. An Indexer instance is associated with a particular
5
+ # setup, and thus is capable of indexing instances of a certain class (and its
6
+ # subclasses).
7
+ #
8
+ class Indexer #:nodoc:
9
+ include RSolr::Char
10
+
11
+ def initialize(connection)
12
+ @connection = connection
13
+ end
14
+
15
+ #
16
+ # Construct a representation of the model for indexing and send it to the
17
+ # connection for indexing
18
+ #
19
+ # ==== Parameters
20
+ #
21
+ # model<Object>:: the model to index
22
+ #
23
+ def add(model)
24
+ documents = Array(model).map { |m| prepare(m) }
25
+ if @batch.nil?
26
+ add_documents(documents)
27
+ else
28
+ @batch.concat(documents)
29
+ end
30
+ end
31
+
32
+ #
33
+ # Remove the given model from the Solr index
34
+ #
35
+ def remove(model)
36
+ @connection.delete_by_id(Adapters::InstanceAdapter.adapt(model).index_id)
37
+ end
38
+
39
+ def remove_by_id(class_name, id)
40
+ @connection.delete_by_id(
41
+ Adapters::InstanceAdapter.index_id_for(class_name, id)
42
+ )
43
+ end
44
+
45
+ #
46
+ # Delete all documents of the class indexed by this indexer from Solr.
47
+ #
48
+ def remove_all(clazz)
49
+ @connection.delete_by_query("type:#{escape(clazz.name)}")
50
+ end
51
+
52
+ def start_batch
53
+ @batch = []
54
+ end
55
+
56
+ def flush_batch
57
+ add_documents(@batch)
58
+ @batch = nil
59
+ end
60
+
61
+ private
62
+
63
+ #
64
+ # Convert documents into hash of indexed properties
65
+ #
66
+ def prepare(model)
67
+ document = document_for(model)
68
+ setup = setup_for(model)
69
+ if boost = setup.document_boost_for(model)
70
+ document.attrs[:boost] = boost
71
+ end
72
+ for field_factory in setup.all_field_factories
73
+ field_factory.populate_document(document, model)
74
+ end
75
+ document
76
+ end
77
+
78
+ def add_documents(documents)
79
+ @connection.add(documents)
80
+ end
81
+
82
+ #
83
+ # All indexed documents index and store the +id+ and +type+ fields.
84
+ # This method constructs the document hash containing those key-value
85
+ # pairs.
86
+ #
87
+ def document_for(model)
88
+ RSolr::Message::Document.new(
89
+ :id => Adapters::InstanceAdapter.adapt(model).index_id,
90
+ :type => Util.superclasses_for(model.class).map { |clazz| clazz.name }
91
+ )
92
+ end
93
+
94
+ #
95
+ # Get the Setup object for the given object's class.
96
+ #
97
+ # ==== Parameters
98
+ #
99
+ # object<Object>:: The object whose setup is to be retrieved
100
+ #
101
+ # ==== Returns
102
+ #
103
+ # Sunspot::Setup:: The setup for the object's class
104
+ #
105
+ def setup_for(object)
106
+ Setup.for(object.class) || raise(NoSetupError, "Sunspot is not configured for #{object.class.inspect}")
107
+ end
108
+
109
+
110
+ class <<self
111
+ #
112
+ # Delete all documents from the Solr index
113
+ #
114
+ # ==== Parameters
115
+ #
116
+ # connection<Solr::Connection>::
117
+ # connection to which to send the delete request
118
+ def remove_all(connection)
119
+ connection.delete_by_query("type:[* TO *]")
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,42 @@
1
+ module Sunspot
2
+ #
3
+ # InstantiatedFacet instances allow access to a model instance based on a
4
+ # primary key stored in facet rows' values. The rows are hydrated lazily, but
5
+ # all rows are hydrated the first time #instance is called on any of the rows.
6
+ #
7
+ # The #rows method returns InstantiatedFacetRow objects.
8
+ #
9
+ class InstantiatedFacet < Facet
10
+ #
11
+ # Hydrate all rows for the facet. For data accessors that can efficiently
12
+ # batch load, this is more efficient than individually lazy-loading
13
+ # instances for each row, but allows us to still stay lazy and not do work
14
+ # in the persistent store if the instances are not needed.
15
+ #
16
+ def populate_instances! #:nodoc:
17
+ ids = rows.map { |row| row.value }
18
+ reference_class = Sunspot::Util.full_const_get(@facet_data.reference.to_s)
19
+ accessor = Adapters::DataAccessor.create(reference_class)
20
+ instance_map = accessor.load_all(ids).inject({}) do |map, instance|
21
+ map[Adapters::InstanceAdapter.adapt(instance).id] = instance
22
+ map
23
+ end
24
+ for row in rows
25
+ row.instance = instance_map[row.value]
26
+ end
27
+ end
28
+
29
+ def rows
30
+ @facet_data.rows { |value, count| InstantiatedFacetRow.new(value, count, self) }
31
+ end
32
+
33
+ private
34
+
35
+ #
36
+ # Override the Facet#new_row method to return an InstantiateFacetRow
37
+ #
38
+ def new_row(pair)
39
+ InstantiatedFacetRow.new(pair, self)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,22 @@
1
+ module Sunspot
2
+ class InstantiatedFacetRow < FacetRow
3
+ attr_writer :instance
4
+
5
+ def initialize(value, count, facet)
6
+ super(value, count)
7
+ @facet = facet
8
+ end
9
+
10
+ #
11
+ # Get the persistent object referenced by this row's value. Instances are
12
+ # batch-lazy-loaded, which means that for a given facet, all of the
13
+ # instances are loaded the first time any row's instance is requested.
14
+ #
15
+ def instance
16
+ unless defined?(@instance)
17
+ @facet.populate_instances!
18
+ end
19
+ @instance
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,191 @@
1
+ %w(base_query scope field_query connective dynamic_query field_facet query_facet
2
+ query_facet_row query_field_facet pagination restriction sort
3
+ sort_composite).each do |file|
4
+ require File.join(File.dirname(__FILE__), 'query', file)
5
+ end
6
+
7
+ module Sunspot
8
+ module Query #:nodoc:
9
+ #
10
+ # This class encapsulates a query that is to be sent to Solr. The query is
11
+ # constructed in the block passed to the Sunspot.search method, using the
12
+ # Sunspot::DSL::Query interface. It can also be accessed directly by calling
13
+ # #query on a Search object (presumably a not-yet-run one created using
14
+ # Sunspot#new_search), which might be more suitable than the DSL when an
15
+ # intermediate object has responsibility for building the query dynamically.
16
+ #--
17
+ # Instances of Query, as well as all of the components it contains, respond to
18
+ # the #to_params method, which returns a hash of parameters in the format
19
+ # recognized by the solr-ruby API.
20
+ #
21
+ class Query < FieldQuery
22
+ attr_reader :query_facets #:nodoc:
23
+
24
+ def initialize(types, setup, configuration) #:nodoc:
25
+ @setup = setup
26
+ @components = []
27
+ @query_facets = {}
28
+ @components << @base_query = BaseQuery.new(types, setup)
29
+ @components << @pagination = Pagination.new(configuration)
30
+ @components << @sort = SortComposite.new
31
+ end
32
+
33
+ #
34
+ # Set the keywords for this query. Keywords are parsed with Solr's dismax
35
+ # handler.
36
+ #
37
+ def keywords=(keywords)
38
+ set_keywords(keywords)
39
+ end
40
+
41
+ #
42
+ # Add a component to the query. Used by objects that proxy to the query
43
+ # object.
44
+ #
45
+ # ==== Parameters
46
+ #
47
+ # component<~to_params>:: Query component to add.
48
+ #
49
+ def add_component(component) #:nodoc:
50
+ @components << component
51
+ end
52
+
53
+ #
54
+ # Sets @start and @rows instance variables using pagination semantics
55
+ #
56
+ # ==== Parameters
57
+ #
58
+ # page<Integer>:: Page on which to start
59
+ # per_page<Integer>::
60
+ # How many rows to display per page. Default taken from
61
+ # Sunspot.config.pagination.default_per_page
62
+ #
63
+ def paginate(page, per_page = nil)
64
+ @pagination.page, @pagination.per_page = page, per_page
65
+ end
66
+
67
+ #
68
+ # Add random ordering to the search. This can be added after other
69
+ # field-based sorts if desired.
70
+ #
71
+ def order_by_random
72
+ add_sort(Sort.new(RandomField.new))
73
+ end
74
+
75
+ #
76
+ # Representation of this query as solr-ruby parameters. Constructs the hash
77
+ # by deep-merging scope and facet parameters, adding in various other
78
+ # parameters from instance data.
79
+ #
80
+ # Note that solr-ruby takes the :q parameter as a separate argument; for
81
+ # the sake of consistency, the Query object ignores this fact (the Search
82
+ # object extracts it back out).
83
+ #
84
+ # ==== Returns
85
+ #
86
+ # Hash:: Representation of query in solr-ruby form
87
+ #
88
+ def to_params #:nodoc:
89
+ params = {}
90
+ query_components = []
91
+ for component in @components
92
+ Util.deep_merge!(params, component.to_params)
93
+ end
94
+ params
95
+ end
96
+
97
+ #
98
+ # Page that this query will return (used by Sunspot::Search to expose
99
+ # pagination)
100
+ #
101
+ # ==== Returns
102
+ #
103
+ # Integer:: Page number
104
+ #
105
+ def page #:nodoc:
106
+ @pagination.page
107
+ end
108
+
109
+ #
110
+ # Number of rows per page that this query will return (used by
111
+ # Sunspot::Search to expose pagination)
112
+ #
113
+ # ==== Returns
114
+ #
115
+ # Integer:: Rows per page
116
+ #
117
+ def per_page #:nodoc:
118
+ @pagination.per_page
119
+ end
120
+
121
+ #
122
+ # Get the query facet with the given name. Used by the Search object to
123
+ # match query facet results with the requested query facets.
124
+ #
125
+ def query_facet(name) #:nodoc:
126
+ @query_facets[name.to_sym]
127
+ end
128
+
129
+ #
130
+ # Add a Sort object into this query's sort composite.
131
+ #
132
+ def add_sort(sort) #:nodoc:
133
+ @sort << sort
134
+ end
135
+
136
+ #
137
+ # Set the keywords for this query, along with keyword options. See
138
+ # Query::BaseQuery for information on what the options do.
139
+ #
140
+ def set_keywords(keywords, options = {}) #:nodoc:
141
+ @base_query.keywords = keywords
142
+ @base_query.keyword_options = options
143
+ end
144
+
145
+ #
146
+ # Pass in search options as a hash. This is not the preferred way of
147
+ # building a Sunspot search, but it is made available as experience shows
148
+ # Ruby developers like to pass in hashes. Probably nice for quick one-offs
149
+ # on the console, anyway.
150
+ #
151
+ # ==== Options (+options+)
152
+ #
153
+ # :keywords:: Keyword string for fulltext search
154
+ # :conditions::
155
+ # Hash of key-value pairs, where keys are field names, and values are one
156
+ # of scalar, Array, or Range. Scalars are evaluated as EqualTo
157
+ # restrictions; Arrays are AnyOf restrictions, and Ranges are Between
158
+ # restrictions.
159
+ # :order::
160
+ # Order the search results. Either a string or array of strings of the
161
+ # form "field_name direction"
162
+ # :page::
163
+ # Page to use for pagination
164
+ # :per_page::
165
+ # Number of results to show per page
166
+ #
167
+ def options=(options) #:nodoc:
168
+ if options.has_key?(:keywords)
169
+ self.keywords = options[:keywords]
170
+ end
171
+ if options.has_key?(:conditions)
172
+ options[:conditions].each_pair do |field_name, value|
173
+ begin
174
+ add_shorthand_restriction(field_name, value)
175
+ rescue UnrecognizedFieldError
176
+ # ignore fields we don't recognize
177
+ end
178
+ end
179
+ end
180
+ if options.has_key?(:order)
181
+ for order in Array(options[:order])
182
+ order_by(*order.split(' '))
183
+ end
184
+ end
185
+ if options.has_key?(:page)
186
+ paginate(options[:page], options[:per_page])
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,90 @@
1
+ module Sunspot
2
+ module Query
3
+ #
4
+ # Encapsulates information common to all queries - in particular, keywords
5
+ # and types.
6
+ #
7
+ class BaseQuery #:nodoc:
8
+ include RSolr::Char
9
+
10
+ attr_writer :keywords
11
+
12
+ def initialize(types, setup)
13
+ @types, @setup = types, setup
14
+ end
15
+
16
+ #
17
+ # Generate params for the base query. If keywords are specified, build
18
+ # params for a dismax query, request all stored fields plus the score,
19
+ # and put the types in a filter query. If keywords are not specified,
20
+ # put the types query in the q parameter.
21
+ #
22
+ def to_params
23
+ params = {}
24
+ if @keywords
25
+ params[:q] = @keywords
26
+ params[:fl] = '* score'
27
+ params[:fq] = types_phrase
28
+ params[:qf] = text_field_names.join(' ')
29
+ params[:defType] = 'dismax'
30
+ else
31
+ params[:q] = types_phrase
32
+ end
33
+ params
34
+ end
35
+
36
+ #
37
+ # Set keyword options
38
+ #
39
+ def keyword_options=(options)
40
+ if options
41
+ @text_field_names = options.delete(:fields)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ #
48
+ # Boolean phrase that restricts results to objects of the type(s) under
49
+ # query. If this is an open query (no types specified) then it sends a
50
+ # no-op phrase because Solr requires that the :q parameter not be empty.
51
+ #
52
+ # ==== Returns
53
+ #
54
+ # String:: Boolean phrase for type restriction
55
+ #
56
+ def types_phrase
57
+ if escaped_types.length == 1 then "type:#{escaped_types.first}"
58
+ else "type:(#{escaped_types * ' OR '})"
59
+ end
60
+ end
61
+
62
+ #
63
+ # Wraps each type in quotes to escape names of the form Namespace::Class
64
+ #
65
+ def escaped_types
66
+ @escaped_types ||=
67
+ @types.map { |type| escape(type.name)}
68
+ end
69
+
70
+ #
71
+ # Returns the names of text fields that should be queried in a keyword
72
+ # search. If specific fields are requested, use those; otherwise use the
73
+ # union of all fields configured for the types under search.
74
+ #
75
+ def text_field_names
76
+ text_fields =
77
+ if @text_field_names
78
+ Array(@text_field_names).map do |field_name|
79
+ @setup.text_field(field_name.to_sym)
80
+ end
81
+ else
82
+ @setup.text_fields
83
+ end
84
+ text_fields.map do |text_field|
85
+ text_field.indexed_name
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end