sunspot 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. data/History.txt +83 -0
  2. data/LICENSE +18 -0
  3. data/README.rdoc +154 -0
  4. data/Rakefile +9 -0
  5. data/TODO +9 -0
  6. data/VERSION.yml +4 -0
  7. data/bin/sunspot-configure-solr +46 -0
  8. data/bin/sunspot-solr +62 -0
  9. data/lib/light_config.rb +40 -0
  10. data/lib/sunspot.rb +469 -0
  11. data/lib/sunspot/adapters.rb +265 -0
  12. data/lib/sunspot/composite_setup.rb +186 -0
  13. data/lib/sunspot/configuration.rb +38 -0
  14. data/lib/sunspot/data_extractor.rb +47 -0
  15. data/lib/sunspot/dsl.rb +3 -0
  16. data/lib/sunspot/dsl/field_query.rb +72 -0
  17. data/lib/sunspot/dsl/fields.rb +86 -0
  18. data/lib/sunspot/dsl/query.rb +59 -0
  19. data/lib/sunspot/dsl/query_facet.rb +31 -0
  20. data/lib/sunspot/dsl/restriction.rb +25 -0
  21. data/lib/sunspot/dsl/scope.rb +193 -0
  22. data/lib/sunspot/dsl/search.rb +30 -0
  23. data/lib/sunspot/facet.rb +16 -0
  24. data/lib/sunspot/facet_data.rb +120 -0
  25. data/lib/sunspot/facet_row.rb +10 -0
  26. data/lib/sunspot/field.rb +157 -0
  27. data/lib/sunspot/field_factory.rb +126 -0
  28. data/lib/sunspot/indexer.rb +123 -0
  29. data/lib/sunspot/instantiated_facet.rb +42 -0
  30. data/lib/sunspot/instantiated_facet_row.rb +22 -0
  31. data/lib/sunspot/query.rb +191 -0
  32. data/lib/sunspot/query/base_query.rb +90 -0
  33. data/lib/sunspot/query/connective.rb +126 -0
  34. data/lib/sunspot/query/dynamic_query.rb +69 -0
  35. data/lib/sunspot/query/field_facet.rb +151 -0
  36. data/lib/sunspot/query/field_query.rb +63 -0
  37. data/lib/sunspot/query/pagination.rb +39 -0
  38. data/lib/sunspot/query/query_facet.rb +73 -0
  39. data/lib/sunspot/query/query_facet_row.rb +19 -0
  40. data/lib/sunspot/query/query_field_facet.rb +13 -0
  41. data/lib/sunspot/query/restriction.rb +233 -0
  42. data/lib/sunspot/query/scope.rb +165 -0
  43. data/lib/sunspot/query/sort.rb +36 -0
  44. data/lib/sunspot/query/sort_composite.rb +33 -0
  45. data/lib/sunspot/schema.rb +165 -0
  46. data/lib/sunspot/search.rb +219 -0
  47. data/lib/sunspot/search/hit.rb +66 -0
  48. data/lib/sunspot/session.rb +201 -0
  49. data/lib/sunspot/setup.rb +271 -0
  50. data/lib/sunspot/type.rb +200 -0
  51. data/lib/sunspot/util.rb +164 -0
  52. data/solr/etc/jetty.xml +212 -0
  53. data/solr/etc/webdefault.xml +379 -0
  54. data/solr/lib/jetty-6.1.3.jar +0 -0
  55. data/solr/lib/jetty-util-6.1.3.jar +0 -0
  56. data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
  57. data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
  58. data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
  59. data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
  60. data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
  61. data/solr/solr/conf/elevate.xml +36 -0
  62. data/solr/solr/conf/protwords.txt +21 -0
  63. data/solr/solr/conf/schema.xml +50 -0
  64. data/solr/solr/conf/solrconfig.xml +696 -0
  65. data/solr/solr/conf/stopwords.txt +57 -0
  66. data/solr/solr/conf/synonyms.txt +31 -0
  67. data/solr/start.jar +0 -0
  68. data/solr/webapps/solr.war +0 -0
  69. data/spec/api/adapters_spec.rb +33 -0
  70. data/spec/api/build_search_spec.rb +1039 -0
  71. data/spec/api/indexer_spec.rb +311 -0
  72. data/spec/api/query_spec.rb +153 -0
  73. data/spec/api/search_retrieval_spec.rb +362 -0
  74. data/spec/api/session_spec.rb +157 -0
  75. data/spec/api/spec_helper.rb +1 -0
  76. data/spec/api/sunspot_spec.rb +18 -0
  77. data/spec/integration/dynamic_fields_spec.rb +55 -0
  78. data/spec/integration/faceting_spec.rb +169 -0
  79. data/spec/integration/keyword_search_spec.rb +83 -0
  80. data/spec/integration/scoped_search_spec.rb +289 -0
  81. data/spec/integration/spec_helper.rb +1 -0
  82. data/spec/integration/stored_fields_spec.rb +10 -0
  83. data/spec/integration/test_pagination.rb +32 -0
  84. data/spec/mocks/adapters.rb +32 -0
  85. data/spec/mocks/blog.rb +3 -0
  86. data/spec/mocks/comment.rb +19 -0
  87. data/spec/mocks/connection.rb +84 -0
  88. data/spec/mocks/mock_adapter.rb +30 -0
  89. data/spec/mocks/mock_record.rb +48 -0
  90. data/spec/mocks/photo.rb +8 -0
  91. data/spec/mocks/post.rb +73 -0
  92. data/spec/mocks/user.rb +8 -0
  93. data/spec/spec_helper.rb +47 -0
  94. data/tasks/gemspec.rake +25 -0
  95. data/tasks/rcov.rake +28 -0
  96. data/tasks/rdoc.rake +22 -0
  97. data/tasks/schema.rake +19 -0
  98. data/tasks/spec.rake +24 -0
  99. data/tasks/todo.rake +4 -0
  100. data/templates/schema.xml.haml +24 -0
  101. metadata +246 -0
@@ -0,0 +1,123 @@
1
+ module Sunspot
2
+ #
3
+ # This class presents a service for adding, updating, and removing data
4
+ # from the Solr index. An Indexer instance is associated with a particular
5
+ # setup, and thus is capable of indexing instances of a certain class (and its
6
+ # subclasses).
7
+ #
8
+ class Indexer #:nodoc:
9
+ include RSolr::Char
10
+
11
+ def initialize(connection)
12
+ @connection = connection
13
+ end
14
+
15
+ #
16
+ # Construct a representation of the model for indexing and send it to the
17
+ # connection for indexing
18
+ #
19
+ # ==== Parameters
20
+ #
21
+ # model<Object>:: the model to index
22
+ #
23
+ def add(model)
24
+ documents = Array(model).map { |m| prepare(m) }
25
+ if @batch.nil?
26
+ add_documents(documents)
27
+ else
28
+ @batch.concat(documents)
29
+ end
30
+ end
31
+
32
+ #
33
+ # Remove the given model from the Solr index
34
+ #
35
+ def remove(model)
36
+ @connection.delete_by_id(Adapters::InstanceAdapter.adapt(model).index_id)
37
+ end
38
+
39
+ def remove_by_id(class_name, id)
40
+ @connection.delete_by_id(
41
+ Adapters::InstanceAdapter.index_id_for(class_name, id)
42
+ )
43
+ end
44
+
45
+ #
46
+ # Delete all documents of the class indexed by this indexer from Solr.
47
+ #
48
+ def remove_all(clazz)
49
+ @connection.delete_by_query("type:#{escape(clazz.name)}")
50
+ end
51
+
52
+ def start_batch
53
+ @batch = []
54
+ end
55
+
56
+ def flush_batch
57
+ add_documents(@batch)
58
+ @batch = nil
59
+ end
60
+
61
+ private
62
+
63
+ #
64
+ # Convert documents into hash of indexed properties
65
+ #
66
+ def prepare(model)
67
+ document = document_for(model)
68
+ setup = setup_for(model)
69
+ if boost = setup.document_boost_for(model)
70
+ document.attrs[:boost] = boost
71
+ end
72
+ for field_factory in setup.all_field_factories
73
+ field_factory.populate_document(document, model)
74
+ end
75
+ document
76
+ end
77
+
78
+ def add_documents(documents)
79
+ @connection.add(documents)
80
+ end
81
+
82
+ #
83
+ # All indexed documents index and store the +id+ and +type+ fields.
84
+ # This method constructs the document hash containing those key-value
85
+ # pairs.
86
+ #
87
+ def document_for(model)
88
+ RSolr::Message::Document.new(
89
+ :id => Adapters::InstanceAdapter.adapt(model).index_id,
90
+ :type => Util.superclasses_for(model.class).map { |clazz| clazz.name }
91
+ )
92
+ end
93
+
94
+ #
95
+ # Get the Setup object for the given object's class.
96
+ #
97
+ # ==== Parameters
98
+ #
99
+ # object<Object>:: The object whose setup is to be retrieved
100
+ #
101
+ # ==== Returns
102
+ #
103
+ # Sunspot::Setup:: The setup for the object's class
104
+ #
105
+ def setup_for(object)
106
+ Setup.for(object.class) || raise(NoSetupError, "Sunspot is not configured for #{object.class.inspect}")
107
+ end
108
+
109
+
110
+ class <<self
111
+ #
112
+ # Delete all documents from the Solr index
113
+ #
114
+ # ==== Parameters
115
+ #
116
+ # connection<Solr::Connection>::
117
+ # connection to which to send the delete request
118
+ def remove_all(connection)
119
+ connection.delete_by_query("type:[* TO *]")
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,42 @@
1
+ module Sunspot
2
+ #
3
+ # InstantiatedFacet instances allow access to a model instance based on a
4
+ # primary key stored in facet rows' values. The rows are hydrated lazily, but
5
+ # all rows are hydrated the first time #instance is called on any of the rows.
6
+ #
7
+ # The #rows method returns InstantiatedFacetRow objects.
8
+ #
9
+ class InstantiatedFacet < Facet
10
+ #
11
+ # Hydrate all rows for the facet. For data accessors that can efficiently
12
+ # batch load, this is more efficient than individually lazy-loading
13
+ # instances for each row, but allows us to still stay lazy and not do work
14
+ # in the persistent store if the instances are not needed.
15
+ #
16
+ def populate_instances! #:nodoc:
17
+ ids = rows.map { |row| row.value }
18
+ reference_class = Sunspot::Util.full_const_get(@facet_data.reference.to_s)
19
+ accessor = Adapters::DataAccessor.create(reference_class)
20
+ instance_map = accessor.load_all(ids).inject({}) do |map, instance|
21
+ map[Adapters::InstanceAdapter.adapt(instance).id] = instance
22
+ map
23
+ end
24
+ for row in rows
25
+ row.instance = instance_map[row.value]
26
+ end
27
+ end
28
+
29
+ def rows
30
+ @facet_data.rows { |value, count| InstantiatedFacetRow.new(value, count, self) }
31
+ end
32
+
33
+ private
34
+
35
+ #
36
+ # Override the Facet#new_row method to return an InstantiateFacetRow
37
+ #
38
+ def new_row(pair)
39
+ InstantiatedFacetRow.new(pair, self)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,22 @@
1
+ module Sunspot
2
+ class InstantiatedFacetRow < FacetRow
3
+ attr_writer :instance
4
+
5
+ def initialize(value, count, facet)
6
+ super(value, count)
7
+ @facet = facet
8
+ end
9
+
10
+ #
11
+ # Get the persistent object referenced by this row's value. Instances are
12
+ # batch-lazy-loaded, which means that for a given facet, all of the
13
+ # instances are loaded the first time any row's instance is requested.
14
+ #
15
+ def instance
16
+ unless defined?(@instance)
17
+ @facet.populate_instances!
18
+ end
19
+ @instance
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,191 @@
1
+ %w(base_query scope field_query connective dynamic_query field_facet query_facet
2
+ query_facet_row query_field_facet pagination restriction sort
3
+ sort_composite).each do |file|
4
+ require File.join(File.dirname(__FILE__), 'query', file)
5
+ end
6
+
7
+ module Sunspot
8
+ module Query #:nodoc:
9
+ #
10
+ # This class encapsulates a query that is to be sent to Solr. The query is
11
+ # constructed in the block passed to the Sunspot.search method, using the
12
+ # Sunspot::DSL::Query interface. It can also be accessed directly by calling
13
+ # #query on a Search object (presumably a not-yet-run one created using
14
+ # Sunspot#new_search), which might be more suitable than the DSL when an
15
+ # intermediate object has responsibility for building the query dynamically.
16
+ #--
17
+ # Instances of Query, as well as all of the components it contains, respond to
18
+ # the #to_params method, which returns a hash of parameters in the format
19
+ # recognized by the solr-ruby API.
20
+ #
21
+ class Query < FieldQuery
22
+ attr_reader :query_facets #:nodoc:
23
+
24
+ def initialize(types, setup, configuration) #:nodoc:
25
+ @setup = setup
26
+ @components = []
27
+ @query_facets = {}
28
+ @components << @base_query = BaseQuery.new(types, setup)
29
+ @components << @pagination = Pagination.new(configuration)
30
+ @components << @sort = SortComposite.new
31
+ end
32
+
33
+ #
34
+ # Set the keywords for this query. Keywords are parsed with Solr's dismax
35
+ # handler.
36
+ #
37
+ def keywords=(keywords)
38
+ set_keywords(keywords)
39
+ end
40
+
41
+ #
42
+ # Add a component to the query. Used by objects that proxy to the query
43
+ # object.
44
+ #
45
+ # ==== Parameters
46
+ #
47
+ # component<~to_params>:: Query component to add.
48
+ #
49
+ def add_component(component) #:nodoc:
50
+ @components << component
51
+ end
52
+
53
+ #
54
+ # Sets @start and @rows instance variables using pagination semantics
55
+ #
56
+ # ==== Parameters
57
+ #
58
+ # page<Integer>:: Page on which to start
59
+ # per_page<Integer>::
60
+ # How many rows to display per page. Default taken from
61
+ # Sunspot.config.pagination.default_per_page
62
+ #
63
+ def paginate(page, per_page = nil)
64
+ @pagination.page, @pagination.per_page = page, per_page
65
+ end
66
+
67
+ #
68
+ # Add random ordering to the search. This can be added after other
69
+ # field-based sorts if desired.
70
+ #
71
+ def order_by_random
72
+ add_sort(Sort.new(RandomField.new))
73
+ end
74
+
75
+ #
76
+ # Representation of this query as solr-ruby parameters. Constructs the hash
77
+ # by deep-merging scope and facet parameters, adding in various other
78
+ # parameters from instance data.
79
+ #
80
+ # Note that solr-ruby takes the :q parameter as a separate argument; for
81
+ # the sake of consistency, the Query object ignores this fact (the Search
82
+ # object extracts it back out).
83
+ #
84
+ # ==== Returns
85
+ #
86
+ # Hash:: Representation of query in solr-ruby form
87
+ #
88
+ def to_params #:nodoc:
89
+ params = {}
90
+ query_components = []
91
+ for component in @components
92
+ Util.deep_merge!(params, component.to_params)
93
+ end
94
+ params
95
+ end
96
+
97
+ #
98
+ # Page that this query will return (used by Sunspot::Search to expose
99
+ # pagination)
100
+ #
101
+ # ==== Returns
102
+ #
103
+ # Integer:: Page number
104
+ #
105
+ def page #:nodoc:
106
+ @pagination.page
107
+ end
108
+
109
+ #
110
+ # Number of rows per page that this query will return (used by
111
+ # Sunspot::Search to expose pagination)
112
+ #
113
+ # ==== Returns
114
+ #
115
+ # Integer:: Rows per page
116
+ #
117
+ def per_page #:nodoc:
118
+ @pagination.per_page
119
+ end
120
+
121
+ #
122
+ # Get the query facet with the given name. Used by the Search object to
123
+ # match query facet results with the requested query facets.
124
+ #
125
+ def query_facet(name) #:nodoc:
126
+ @query_facets[name.to_sym]
127
+ end
128
+
129
+ #
130
+ # Add a Sort object into this query's sort composite.
131
+ #
132
+ def add_sort(sort) #:nodoc:
133
+ @sort << sort
134
+ end
135
+
136
+ #
137
+ # Set the keywords for this query, along with keyword options. See
138
+ # Query::BaseQuery for information on what the options do.
139
+ #
140
+ def set_keywords(keywords, options = {}) #:nodoc:
141
+ @base_query.keywords = keywords
142
+ @base_query.keyword_options = options
143
+ end
144
+
145
+ #
146
+ # Pass in search options as a hash. This is not the preferred way of
147
+ # building a Sunspot search, but it is made available as experience shows
148
+ # Ruby developers like to pass in hashes. Probably nice for quick one-offs
149
+ # on the console, anyway.
150
+ #
151
+ # ==== Options (+options+)
152
+ #
153
+ # :keywords:: Keyword string for fulltext search
154
+ # :conditions::
155
+ # Hash of key-value pairs, where keys are field names, and values are one
156
+ # of scalar, Array, or Range. Scalars are evaluated as EqualTo
157
+ # restrictions; Arrays are AnyOf restrictions, and Ranges are Between
158
+ # restrictions.
159
+ # :order::
160
+ # Order the search results. Either a string or array of strings of the
161
+ # form "field_name direction"
162
+ # :page::
163
+ # Page to use for pagination
164
+ # :per_page::
165
+ # Number of results to show per page
166
+ #
167
+ def options=(options) #:nodoc:
168
+ if options.has_key?(:keywords)
169
+ self.keywords = options[:keywords]
170
+ end
171
+ if options.has_key?(:conditions)
172
+ options[:conditions].each_pair do |field_name, value|
173
+ begin
174
+ add_shorthand_restriction(field_name, value)
175
+ rescue UnrecognizedFieldError
176
+ # ignore fields we don't recognize
177
+ end
178
+ end
179
+ end
180
+ if options.has_key?(:order)
181
+ for order in Array(options[:order])
182
+ order_by(*order.split(' '))
183
+ end
184
+ end
185
+ if options.has_key?(:page)
186
+ paginate(options[:page], options[:per_page])
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,90 @@
1
+ module Sunspot
2
+ module Query
3
+ #
4
+ # Encapsulates information common to all queries - in particular, keywords
5
+ # and types.
6
+ #
7
+ class BaseQuery #:nodoc:
8
+ include RSolr::Char
9
+
10
+ attr_writer :keywords
11
+
12
+ def initialize(types, setup)
13
+ @types, @setup = types, setup
14
+ end
15
+
16
+ #
17
+ # Generate params for the base query. If keywords are specified, build
18
+ # params for a dismax query, request all stored fields plus the score,
19
+ # and put the types in a filter query. If keywords are not specified,
20
+ # put the types query in the q parameter.
21
+ #
22
+ def to_params
23
+ params = {}
24
+ if @keywords
25
+ params[:q] = @keywords
26
+ params[:fl] = '* score'
27
+ params[:fq] = types_phrase
28
+ params[:qf] = text_field_names.join(' ')
29
+ params[:defType] = 'dismax'
30
+ else
31
+ params[:q] = types_phrase
32
+ end
33
+ params
34
+ end
35
+
36
+ #
37
+ # Set keyword options
38
+ #
39
+ def keyword_options=(options)
40
+ if options
41
+ @text_field_names = options.delete(:fields)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ #
48
+ # Boolean phrase that restricts results to objects of the type(s) under
49
+ # query. If this is an open query (no types specified) then it sends a
50
+ # no-op phrase because Solr requires that the :q parameter not be empty.
51
+ #
52
+ # ==== Returns
53
+ #
54
+ # String:: Boolean phrase for type restriction
55
+ #
56
+ def types_phrase
57
+ if escaped_types.length == 1 then "type:#{escaped_types.first}"
58
+ else "type:(#{escaped_types * ' OR '})"
59
+ end
60
+ end
61
+
62
+ #
63
+ # Wraps each type in quotes to escape names of the form Namespace::Class
64
+ #
65
+ def escaped_types
66
+ @escaped_types ||=
67
+ @types.map { |type| escape(type.name)}
68
+ end
69
+
70
+ #
71
+ # Returns the names of text fields that should be queried in a keyword
72
+ # search. If specific fields are requested, use those; otherwise use the
73
+ # union of all fields configured for the types under search.
74
+ #
75
+ def text_field_names
76
+ text_fields =
77
+ if @text_field_names
78
+ Array(@text_field_names).map do |field_name|
79
+ @setup.text_field(field_name.to_sym)
80
+ end
81
+ else
82
+ @setup.text_fields
83
+ end
84
+ text_fields.map do |text_field|
85
+ text_field.indexed_name
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end