xapit 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/{CHANGELOG → CHANGELOG.rdoc} +7 -2
  2. data/Gemfile +19 -0
  3. data/LICENSE +4 -4
  4. data/README.rdoc +61 -108
  5. data/Rakefile +11 -10
  6. data/features/facets.feature +93 -82
  7. data/features/finding.feature +196 -138
  8. data/features/indexing.feature +35 -37
  9. data/features/remote_server.feature +10 -0
  10. data/features/step_definitions/xapit_steps.rb +53 -25
  11. data/features/suggestions.feature +20 -14
  12. data/features/support/env.rb +13 -6
  13. data/features/support/xapit_helpers.rb +8 -9
  14. data/lib/generators/xapit/install_generator.rb +14 -0
  15. data/lib/generators/xapit/templates/xapit.ru +6 -0
  16. data/lib/generators/xapit/templates/xapit.yml +11 -0
  17. data/lib/xapit.rb +106 -64
  18. data/lib/xapit/client/collection.rb +150 -0
  19. data/lib/xapit/client/facet.rb +11 -0
  20. data/lib/xapit/client/facet_option.rb +29 -0
  21. data/lib/xapit/client/index_builder.rb +67 -0
  22. data/lib/xapit/client/membership.rb +46 -0
  23. data/lib/xapit/client/model_adapters/abstract_model_adapter.rb +30 -0
  24. data/lib/xapit/client/model_adapters/active_record_adapter.rb +27 -0
  25. data/lib/xapit/client/model_adapters/default_model_adapter.rb +7 -0
  26. data/lib/xapit/client/railtie.rb +18 -0
  27. data/lib/xapit/client/remote_database.rb +21 -0
  28. data/lib/xapit/client/tasks.rb +18 -0
  29. data/lib/xapit/server/app.rb +27 -0
  30. data/lib/xapit/server/database.rb +47 -0
  31. data/lib/xapit/server/indexer.rb +138 -0
  32. data/lib/xapit/server/query.rb +240 -0
  33. data/spec/fixtures/blankdb/flintlock +0 -0
  34. data/spec/fixtures/blankdb/iamchert +1 -0
  35. data/spec/fixtures/blankdb/postlist.DB +0 -0
  36. data/spec/fixtures/blankdb/postlist.baseA +0 -0
  37. data/spec/fixtures/blankdb/record.DB +0 -0
  38. data/spec/fixtures/blankdb/record.baseA +0 -0
  39. data/spec/fixtures/blankdb/termlist.DB +0 -0
  40. data/spec/fixtures/blankdb/termlist.baseA +0 -0
  41. data/spec/fixtures/xapit.ru +13 -0
  42. data/spec/fixtures/xapit.yml +4 -0
  43. data/spec/spec_helper.rb +8 -9
  44. data/spec/support/spec_macros.rb +6 -0
  45. data/spec/{xapit_member.rb → support/xapit_member.rb} +14 -16
  46. data/spec/xapit/client/collection_spec.rb +63 -0
  47. data/spec/xapit/client/facet_option_spec.rb +26 -0
  48. data/spec/xapit/client/facet_spec.rb +13 -0
  49. data/spec/xapit/client/index_builder_spec.rb +66 -0
  50. data/spec/xapit/client/membership_spec.rb +43 -0
  51. data/spec/xapit/client/model_adapters/active_record_adapter_spec.rb +62 -0
  52. data/spec/xapit/client/model_adapters/default_model_adapter_spec.rb +7 -0
  53. data/spec/xapit/client/remote_database_spec.rb +19 -0
  54. data/spec/xapit/server/app_spec.rb +22 -0
  55. data/spec/xapit/server/database_spec.rb +37 -0
  56. data/spec/xapit/server/indexer_spec.rb +82 -0
  57. data/spec/xapit/server/query_spec.rb +43 -0
  58. data/spec/xapit/xapit_spec.rb +28 -0
  59. metadata +124 -93
  60. data/Manifest +0 -60
  61. data/features/sorting.feature +0 -29
  62. data/init.rb +0 -1
  63. data/install.rb +0 -8
  64. data/lib/xapit/adapters/abstract_adapter.rb +0 -47
  65. data/lib/xapit/adapters/active_record_adapter.rb +0 -20
  66. data/lib/xapit/adapters/data_mapper_adapter.rb +0 -10
  67. data/lib/xapit/collection.rb +0 -187
  68. data/lib/xapit/config.rb +0 -84
  69. data/lib/xapit/facet.rb +0 -67
  70. data/lib/xapit/facet_blueprint.rb +0 -59
  71. data/lib/xapit/facet_option.rb +0 -56
  72. data/lib/xapit/index_blueprint.rb +0 -147
  73. data/lib/xapit/indexers/abstract_indexer.rb +0 -116
  74. data/lib/xapit/indexers/classic_indexer.rb +0 -29
  75. data/lib/xapit/indexers/simple_indexer.rb +0 -38
  76. data/lib/xapit/membership.rb +0 -137
  77. data/lib/xapit/query.rb +0 -89
  78. data/lib/xapit/query_parsers/abstract_query_parser.rb +0 -174
  79. data/lib/xapit/query_parsers/classic_query_parser.rb +0 -29
  80. data/lib/xapit/query_parsers/simple_query_parser.rb +0 -75
  81. data/lib/xapit/rake_tasks.rb +0 -13
  82. data/rails_generators/xapit/USAGE +0 -13
  83. data/rails_generators/xapit/templates/setup_xapit.rb +0 -1
  84. data/rails_generators/xapit/templates/xapit.rake +0 -4
  85. data/rails_generators/xapit/xapit_generator.rb +0 -20
  86. data/spec/xapit/adapters/active_record_adapter_spec.rb +0 -31
  87. data/spec/xapit/adapters/data_mapper_adapter_spec.rb +0 -10
  88. data/spec/xapit/collection_spec.rb +0 -176
  89. data/spec/xapit/config_spec.rb +0 -62
  90. data/spec/xapit/facet_blueprint_spec.rb +0 -29
  91. data/spec/xapit/facet_option_spec.rb +0 -80
  92. data/spec/xapit/facet_spec.rb +0 -73
  93. data/spec/xapit/index_blueprint_spec.rb +0 -112
  94. data/spec/xapit/indexers/abstract_indexer_spec.rb +0 -111
  95. data/spec/xapit/indexers/classic_indexer_spec.rb +0 -35
  96. data/spec/xapit/indexers/simple_indexer_spec.rb +0 -69
  97. data/spec/xapit/membership_spec.rb +0 -55
  98. data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +0 -60
  99. data/spec/xapit/query_parsers/classic_query_parser_spec.rb +0 -20
  100. data/spec/xapit/query_parsers/simple_query_parser_spec.rb +0 -86
  101. data/spec/xapit/query_spec.rb +0 -60
  102. data/tasks/spec.rb +0 -9
  103. data/tasks/xapit.rake +0 -1
  104. data/uninstall.rb +0 -5
  105. data/xapit.gemspec +0 -30
@@ -1,29 +0,0 @@
1
- module Xapit
2
- class ClassicIndexer < AbstractIndexer
3
- def index_text_attributes(member, document)
4
- term_generator.document = document
5
- @blueprint.text_attributes.each do |name, options|
6
- content = member.send(name)
7
- if options[:proc]
8
- index_terms(options[:proc].call(content.to_s).reject(&:blank?).map(&:to_s).map(&:downcase), document)
9
- elsif content.kind_of? Array
10
- index_terms(content.reject(&:blank?).map(&:to_s).map(&:downcase), document)
11
- else
12
- term_generator.index_text(content.to_s)
13
- end
14
- end
15
- end
16
-
17
- def term_generator
18
- @term_generator ||= create_term_generator
19
- end
20
-
21
- def create_term_generator
22
- term_generator = Xapian::TermGenerator.new
23
- term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
24
- term_generator.database = database
25
- term_generator.stemmer = Xapian::Stem.new(Config.stemming)
26
- term_generator
27
- end
28
- end
29
- end
@@ -1,38 +0,0 @@
1
- module Xapit
2
- class SimpleIndexer < AbstractIndexer
3
- def index_text_attributes(member, document)
4
- @blueprint.text_attributes.map do |name, options|
5
- terms_for_attribute(member, name, options).each do |term|
6
- document.add_term(term, options[:weight] || 1)
7
- database.add_spelling(term) if Config.spelling?
8
- end
9
- if Config.stemming
10
- stemmed_terms_for_attribute(member, name, options).each do |term|
11
- document.add_term(term, options[:weight] || 1)
12
- end
13
- end
14
- end
15
- end
16
-
17
- def stemmed_terms_for_attribute(member, name, options)
18
- terms_for_attribute(member, name, options).map do |term|
19
- "Z#{stemmer.call(term)}"
20
- end
21
- end
22
-
23
- def terms_for_attribute(member, name, options)
24
- content = member.send(name)
25
- if options[:proc]
26
- options[:proc].call(content.to_s).reject(&:blank?).map(&:to_s).map(&:downcase)
27
- elsif content.kind_of? Array
28
- content.reject(&:blank?).map(&:to_s).map(&:downcase)
29
- else
30
- content.to_s.scan(/\w+/u).map(&:downcase)
31
- end
32
- end
33
-
34
- def stemmer
35
- @stemmer ||= Xapian::Stem.new(Config.stemming)
36
- end
37
- end
38
- end
@@ -1,137 +0,0 @@
1
- module Xapit
2
- # Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
3
- # in ActiveRecord::Base so you do not need to do anything there.
4
- module Membership
5
- def self.included(base)
6
- base.extend ClassMethods
7
- end
8
-
9
- module ClassMethods
10
- # Simply call "xapit" on a class and pass a block to define the indexed attributes.
11
- #
12
- # class Article < ActiveRecord::Base
13
- # xapit do |index|
14
- # index.text :name, :content
15
- # index.field :category_id
16
- # index.facet :author_name, "Author"
17
- # index.sortable :id, :category_id
18
- # end
19
- # end
20
- #
21
- # First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
22
- #
23
- # Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
24
- #
25
- # def author_name
26
- # author.name
27
- # end
28
- #
29
- # This way you can create a completely custom facet by simply defining your own method
30
- #
31
- # You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
32
- #
33
- # xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
34
- #
35
- # If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
36
- #
37
- # xapit do |index|
38
- # index.text(:keywords) { |words| words.split(', ') }
39
- # end
40
- #
41
- # You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
42
- # that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
43
- #
44
- # index.text :name, :weight => 10
45
- #
46
- def xapit(*args)
47
- @xapit_index_blueprint = IndexBlueprint.new(self, *args)
48
- yield(@xapit_index_blueprint)
49
- include AdditionalMethods
50
- include XapitSync::Membership if defined? XapitSync
51
- end
52
- end
53
-
54
- module AdditionalMethods
55
- def self.included(base)
56
- base.extend ClassMethods
57
- base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
58
- end
59
-
60
- # Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
61
- def search_similar(*args)
62
- Collection.search_similar(self, *args)
63
- end
64
-
65
- module ClassMethods
66
- # Used to perform a search on a model.
67
- #
68
- # # perform a simple full text search
69
- # @articles = Article.search("phone")
70
- #
71
- # # add pagination if you're using will_paginate
72
- # @articles = Article.search("phone", :per_page => 10, :page => params[:page])
73
- #
74
- # # search based on indexed fields
75
- # @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
76
- #
77
- # # search for multiple negative conditions (doesn't match 3, 5, or 8)
78
- # @articles = Article.search(:not_conditions => { :category_id => [3, 5, 8] })
79
- #
80
- # # search for range of conditions by number
81
- # @articles = Article.search(:conditions => { :released_at => 2.years.ago..Time.now })
82
- #
83
- # # manually sort based on any number of indexed fields, sort defaults to most relevant
84
- # @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
85
- #
86
- # # basic boolean matching is supported
87
- # @articles = Article.search("phone OR fax NOT email")
88
- #
89
- # # field conditions in query string
90
- # @articles = Article.search("priority:3")
91
- #
92
- # # no need to specify first query string when searching all records
93
- # @articles = Article.search(:conditions => { :category_id => params[:category_id] })
94
- #
95
- # # search partial terms with asterisk (only supported at end of term)
96
- # @articles = Article.search("sab*", :conditions => { :name => "Din*" })
97
- #
98
- # # search multiple conditions with OR by passing an array
99
- # @articles = Article.search(:conditions => [{ :category_id => 1 }, { :priority => 2 }])
100
- #
101
- def search(*args)
102
- Collection.new(self, *args)
103
- end
104
-
105
- # The Xapit::IndexBlueprint object used for this class.
106
- def xapit_index_blueprint
107
- @xapit_index_blueprint
108
- end
109
-
110
- # The Xapit::AbstractAdapter used to perform database queries on.
111
- def xapit_adapter
112
- @xapit_adapter ||= begin
113
- adapter_class = AbstractAdapter.subclasses.detect { |a| a.for_class?(self) }
114
- if adapter_class
115
- adapter_class.new(self)
116
- else
117
- raise "Unable to find Xapit adapter for class #{self.name}"
118
- end
119
- end
120
- end
121
-
122
- # Finds a Xapit::FacetBlueprint for the given attribute.
123
- def xapit_facet_blueprint(attribute)
124
- result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
125
- raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
126
- result
127
- end
128
- end
129
- end
130
- end
131
- end
132
-
133
- if defined? ActiveRecord
134
- ActiveRecord::Base.class_eval do
135
- include Xapit::Membership
136
- end
137
- end
@@ -1,89 +0,0 @@
1
- module Xapit
2
- # This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
3
- # this class unless you are trying to query the Xapian database directly.
4
- # You may be looking for Xapit::Collection instead.
5
- class Query
6
- attr_reader :xapian_query
7
-
8
- def initialize(*args)
9
- @xapian_query = build_xapian_query(*args)
10
- end
11
-
12
- def and_query(*args)
13
- merge_query(:and, *args)
14
- end
15
-
16
- def or_query(*args)
17
- merge_query(:or, *args)
18
- end
19
-
20
- def not_query(*args)
21
- merge_query(:not, *args)
22
- end
23
-
24
- def matchset(options = {})
25
- options.reverse_merge! :offset => 0, :sort_descending => false
26
- enquire = Xapian::Enquire.new(Config.database)
27
- if options[:sort_by_values]
28
- sorter = Xapian::MultiValueSorter.new
29
- options[:sort_by_values].each do |sort_value|
30
- sorter.add(sort_value, !!options[:sort_descending])
31
- end
32
- enquire.set_sort_by_key_then_relevance(sorter)
33
- end
34
- enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
35
- enquire.query = @xapian_query
36
- enquire.mset(options[:offset], options[:limit])
37
- end
38
-
39
- def matches(options = {})
40
- matchset(options).matches
41
- end
42
-
43
- def count
44
- # a bit of a hack to get more accurate count estimate
45
- @count ||= matchset(:limit => Config.database.doccount).matches_estimated
46
- end
47
-
48
- private
49
-
50
- def merge_query(operator, *args)
51
- if args.first.blank?
52
- self
53
- else
54
- Xapit::Query.new([@xapian_query, build_xapian_query(*args)], operator)
55
- end
56
- end
57
-
58
- def build_xapian_query(query, operator = :and)
59
- extract_queries(query, operator).inject(nil) do |query, extra_query|
60
- if query
61
- extra_query = extra_query.xapian_query if extra_query.respond_to? :xapian_query
62
- Xapian::Query.new(xapian_operator(operator), query, extra_query)
63
- else
64
- extra_query = extra_query.xapian_query if extra_query.respond_to? :xapian_query
65
- extra_query
66
- end
67
- end
68
- end
69
-
70
- def extract_queries(query, operator)
71
- queries = [query].flatten
72
- terms = queries.select { |q| q.kind_of? String }
73
- if terms.empty?
74
- queries
75
- else
76
- (queries - terms) + [Xapian::Query.new(xapian_operator(operator), terms)]
77
- end
78
- end
79
-
80
- def xapian_operator(operator)
81
- case operator
82
- when :and then Xapian::Query::OP_AND
83
- when :or then Xapian::Query::OP_OR
84
- when :not then Xapian::Query::OP_AND_NOT
85
- else raise "Unknown Xapian operator #{operator}"
86
- end
87
- end
88
- end
89
- end
@@ -1,174 +0,0 @@
1
- module Xapit
2
- class AbstractQueryParser
3
- attr_reader :member_class, :options
4
- attr_writer :base_query
5
- attr_accessor :extra_queries
6
-
7
- def initialize(*args)
8
- @options = args.extract_options!
9
- @member_class = args[0]
10
- @search_text = args[1].to_s
11
- @extra_queries = []
12
- end
13
-
14
- def query
15
- if @extra_queries.blank?
16
- primary_query
17
- else
18
- Query.new([primary_query] + @extra_queries, :or)
19
- end
20
- end
21
-
22
- def primary_query
23
- if (@search_text.split + condition_terms + not_condition_terms + facet_terms).empty?
24
- base_query
25
- else
26
- @query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms).not_query(not_condition_terms)
27
- end
28
- end
29
-
30
- def current_page
31
- @options[:page] ? @options[:page].to_i : 1
32
- end
33
-
34
- def per_page
35
- @options[:per_page] ? @options[:per_page].to_i : 20
36
- end
37
-
38
- def offset
39
- per_page*(current_page-1)
40
- end
41
-
42
- def sort_by_values
43
- if @options[:order] && @member_class
44
- index = @member_class.xapit_index_blueprint
45
- if @options[:order].kind_of? Array
46
- @options[:order].map do |attribute|
47
- index.position_of_sortable(attribute)
48
- end
49
- else
50
- [index.position_of_sortable(@options[:order])]
51
- end
52
- end
53
- end
54
-
55
- def base_query
56
- @base_query ||= initial_query
57
- end
58
-
59
- def initial_query
60
- Query.new(initial_query_strings, :or)
61
- end
62
-
63
- def initial_query_strings
64
- if classes.empty?
65
- [""]
66
- else
67
- classes.map { |klass| "C#{klass.name}" }
68
- end
69
- end
70
-
71
- def classes
72
- (@options[:classes] || [@member_class]).compact
73
- end
74
-
75
- def condition_terms
76
- parse_conditions(@options[:conditions])
77
- end
78
-
79
- def not_condition_terms
80
- parse_conditions(@options[:not_conditions])
81
- end
82
-
83
- def facet_terms
84
- if @options[:facets]
85
- facet_identifiers.map do |identifier|
86
- "F#{identifier}"
87
- end
88
- else
89
- []
90
- end
91
- end
92
-
93
- def facet_identifiers
94
- @options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
95
- end
96
-
97
- def spelling_suggestion
98
- raise "Spelling has been disabled. Enable spelling in Xapit.setup." unless Config.spelling?
99
- if [@search_text, *@search_text.scan(/\w+/)].all? { |term| term_suggestion(term).nil? }
100
- nil
101
- else
102
- return term_suggestion(@search_text) unless term_suggestion(@search_text).blank?
103
- @search_text.downcase.gsub(/\w+/) do |term|
104
- term_suggestion(term) || term
105
- end
106
- end
107
- end
108
-
109
- def term_suggestion(term)
110
- suggestion = Config.database.get_spelling_suggestion(term.downcase)
111
- suggestion.blank? ? nil : suggestion
112
- end
113
-
114
- def matchset(options = {})
115
- query.matchset(query_options.merge(options))
116
- end
117
-
118
- def query_options
119
- {
120
- :offset => offset,
121
- :limit => per_page,
122
- :sort_by_values => sort_by_values,
123
- :sort_descending => @options[:descending]
124
- }
125
- end
126
-
127
- private
128
-
129
- def parse_conditions(conditions)
130
- if conditions.kind_of? Array
131
- [Query.new(conditions.map { |hash| Query.new(condition_terms_from_hash(hash)) }, :or)]
132
- elsif conditions.kind_of? Hash
133
- condition_terms_from_hash(conditions)
134
- else
135
- []
136
- end
137
- end
138
-
139
- def condition_terms_from_hash(conditions)
140
- conditions.map do |name, value|
141
- if value.kind_of? Array
142
- Query.new(value.map { |v| condition_term(name, v) }, :or)
143
- else
144
- condition_term(name, value)
145
- end
146
- end.flatten
147
- end
148
-
149
- def condition_term(name, value)
150
- if value.kind_of?(Range) && @member_class
151
- position = @member_class.xapit_index_blueprint.position_of_field(name)
152
- Xapian::Query.new(Xapian::Query::OP_VALUE_RANGE, position, Xapit.serialize_value(value.begin), Xapit.serialize_value(value.end))
153
- elsif value.to_s.ends_with?("*") && value.to_s.strip.length > 2
154
- wildcard_query(value, "X#{name}-")
155
- else
156
- if value.kind_of? Time
157
- value = value.to_i
158
- elsif value.kind_of? Date
159
- value = value.to_time.to_i
160
- end
161
- "X#{name}-#{value.to_s.downcase}"
162
- end
163
- end
164
-
165
- # Expands the wildcard in the term (just at the end) and returns a query
166
- # which will match any term that starts with the given term.
167
- def wildcard_query(term, prefix = "")
168
- full_term = (prefix + term.downcase).sub(/\*$/, '') # remove asterisk at end if it exists
169
- parser = Xapian::QueryParser.new
170
- parser.database = Xapit::Config.database
171
- parser.parse_query(full_term[-1..-1], Xapian::QueryParser::FLAG_PARTIAL, full_term[0..-2])
172
- end
173
- end
174
- end