xapit 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/{CHANGELOG → CHANGELOG.rdoc} +7 -2
  2. data/Gemfile +19 -0
  3. data/LICENSE +4 -4
  4. data/README.rdoc +61 -108
  5. data/Rakefile +11 -10
  6. data/features/facets.feature +93 -82
  7. data/features/finding.feature +196 -138
  8. data/features/indexing.feature +35 -37
  9. data/features/remote_server.feature +10 -0
  10. data/features/step_definitions/xapit_steps.rb +53 -25
  11. data/features/suggestions.feature +20 -14
  12. data/features/support/env.rb +13 -6
  13. data/features/support/xapit_helpers.rb +8 -9
  14. data/lib/generators/xapit/install_generator.rb +14 -0
  15. data/lib/generators/xapit/templates/xapit.ru +6 -0
  16. data/lib/generators/xapit/templates/xapit.yml +11 -0
  17. data/lib/xapit.rb +106 -64
  18. data/lib/xapit/client/collection.rb +150 -0
  19. data/lib/xapit/client/facet.rb +11 -0
  20. data/lib/xapit/client/facet_option.rb +29 -0
  21. data/lib/xapit/client/index_builder.rb +67 -0
  22. data/lib/xapit/client/membership.rb +46 -0
  23. data/lib/xapit/client/model_adapters/abstract_model_adapter.rb +30 -0
  24. data/lib/xapit/client/model_adapters/active_record_adapter.rb +27 -0
  25. data/lib/xapit/client/model_adapters/default_model_adapter.rb +7 -0
  26. data/lib/xapit/client/railtie.rb +18 -0
  27. data/lib/xapit/client/remote_database.rb +21 -0
  28. data/lib/xapit/client/tasks.rb +18 -0
  29. data/lib/xapit/server/app.rb +27 -0
  30. data/lib/xapit/server/database.rb +47 -0
  31. data/lib/xapit/server/indexer.rb +138 -0
  32. data/lib/xapit/server/query.rb +240 -0
  33. data/spec/fixtures/blankdb/flintlock +0 -0
  34. data/spec/fixtures/blankdb/iamchert +1 -0
  35. data/spec/fixtures/blankdb/postlist.DB +0 -0
  36. data/spec/fixtures/blankdb/postlist.baseA +0 -0
  37. data/spec/fixtures/blankdb/record.DB +0 -0
  38. data/spec/fixtures/blankdb/record.baseA +0 -0
  39. data/spec/fixtures/blankdb/termlist.DB +0 -0
  40. data/spec/fixtures/blankdb/termlist.baseA +0 -0
  41. data/spec/fixtures/xapit.ru +13 -0
  42. data/spec/fixtures/xapit.yml +4 -0
  43. data/spec/spec_helper.rb +8 -9
  44. data/spec/support/spec_macros.rb +6 -0
  45. data/spec/{xapit_member.rb → support/xapit_member.rb} +14 -16
  46. data/spec/xapit/client/collection_spec.rb +63 -0
  47. data/spec/xapit/client/facet_option_spec.rb +26 -0
  48. data/spec/xapit/client/facet_spec.rb +13 -0
  49. data/spec/xapit/client/index_builder_spec.rb +66 -0
  50. data/spec/xapit/client/membership_spec.rb +43 -0
  51. data/spec/xapit/client/model_adapters/active_record_adapter_spec.rb +62 -0
  52. data/spec/xapit/client/model_adapters/default_model_adapter_spec.rb +7 -0
  53. data/spec/xapit/client/remote_database_spec.rb +19 -0
  54. data/spec/xapit/server/app_spec.rb +22 -0
  55. data/spec/xapit/server/database_spec.rb +37 -0
  56. data/spec/xapit/server/indexer_spec.rb +82 -0
  57. data/spec/xapit/server/query_spec.rb +43 -0
  58. data/spec/xapit/xapit_spec.rb +28 -0
  59. metadata +124 -93
  60. data/Manifest +0 -60
  61. data/features/sorting.feature +0 -29
  62. data/init.rb +0 -1
  63. data/install.rb +0 -8
  64. data/lib/xapit/adapters/abstract_adapter.rb +0 -47
  65. data/lib/xapit/adapters/active_record_adapter.rb +0 -20
  66. data/lib/xapit/adapters/data_mapper_adapter.rb +0 -10
  67. data/lib/xapit/collection.rb +0 -187
  68. data/lib/xapit/config.rb +0 -84
  69. data/lib/xapit/facet.rb +0 -67
  70. data/lib/xapit/facet_blueprint.rb +0 -59
  71. data/lib/xapit/facet_option.rb +0 -56
  72. data/lib/xapit/index_blueprint.rb +0 -147
  73. data/lib/xapit/indexers/abstract_indexer.rb +0 -116
  74. data/lib/xapit/indexers/classic_indexer.rb +0 -29
  75. data/lib/xapit/indexers/simple_indexer.rb +0 -38
  76. data/lib/xapit/membership.rb +0 -137
  77. data/lib/xapit/query.rb +0 -89
  78. data/lib/xapit/query_parsers/abstract_query_parser.rb +0 -174
  79. data/lib/xapit/query_parsers/classic_query_parser.rb +0 -29
  80. data/lib/xapit/query_parsers/simple_query_parser.rb +0 -75
  81. data/lib/xapit/rake_tasks.rb +0 -13
  82. data/rails_generators/xapit/USAGE +0 -13
  83. data/rails_generators/xapit/templates/setup_xapit.rb +0 -1
  84. data/rails_generators/xapit/templates/xapit.rake +0 -4
  85. data/rails_generators/xapit/xapit_generator.rb +0 -20
  86. data/spec/xapit/adapters/active_record_adapter_spec.rb +0 -31
  87. data/spec/xapit/adapters/data_mapper_adapter_spec.rb +0 -10
  88. data/spec/xapit/collection_spec.rb +0 -176
  89. data/spec/xapit/config_spec.rb +0 -62
  90. data/spec/xapit/facet_blueprint_spec.rb +0 -29
  91. data/spec/xapit/facet_option_spec.rb +0 -80
  92. data/spec/xapit/facet_spec.rb +0 -73
  93. data/spec/xapit/index_blueprint_spec.rb +0 -112
  94. data/spec/xapit/indexers/abstract_indexer_spec.rb +0 -111
  95. data/spec/xapit/indexers/classic_indexer_spec.rb +0 -35
  96. data/spec/xapit/indexers/simple_indexer_spec.rb +0 -69
  97. data/spec/xapit/membership_spec.rb +0 -55
  98. data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +0 -60
  99. data/spec/xapit/query_parsers/classic_query_parser_spec.rb +0 -20
  100. data/spec/xapit/query_parsers/simple_query_parser_spec.rb +0 -86
  101. data/spec/xapit/query_spec.rb +0 -60
  102. data/tasks/spec.rb +0 -9
  103. data/tasks/xapit.rake +0 -1
  104. data/uninstall.rb +0 -5
  105. data/xapit.gemspec +0 -30
@@ -1,29 +0,0 @@
1
- module Xapit
2
- class ClassicIndexer < AbstractIndexer
3
- def index_text_attributes(member, document)
4
- term_generator.document = document
5
- @blueprint.text_attributes.each do |name, options|
6
- content = member.send(name)
7
- if options[:proc]
8
- index_terms(options[:proc].call(content.to_s).reject(&:blank?).map(&:to_s).map(&:downcase), document)
9
- elsif content.kind_of? Array
10
- index_terms(content.reject(&:blank?).map(&:to_s).map(&:downcase), document)
11
- else
12
- term_generator.index_text(content.to_s)
13
- end
14
- end
15
- end
16
-
17
- def term_generator
18
- @term_generator ||= create_term_generator
19
- end
20
-
21
- def create_term_generator
22
- term_generator = Xapian::TermGenerator.new
23
- term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
24
- term_generator.database = database
25
- term_generator.stemmer = Xapian::Stem.new(Config.stemming)
26
- term_generator
27
- end
28
- end
29
- end
@@ -1,38 +0,0 @@
1
- module Xapit
2
- class SimpleIndexer < AbstractIndexer
3
- def index_text_attributes(member, document)
4
- @blueprint.text_attributes.map do |name, options|
5
- terms_for_attribute(member, name, options).each do |term|
6
- document.add_term(term, options[:weight] || 1)
7
- database.add_spelling(term) if Config.spelling?
8
- end
9
- if Config.stemming
10
- stemmed_terms_for_attribute(member, name, options).each do |term|
11
- document.add_term(term, options[:weight] || 1)
12
- end
13
- end
14
- end
15
- end
16
-
17
- def stemmed_terms_for_attribute(member, name, options)
18
- terms_for_attribute(member, name, options).map do |term|
19
- "Z#{stemmer.call(term)}"
20
- end
21
- end
22
-
23
- def terms_for_attribute(member, name, options)
24
- content = member.send(name)
25
- if options[:proc]
26
- options[:proc].call(content.to_s).reject(&:blank?).map(&:to_s).map(&:downcase)
27
- elsif content.kind_of? Array
28
- content.reject(&:blank?).map(&:to_s).map(&:downcase)
29
- else
30
- content.to_s.scan(/\w+/u).map(&:downcase)
31
- end
32
- end
33
-
34
- def stemmer
35
- @stemmer ||= Xapian::Stem.new(Config.stemming)
36
- end
37
- end
38
- end
@@ -1,137 +0,0 @@
1
- module Xapit
2
- # Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
3
- # in ActiveRecord::Base so you do not need to do anything there.
4
- module Membership
5
- def self.included(base)
6
- base.extend ClassMethods
7
- end
8
-
9
- module ClassMethods
10
- # Simply call "xapit" on a class and pass a block to define the indexed attributes.
11
- #
12
- # class Article < ActiveRecord::Base
13
- # xapit do |index|
14
- # index.text :name, :content
15
- # index.field :category_id
16
- # index.facet :author_name, "Author"
17
- # index.sortable :id, :category_id
18
- # end
19
- # end
20
- #
21
- # First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
22
- #
23
- # Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
24
- #
25
- # def author_name
26
- # author.name
27
- # end
28
- #
29
- # This way you can create a completely custom facet by simply defining your own method
30
- #
31
- # You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
32
- #
33
- # xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
34
- #
35
- # If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
36
- #
37
- # xapit do |index|
38
- # index.text(:keywords) { |words| words.split(', ') }
39
- # end
40
- #
41
- # You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
42
- # that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
43
- #
44
- # index.text :name, :weight => 10
45
- #
46
- def xapit(*args)
47
- @xapit_index_blueprint = IndexBlueprint.new(self, *args)
48
- yield(@xapit_index_blueprint)
49
- include AdditionalMethods
50
- include XapitSync::Membership if defined? XapitSync
51
- end
52
- end
53
-
54
- module AdditionalMethods
55
- def self.included(base)
56
- base.extend ClassMethods
57
- base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
58
- end
59
-
60
- # Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
61
- def search_similar(*args)
62
- Collection.search_similar(self, *args)
63
- end
64
-
65
- module ClassMethods
66
- # Used to perform a search on a model.
67
- #
68
- # # perform a simple full text search
69
- # @articles = Article.search("phone")
70
- #
71
- # # add pagination if you're using will_paginate
72
- # @articles = Article.search("phone", :per_page => 10, :page => params[:page])
73
- #
74
- # # search based on indexed fields
75
- # @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
76
- #
77
- # # search for multiple negative conditions (doesn't match 3, 5, or 8)
78
- # @articles = Article.search(:not_conditions => { :category_id => [3, 5, 8] })
79
- #
80
- # # search for range of conditions by number
81
- # @articles = Article.search(:conditions => { :released_at => 2.years.ago..Time.now })
82
- #
83
- # # manually sort based on any number of indexed fields, sort defaults to most relevant
84
- # @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
85
- #
86
- # # basic boolean matching is supported
87
- # @articles = Article.search("phone OR fax NOT email")
88
- #
89
- # # field conditions in query string
90
- # @articles = Article.search("priority:3")
91
- #
92
- # # no need to specify first query string when searching all records
93
- # @articles = Article.search(:conditions => { :category_id => params[:category_id] })
94
- #
95
- # # search partial terms with asterisk (only supported at end of term)
96
- # @articles = Article.search("sab*", :conditions => { :name => "Din*" })
97
- #
98
- # # search multiple conditions with OR by passing an array
99
- # @articles = Article.search(:conditions => [{ :category_id => 1 }, { :priority => 2 }])
100
- #
101
- def search(*args)
102
- Collection.new(self, *args)
103
- end
104
-
105
- # The Xapit::IndexBlueprint object used for this class.
106
- def xapit_index_blueprint
107
- @xapit_index_blueprint
108
- end
109
-
110
- # The Xapit::AbstractAdapter used to perform database queries on.
111
- def xapit_adapter
112
- @xapit_adapter ||= begin
113
- adapter_class = AbstractAdapter.subclasses.detect { |a| a.for_class?(self) }
114
- if adapter_class
115
- adapter_class.new(self)
116
- else
117
- raise "Unable to find Xapit adapter for class #{self.name}"
118
- end
119
- end
120
- end
121
-
122
- # Finds a Xapit::FacetBlueprint for the given attribute.
123
- def xapit_facet_blueprint(attribute)
124
- result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
125
- raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
126
- result
127
- end
128
- end
129
- end
130
- end
131
- end
132
-
133
- if defined? ActiveRecord
134
- ActiveRecord::Base.class_eval do
135
- include Xapit::Membership
136
- end
137
- end
@@ -1,89 +0,0 @@
1
- module Xapit
2
- # This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
3
- # this class unless you are trying to query the Xapian database directly.
4
- # You may be looking for Xapit::Collection instead.
5
- class Query
6
- attr_reader :xapian_query
7
-
8
- def initialize(*args)
9
- @xapian_query = build_xapian_query(*args)
10
- end
11
-
12
- def and_query(*args)
13
- merge_query(:and, *args)
14
- end
15
-
16
- def or_query(*args)
17
- merge_query(:or, *args)
18
- end
19
-
20
- def not_query(*args)
21
- merge_query(:not, *args)
22
- end
23
-
24
- def matchset(options = {})
25
- options.reverse_merge! :offset => 0, :sort_descending => false
26
- enquire = Xapian::Enquire.new(Config.database)
27
- if options[:sort_by_values]
28
- sorter = Xapian::MultiValueSorter.new
29
- options[:sort_by_values].each do |sort_value|
30
- sorter.add(sort_value, !!options[:sort_descending])
31
- end
32
- enquire.set_sort_by_key_then_relevance(sorter)
33
- end
34
- enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
35
- enquire.query = @xapian_query
36
- enquire.mset(options[:offset], options[:limit])
37
- end
38
-
39
- def matches(options = {})
40
- matchset(options).matches
41
- end
42
-
43
- def count
44
- # a bit of a hack to get more accurate count estimate
45
- @count ||= matchset(:limit => Config.database.doccount).matches_estimated
46
- end
47
-
48
- private
49
-
50
- def merge_query(operator, *args)
51
- if args.first.blank?
52
- self
53
- else
54
- Xapit::Query.new([@xapian_query, build_xapian_query(*args)], operator)
55
- end
56
- end
57
-
58
- def build_xapian_query(query, operator = :and)
59
- extract_queries(query, operator).inject(nil) do |query, extra_query|
60
- if query
61
- extra_query = extra_query.xapian_query if extra_query.respond_to? :xapian_query
62
- Xapian::Query.new(xapian_operator(operator), query, extra_query)
63
- else
64
- extra_query = extra_query.xapian_query if extra_query.respond_to? :xapian_query
65
- extra_query
66
- end
67
- end
68
- end
69
-
70
- def extract_queries(query, operator)
71
- queries = [query].flatten
72
- terms = queries.select { |q| q.kind_of? String }
73
- if terms.empty?
74
- queries
75
- else
76
- (queries - terms) + [Xapian::Query.new(xapian_operator(operator), terms)]
77
- end
78
- end
79
-
80
- def xapian_operator(operator)
81
- case operator
82
- when :and then Xapian::Query::OP_AND
83
- when :or then Xapian::Query::OP_OR
84
- when :not then Xapian::Query::OP_AND_NOT
85
- else raise "Unknown Xapian operator #{operator}"
86
- end
87
- end
88
- end
89
- end
@@ -1,174 +0,0 @@
1
- module Xapit
2
- class AbstractQueryParser
3
- attr_reader :member_class, :options
4
- attr_writer :base_query
5
- attr_accessor :extra_queries
6
-
7
- def initialize(*args)
8
- @options = args.extract_options!
9
- @member_class = args[0]
10
- @search_text = args[1].to_s
11
- @extra_queries = []
12
- end
13
-
14
- def query
15
- if @extra_queries.blank?
16
- primary_query
17
- else
18
- Query.new([primary_query] + @extra_queries, :or)
19
- end
20
- end
21
-
22
- def primary_query
23
- if (@search_text.split + condition_terms + not_condition_terms + facet_terms).empty?
24
- base_query
25
- else
26
- @query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms).not_query(not_condition_terms)
27
- end
28
- end
29
-
30
- def current_page
31
- @options[:page] ? @options[:page].to_i : 1
32
- end
33
-
34
- def per_page
35
- @options[:per_page] ? @options[:per_page].to_i : 20
36
- end
37
-
38
- def offset
39
- per_page*(current_page-1)
40
- end
41
-
42
- def sort_by_values
43
- if @options[:order] && @member_class
44
- index = @member_class.xapit_index_blueprint
45
- if @options[:order].kind_of? Array
46
- @options[:order].map do |attribute|
47
- index.position_of_sortable(attribute)
48
- end
49
- else
50
- [index.position_of_sortable(@options[:order])]
51
- end
52
- end
53
- end
54
-
55
- def base_query
56
- @base_query ||= initial_query
57
- end
58
-
59
- def initial_query
60
- Query.new(initial_query_strings, :or)
61
- end
62
-
63
- def initial_query_strings
64
- if classes.empty?
65
- [""]
66
- else
67
- classes.map { |klass| "C#{klass.name}" }
68
- end
69
- end
70
-
71
- def classes
72
- (@options[:classes] || [@member_class]).compact
73
- end
74
-
75
- def condition_terms
76
- parse_conditions(@options[:conditions])
77
- end
78
-
79
- def not_condition_terms
80
- parse_conditions(@options[:not_conditions])
81
- end
82
-
83
- def facet_terms
84
- if @options[:facets]
85
- facet_identifiers.map do |identifier|
86
- "F#{identifier}"
87
- end
88
- else
89
- []
90
- end
91
- end
92
-
93
- def facet_identifiers
94
- @options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
95
- end
96
-
97
- def spelling_suggestion
98
- raise "Spelling has been disabled. Enable spelling in Xapit.setup." unless Config.spelling?
99
- if [@search_text, *@search_text.scan(/\w+/)].all? { |term| term_suggestion(term).nil? }
100
- nil
101
- else
102
- return term_suggestion(@search_text) unless term_suggestion(@search_text).blank?
103
- @search_text.downcase.gsub(/\w+/) do |term|
104
- term_suggestion(term) || term
105
- end
106
- end
107
- end
108
-
109
- def term_suggestion(term)
110
- suggestion = Config.database.get_spelling_suggestion(term.downcase)
111
- suggestion.blank? ? nil : suggestion
112
- end
113
-
114
- def matchset(options = {})
115
- query.matchset(query_options.merge(options))
116
- end
117
-
118
- def query_options
119
- {
120
- :offset => offset,
121
- :limit => per_page,
122
- :sort_by_values => sort_by_values,
123
- :sort_descending => @options[:descending]
124
- }
125
- end
126
-
127
- private
128
-
129
- def parse_conditions(conditions)
130
- if conditions.kind_of? Array
131
- [Query.new(conditions.map { |hash| Query.new(condition_terms_from_hash(hash)) }, :or)]
132
- elsif conditions.kind_of? Hash
133
- condition_terms_from_hash(conditions)
134
- else
135
- []
136
- end
137
- end
138
-
139
- def condition_terms_from_hash(conditions)
140
- conditions.map do |name, value|
141
- if value.kind_of? Array
142
- Query.new(value.map { |v| condition_term(name, v) }, :or)
143
- else
144
- condition_term(name, value)
145
- end
146
- end.flatten
147
- end
148
-
149
- def condition_term(name, value)
150
- if value.kind_of?(Range) && @member_class
151
- position = @member_class.xapit_index_blueprint.position_of_field(name)
152
- Xapian::Query.new(Xapian::Query::OP_VALUE_RANGE, position, Xapit.serialize_value(value.begin), Xapit.serialize_value(value.end))
153
- elsif value.to_s.ends_with?("*") && value.to_s.strip.length > 2
154
- wildcard_query(value, "X#{name}-")
155
- else
156
- if value.kind_of? Time
157
- value = value.to_i
158
- elsif value.kind_of? Date
159
- value = value.to_time.to_i
160
- end
161
- "X#{name}-#{value.to_s.downcase}"
162
- end
163
- end
164
-
165
- # Expands the wildcard in the term (just at the end) and returns a query
166
- # which will match any term that starts with the given term.
167
- def wildcard_query(term, prefix = "")
168
- full_term = (prefix + term.downcase).sub(/\*$/, '') # remove asterisk at end if it exists
169
- parser = Xapian::QueryParser.new
170
- parser.database = Xapit::Config.database
171
- parser.parse_query(full_term[-1..-1], Xapian::QueryParser::FLAG_PARTIAL, full_term[0..-2])
172
- end
173
- end
174
- end