xapit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (180) hide show
  1. data/LICENSE +20 -0
  2. data/Manifest +178 -0
  3. data/README.rdoc +183 -0
  4. data/Rakefile +15 -0
  5. data/TODO +23 -0
  6. data/features/facets.feature +51 -0
  7. data/features/finding.feature +119 -0
  8. data/features/indexing.feature +41 -0
  9. data/features/step_definitions/common_steps.rb +7 -0
  10. data/features/step_definitions/xapit_steps.rb +117 -0
  11. data/features/support/env.rb +7 -0
  12. data/features/support/xapit_helpers.rb +27 -0
  13. data/init.rb +3 -0
  14. data/install.rb +9 -0
  15. data/lib/xapit.rb +39 -0
  16. data/lib/xapit/collection.rb +165 -0
  17. data/lib/xapit/config.rb +83 -0
  18. data/lib/xapit/facet.rb +59 -0
  19. data/lib/xapit/facet_blueprint.rb +59 -0
  20. data/lib/xapit/facet_option.rb +56 -0
  21. data/lib/xapit/index_blueprint.rb +117 -0
  22. data/lib/xapit/indexers/abstract_indexer.rb +101 -0
  23. data/lib/xapit/indexers/classic_indexer.rb +27 -0
  24. data/lib/xapit/indexers/simple_indexer.rb +31 -0
  25. data/lib/xapit/membership.rb +103 -0
  26. data/lib/xapit/query.rb +62 -0
  27. data/lib/xapit/query_parsers/abstract_query_parser.rb +115 -0
  28. data/lib/xapit/query_parsers/classic_query_parser.rb +19 -0
  29. data/lib/xapit/query_parsers/simple_query_parser.rb +75 -0
  30. data/spec/spec_helper.rb +15 -0
  31. data/spec/tmp/xapdb/flintlock +0 -0
  32. data/spec/tmp/xapdb/iamflint +0 -0
  33. data/spec/tmp/xapdb/postlist.DB +0 -0
  34. data/spec/tmp/xapdb/postlist.baseA +0 -0
  35. data/spec/tmp/xapdb/postlist.baseB +0 -0
  36. data/spec/tmp/xapdb/record.DB +0 -0
  37. data/spec/tmp/xapdb/record.baseA +0 -0
  38. data/spec/tmp/xapdb/record.baseB +0 -0
  39. data/spec/tmp/xapdb/spelling.DB +0 -0
  40. data/spec/tmp/xapdb/spelling.baseA +0 -0
  41. data/spec/tmp/xapdb/spelling.baseB +0 -0
  42. data/spec/tmp/xapdb/termlist.DB +0 -0
  43. data/spec/tmp/xapdb/termlist.baseA +0 -0
  44. data/spec/tmp/xapdb/termlist.baseB +0 -0
  45. data/spec/tmp/xapian_db/flintlock +0 -0
  46. data/spec/tmp/xapian_db/iamflint +0 -0
  47. data/spec/tmp/xapian_db/postlist.DB +0 -0
  48. data/spec/tmp/xapian_db/postlist.baseA +0 -0
  49. data/spec/tmp/xapian_db/record.DB +0 -0
  50. data/spec/tmp/xapian_db/record.baseA +0 -0
  51. data/spec/tmp/xapian_db/termlist.DB +0 -0
  52. data/spec/tmp/xapian_db/termlist.baseA +0 -0
  53. data/spec/tmp/xapiandab/flintlock +0 -0
  54. data/spec/tmp/xapiandab/iamflint +0 -0
  55. data/spec/tmp/xapiandab/postlist.DB +0 -0
  56. data/spec/tmp/xapiandab/postlist.baseA +0 -0
  57. data/spec/tmp/xapiandab/postlist.baseB +0 -0
  58. data/spec/tmp/xapiandab/record.DB +0 -0
  59. data/spec/tmp/xapiandab/record.baseA +0 -0
  60. data/spec/tmp/xapiandab/record.baseB +0 -0
  61. data/spec/tmp/xapiandab/spelling.DB +0 -0
  62. data/spec/tmp/xapiandab/spelling.baseA +0 -0
  63. data/spec/tmp/xapiandab/spelling.baseB +0 -0
  64. data/spec/tmp/xapiandab/termlist.DB +0 -0
  65. data/spec/tmp/xapiandab/termlist.baseA +0 -0
  66. data/spec/tmp/xapiandab/termlist.baseB +0 -0
  67. data/spec/tmp/xapiandatab/flintlock +0 -0
  68. data/spec/tmp/xapiandatab/iamflint +0 -0
  69. data/spec/tmp/xapiandatab/postlist.DB +0 -0
  70. data/spec/tmp/xapiandatab/postlist.baseA +0 -0
  71. data/spec/tmp/xapiandatab/postlist.baseB +0 -0
  72. data/spec/tmp/xapiandatab/record.DB +0 -0
  73. data/spec/tmp/xapiandatab/record.baseA +0 -0
  74. data/spec/tmp/xapiandatab/record.baseB +0 -0
  75. data/spec/tmp/xapiandatab/spelling.DB +0 -0
  76. data/spec/tmp/xapiandatab/spelling.baseA +0 -0
  77. data/spec/tmp/xapiandatab/spelling.baseB +0 -0
  78. data/spec/tmp/xapiandatab/termlist.DB +0 -0
  79. data/spec/tmp/xapiandatab/termlist.baseA +0 -0
  80. data/spec/tmp/xapiandatab/termlist.baseB +0 -0
  81. data/spec/tmp/xapiandataba/flintlock +0 -0
  82. data/spec/tmp/xapiandataba/iamflint +0 -0
  83. data/spec/tmp/xapiandataba/postlist.DB +0 -0
  84. data/spec/tmp/xapiandataba/postlist.baseA +0 -0
  85. data/spec/tmp/xapiandataba/postlist.baseB +0 -0
  86. data/spec/tmp/xapiandataba/record.DB +0 -0
  87. data/spec/tmp/xapiandataba/record.baseA +0 -0
  88. data/spec/tmp/xapiandataba/record.baseB +0 -0
  89. data/spec/tmp/xapiandataba/spelling.DB +0 -0
  90. data/spec/tmp/xapiandataba/spelling.baseA +0 -0
  91. data/spec/tmp/xapiandataba/spelling.baseB +0 -0
  92. data/spec/tmp/xapiandataba/termlist.DB +0 -0
  93. data/spec/tmp/xapiandataba/termlist.baseA +0 -0
  94. data/spec/tmp/xapiandataba/termlist.baseB +0 -0
  95. data/spec/tmp/xapiandatabas/flintlock +0 -0
  96. data/spec/tmp/xapiandatabas/iamflint +0 -0
  97. data/spec/tmp/xapiandatabas/postlist.DB +0 -0
  98. data/spec/tmp/xapiandatabas/postlist.baseA +0 -0
  99. data/spec/tmp/xapiandatabas/record.DB +0 -0
  100. data/spec/tmp/xapiandatabas/record.baseA +0 -0
  101. data/spec/tmp/xapiandatabas/termlist.DB +0 -0
  102. data/spec/tmp/xapiandatabas/termlist.baseA +0 -0
  103. data/spec/tmp/xapiandatb/flintlock +0 -0
  104. data/spec/tmp/xapiandatb/iamflint +0 -0
  105. data/spec/tmp/xapiandatb/postlist.DB +0 -0
  106. data/spec/tmp/xapiandatb/postlist.baseA +0 -0
  107. data/spec/tmp/xapiandatb/postlist.baseB +0 -0
  108. data/spec/tmp/xapiandatb/record.DB +0 -0
  109. data/spec/tmp/xapiandatb/record.baseA +0 -0
  110. data/spec/tmp/xapiandatb/record.baseB +0 -0
  111. data/spec/tmp/xapiandatb/spelling.DB +0 -0
  112. data/spec/tmp/xapiandatb/spelling.baseA +0 -0
  113. data/spec/tmp/xapiandatb/spelling.baseB +0 -0
  114. data/spec/tmp/xapiandatb/termlist.DB +0 -0
  115. data/spec/tmp/xapiandatb/termlist.baseA +0 -0
  116. data/spec/tmp/xapiandatb/termlist.baseB +0 -0
  117. data/spec/tmp/xapiandbase/flintlock +0 -0
  118. data/spec/tmp/xapiandbase/iamflint +0 -0
  119. data/spec/tmp/xapiandbase/postlist.DB +0 -0
  120. data/spec/tmp/xapiandbase/postlist.baseA +0 -0
  121. data/spec/tmp/xapiandbase/postlist.baseB +0 -0
  122. data/spec/tmp/xapiandbase/record.DB +0 -0
  123. data/spec/tmp/xapiandbase/record.baseA +0 -0
  124. data/spec/tmp/xapiandbase/record.baseB +0 -0
  125. data/spec/tmp/xapiandbase/spelling.DB +0 -0
  126. data/spec/tmp/xapiandbase/spelling.baseA +0 -0
  127. data/spec/tmp/xapiandbase/spelling.baseB +0 -0
  128. data/spec/tmp/xapiandbase/termlist.DB +0 -0
  129. data/spec/tmp/xapiandbase/termlist.baseA +0 -0
  130. data/spec/tmp/xapiandbase/termlist.baseB +0 -0
  131. data/spec/xapit/collection_spec.rb +153 -0
  132. data/spec/xapit/config_spec.rb +48 -0
  133. data/spec/xapit/facet_blueprint_spec.rb +29 -0
  134. data/spec/xapit/facet_option_spec.rb +80 -0
  135. data/spec/xapit/facet_spec.rb +73 -0
  136. data/spec/xapit/index_blueprint_spec.rb +60 -0
  137. data/spec/xapit/indexers/abstract_indexer_spec.rb +74 -0
  138. data/spec/xapit/indexers/classic_indexer_spec.rb +26 -0
  139. data/spec/xapit/indexers/simple_indexer_spec.rb +53 -0
  140. data/spec/xapit/membership_spec.rb +39 -0
  141. data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +23 -0
  142. data/spec/xapit/query_parsers/classic_query_parser_spec.rb +15 -0
  143. data/spec/xapit/query_parsers/simple_query_parser_spec.rb +86 -0
  144. data/spec/xapit/query_spec.rb +41 -0
  145. data/spec/xapit_member.rb +32 -0
  146. data/tasks/spec.rb +9 -0
  147. data/tasks/xapit.rake +9 -0
  148. data/tmp/xapiandatabase/flintlock +0 -0
  149. data/tmp/xapiandatabase/iamflint +0 -0
  150. data/tmp/xapiandatabase/postlist.DB +0 -0
  151. data/tmp/xapiandatabase/postlist.baseA +0 -0
  152. data/tmp/xapiandatabase/postlist.baseB +0 -0
  153. data/tmp/xapiandatabase/record.DB +0 -0
  154. data/tmp/xapiandatabase/record.baseA +0 -0
  155. data/tmp/xapiandatabase/record.baseB +0 -0
  156. data/tmp/xapiandatabase/spelling.DB +0 -0
  157. data/tmp/xapiandatabase/spelling.baseA +0 -0
  158. data/tmp/xapiandatabase/spelling.baseB +0 -0
  159. data/tmp/xapiandatabase/termlist.DB +0 -0
  160. data/tmp/xapiandatabase/termlist.baseA +0 -0
  161. data/tmp/xapiandatabase/termlist.baseB +0 -0
  162. data/tmp/xapiandatabase/value.baseB +0 -0
  163. data/tmp/xapiandb/flintlock +0 -0
  164. data/tmp/xapiandb/iamflint +0 -0
  165. data/tmp/xapiandb/postlist.DB +0 -0
  166. data/tmp/xapiandb/postlist.baseA +0 -0
  167. data/tmp/xapiandb/postlist.baseB +0 -0
  168. data/tmp/xapiandb/record.DB +0 -0
  169. data/tmp/xapiandb/record.baseA +0 -0
  170. data/tmp/xapiandb/record.baseB +0 -0
  171. data/tmp/xapiandb/spelling.DB +0 -0
  172. data/tmp/xapiandb/spelling.baseA +0 -0
  173. data/tmp/xapiandb/spelling.baseB +0 -0
  174. data/tmp/xapiandb/termlist.DB +0 -0
  175. data/tmp/xapiandb/termlist.baseA +0 -0
  176. data/tmp/xapiandb/termlist.baseB +0 -0
  177. data/tmp/xapiandb/value.baseB +0 -0
  178. data/uninstall.rb +5 -0
  179. data/xapit.gemspec +30 -0
  180. metadata +257 -0
@@ -0,0 +1,27 @@
1
+ module Xapit
2
+ class ClassicIndexer < AbstractIndexer
3
+ def index_text_attributes(member, document)
4
+ term_generator.document = document
5
+ @blueprint.text_attributes.each do |name, options|
6
+ content = member.send(name).to_s
7
+ if options[:proc]
8
+ index_terms(options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase), document)
9
+ else
10
+ term_generator.index_text(content)
11
+ end
12
+ end
13
+ end
14
+
15
+ def term_generator
16
+ @term_generator ||= create_term_generator
17
+ end
18
+
19
+ def create_term_generator
20
+ term_generator = Xapian::TermGenerator.new
21
+ term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
22
+ term_generator.database = database
23
+ term_generator.stemmer = Xapian::Stem.new(Config.stemming)
24
+ term_generator
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ module Xapit
2
+ class SimpleIndexer < AbstractIndexer
3
+ def index_text_attributes(member, document)
4
+ @blueprint.text_attributes.map do |name, options|
5
+ terms_for_attribute(member, name, options).each do |term|
6
+ document.add_term(term, options[:weight] || 1)
7
+ database.add_spelling(term) if Config.spelling?
8
+ end
9
+ end
10
+ end
11
+
12
+ def terms_for_attribute(member, name, options)
13
+ terms_for_attribute_without_stemming(member, name, options).map do |term|
14
+ [term, "Z#{stemmer.call(term)}"]
15
+ end.flatten
16
+ end
17
+
18
+ def terms_for_attribute_without_stemming(member, name, options)
19
+ content = member.send(name).to_s
20
+ if options[:proc]
21
+ options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase)
22
+ else
23
+ content.scan(/\w+/u).map(&:downcase)
24
+ end
25
+ end
26
+
27
+ def stemmer
28
+ @stemmer ||= Xapian::Stem.new(Config.stemming)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,103 @@
1
+ module Xapit
2
+ # Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
3
+ # in ActiveRecord::Base so you do not need to do anything there.
4
+ module Membership
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ module ClassMethods
10
+ # Simply call "xapit" on a class and pass a block to define the indexed attributes.
11
+ #
12
+ # class Article < ActiveRecord::Base
13
+ # xapit do |index|
14
+ # index.text :name, :content
15
+ # index.field :category_id
16
+ # index.facet :author_name, "Author"
17
+ # index.sortable :id, :category_id
18
+ # end
19
+ # end
20
+ #
21
+ # First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
22
+ #
23
+ # Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
24
+ #
25
+ # def author_name
26
+ # author.name
27
+ # end
28
+ #
29
+ # This way you can create a completely custom facet by simply defining your own method
30
+ #
31
+ # You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
32
+ #
33
+ # xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
34
+ #
35
+ # If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
36
+ #
37
+ # xapit do |index|
38
+ # index.text(:keywords) { |words| words.split(', ') }
39
+ # end
40
+ #
41
+ # You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
42
+ # that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
43
+ #
44
+ # index.text :name, :weight => 10
45
+ #
46
+ def xapit(*args)
47
+ @xapit_index_blueprint = IndexBlueprint.new(self, *args)
48
+ yield(@xapit_index_blueprint)
49
+ include AdditionalMethods
50
+ end
51
+ end
52
+
53
+ module AdditionalMethods
54
+ def self.included(base)
55
+ base.extend ClassMethods
56
+ base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
57
+ end
58
+
59
+ # Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
60
+ def search_similar(*args)
61
+ Collection.search_similar(self, *args)
62
+ end
63
+
64
+ module ClassMethods
65
+ # Used to perform a search on a model.
66
+ #
67
+ # # perform a simple full text search
68
+ # @articles = Article.search("phone")
69
+ #
70
+ # # add pagination if you're using will_paginate
71
+ # @articles = Article.search("phone", :per_page => 10, :page => params[:page])
72
+ #
73
+ # # search based on indexed fields
74
+ # @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
75
+ #
76
+ # # manually sort based on any number of indexed fields, sort defaults to most relevant
77
+ # @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
78
+ #
79
+ # # basic boolean matching is supported
80
+ # @articles = Article.search("phone or fax not email")
81
+ #
82
+ # # no need to specify first query string when searching all records
83
+ # @articles = Article.search(:conditions => { :category_id => params[:category_id] })
84
+ #
85
+ def search(*args)
86
+ Collection.new(self, *args)
87
+ end
88
+
89
+ # The Xapit::IndexBlueprint object used for this class.
90
+ def xapit_index_blueprint
91
+ @xapit_index_blueprint
92
+ end
93
+
94
+ # Finds a Xapit::FacetBlueprint for the given attribute.
95
+ def xapit_facet_blueprint(attribute)
96
+ result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
97
+ raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
98
+ result
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,62 @@
1
+ module Xapit
2
+ # This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
3
+ # this class unless you are trying to query the Xapian database directly.
4
+ # You may be looking for Xapit::Collection instead.
5
+ class Query
6
+ attr_reader :default_options, :xapian_query
7
+
8
+ def initialize(query)
9
+ @xapian_query = build_xapian_query(query)
10
+ @default_options = { :offset => 0, :sort_descending => false }
11
+ end
12
+
13
+ def and_query(query)
14
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_AND, @xapian_query, build_xapian_query(query)) unless query.blank?
15
+ self
16
+ end
17
+
18
+ def or_query(query)
19
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_OR, @xapian_query, build_xapian_query(query)) unless query.blank?
20
+ self
21
+ end
22
+
23
+ def not_query(query)
24
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, @xapian_query, build_xapian_query(query)) unless query.blank?
25
+ self
26
+ end
27
+
28
+ def matchset(options = {})
29
+ options.reverse_merge!(default_options)
30
+ enquire = Xapian::Enquire.new(Config.database)
31
+ if options[:sort_by_values]
32
+ sorter = Xapian::MultiValueSorter.new
33
+ options[:sort_by_values].each do |sort_value|
34
+ sorter.add(sort_value, !!options[:sort_descending])
35
+ end
36
+ enquire.set_sort_by_key_then_relevance(sorter)
37
+ end
38
+ enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
39
+ enquire.query = @xapian_query
40
+ enquire.mset(options[:offset], options[:limit])
41
+ end
42
+
43
+ def matches(options = {})
44
+ matchset(options).matches
45
+ end
46
+
47
+ def count
48
+ # a bit of a hack to get more accurate count estimate
49
+ matchset(:limit => Config.database.doccount).matches_estimated
50
+ end
51
+
52
+ private
53
+
54
+ def build_xapian_query(query)
55
+ if query.kind_of? Xapian::Query
56
+ query
57
+ else
58
+ Xapian::Query.new(Xapian::Query::OP_AND, [query].flatten)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,115 @@
1
+ module Xapit
2
+ class AbstractQueryParser
3
+ attr_reader :member_class
4
+ attr_writer :base_query
5
+
6
+ def initialize(*args)
7
+ @options = args.extract_options!
8
+ @member_class = args[0]
9
+ @search_text = args[1].to_s
10
+ end
11
+
12
+ def query
13
+ if (@search_text.split + condition_terms + facet_terms).empty?
14
+ base_query
15
+ else
16
+ @query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms)
17
+ end
18
+ end
19
+
20
+ def current_page
21
+ @options[:page] ? @options[:page].to_i : 1
22
+ end
23
+
24
+ def per_page
25
+ @options[:per_page] ? @options[:per_page].to_i : 20
26
+ end
27
+
28
+ def offset
29
+ per_page*(current_page-1)
30
+ end
31
+
32
+ def sort_by_values
33
+ if @options[:order] && @member_class
34
+ index = @member_class.xapit_index_blueprint
35
+ if @options[:order].kind_of? Array
36
+ @options[:order].map do |attribute|
37
+ index.sortable_position_for(attribute)
38
+ end
39
+ else
40
+ [index.sortable_position_for(@options[:order])]
41
+ end
42
+ end
43
+ end
44
+
45
+ def base_query
46
+ @base_query ||= initial_query
47
+ end
48
+
49
+ def initial_query
50
+ query = Query.new(Xapian::Query.new(Xapian::Query::OP_OR, initial_query_strings))
51
+ query.default_options[:offset] = offset
52
+ query.default_options[:limit] = per_page
53
+ query.default_options[:sort_by_values] = sort_by_values
54
+ query.default_options[:sort_descending] = @options[:descending]
55
+ query
56
+ end
57
+
58
+ def initial_query_strings
59
+ if classes.empty?
60
+ [""]
61
+ else
62
+ classes.map { |klass| "C#{klass.name}" }
63
+ end
64
+ end
65
+
66
+ def classes
67
+ (@options[:classes] || [@member_class]).compact
68
+ end
69
+
70
+ def condition_terms
71
+ if @options[:conditions]
72
+ @options[:conditions].map do |name, value|
73
+ if value.kind_of? Time
74
+ value = value.to_i
75
+ elsif value.kind_of? Date
76
+ value = value.to_time.to_i
77
+ end
78
+ "X#{name}-#{value.to_s.downcase}"
79
+ end
80
+ else
81
+ []
82
+ end
83
+ end
84
+
85
+ def facet_terms
86
+ if @options[:facets]
87
+ facet_identifiers.map do |identifier|
88
+ "F#{identifier}"
89
+ end
90
+ else
91
+ []
92
+ end
93
+ end
94
+
95
+ def facet_identifiers
96
+ @options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
97
+ end
98
+
99
+ def spelling_suggestion
100
+ raise "Spelling has been disabled. Enable spelling in Xapit::Config.setup." unless Config.spelling?
101
+ if @search_text.downcase.scan(/\w+/).all? { |term| Config.database.get_spelling_suggestion(term).empty? }
102
+ nil
103
+ else
104
+ @search_text.downcase.gsub(/\w+/) do |term|
105
+ suggestion = Config.database.get_spelling_suggestion(term)
106
+ if suggestion.blank?
107
+ term
108
+ else
109
+ suggestion
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,19 @@
1
+ module Xapit
2
+ class ClassicQueryParser < AbstractQueryParser
3
+ def xapian_query_from_text(text)
4
+ xapian_parser.parse_query(text)
5
+ end
6
+
7
+ def xapian_parser
8
+ @xapian_parser ||= build_xapian_parser
9
+ end
10
+
11
+ def build_xapian_parser
12
+ parser = Xapian::QueryParser.new
13
+ parser.stemmer = Xapian::Stem.new(Config.stemming)
14
+ parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
15
+ parser.default_op = Xapian::Query::OP_AND
16
+ parser
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,75 @@
1
+ module Xapit
2
+ class SimpleQueryParser < AbstractQueryParser
3
+ # REFACTORME this is a bit complex for one method...
4
+ def xapian_query(instructions = nil)
5
+ instructions ||= parsed
6
+ instructions = [:add, instructions] if instructions.kind_of? String
7
+ operator = (instructions.first == :or ? Xapian::Query::OP_OR : Xapian::Query::OP_AND)
8
+ words = instructions[1..-1].select { |i| i.kind_of? String }
9
+ query = Xapian::Query.new(operator, words) unless words.empty?
10
+ instructions[1..-1].select { |i| i.kind_of? Array }.each do |sub_instructions|
11
+ if sub_instructions.first == :not
12
+ sub_operator = Xapian::Query::OP_AND_NOT
13
+ else
14
+ sub_operator = operator
15
+ end
16
+ if query
17
+ query = Xapian::Query.new(sub_operator, query, xapian_query(sub_instructions))
18
+ else
19
+ query = xapian_query(sub_instructions)
20
+ end
21
+ end
22
+ query
23
+ end
24
+
25
+ def parsed
26
+ parse(@search_text.downcase)
27
+ end
28
+
29
+ def xapian_query_from_text(text)
30
+ xapian_query(parse(text.downcase))
31
+ end
32
+
33
+ private
34
+
35
+
36
+ def parse(text)
37
+ if text.kind_of? Array
38
+ [:and, *text]
39
+ else
40
+ text = text.strip
41
+ if text =~ /\sor\s/ui
42
+ [:or, *text.split(/\s+or\s+/ui).map { |t| parse(t) }]
43
+ elsif text =~ /\s+/u
44
+ words = text.scan(/(?:\bnot\s+)?[^\s]+/ui)
45
+ words.map! do |word|
46
+ if Config.stemming
47
+ if word =~ /^not\s/ui
48
+ [:not, "Z" + stemmer.call(word.sub(/^not\s+/ui, ''))]
49
+ else
50
+ "Z" + stemmer.call(word)
51
+ end
52
+ else
53
+ if word =~ /^not\s/ui
54
+ [:not, word.sub(/^not\s+/ui, '')]
55
+ else
56
+ word
57
+ end
58
+ end
59
+ end
60
+ [:and, *words]
61
+ else
62
+ if Config.stemming && !text.blank?
63
+ "Z" + stemmer.call(text)
64
+ else
65
+ text
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ def stemmer
72
+ @stemmer ||= Xapian::Stem.new(Config.stemming)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'active_support'
4
+ require 'fileutils'
5
+ require File.dirname(__FILE__) + '/../lib/xapit'
6
+ require File.dirname(__FILE__) + '/xapit_member'
7
+
8
+ Spec::Runner.configure do |config|
9
+ config.mock_with :rr
10
+ config.before(:each) do
11
+ Xapit::Config.setup(:database_path => File.dirname(__FILE__) + '/tmp/xapiandb')
12
+ Xapit::Config.remove_database
13
+ XapitMember.delete_all
14
+ end
15
+ end