xapit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. data/LICENSE +20 -0
  2. data/Manifest +178 -0
  3. data/README.rdoc +183 -0
  4. data/Rakefile +15 -0
  5. data/TODO +23 -0
  6. data/features/facets.feature +51 -0
  7. data/features/finding.feature +119 -0
  8. data/features/indexing.feature +41 -0
  9. data/features/step_definitions/common_steps.rb +7 -0
  10. data/features/step_definitions/xapit_steps.rb +117 -0
  11. data/features/support/env.rb +7 -0
  12. data/features/support/xapit_helpers.rb +27 -0
  13. data/init.rb +3 -0
  14. data/install.rb +9 -0
  15. data/lib/xapit.rb +39 -0
  16. data/lib/xapit/collection.rb +165 -0
  17. data/lib/xapit/config.rb +83 -0
  18. data/lib/xapit/facet.rb +59 -0
  19. data/lib/xapit/facet_blueprint.rb +59 -0
  20. data/lib/xapit/facet_option.rb +56 -0
  21. data/lib/xapit/index_blueprint.rb +117 -0
  22. data/lib/xapit/indexers/abstract_indexer.rb +101 -0
  23. data/lib/xapit/indexers/classic_indexer.rb +27 -0
  24. data/lib/xapit/indexers/simple_indexer.rb +31 -0
  25. data/lib/xapit/membership.rb +103 -0
  26. data/lib/xapit/query.rb +62 -0
  27. data/lib/xapit/query_parsers/abstract_query_parser.rb +115 -0
  28. data/lib/xapit/query_parsers/classic_query_parser.rb +19 -0
  29. data/lib/xapit/query_parsers/simple_query_parser.rb +75 -0
  30. data/spec/spec_helper.rb +15 -0
  31. data/spec/tmp/xapdb/flintlock +0 -0
  32. data/spec/tmp/xapdb/iamflint +0 -0
  33. data/spec/tmp/xapdb/postlist.DB +0 -0
  34. data/spec/tmp/xapdb/postlist.baseA +0 -0
  35. data/spec/tmp/xapdb/postlist.baseB +0 -0
  36. data/spec/tmp/xapdb/record.DB +0 -0
  37. data/spec/tmp/xapdb/record.baseA +0 -0
  38. data/spec/tmp/xapdb/record.baseB +0 -0
  39. data/spec/tmp/xapdb/spelling.DB +0 -0
  40. data/spec/tmp/xapdb/spelling.baseA +0 -0
  41. data/spec/tmp/xapdb/spelling.baseB +0 -0
  42. data/spec/tmp/xapdb/termlist.DB +0 -0
  43. data/spec/tmp/xapdb/termlist.baseA +0 -0
  44. data/spec/tmp/xapdb/termlist.baseB +0 -0
  45. data/spec/tmp/xapian_db/flintlock +0 -0
  46. data/spec/tmp/xapian_db/iamflint +0 -0
  47. data/spec/tmp/xapian_db/postlist.DB +0 -0
  48. data/spec/tmp/xapian_db/postlist.baseA +0 -0
  49. data/spec/tmp/xapian_db/record.DB +0 -0
  50. data/spec/tmp/xapian_db/record.baseA +0 -0
  51. data/spec/tmp/xapian_db/termlist.DB +0 -0
  52. data/spec/tmp/xapian_db/termlist.baseA +0 -0
  53. data/spec/tmp/xapiandab/flintlock +0 -0
  54. data/spec/tmp/xapiandab/iamflint +0 -0
  55. data/spec/tmp/xapiandab/postlist.DB +0 -0
  56. data/spec/tmp/xapiandab/postlist.baseA +0 -0
  57. data/spec/tmp/xapiandab/postlist.baseB +0 -0
  58. data/spec/tmp/xapiandab/record.DB +0 -0
  59. data/spec/tmp/xapiandab/record.baseA +0 -0
  60. data/spec/tmp/xapiandab/record.baseB +0 -0
  61. data/spec/tmp/xapiandab/spelling.DB +0 -0
  62. data/spec/tmp/xapiandab/spelling.baseA +0 -0
  63. data/spec/tmp/xapiandab/spelling.baseB +0 -0
  64. data/spec/tmp/xapiandab/termlist.DB +0 -0
  65. data/spec/tmp/xapiandab/termlist.baseA +0 -0
  66. data/spec/tmp/xapiandab/termlist.baseB +0 -0
  67. data/spec/tmp/xapiandatab/flintlock +0 -0
  68. data/spec/tmp/xapiandatab/iamflint +0 -0
  69. data/spec/tmp/xapiandatab/postlist.DB +0 -0
  70. data/spec/tmp/xapiandatab/postlist.baseA +0 -0
  71. data/spec/tmp/xapiandatab/postlist.baseB +0 -0
  72. data/spec/tmp/xapiandatab/record.DB +0 -0
  73. data/spec/tmp/xapiandatab/record.baseA +0 -0
  74. data/spec/tmp/xapiandatab/record.baseB +0 -0
  75. data/spec/tmp/xapiandatab/spelling.DB +0 -0
  76. data/spec/tmp/xapiandatab/spelling.baseA +0 -0
  77. data/spec/tmp/xapiandatab/spelling.baseB +0 -0
  78. data/spec/tmp/xapiandatab/termlist.DB +0 -0
  79. data/spec/tmp/xapiandatab/termlist.baseA +0 -0
  80. data/spec/tmp/xapiandatab/termlist.baseB +0 -0
  81. data/spec/tmp/xapiandataba/flintlock +0 -0
  82. data/spec/tmp/xapiandataba/iamflint +0 -0
  83. data/spec/tmp/xapiandataba/postlist.DB +0 -0
  84. data/spec/tmp/xapiandataba/postlist.baseA +0 -0
  85. data/spec/tmp/xapiandataba/postlist.baseB +0 -0
  86. data/spec/tmp/xapiandataba/record.DB +0 -0
  87. data/spec/tmp/xapiandataba/record.baseA +0 -0
  88. data/spec/tmp/xapiandataba/record.baseB +0 -0
  89. data/spec/tmp/xapiandataba/spelling.DB +0 -0
  90. data/spec/tmp/xapiandataba/spelling.baseA +0 -0
  91. data/spec/tmp/xapiandataba/spelling.baseB +0 -0
  92. data/spec/tmp/xapiandataba/termlist.DB +0 -0
  93. data/spec/tmp/xapiandataba/termlist.baseA +0 -0
  94. data/spec/tmp/xapiandataba/termlist.baseB +0 -0
  95. data/spec/tmp/xapiandatabas/flintlock +0 -0
  96. data/spec/tmp/xapiandatabas/iamflint +0 -0
  97. data/spec/tmp/xapiandatabas/postlist.DB +0 -0
  98. data/spec/tmp/xapiandatabas/postlist.baseA +0 -0
  99. data/spec/tmp/xapiandatabas/record.DB +0 -0
  100. data/spec/tmp/xapiandatabas/record.baseA +0 -0
  101. data/spec/tmp/xapiandatabas/termlist.DB +0 -0
  102. data/spec/tmp/xapiandatabas/termlist.baseA +0 -0
  103. data/spec/tmp/xapiandatb/flintlock +0 -0
  104. data/spec/tmp/xapiandatb/iamflint +0 -0
  105. data/spec/tmp/xapiandatb/postlist.DB +0 -0
  106. data/spec/tmp/xapiandatb/postlist.baseA +0 -0
  107. data/spec/tmp/xapiandatb/postlist.baseB +0 -0
  108. data/spec/tmp/xapiandatb/record.DB +0 -0
  109. data/spec/tmp/xapiandatb/record.baseA +0 -0
  110. data/spec/tmp/xapiandatb/record.baseB +0 -0
  111. data/spec/tmp/xapiandatb/spelling.DB +0 -0
  112. data/spec/tmp/xapiandatb/spelling.baseA +0 -0
  113. data/spec/tmp/xapiandatb/spelling.baseB +0 -0
  114. data/spec/tmp/xapiandatb/termlist.DB +0 -0
  115. data/spec/tmp/xapiandatb/termlist.baseA +0 -0
  116. data/spec/tmp/xapiandatb/termlist.baseB +0 -0
  117. data/spec/tmp/xapiandbase/flintlock +0 -0
  118. data/spec/tmp/xapiandbase/iamflint +0 -0
  119. data/spec/tmp/xapiandbase/postlist.DB +0 -0
  120. data/spec/tmp/xapiandbase/postlist.baseA +0 -0
  121. data/spec/tmp/xapiandbase/postlist.baseB +0 -0
  122. data/spec/tmp/xapiandbase/record.DB +0 -0
  123. data/spec/tmp/xapiandbase/record.baseA +0 -0
  124. data/spec/tmp/xapiandbase/record.baseB +0 -0
  125. data/spec/tmp/xapiandbase/spelling.DB +0 -0
  126. data/spec/tmp/xapiandbase/spelling.baseA +0 -0
  127. data/spec/tmp/xapiandbase/spelling.baseB +0 -0
  128. data/spec/tmp/xapiandbase/termlist.DB +0 -0
  129. data/spec/tmp/xapiandbase/termlist.baseA +0 -0
  130. data/spec/tmp/xapiandbase/termlist.baseB +0 -0
  131. data/spec/xapit/collection_spec.rb +153 -0
  132. data/spec/xapit/config_spec.rb +48 -0
  133. data/spec/xapit/facet_blueprint_spec.rb +29 -0
  134. data/spec/xapit/facet_option_spec.rb +80 -0
  135. data/spec/xapit/facet_spec.rb +73 -0
  136. data/spec/xapit/index_blueprint_spec.rb +60 -0
  137. data/spec/xapit/indexers/abstract_indexer_spec.rb +74 -0
  138. data/spec/xapit/indexers/classic_indexer_spec.rb +26 -0
  139. data/spec/xapit/indexers/simple_indexer_spec.rb +53 -0
  140. data/spec/xapit/membership_spec.rb +39 -0
  141. data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +23 -0
  142. data/spec/xapit/query_parsers/classic_query_parser_spec.rb +15 -0
  143. data/spec/xapit/query_parsers/simple_query_parser_spec.rb +86 -0
  144. data/spec/xapit/query_spec.rb +41 -0
  145. data/spec/xapit_member.rb +32 -0
  146. data/tasks/spec.rb +9 -0
  147. data/tasks/xapit.rake +9 -0
  148. data/tmp/xapiandatabase/flintlock +0 -0
  149. data/tmp/xapiandatabase/iamflint +0 -0
  150. data/tmp/xapiandatabase/postlist.DB +0 -0
  151. data/tmp/xapiandatabase/postlist.baseA +0 -0
  152. data/tmp/xapiandatabase/postlist.baseB +0 -0
  153. data/tmp/xapiandatabase/record.DB +0 -0
  154. data/tmp/xapiandatabase/record.baseA +0 -0
  155. data/tmp/xapiandatabase/record.baseB +0 -0
  156. data/tmp/xapiandatabase/spelling.DB +0 -0
  157. data/tmp/xapiandatabase/spelling.baseA +0 -0
  158. data/tmp/xapiandatabase/spelling.baseB +0 -0
  159. data/tmp/xapiandatabase/termlist.DB +0 -0
  160. data/tmp/xapiandatabase/termlist.baseA +0 -0
  161. data/tmp/xapiandatabase/termlist.baseB +0 -0
  162. data/tmp/xapiandatabase/value.baseB +0 -0
  163. data/tmp/xapiandb/flintlock +0 -0
  164. data/tmp/xapiandb/iamflint +0 -0
  165. data/tmp/xapiandb/postlist.DB +0 -0
  166. data/tmp/xapiandb/postlist.baseA +0 -0
  167. data/tmp/xapiandb/postlist.baseB +0 -0
  168. data/tmp/xapiandb/record.DB +0 -0
  169. data/tmp/xapiandb/record.baseA +0 -0
  170. data/tmp/xapiandb/record.baseB +0 -0
  171. data/tmp/xapiandb/spelling.DB +0 -0
  172. data/tmp/xapiandb/spelling.baseA +0 -0
  173. data/tmp/xapiandb/spelling.baseB +0 -0
  174. data/tmp/xapiandb/termlist.DB +0 -0
  175. data/tmp/xapiandb/termlist.baseA +0 -0
  176. data/tmp/xapiandb/termlist.baseB +0 -0
  177. data/tmp/xapiandb/value.baseB +0 -0
  178. data/uninstall.rb +5 -0
  179. data/xapit.gemspec +30 -0
  180. metadata +257 -0
@@ -0,0 +1,27 @@
1
+ module Xapit
2
+ class ClassicIndexer < AbstractIndexer
3
+ def index_text_attributes(member, document)
4
+ term_generator.document = document
5
+ @blueprint.text_attributes.each do |name, options|
6
+ content = member.send(name).to_s
7
+ if options[:proc]
8
+ index_terms(options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase), document)
9
+ else
10
+ term_generator.index_text(content)
11
+ end
12
+ end
13
+ end
14
+
15
+ def term_generator
16
+ @term_generator ||= create_term_generator
17
+ end
18
+
19
+ def create_term_generator
20
+ term_generator = Xapian::TermGenerator.new
21
+ term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
22
+ term_generator.database = database
23
+ term_generator.stemmer = Xapian::Stem.new(Config.stemming)
24
+ term_generator
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ module Xapit
2
+ class SimpleIndexer < AbstractIndexer
3
+ def index_text_attributes(member, document)
4
+ @blueprint.text_attributes.map do |name, options|
5
+ terms_for_attribute(member, name, options).each do |term|
6
+ document.add_term(term, options[:weight] || 1)
7
+ database.add_spelling(term) if Config.spelling?
8
+ end
9
+ end
10
+ end
11
+
12
+ def terms_for_attribute(member, name, options)
13
+ terms_for_attribute_without_stemming(member, name, options).map do |term|
14
+ [term, "Z#{stemmer.call(term)}"]
15
+ end.flatten
16
+ end
17
+
18
+ def terms_for_attribute_without_stemming(member, name, options)
19
+ content = member.send(name).to_s
20
+ if options[:proc]
21
+ options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase)
22
+ else
23
+ content.scan(/\w+/u).map(&:downcase)
24
+ end
25
+ end
26
+
27
+ def stemmer
28
+ @stemmer ||= Xapian::Stem.new(Config.stemming)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,103 @@
1
+ module Xapit
2
+ # Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
3
+ # in ActiveRecord::Base so you do not need to do anything there.
4
+ module Membership
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ module ClassMethods
10
+ # Simply call "xapit" on a class and pass a block to define the indexed attributes.
11
+ #
12
+ # class Article < ActiveRecord::Base
13
+ # xapit do |index|
14
+ # index.text :name, :content
15
+ # index.field :category_id
16
+ # index.facet :author_name, "Author"
17
+ # index.sortable :id, :category_id
18
+ # end
19
+ # end
20
+ #
21
+ # First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
22
+ #
23
+ # Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
24
+ #
25
+ # def author_name
26
+ # author.name
27
+ # end
28
+ #
29
+ # This way you can create a completely custom facet by simply defining your own method
30
+ #
31
+ # You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
32
+ #
33
+ # xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
34
+ #
35
+ # If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
36
+ #
37
+ # xapit do |index|
38
+ # index.text(:keywords) { |words| words.split(', ') }
39
+ # end
40
+ #
41
+ # You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
42
+ # that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
43
+ #
44
+ # index.text :name, :weight => 10
45
+ #
46
+ def xapit(*args)
47
+ @xapit_index_blueprint = IndexBlueprint.new(self, *args)
48
+ yield(@xapit_index_blueprint)
49
+ include AdditionalMethods
50
+ end
51
+ end
52
+
53
+ module AdditionalMethods
54
+ def self.included(base)
55
+ base.extend ClassMethods
56
+ base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
57
+ end
58
+
59
+ # Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
60
+ def search_similar(*args)
61
+ Collection.search_similar(self, *args)
62
+ end
63
+
64
+ module ClassMethods
65
+ # Used to perform a search on a model.
66
+ #
67
+ # # perform a simple full text search
68
+ # @articles = Article.search("phone")
69
+ #
70
+ # # add pagination if you're using will_paginate
71
+ # @articles = Article.search("phone", :per_page => 10, :page => params[:page])
72
+ #
73
+ # # search based on indexed fields
74
+ # @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
75
+ #
76
+ # # manually sort based on any number of indexed fields, sort defaults to most relevant
77
+ # @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
78
+ #
79
+ # # basic boolean matching is supported
80
+ # @articles = Article.search("phone or fax not email")
81
+ #
82
+ # # no need to specify first query string when searching all records
83
+ # @articles = Article.search(:conditions => { :category_id => params[:category_id] })
84
+ #
85
+ def search(*args)
86
+ Collection.new(self, *args)
87
+ end
88
+
89
+ # The Xapit::IndexBlueprint object used for this class.
90
+ def xapit_index_blueprint
91
+ @xapit_index_blueprint
92
+ end
93
+
94
+ # Finds a Xapit::FacetBlueprint for the given attribute.
95
+ def xapit_facet_blueprint(attribute)
96
+ result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
97
+ raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
98
+ result
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,62 @@
1
+ module Xapit
2
+ # This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
3
+ # this class unless you are trying to query the Xapian database directly.
4
+ # You may be looking for Xapit::Collection instead.
5
+ class Query
6
+ attr_reader :default_options, :xapian_query
7
+
8
+ def initialize(query)
9
+ @xapian_query = build_xapian_query(query)
10
+ @default_options = { :offset => 0, :sort_descending => false }
11
+ end
12
+
13
+ def and_query(query)
14
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_AND, @xapian_query, build_xapian_query(query)) unless query.blank?
15
+ self
16
+ end
17
+
18
+ def or_query(query)
19
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_OR, @xapian_query, build_xapian_query(query)) unless query.blank?
20
+ self
21
+ end
22
+
23
+ def not_query(query)
24
+ @xapian_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, @xapian_query, build_xapian_query(query)) unless query.blank?
25
+ self
26
+ end
27
+
28
+ def matchset(options = {})
29
+ options.reverse_merge!(default_options)
30
+ enquire = Xapian::Enquire.new(Config.database)
31
+ if options[:sort_by_values]
32
+ sorter = Xapian::MultiValueSorter.new
33
+ options[:sort_by_values].each do |sort_value|
34
+ sorter.add(sort_value, !!options[:sort_descending])
35
+ end
36
+ enquire.set_sort_by_key_then_relevance(sorter)
37
+ end
38
+ enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
39
+ enquire.query = @xapian_query
40
+ enquire.mset(options[:offset], options[:limit])
41
+ end
42
+
43
+ def matches(options = {})
44
+ matchset(options).matches
45
+ end
46
+
47
+ def count
48
+ # a bit of a hack to get more accurate count estimate
49
+ matchset(:limit => Config.database.doccount).matches_estimated
50
+ end
51
+
52
+ private
53
+
54
+ def build_xapian_query(query)
55
+ if query.kind_of? Xapian::Query
56
+ query
57
+ else
58
+ Xapian::Query.new(Xapian::Query::OP_AND, [query].flatten)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,115 @@
1
+ module Xapit
2
+ class AbstractQueryParser
3
+ attr_reader :member_class
4
+ attr_writer :base_query
5
+
6
+ def initialize(*args)
7
+ @options = args.extract_options!
8
+ @member_class = args[0]
9
+ @search_text = args[1].to_s
10
+ end
11
+
12
+ def query
13
+ if (@search_text.split + condition_terms + facet_terms).empty?
14
+ base_query
15
+ else
16
+ @query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms)
17
+ end
18
+ end
19
+
20
+ def current_page
21
+ @options[:page] ? @options[:page].to_i : 1
22
+ end
23
+
24
+ def per_page
25
+ @options[:per_page] ? @options[:per_page].to_i : 20
26
+ end
27
+
28
+ def offset
29
+ per_page*(current_page-1)
30
+ end
31
+
32
+ def sort_by_values
33
+ if @options[:order] && @member_class
34
+ index = @member_class.xapit_index_blueprint
35
+ if @options[:order].kind_of? Array
36
+ @options[:order].map do |attribute|
37
+ index.sortable_position_for(attribute)
38
+ end
39
+ else
40
+ [index.sortable_position_for(@options[:order])]
41
+ end
42
+ end
43
+ end
44
+
45
+ def base_query
46
+ @base_query ||= initial_query
47
+ end
48
+
49
+ def initial_query
50
+ query = Query.new(Xapian::Query.new(Xapian::Query::OP_OR, initial_query_strings))
51
+ query.default_options[:offset] = offset
52
+ query.default_options[:limit] = per_page
53
+ query.default_options[:sort_by_values] = sort_by_values
54
+ query.default_options[:sort_descending] = @options[:descending]
55
+ query
56
+ end
57
+
58
+ def initial_query_strings
59
+ if classes.empty?
60
+ [""]
61
+ else
62
+ classes.map { |klass| "C#{klass.name}" }
63
+ end
64
+ end
65
+
66
+ def classes
67
+ (@options[:classes] || [@member_class]).compact
68
+ end
69
+
70
+ def condition_terms
71
+ if @options[:conditions]
72
+ @options[:conditions].map do |name, value|
73
+ if value.kind_of? Time
74
+ value = value.to_i
75
+ elsif value.kind_of? Date
76
+ value = value.to_time.to_i
77
+ end
78
+ "X#{name}-#{value.to_s.downcase}"
79
+ end
80
+ else
81
+ []
82
+ end
83
+ end
84
+
85
+ def facet_terms
86
+ if @options[:facets]
87
+ facet_identifiers.map do |identifier|
88
+ "F#{identifier}"
89
+ end
90
+ else
91
+ []
92
+ end
93
+ end
94
+
95
+ def facet_identifiers
96
+ @options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
97
+ end
98
+
99
+ def spelling_suggestion
100
+ raise "Spelling has been disabled. Enable spelling in Xapit::Config.setup." unless Config.spelling?
101
+ if @search_text.downcase.scan(/\w+/).all? { |term| Config.database.get_spelling_suggestion(term).empty? }
102
+ nil
103
+ else
104
+ @search_text.downcase.gsub(/\w+/) do |term|
105
+ suggestion = Config.database.get_spelling_suggestion(term)
106
+ if suggestion.blank?
107
+ term
108
+ else
109
+ suggestion
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,19 @@
1
+ module Xapit
2
+ class ClassicQueryParser < AbstractQueryParser
3
+ def xapian_query_from_text(text)
4
+ xapian_parser.parse_query(text)
5
+ end
6
+
7
+ def xapian_parser
8
+ @xapian_parser ||= build_xapian_parser
9
+ end
10
+
11
+ def build_xapian_parser
12
+ parser = Xapian::QueryParser.new
13
+ parser.stemmer = Xapian::Stem.new(Config.stemming)
14
+ parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
15
+ parser.default_op = Xapian::Query::OP_AND
16
+ parser
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,75 @@
1
+ module Xapit
2
+ class SimpleQueryParser < AbstractQueryParser
3
+ # REFACTORME this is a bit complex for one method...
4
+ def xapian_query(instructions = nil)
5
+ instructions ||= parsed
6
+ instructions = [:add, instructions] if instructions.kind_of? String
7
+ operator = (instructions.first == :or ? Xapian::Query::OP_OR : Xapian::Query::OP_AND)
8
+ words = instructions[1..-1].select { |i| i.kind_of? String }
9
+ query = Xapian::Query.new(operator, words) unless words.empty?
10
+ instructions[1..-1].select { |i| i.kind_of? Array }.each do |sub_instructions|
11
+ if sub_instructions.first == :not
12
+ sub_operator = Xapian::Query::OP_AND_NOT
13
+ else
14
+ sub_operator = operator
15
+ end
16
+ if query
17
+ query = Xapian::Query.new(sub_operator, query, xapian_query(sub_instructions))
18
+ else
19
+ query = xapian_query(sub_instructions)
20
+ end
21
+ end
22
+ query
23
+ end
24
+
25
+ def parsed
26
+ parse(@search_text.downcase)
27
+ end
28
+
29
+ def xapian_query_from_text(text)
30
+ xapian_query(parse(text.downcase))
31
+ end
32
+
33
+ private
34
+
35
+
36
+ def parse(text)
37
+ if text.kind_of? Array
38
+ [:and, *text]
39
+ else
40
+ text = text.strip
41
+ if text =~ /\sor\s/ui
42
+ [:or, *text.split(/\s+or\s+/ui).map { |t| parse(t) }]
43
+ elsif text =~ /\s+/u
44
+ words = text.scan(/(?:\bnot\s+)?[^\s]+/ui)
45
+ words.map! do |word|
46
+ if Config.stemming
47
+ if word =~ /^not\s/ui
48
+ [:not, "Z" + stemmer.call(word.sub(/^not\s+/ui, ''))]
49
+ else
50
+ "Z" + stemmer.call(word)
51
+ end
52
+ else
53
+ if word =~ /^not\s/ui
54
+ [:not, word.sub(/^not\s+/ui, '')]
55
+ else
56
+ word
57
+ end
58
+ end
59
+ end
60
+ [:and, *words]
61
+ else
62
+ if Config.stemming && !text.blank?
63
+ "Z" + stemmer.call(text)
64
+ else
65
+ text
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ def stemmer
72
+ @stemmer ||= Xapian::Stem.new(Config.stemming)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'active_support'
4
+ require 'fileutils'
5
+ require File.dirname(__FILE__) + '/../lib/xapit'
6
+ require File.dirname(__FILE__) + '/xapit_member'
7
+
8
+ Spec::Runner.configure do |config|
9
+ config.mock_with :rr
10
+ config.before(:each) do
11
+ Xapit::Config.setup(:database_path => File.dirname(__FILE__) + '/tmp/xapiandb')
12
+ Xapit::Config.remove_database
13
+ XapitMember.delete_all
14
+ end
15
+ end