xapit 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/Manifest +178 -0
- data/README.rdoc +183 -0
- data/Rakefile +15 -0
- data/TODO +23 -0
- data/features/facets.feature +51 -0
- data/features/finding.feature +119 -0
- data/features/indexing.feature +41 -0
- data/features/step_definitions/common_steps.rb +7 -0
- data/features/step_definitions/xapit_steps.rb +117 -0
- data/features/support/env.rb +7 -0
- data/features/support/xapit_helpers.rb +27 -0
- data/init.rb +3 -0
- data/install.rb +9 -0
- data/lib/xapit.rb +39 -0
- data/lib/xapit/collection.rb +165 -0
- data/lib/xapit/config.rb +83 -0
- data/lib/xapit/facet.rb +59 -0
- data/lib/xapit/facet_blueprint.rb +59 -0
- data/lib/xapit/facet_option.rb +56 -0
- data/lib/xapit/index_blueprint.rb +117 -0
- data/lib/xapit/indexers/abstract_indexer.rb +101 -0
- data/lib/xapit/indexers/classic_indexer.rb +27 -0
- data/lib/xapit/indexers/simple_indexer.rb +31 -0
- data/lib/xapit/membership.rb +103 -0
- data/lib/xapit/query.rb +62 -0
- data/lib/xapit/query_parsers/abstract_query_parser.rb +115 -0
- data/lib/xapit/query_parsers/classic_query_parser.rb +19 -0
- data/lib/xapit/query_parsers/simple_query_parser.rb +75 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/tmp/xapdb/flintlock +0 -0
- data/spec/tmp/xapdb/iamflint +0 -0
- data/spec/tmp/xapdb/postlist.DB +0 -0
- data/spec/tmp/xapdb/postlist.baseA +0 -0
- data/spec/tmp/xapdb/postlist.baseB +0 -0
- data/spec/tmp/xapdb/record.DB +0 -0
- data/spec/tmp/xapdb/record.baseA +0 -0
- data/spec/tmp/xapdb/record.baseB +0 -0
- data/spec/tmp/xapdb/spelling.DB +0 -0
- data/spec/tmp/xapdb/spelling.baseA +0 -0
- data/spec/tmp/xapdb/spelling.baseB +0 -0
- data/spec/tmp/xapdb/termlist.DB +0 -0
- data/spec/tmp/xapdb/termlist.baseA +0 -0
- data/spec/tmp/xapdb/termlist.baseB +0 -0
- data/spec/tmp/xapian_db/flintlock +0 -0
- data/spec/tmp/xapian_db/iamflint +0 -0
- data/spec/tmp/xapian_db/postlist.DB +0 -0
- data/spec/tmp/xapian_db/postlist.baseA +0 -0
- data/spec/tmp/xapian_db/record.DB +0 -0
- data/spec/tmp/xapian_db/record.baseA +0 -0
- data/spec/tmp/xapian_db/termlist.DB +0 -0
- data/spec/tmp/xapian_db/termlist.baseA +0 -0
- data/spec/tmp/xapiandab/flintlock +0 -0
- data/spec/tmp/xapiandab/iamflint +0 -0
- data/spec/tmp/xapiandab/postlist.DB +0 -0
- data/spec/tmp/xapiandab/postlist.baseA +0 -0
- data/spec/tmp/xapiandab/postlist.baseB +0 -0
- data/spec/tmp/xapiandab/record.DB +0 -0
- data/spec/tmp/xapiandab/record.baseA +0 -0
- data/spec/tmp/xapiandab/record.baseB +0 -0
- data/spec/tmp/xapiandab/spelling.DB +0 -0
- data/spec/tmp/xapiandab/spelling.baseA +0 -0
- data/spec/tmp/xapiandab/spelling.baseB +0 -0
- data/spec/tmp/xapiandab/termlist.DB +0 -0
- data/spec/tmp/xapiandab/termlist.baseA +0 -0
- data/spec/tmp/xapiandab/termlist.baseB +0 -0
- data/spec/tmp/xapiandatab/flintlock +0 -0
- data/spec/tmp/xapiandatab/iamflint +0 -0
- data/spec/tmp/xapiandatab/postlist.DB +0 -0
- data/spec/tmp/xapiandatab/postlist.baseA +0 -0
- data/spec/tmp/xapiandatab/postlist.baseB +0 -0
- data/spec/tmp/xapiandatab/record.DB +0 -0
- data/spec/tmp/xapiandatab/record.baseA +0 -0
- data/spec/tmp/xapiandatab/record.baseB +0 -0
- data/spec/tmp/xapiandatab/spelling.DB +0 -0
- data/spec/tmp/xapiandatab/spelling.baseA +0 -0
- data/spec/tmp/xapiandatab/spelling.baseB +0 -0
- data/spec/tmp/xapiandatab/termlist.DB +0 -0
- data/spec/tmp/xapiandatab/termlist.baseA +0 -0
- data/spec/tmp/xapiandatab/termlist.baseB +0 -0
- data/spec/tmp/xapiandataba/flintlock +0 -0
- data/spec/tmp/xapiandataba/iamflint +0 -0
- data/spec/tmp/xapiandataba/postlist.DB +0 -0
- data/spec/tmp/xapiandataba/postlist.baseA +0 -0
- data/spec/tmp/xapiandataba/postlist.baseB +0 -0
- data/spec/tmp/xapiandataba/record.DB +0 -0
- data/spec/tmp/xapiandataba/record.baseA +0 -0
- data/spec/tmp/xapiandataba/record.baseB +0 -0
- data/spec/tmp/xapiandataba/spelling.DB +0 -0
- data/spec/tmp/xapiandataba/spelling.baseA +0 -0
- data/spec/tmp/xapiandataba/spelling.baseB +0 -0
- data/spec/tmp/xapiandataba/termlist.DB +0 -0
- data/spec/tmp/xapiandataba/termlist.baseA +0 -0
- data/spec/tmp/xapiandataba/termlist.baseB +0 -0
- data/spec/tmp/xapiandatabas/flintlock +0 -0
- data/spec/tmp/xapiandatabas/iamflint +0 -0
- data/spec/tmp/xapiandatabas/postlist.DB +0 -0
- data/spec/tmp/xapiandatabas/postlist.baseA +0 -0
- data/spec/tmp/xapiandatabas/record.DB +0 -0
- data/spec/tmp/xapiandatabas/record.baseA +0 -0
- data/spec/tmp/xapiandatabas/termlist.DB +0 -0
- data/spec/tmp/xapiandatabas/termlist.baseA +0 -0
- data/spec/tmp/xapiandatb/flintlock +0 -0
- data/spec/tmp/xapiandatb/iamflint +0 -0
- data/spec/tmp/xapiandatb/postlist.DB +0 -0
- data/spec/tmp/xapiandatb/postlist.baseA +0 -0
- data/spec/tmp/xapiandatb/postlist.baseB +0 -0
- data/spec/tmp/xapiandatb/record.DB +0 -0
- data/spec/tmp/xapiandatb/record.baseA +0 -0
- data/spec/tmp/xapiandatb/record.baseB +0 -0
- data/spec/tmp/xapiandatb/spelling.DB +0 -0
- data/spec/tmp/xapiandatb/spelling.baseA +0 -0
- data/spec/tmp/xapiandatb/spelling.baseB +0 -0
- data/spec/tmp/xapiandatb/termlist.DB +0 -0
- data/spec/tmp/xapiandatb/termlist.baseA +0 -0
- data/spec/tmp/xapiandatb/termlist.baseB +0 -0
- data/spec/tmp/xapiandbase/flintlock +0 -0
- data/spec/tmp/xapiandbase/iamflint +0 -0
- data/spec/tmp/xapiandbase/postlist.DB +0 -0
- data/spec/tmp/xapiandbase/postlist.baseA +0 -0
- data/spec/tmp/xapiandbase/postlist.baseB +0 -0
- data/spec/tmp/xapiandbase/record.DB +0 -0
- data/spec/tmp/xapiandbase/record.baseA +0 -0
- data/spec/tmp/xapiandbase/record.baseB +0 -0
- data/spec/tmp/xapiandbase/spelling.DB +0 -0
- data/spec/tmp/xapiandbase/spelling.baseA +0 -0
- data/spec/tmp/xapiandbase/spelling.baseB +0 -0
- data/spec/tmp/xapiandbase/termlist.DB +0 -0
- data/spec/tmp/xapiandbase/termlist.baseA +0 -0
- data/spec/tmp/xapiandbase/termlist.baseB +0 -0
- data/spec/xapit/collection_spec.rb +153 -0
- data/spec/xapit/config_spec.rb +48 -0
- data/spec/xapit/facet_blueprint_spec.rb +29 -0
- data/spec/xapit/facet_option_spec.rb +80 -0
- data/spec/xapit/facet_spec.rb +73 -0
- data/spec/xapit/index_blueprint_spec.rb +60 -0
- data/spec/xapit/indexers/abstract_indexer_spec.rb +74 -0
- data/spec/xapit/indexers/classic_indexer_spec.rb +26 -0
- data/spec/xapit/indexers/simple_indexer_spec.rb +53 -0
- data/spec/xapit/membership_spec.rb +39 -0
- data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +23 -0
- data/spec/xapit/query_parsers/classic_query_parser_spec.rb +15 -0
- data/spec/xapit/query_parsers/simple_query_parser_spec.rb +86 -0
- data/spec/xapit/query_spec.rb +41 -0
- data/spec/xapit_member.rb +32 -0
- data/tasks/spec.rb +9 -0
- data/tasks/xapit.rake +9 -0
- data/tmp/xapiandatabase/flintlock +0 -0
- data/tmp/xapiandatabase/iamflint +0 -0
- data/tmp/xapiandatabase/postlist.DB +0 -0
- data/tmp/xapiandatabase/postlist.baseA +0 -0
- data/tmp/xapiandatabase/postlist.baseB +0 -0
- data/tmp/xapiandatabase/record.DB +0 -0
- data/tmp/xapiandatabase/record.baseA +0 -0
- data/tmp/xapiandatabase/record.baseB +0 -0
- data/tmp/xapiandatabase/spelling.DB +0 -0
- data/tmp/xapiandatabase/spelling.baseA +0 -0
- data/tmp/xapiandatabase/spelling.baseB +0 -0
- data/tmp/xapiandatabase/termlist.DB +0 -0
- data/tmp/xapiandatabase/termlist.baseA +0 -0
- data/tmp/xapiandatabase/termlist.baseB +0 -0
- data/tmp/xapiandatabase/value.baseB +0 -0
- data/tmp/xapiandb/flintlock +0 -0
- data/tmp/xapiandb/iamflint +0 -0
- data/tmp/xapiandb/postlist.DB +0 -0
- data/tmp/xapiandb/postlist.baseA +0 -0
- data/tmp/xapiandb/postlist.baseB +0 -0
- data/tmp/xapiandb/record.DB +0 -0
- data/tmp/xapiandb/record.baseA +0 -0
- data/tmp/xapiandb/record.baseB +0 -0
- data/tmp/xapiandb/spelling.DB +0 -0
- data/tmp/xapiandb/spelling.baseA +0 -0
- data/tmp/xapiandb/spelling.baseB +0 -0
- data/tmp/xapiandb/termlist.DB +0 -0
- data/tmp/xapiandb/termlist.baseA +0 -0
- data/tmp/xapiandb/termlist.baseB +0 -0
- data/tmp/xapiandb/value.baseB +0 -0
- data/uninstall.rb +5 -0
- data/xapit.gemspec +30 -0
- metadata +257 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
module Xapit
|
2
|
+
class ClassicIndexer < AbstractIndexer
|
3
|
+
def index_text_attributes(member, document)
|
4
|
+
term_generator.document = document
|
5
|
+
@blueprint.text_attributes.each do |name, options|
|
6
|
+
content = member.send(name).to_s
|
7
|
+
if options[:proc]
|
8
|
+
index_terms(options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase), document)
|
9
|
+
else
|
10
|
+
term_generator.index_text(content)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def term_generator
|
16
|
+
@term_generator ||= create_term_generator
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_term_generator
|
20
|
+
term_generator = Xapian::TermGenerator.new
|
21
|
+
term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
|
22
|
+
term_generator.database = database
|
23
|
+
term_generator.stemmer = Xapian::Stem.new(Config.stemming)
|
24
|
+
term_generator
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Xapit
|
2
|
+
class SimpleIndexer < AbstractIndexer
|
3
|
+
def index_text_attributes(member, document)
|
4
|
+
@blueprint.text_attributes.map do |name, options|
|
5
|
+
terms_for_attribute(member, name, options).each do |term|
|
6
|
+
document.add_term(term, options[:weight] || 1)
|
7
|
+
database.add_spelling(term) if Config.spelling?
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def terms_for_attribute(member, name, options)
|
13
|
+
terms_for_attribute_without_stemming(member, name, options).map do |term|
|
14
|
+
[term, "Z#{stemmer.call(term)}"]
|
15
|
+
end.flatten
|
16
|
+
end
|
17
|
+
|
18
|
+
def terms_for_attribute_without_stemming(member, name, options)
|
19
|
+
content = member.send(name).to_s
|
20
|
+
if options[:proc]
|
21
|
+
options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase)
|
22
|
+
else
|
23
|
+
content.scan(/\w+/u).map(&:downcase)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def stemmer
|
28
|
+
@stemmer ||= Xapian::Stem.new(Config.stemming)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Xapit
|
2
|
+
# Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
|
3
|
+
# in ActiveRecord::Base so you do not need to do anything there.
|
4
|
+
module Membership
|
5
|
+
def self.included(base)
|
6
|
+
base.extend ClassMethods
|
7
|
+
end
|
8
|
+
|
9
|
+
module ClassMethods
|
10
|
+
# Simply call "xapit" on a class and pass a block to define the indexed attributes.
|
11
|
+
#
|
12
|
+
# class Article < ActiveRecord::Base
|
13
|
+
# xapit do |index|
|
14
|
+
# index.text :name, :content
|
15
|
+
# index.field :category_id
|
16
|
+
# index.facet :author_name, "Author"
|
17
|
+
# index.sortable :id, :category_id
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
|
22
|
+
#
|
23
|
+
# Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
|
24
|
+
#
|
25
|
+
# def author_name
|
26
|
+
# author.name
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# This way you can create a completely custom facet by simply defining your own method
|
30
|
+
#
|
31
|
+
# You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
|
32
|
+
#
|
33
|
+
# xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
|
34
|
+
#
|
35
|
+
# If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
|
36
|
+
#
|
37
|
+
# xapit do |index|
|
38
|
+
# index.text(:keywords) { |words| words.split(', ') }
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
|
42
|
+
# that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
|
43
|
+
#
|
44
|
+
# index.text :name, :weight => 10
|
45
|
+
#
|
46
|
+
def xapit(*args)
|
47
|
+
@xapit_index_blueprint = IndexBlueprint.new(self, *args)
|
48
|
+
yield(@xapit_index_blueprint)
|
49
|
+
include AdditionalMethods
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
module AdditionalMethods
|
54
|
+
def self.included(base)
|
55
|
+
base.extend ClassMethods
|
56
|
+
base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
|
57
|
+
end
|
58
|
+
|
59
|
+
# Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
|
60
|
+
def search_similar(*args)
|
61
|
+
Collection.search_similar(self, *args)
|
62
|
+
end
|
63
|
+
|
64
|
+
module ClassMethods
|
65
|
+
# Used to perform a search on a model.
|
66
|
+
#
|
67
|
+
# # perform a simple full text search
|
68
|
+
# @articles = Article.search("phone")
|
69
|
+
#
|
70
|
+
# # add pagination if you're using will_paginate
|
71
|
+
# @articles = Article.search("phone", :per_page => 10, :page => params[:page])
|
72
|
+
#
|
73
|
+
# # search based on indexed fields
|
74
|
+
# @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
|
75
|
+
#
|
76
|
+
# # manually sort based on any number of indexed fields, sort defaults to most relevant
|
77
|
+
# @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
|
78
|
+
#
|
79
|
+
# # basic boolean matching is supported
|
80
|
+
# @articles = Article.search("phone or fax not email")
|
81
|
+
#
|
82
|
+
# # no need to specify first query string when searching all records
|
83
|
+
# @articles = Article.search(:conditions => { :category_id => params[:category_id] })
|
84
|
+
#
|
85
|
+
def search(*args)
|
86
|
+
Collection.new(self, *args)
|
87
|
+
end
|
88
|
+
|
89
|
+
# The Xapit::IndexBlueprint object used for this class.
|
90
|
+
def xapit_index_blueprint
|
91
|
+
@xapit_index_blueprint
|
92
|
+
end
|
93
|
+
|
94
|
+
# Finds a Xapit::FacetBlueprint for the given attribute.
|
95
|
+
def xapit_facet_blueprint(attribute)
|
96
|
+
result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
|
97
|
+
raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
|
98
|
+
result
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data/lib/xapit/query.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Xapit
|
2
|
+
# This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
|
3
|
+
# this class unless you are trying to query the Xapian database directly.
|
4
|
+
# You may be looking for Xapit::Collection instead.
|
5
|
+
class Query
|
6
|
+
attr_reader :default_options, :xapian_query
|
7
|
+
|
8
|
+
def initialize(query)
|
9
|
+
@xapian_query = build_xapian_query(query)
|
10
|
+
@default_options = { :offset => 0, :sort_descending => false }
|
11
|
+
end
|
12
|
+
|
13
|
+
def and_query(query)
|
14
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_AND, @xapian_query, build_xapian_query(query)) unless query.blank?
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def or_query(query)
|
19
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_OR, @xapian_query, build_xapian_query(query)) unless query.blank?
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
def not_query(query)
|
24
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, @xapian_query, build_xapian_query(query)) unless query.blank?
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def matchset(options = {})
|
29
|
+
options.reverse_merge!(default_options)
|
30
|
+
enquire = Xapian::Enquire.new(Config.database)
|
31
|
+
if options[:sort_by_values]
|
32
|
+
sorter = Xapian::MultiValueSorter.new
|
33
|
+
options[:sort_by_values].each do |sort_value|
|
34
|
+
sorter.add(sort_value, !!options[:sort_descending])
|
35
|
+
end
|
36
|
+
enquire.set_sort_by_key_then_relevance(sorter)
|
37
|
+
end
|
38
|
+
enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
|
39
|
+
enquire.query = @xapian_query
|
40
|
+
enquire.mset(options[:offset], options[:limit])
|
41
|
+
end
|
42
|
+
|
43
|
+
def matches(options = {})
|
44
|
+
matchset(options).matches
|
45
|
+
end
|
46
|
+
|
47
|
+
def count
|
48
|
+
# a bit of a hack to get more accurate count estimate
|
49
|
+
matchset(:limit => Config.database.doccount).matches_estimated
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def build_xapian_query(query)
|
55
|
+
if query.kind_of? Xapian::Query
|
56
|
+
query
|
57
|
+
else
|
58
|
+
Xapian::Query.new(Xapian::Query::OP_AND, [query].flatten)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module Xapit
|
2
|
+
class AbstractQueryParser
|
3
|
+
attr_reader :member_class
|
4
|
+
attr_writer :base_query
|
5
|
+
|
6
|
+
def initialize(*args)
|
7
|
+
@options = args.extract_options!
|
8
|
+
@member_class = args[0]
|
9
|
+
@search_text = args[1].to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
def query
|
13
|
+
if (@search_text.split + condition_terms + facet_terms).empty?
|
14
|
+
base_query
|
15
|
+
else
|
16
|
+
@query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def current_page
|
21
|
+
@options[:page] ? @options[:page].to_i : 1
|
22
|
+
end
|
23
|
+
|
24
|
+
def per_page
|
25
|
+
@options[:per_page] ? @options[:per_page].to_i : 20
|
26
|
+
end
|
27
|
+
|
28
|
+
def offset
|
29
|
+
per_page*(current_page-1)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sort_by_values
|
33
|
+
if @options[:order] && @member_class
|
34
|
+
index = @member_class.xapit_index_blueprint
|
35
|
+
if @options[:order].kind_of? Array
|
36
|
+
@options[:order].map do |attribute|
|
37
|
+
index.sortable_position_for(attribute)
|
38
|
+
end
|
39
|
+
else
|
40
|
+
[index.sortable_position_for(@options[:order])]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def base_query
|
46
|
+
@base_query ||= initial_query
|
47
|
+
end
|
48
|
+
|
49
|
+
def initial_query
|
50
|
+
query = Query.new(Xapian::Query.new(Xapian::Query::OP_OR, initial_query_strings))
|
51
|
+
query.default_options[:offset] = offset
|
52
|
+
query.default_options[:limit] = per_page
|
53
|
+
query.default_options[:sort_by_values] = sort_by_values
|
54
|
+
query.default_options[:sort_descending] = @options[:descending]
|
55
|
+
query
|
56
|
+
end
|
57
|
+
|
58
|
+
def initial_query_strings
|
59
|
+
if classes.empty?
|
60
|
+
[""]
|
61
|
+
else
|
62
|
+
classes.map { |klass| "C#{klass.name}" }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def classes
|
67
|
+
(@options[:classes] || [@member_class]).compact
|
68
|
+
end
|
69
|
+
|
70
|
+
def condition_terms
|
71
|
+
if @options[:conditions]
|
72
|
+
@options[:conditions].map do |name, value|
|
73
|
+
if value.kind_of? Time
|
74
|
+
value = value.to_i
|
75
|
+
elsif value.kind_of? Date
|
76
|
+
value = value.to_time.to_i
|
77
|
+
end
|
78
|
+
"X#{name}-#{value.to_s.downcase}"
|
79
|
+
end
|
80
|
+
else
|
81
|
+
[]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def facet_terms
|
86
|
+
if @options[:facets]
|
87
|
+
facet_identifiers.map do |identifier|
|
88
|
+
"F#{identifier}"
|
89
|
+
end
|
90
|
+
else
|
91
|
+
[]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def facet_identifiers
|
96
|
+
@options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
|
97
|
+
end
|
98
|
+
|
99
|
+
def spelling_suggestion
|
100
|
+
raise "Spelling has been disabled. Enable spelling in Xapit::Config.setup." unless Config.spelling?
|
101
|
+
if @search_text.downcase.scan(/\w+/).all? { |term| Config.database.get_spelling_suggestion(term).empty? }
|
102
|
+
nil
|
103
|
+
else
|
104
|
+
@search_text.downcase.gsub(/\w+/) do |term|
|
105
|
+
suggestion = Config.database.get_spelling_suggestion(term)
|
106
|
+
if suggestion.blank?
|
107
|
+
term
|
108
|
+
else
|
109
|
+
suggestion
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Xapit
|
2
|
+
class ClassicQueryParser < AbstractQueryParser
|
3
|
+
def xapian_query_from_text(text)
|
4
|
+
xapian_parser.parse_query(text)
|
5
|
+
end
|
6
|
+
|
7
|
+
def xapian_parser
|
8
|
+
@xapian_parser ||= build_xapian_parser
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_xapian_parser
|
12
|
+
parser = Xapian::QueryParser.new
|
13
|
+
parser.stemmer = Xapian::Stem.new(Config.stemming)
|
14
|
+
parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
15
|
+
parser.default_op = Xapian::Query::OP_AND
|
16
|
+
parser
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Xapit
|
2
|
+
class SimpleQueryParser < AbstractQueryParser
|
3
|
+
# REFACTORME this is a bit complex for one method...
|
4
|
+
def xapian_query(instructions = nil)
|
5
|
+
instructions ||= parsed
|
6
|
+
instructions = [:add, instructions] if instructions.kind_of? String
|
7
|
+
operator = (instructions.first == :or ? Xapian::Query::OP_OR : Xapian::Query::OP_AND)
|
8
|
+
words = instructions[1..-1].select { |i| i.kind_of? String }
|
9
|
+
query = Xapian::Query.new(operator, words) unless words.empty?
|
10
|
+
instructions[1..-1].select { |i| i.kind_of? Array }.each do |sub_instructions|
|
11
|
+
if sub_instructions.first == :not
|
12
|
+
sub_operator = Xapian::Query::OP_AND_NOT
|
13
|
+
else
|
14
|
+
sub_operator = operator
|
15
|
+
end
|
16
|
+
if query
|
17
|
+
query = Xapian::Query.new(sub_operator, query, xapian_query(sub_instructions))
|
18
|
+
else
|
19
|
+
query = xapian_query(sub_instructions)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
query
|
23
|
+
end
|
24
|
+
|
25
|
+
def parsed
|
26
|
+
parse(@search_text.downcase)
|
27
|
+
end
|
28
|
+
|
29
|
+
def xapian_query_from_text(text)
|
30
|
+
xapian_query(parse(text.downcase))
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
|
36
|
+
def parse(text)
|
37
|
+
if text.kind_of? Array
|
38
|
+
[:and, *text]
|
39
|
+
else
|
40
|
+
text = text.strip
|
41
|
+
if text =~ /\sor\s/ui
|
42
|
+
[:or, *text.split(/\s+or\s+/ui).map { |t| parse(t) }]
|
43
|
+
elsif text =~ /\s+/u
|
44
|
+
words = text.scan(/(?:\bnot\s+)?[^\s]+/ui)
|
45
|
+
words.map! do |word|
|
46
|
+
if Config.stemming
|
47
|
+
if word =~ /^not\s/ui
|
48
|
+
[:not, "Z" + stemmer.call(word.sub(/^not\s+/ui, ''))]
|
49
|
+
else
|
50
|
+
"Z" + stemmer.call(word)
|
51
|
+
end
|
52
|
+
else
|
53
|
+
if word =~ /^not\s/ui
|
54
|
+
[:not, word.sub(/^not\s+/ui, '')]
|
55
|
+
else
|
56
|
+
word
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
[:and, *words]
|
61
|
+
else
|
62
|
+
if Config.stemming && !text.blank?
|
63
|
+
"Z" + stemmer.call(text)
|
64
|
+
else
|
65
|
+
text
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def stemmer
|
72
|
+
@stemmer ||= Xapian::Stem.new(Config.stemming)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
require 'active_support'
|
4
|
+
require 'fileutils'
|
5
|
+
require File.dirname(__FILE__) + '/../lib/xapit'
|
6
|
+
require File.dirname(__FILE__) + '/xapit_member'
|
7
|
+
|
8
|
+
Spec::Runner.configure do |config|
|
9
|
+
config.mock_with :rr
|
10
|
+
config.before(:each) do
|
11
|
+
Xapit::Config.setup(:database_path => File.dirname(__FILE__) + '/tmp/xapiandb')
|
12
|
+
Xapit::Config.remove_database
|
13
|
+
XapitMember.delete_all
|
14
|
+
end
|
15
|
+
end
|