xapit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/Manifest +178 -0
- data/README.rdoc +183 -0
- data/Rakefile +15 -0
- data/TODO +23 -0
- data/features/facets.feature +51 -0
- data/features/finding.feature +119 -0
- data/features/indexing.feature +41 -0
- data/features/step_definitions/common_steps.rb +7 -0
- data/features/step_definitions/xapit_steps.rb +117 -0
- data/features/support/env.rb +7 -0
- data/features/support/xapit_helpers.rb +27 -0
- data/init.rb +3 -0
- data/install.rb +9 -0
- data/lib/xapit.rb +39 -0
- data/lib/xapit/collection.rb +165 -0
- data/lib/xapit/config.rb +83 -0
- data/lib/xapit/facet.rb +59 -0
- data/lib/xapit/facet_blueprint.rb +59 -0
- data/lib/xapit/facet_option.rb +56 -0
- data/lib/xapit/index_blueprint.rb +117 -0
- data/lib/xapit/indexers/abstract_indexer.rb +101 -0
- data/lib/xapit/indexers/classic_indexer.rb +27 -0
- data/lib/xapit/indexers/simple_indexer.rb +31 -0
- data/lib/xapit/membership.rb +103 -0
- data/lib/xapit/query.rb +62 -0
- data/lib/xapit/query_parsers/abstract_query_parser.rb +115 -0
- data/lib/xapit/query_parsers/classic_query_parser.rb +19 -0
- data/lib/xapit/query_parsers/simple_query_parser.rb +75 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/tmp/xapdb/flintlock +0 -0
- data/spec/tmp/xapdb/iamflint +0 -0
- data/spec/tmp/xapdb/postlist.DB +0 -0
- data/spec/tmp/xapdb/postlist.baseA +0 -0
- data/spec/tmp/xapdb/postlist.baseB +0 -0
- data/spec/tmp/xapdb/record.DB +0 -0
- data/spec/tmp/xapdb/record.baseA +0 -0
- data/spec/tmp/xapdb/record.baseB +0 -0
- data/spec/tmp/xapdb/spelling.DB +0 -0
- data/spec/tmp/xapdb/spelling.baseA +0 -0
- data/spec/tmp/xapdb/spelling.baseB +0 -0
- data/spec/tmp/xapdb/termlist.DB +0 -0
- data/spec/tmp/xapdb/termlist.baseA +0 -0
- data/spec/tmp/xapdb/termlist.baseB +0 -0
- data/spec/tmp/xapian_db/flintlock +0 -0
- data/spec/tmp/xapian_db/iamflint +0 -0
- data/spec/tmp/xapian_db/postlist.DB +0 -0
- data/spec/tmp/xapian_db/postlist.baseA +0 -0
- data/spec/tmp/xapian_db/record.DB +0 -0
- data/spec/tmp/xapian_db/record.baseA +0 -0
- data/spec/tmp/xapian_db/termlist.DB +0 -0
- data/spec/tmp/xapian_db/termlist.baseA +0 -0
- data/spec/tmp/xapiandab/flintlock +0 -0
- data/spec/tmp/xapiandab/iamflint +0 -0
- data/spec/tmp/xapiandab/postlist.DB +0 -0
- data/spec/tmp/xapiandab/postlist.baseA +0 -0
- data/spec/tmp/xapiandab/postlist.baseB +0 -0
- data/spec/tmp/xapiandab/record.DB +0 -0
- data/spec/tmp/xapiandab/record.baseA +0 -0
- data/spec/tmp/xapiandab/record.baseB +0 -0
- data/spec/tmp/xapiandab/spelling.DB +0 -0
- data/spec/tmp/xapiandab/spelling.baseA +0 -0
- data/spec/tmp/xapiandab/spelling.baseB +0 -0
- data/spec/tmp/xapiandab/termlist.DB +0 -0
- data/spec/tmp/xapiandab/termlist.baseA +0 -0
- data/spec/tmp/xapiandab/termlist.baseB +0 -0
- data/spec/tmp/xapiandatab/flintlock +0 -0
- data/spec/tmp/xapiandatab/iamflint +0 -0
- data/spec/tmp/xapiandatab/postlist.DB +0 -0
- data/spec/tmp/xapiandatab/postlist.baseA +0 -0
- data/spec/tmp/xapiandatab/postlist.baseB +0 -0
- data/spec/tmp/xapiandatab/record.DB +0 -0
- data/spec/tmp/xapiandatab/record.baseA +0 -0
- data/spec/tmp/xapiandatab/record.baseB +0 -0
- data/spec/tmp/xapiandatab/spelling.DB +0 -0
- data/spec/tmp/xapiandatab/spelling.baseA +0 -0
- data/spec/tmp/xapiandatab/spelling.baseB +0 -0
- data/spec/tmp/xapiandatab/termlist.DB +0 -0
- data/spec/tmp/xapiandatab/termlist.baseA +0 -0
- data/spec/tmp/xapiandatab/termlist.baseB +0 -0
- data/spec/tmp/xapiandataba/flintlock +0 -0
- data/spec/tmp/xapiandataba/iamflint +0 -0
- data/spec/tmp/xapiandataba/postlist.DB +0 -0
- data/spec/tmp/xapiandataba/postlist.baseA +0 -0
- data/spec/tmp/xapiandataba/postlist.baseB +0 -0
- data/spec/tmp/xapiandataba/record.DB +0 -0
- data/spec/tmp/xapiandataba/record.baseA +0 -0
- data/spec/tmp/xapiandataba/record.baseB +0 -0
- data/spec/tmp/xapiandataba/spelling.DB +0 -0
- data/spec/tmp/xapiandataba/spelling.baseA +0 -0
- data/spec/tmp/xapiandataba/spelling.baseB +0 -0
- data/spec/tmp/xapiandataba/termlist.DB +0 -0
- data/spec/tmp/xapiandataba/termlist.baseA +0 -0
- data/spec/tmp/xapiandataba/termlist.baseB +0 -0
- data/spec/tmp/xapiandatabas/flintlock +0 -0
- data/spec/tmp/xapiandatabas/iamflint +0 -0
- data/spec/tmp/xapiandatabas/postlist.DB +0 -0
- data/spec/tmp/xapiandatabas/postlist.baseA +0 -0
- data/spec/tmp/xapiandatabas/record.DB +0 -0
- data/spec/tmp/xapiandatabas/record.baseA +0 -0
- data/spec/tmp/xapiandatabas/termlist.DB +0 -0
- data/spec/tmp/xapiandatabas/termlist.baseA +0 -0
- data/spec/tmp/xapiandatb/flintlock +0 -0
- data/spec/tmp/xapiandatb/iamflint +0 -0
- data/spec/tmp/xapiandatb/postlist.DB +0 -0
- data/spec/tmp/xapiandatb/postlist.baseA +0 -0
- data/spec/tmp/xapiandatb/postlist.baseB +0 -0
- data/spec/tmp/xapiandatb/record.DB +0 -0
- data/spec/tmp/xapiandatb/record.baseA +0 -0
- data/spec/tmp/xapiandatb/record.baseB +0 -0
- data/spec/tmp/xapiandatb/spelling.DB +0 -0
- data/spec/tmp/xapiandatb/spelling.baseA +0 -0
- data/spec/tmp/xapiandatb/spelling.baseB +0 -0
- data/spec/tmp/xapiandatb/termlist.DB +0 -0
- data/spec/tmp/xapiandatb/termlist.baseA +0 -0
- data/spec/tmp/xapiandatb/termlist.baseB +0 -0
- data/spec/tmp/xapiandbase/flintlock +0 -0
- data/spec/tmp/xapiandbase/iamflint +0 -0
- data/spec/tmp/xapiandbase/postlist.DB +0 -0
- data/spec/tmp/xapiandbase/postlist.baseA +0 -0
- data/spec/tmp/xapiandbase/postlist.baseB +0 -0
- data/spec/tmp/xapiandbase/record.DB +0 -0
- data/spec/tmp/xapiandbase/record.baseA +0 -0
- data/spec/tmp/xapiandbase/record.baseB +0 -0
- data/spec/tmp/xapiandbase/spelling.DB +0 -0
- data/spec/tmp/xapiandbase/spelling.baseA +0 -0
- data/spec/tmp/xapiandbase/spelling.baseB +0 -0
- data/spec/tmp/xapiandbase/termlist.DB +0 -0
- data/spec/tmp/xapiandbase/termlist.baseA +0 -0
- data/spec/tmp/xapiandbase/termlist.baseB +0 -0
- data/spec/xapit/collection_spec.rb +153 -0
- data/spec/xapit/config_spec.rb +48 -0
- data/spec/xapit/facet_blueprint_spec.rb +29 -0
- data/spec/xapit/facet_option_spec.rb +80 -0
- data/spec/xapit/facet_spec.rb +73 -0
- data/spec/xapit/index_blueprint_spec.rb +60 -0
- data/spec/xapit/indexers/abstract_indexer_spec.rb +74 -0
- data/spec/xapit/indexers/classic_indexer_spec.rb +26 -0
- data/spec/xapit/indexers/simple_indexer_spec.rb +53 -0
- data/spec/xapit/membership_spec.rb +39 -0
- data/spec/xapit/query_parsers/abstract_query_parser_spec.rb +23 -0
- data/spec/xapit/query_parsers/classic_query_parser_spec.rb +15 -0
- data/spec/xapit/query_parsers/simple_query_parser_spec.rb +86 -0
- data/spec/xapit/query_spec.rb +41 -0
- data/spec/xapit_member.rb +32 -0
- data/tasks/spec.rb +9 -0
- data/tasks/xapit.rake +9 -0
- data/tmp/xapiandatabase/flintlock +0 -0
- data/tmp/xapiandatabase/iamflint +0 -0
- data/tmp/xapiandatabase/postlist.DB +0 -0
- data/tmp/xapiandatabase/postlist.baseA +0 -0
- data/tmp/xapiandatabase/postlist.baseB +0 -0
- data/tmp/xapiandatabase/record.DB +0 -0
- data/tmp/xapiandatabase/record.baseA +0 -0
- data/tmp/xapiandatabase/record.baseB +0 -0
- data/tmp/xapiandatabase/spelling.DB +0 -0
- data/tmp/xapiandatabase/spelling.baseA +0 -0
- data/tmp/xapiandatabase/spelling.baseB +0 -0
- data/tmp/xapiandatabase/termlist.DB +0 -0
- data/tmp/xapiandatabase/termlist.baseA +0 -0
- data/tmp/xapiandatabase/termlist.baseB +0 -0
- data/tmp/xapiandatabase/value.baseB +0 -0
- data/tmp/xapiandb/flintlock +0 -0
- data/tmp/xapiandb/iamflint +0 -0
- data/tmp/xapiandb/postlist.DB +0 -0
- data/tmp/xapiandb/postlist.baseA +0 -0
- data/tmp/xapiandb/postlist.baseB +0 -0
- data/tmp/xapiandb/record.DB +0 -0
- data/tmp/xapiandb/record.baseA +0 -0
- data/tmp/xapiandb/record.baseB +0 -0
- data/tmp/xapiandb/spelling.DB +0 -0
- data/tmp/xapiandb/spelling.baseA +0 -0
- data/tmp/xapiandb/spelling.baseB +0 -0
- data/tmp/xapiandb/termlist.DB +0 -0
- data/tmp/xapiandb/termlist.baseA +0 -0
- data/tmp/xapiandb/termlist.baseB +0 -0
- data/tmp/xapiandb/value.baseB +0 -0
- data/uninstall.rb +5 -0
- data/xapit.gemspec +30 -0
- metadata +257 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
class ClassicIndexer < AbstractIndexer
|
|
3
|
+
def index_text_attributes(member, document)
|
|
4
|
+
term_generator.document = document
|
|
5
|
+
@blueprint.text_attributes.each do |name, options|
|
|
6
|
+
content = member.send(name).to_s
|
|
7
|
+
if options[:proc]
|
|
8
|
+
index_terms(options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase), document)
|
|
9
|
+
else
|
|
10
|
+
term_generator.index_text(content)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def term_generator
|
|
16
|
+
@term_generator ||= create_term_generator
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def create_term_generator
|
|
20
|
+
term_generator = Xapian::TermGenerator.new
|
|
21
|
+
term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) if Config.spelling?
|
|
22
|
+
term_generator.database = database
|
|
23
|
+
term_generator.stemmer = Xapian::Stem.new(Config.stemming)
|
|
24
|
+
term_generator
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
class SimpleIndexer < AbstractIndexer
|
|
3
|
+
def index_text_attributes(member, document)
|
|
4
|
+
@blueprint.text_attributes.map do |name, options|
|
|
5
|
+
terms_for_attribute(member, name, options).each do |term|
|
|
6
|
+
document.add_term(term, options[:weight] || 1)
|
|
7
|
+
database.add_spelling(term) if Config.spelling?
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def terms_for_attribute(member, name, options)
|
|
13
|
+
terms_for_attribute_without_stemming(member, name, options).map do |term|
|
|
14
|
+
[term, "Z#{stemmer.call(term)}"]
|
|
15
|
+
end.flatten
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def terms_for_attribute_without_stemming(member, name, options)
|
|
19
|
+
content = member.send(name).to_s
|
|
20
|
+
if options[:proc]
|
|
21
|
+
options[:proc].call(content).reject(&:blank?).map(&:to_s).map(&:downcase)
|
|
22
|
+
else
|
|
23
|
+
content.scan(/\w+/u).map(&:downcase)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def stemmer
|
|
28
|
+
@stemmer ||= Xapian::Stem.new(Config.stemming)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
# Use "include Xapit::Membership" on a class to allow xapian searching on it. This is automatically included
|
|
3
|
+
# in ActiveRecord::Base so you do not need to do anything there.
|
|
4
|
+
module Membership
|
|
5
|
+
def self.included(base)
|
|
6
|
+
base.extend ClassMethods
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
module ClassMethods
|
|
10
|
+
# Simply call "xapit" on a class and pass a block to define the indexed attributes.
|
|
11
|
+
#
|
|
12
|
+
# class Article < ActiveRecord::Base
|
|
13
|
+
# xapit do |index|
|
|
14
|
+
# index.text :name, :content
|
|
15
|
+
# index.field :category_id
|
|
16
|
+
# index.facet :author_name, "Author"
|
|
17
|
+
# index.sortable :id, :category_id
|
|
18
|
+
# end
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# First we index "name" and "content" attributes for full text searching. The "category_id" field is indexed for :conditions searching. The "author_name" is indexed as a facet with "Author" being the display name of the facet. See the facets section below for details. Finally the "id" and "category_id" attributes are indexed as sortable attributes so they can be included in the :order option in a search.
|
|
22
|
+
#
|
|
23
|
+
# Because the indexing happens in Ruby these attributes do no have to be database columns. They can be simple Ruby methods. For example, the "author_name" attribute mentioned above can be defined like this.
|
|
24
|
+
#
|
|
25
|
+
# def author_name
|
|
26
|
+
# author.name
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
# This way you can create a completely custom facet by simply defining your own method
|
|
30
|
+
#
|
|
31
|
+
# You can also pass any find options to the xapit method to determine what gets indexed and improve performance with eager loading or a different batch size.
|
|
32
|
+
#
|
|
33
|
+
# xapit(:batch_size => 100, :include => :author, :conditions => { :visible => true })
|
|
34
|
+
#
|
|
35
|
+
# If you pass in a block you can customize how the text words will be devided (instead of by simply white space).
|
|
36
|
+
#
|
|
37
|
+
# xapit do |index|
|
|
38
|
+
# index.text(:keywords) { |words| words.split(', ') }
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# You can specify a :weight option to give a text attribute more importance. This will cause search terms matching
|
|
42
|
+
# that attribute to have a higher rank. The default weight is 1. Decimal (0.5) weight values are not supported.
|
|
43
|
+
#
|
|
44
|
+
# index.text :name, :weight => 10
|
|
45
|
+
#
|
|
46
|
+
def xapit(*args)
|
|
47
|
+
@xapit_index_blueprint = IndexBlueprint.new(self, *args)
|
|
48
|
+
yield(@xapit_index_blueprint)
|
|
49
|
+
include AdditionalMethods
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
module AdditionalMethods
|
|
54
|
+
def self.included(base)
|
|
55
|
+
base.extend ClassMethods
|
|
56
|
+
base.send(:attr_accessor, :xapit_relevance) # is there a better way to do this?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Find similar records to the given model. It takes the same arguments as Membership::AdditionalMethods::ClassMethods#search to further narrow down the results.
|
|
60
|
+
def search_similar(*args)
|
|
61
|
+
Collection.search_similar(self, *args)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
module ClassMethods
|
|
65
|
+
# Used to perform a search on a model.
|
|
66
|
+
#
|
|
67
|
+
# # perform a simple full text search
|
|
68
|
+
# @articles = Article.search("phone")
|
|
69
|
+
#
|
|
70
|
+
# # add pagination if you're using will_paginate
|
|
71
|
+
# @articles = Article.search("phone", :per_page => 10, :page => params[:page])
|
|
72
|
+
#
|
|
73
|
+
# # search based on indexed fields
|
|
74
|
+
# @articles = Article.search("phone", :conditions => { :category_id => params[:category_id] })
|
|
75
|
+
#
|
|
76
|
+
# # manually sort based on any number of indexed fields, sort defaults to most relevant
|
|
77
|
+
# @articles = Article.search("phone", :order => [:category_id, :id], :descending => true)
|
|
78
|
+
#
|
|
79
|
+
# # basic boolean matching is supported
|
|
80
|
+
# @articles = Article.search("phone or fax not email")
|
|
81
|
+
#
|
|
82
|
+
# # no need to specify first query string when searching all records
|
|
83
|
+
# @articles = Article.search(:conditions => { :category_id => params[:category_id] })
|
|
84
|
+
#
|
|
85
|
+
def search(*args)
|
|
86
|
+
Collection.new(self, *args)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# The Xapit::IndexBlueprint object used for this class.
|
|
90
|
+
def xapit_index_blueprint
|
|
91
|
+
@xapit_index_blueprint
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Finds a Xapit::FacetBlueprint for the given attribute.
|
|
95
|
+
def xapit_facet_blueprint(attribute)
|
|
96
|
+
result = xapit_index_blueprint.facets.detect { |f| f.attribute.to_s == attribute.to_s }
|
|
97
|
+
raise "Unable to find facet blueprint for #{attribute} on #{name}" if result.nil?
|
|
98
|
+
result
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
data/lib/xapit/query.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
# This class wraps a Xapian::Query for convenience purposes. You will likely not need to use
|
|
3
|
+
# this class unless you are trying to query the Xapian database directly.
|
|
4
|
+
# You may be looking for Xapit::Collection instead.
|
|
5
|
+
class Query
|
|
6
|
+
attr_reader :default_options, :xapian_query
|
|
7
|
+
|
|
8
|
+
def initialize(query)
|
|
9
|
+
@xapian_query = build_xapian_query(query)
|
|
10
|
+
@default_options = { :offset => 0, :sort_descending => false }
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def and_query(query)
|
|
14
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_AND, @xapian_query, build_xapian_query(query)) unless query.blank?
|
|
15
|
+
self
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def or_query(query)
|
|
19
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_OR, @xapian_query, build_xapian_query(query)) unless query.blank?
|
|
20
|
+
self
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def not_query(query)
|
|
24
|
+
@xapian_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, @xapian_query, build_xapian_query(query)) unless query.blank?
|
|
25
|
+
self
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def matchset(options = {})
|
|
29
|
+
options.reverse_merge!(default_options)
|
|
30
|
+
enquire = Xapian::Enquire.new(Config.database)
|
|
31
|
+
if options[:sort_by_values]
|
|
32
|
+
sorter = Xapian::MultiValueSorter.new
|
|
33
|
+
options[:sort_by_values].each do |sort_value|
|
|
34
|
+
sorter.add(sort_value, !!options[:sort_descending])
|
|
35
|
+
end
|
|
36
|
+
enquire.set_sort_by_key_then_relevance(sorter)
|
|
37
|
+
end
|
|
38
|
+
enquire.collapse_key = options[:collapse_key] if options[:collapse_key]
|
|
39
|
+
enquire.query = @xapian_query
|
|
40
|
+
enquire.mset(options[:offset], options[:limit])
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def matches(options = {})
|
|
44
|
+
matchset(options).matches
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def count
|
|
48
|
+
# a bit of a hack to get more accurate count estimate
|
|
49
|
+
matchset(:limit => Config.database.doccount).matches_estimated
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def build_xapian_query(query)
|
|
55
|
+
if query.kind_of? Xapian::Query
|
|
56
|
+
query
|
|
57
|
+
else
|
|
58
|
+
Xapian::Query.new(Xapian::Query::OP_AND, [query].flatten)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
class AbstractQueryParser
|
|
3
|
+
attr_reader :member_class
|
|
4
|
+
attr_writer :base_query
|
|
5
|
+
|
|
6
|
+
def initialize(*args)
|
|
7
|
+
@options = args.extract_options!
|
|
8
|
+
@member_class = args[0]
|
|
9
|
+
@search_text = args[1].to_s
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def query
|
|
13
|
+
if (@search_text.split + condition_terms + facet_terms).empty?
|
|
14
|
+
base_query
|
|
15
|
+
else
|
|
16
|
+
@query ||= base_query.and_query(xapian_query_from_text(@search_text)).and_query(condition_terms + facet_terms)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def current_page
|
|
21
|
+
@options[:page] ? @options[:page].to_i : 1
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def per_page
|
|
25
|
+
@options[:per_page] ? @options[:per_page].to_i : 20
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def offset
|
|
29
|
+
per_page*(current_page-1)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def sort_by_values
|
|
33
|
+
if @options[:order] && @member_class
|
|
34
|
+
index = @member_class.xapit_index_blueprint
|
|
35
|
+
if @options[:order].kind_of? Array
|
|
36
|
+
@options[:order].map do |attribute|
|
|
37
|
+
index.sortable_position_for(attribute)
|
|
38
|
+
end
|
|
39
|
+
else
|
|
40
|
+
[index.sortable_position_for(@options[:order])]
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def base_query
|
|
46
|
+
@base_query ||= initial_query
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def initial_query
|
|
50
|
+
query = Query.new(Xapian::Query.new(Xapian::Query::OP_OR, initial_query_strings))
|
|
51
|
+
query.default_options[:offset] = offset
|
|
52
|
+
query.default_options[:limit] = per_page
|
|
53
|
+
query.default_options[:sort_by_values] = sort_by_values
|
|
54
|
+
query.default_options[:sort_descending] = @options[:descending]
|
|
55
|
+
query
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def initial_query_strings
|
|
59
|
+
if classes.empty?
|
|
60
|
+
[""]
|
|
61
|
+
else
|
|
62
|
+
classes.map { |klass| "C#{klass.name}" }
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def classes
|
|
67
|
+
(@options[:classes] || [@member_class]).compact
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def condition_terms
|
|
71
|
+
if @options[:conditions]
|
|
72
|
+
@options[:conditions].map do |name, value|
|
|
73
|
+
if value.kind_of? Time
|
|
74
|
+
value = value.to_i
|
|
75
|
+
elsif value.kind_of? Date
|
|
76
|
+
value = value.to_time.to_i
|
|
77
|
+
end
|
|
78
|
+
"X#{name}-#{value.to_s.downcase}"
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
[]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def facet_terms
|
|
86
|
+
if @options[:facets]
|
|
87
|
+
facet_identifiers.map do |identifier|
|
|
88
|
+
"F#{identifier}"
|
|
89
|
+
end
|
|
90
|
+
else
|
|
91
|
+
[]
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def facet_identifiers
|
|
96
|
+
@options[:facets].kind_of?(String) ? @options[:facets].split('-') : (@options[:facets] || [])
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def spelling_suggestion
|
|
100
|
+
raise "Spelling has been disabled. Enable spelling in Xapit::Config.setup." unless Config.spelling?
|
|
101
|
+
if @search_text.downcase.scan(/\w+/).all? { |term| Config.database.get_spelling_suggestion(term).empty? }
|
|
102
|
+
nil
|
|
103
|
+
else
|
|
104
|
+
@search_text.downcase.gsub(/\w+/) do |term|
|
|
105
|
+
suggestion = Config.database.get_spelling_suggestion(term)
|
|
106
|
+
if suggestion.blank?
|
|
107
|
+
term
|
|
108
|
+
else
|
|
109
|
+
suggestion
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
class ClassicQueryParser < AbstractQueryParser
|
|
3
|
+
def xapian_query_from_text(text)
|
|
4
|
+
xapian_parser.parse_query(text)
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
def xapian_parser
|
|
8
|
+
@xapian_parser ||= build_xapian_parser
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def build_xapian_parser
|
|
12
|
+
parser = Xapian::QueryParser.new
|
|
13
|
+
parser.stemmer = Xapian::Stem.new(Config.stemming)
|
|
14
|
+
parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
|
15
|
+
parser.default_op = Xapian::Query::OP_AND
|
|
16
|
+
parser
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
module Xapit
|
|
2
|
+
class SimpleQueryParser < AbstractQueryParser
|
|
3
|
+
# REFACTORME this is a bit complex for one method...
|
|
4
|
+
def xapian_query(instructions = nil)
|
|
5
|
+
instructions ||= parsed
|
|
6
|
+
instructions = [:add, instructions] if instructions.kind_of? String
|
|
7
|
+
operator = (instructions.first == :or ? Xapian::Query::OP_OR : Xapian::Query::OP_AND)
|
|
8
|
+
words = instructions[1..-1].select { |i| i.kind_of? String }
|
|
9
|
+
query = Xapian::Query.new(operator, words) unless words.empty?
|
|
10
|
+
instructions[1..-1].select { |i| i.kind_of? Array }.each do |sub_instructions|
|
|
11
|
+
if sub_instructions.first == :not
|
|
12
|
+
sub_operator = Xapian::Query::OP_AND_NOT
|
|
13
|
+
else
|
|
14
|
+
sub_operator = operator
|
|
15
|
+
end
|
|
16
|
+
if query
|
|
17
|
+
query = Xapian::Query.new(sub_operator, query, xapian_query(sub_instructions))
|
|
18
|
+
else
|
|
19
|
+
query = xapian_query(sub_instructions)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
query
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def parsed
|
|
26
|
+
parse(@search_text.downcase)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def xapian_query_from_text(text)
|
|
30
|
+
xapian_query(parse(text.downcase))
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def parse(text)
|
|
37
|
+
if text.kind_of? Array
|
|
38
|
+
[:and, *text]
|
|
39
|
+
else
|
|
40
|
+
text = text.strip
|
|
41
|
+
if text =~ /\sor\s/ui
|
|
42
|
+
[:or, *text.split(/\s+or\s+/ui).map { |t| parse(t) }]
|
|
43
|
+
elsif text =~ /\s+/u
|
|
44
|
+
words = text.scan(/(?:\bnot\s+)?[^\s]+/ui)
|
|
45
|
+
words.map! do |word|
|
|
46
|
+
if Config.stemming
|
|
47
|
+
if word =~ /^not\s/ui
|
|
48
|
+
[:not, "Z" + stemmer.call(word.sub(/^not\s+/ui, ''))]
|
|
49
|
+
else
|
|
50
|
+
"Z" + stemmer.call(word)
|
|
51
|
+
end
|
|
52
|
+
else
|
|
53
|
+
if word =~ /^not\s/ui
|
|
54
|
+
[:not, word.sub(/^not\s+/ui, '')]
|
|
55
|
+
else
|
|
56
|
+
word
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
[:and, *words]
|
|
61
|
+
else
|
|
62
|
+
if Config.stemming && !text.blank?
|
|
63
|
+
"Z" + stemmer.call(text)
|
|
64
|
+
else
|
|
65
|
+
text
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def stemmer
|
|
72
|
+
@stemmer ||= Xapian::Stem.new(Config.stemming)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'spec'
|
|
3
|
+
require 'active_support'
|
|
4
|
+
require 'fileutils'
|
|
5
|
+
require File.dirname(__FILE__) + '/../lib/xapit'
|
|
6
|
+
require File.dirname(__FILE__) + '/xapit_member'
|
|
7
|
+
|
|
8
|
+
Spec::Runner.configure do |config|
|
|
9
|
+
config.mock_with :rr
|
|
10
|
+
config.before(:each) do
|
|
11
|
+
Xapit::Config.setup(:database_path => File.dirname(__FILE__) + '/tmp/xapiandb')
|
|
12
|
+
Xapit::Config.remove_database
|
|
13
|
+
XapitMember.delete_all
|
|
14
|
+
end
|
|
15
|
+
end
|