xapian_db 1.2.5 → 1.2.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +7 -0
- data/README.rdoc +12 -0
- data/lib/type_codec.rb +7 -4
- data/lib/xapian_db/config.rb +23 -1
- data/lib/xapian_db/indexer.rb +12 -1
- data/lib/xapian_db/query_parser.rb +1 -1
- data/lib/xapian_db/railtie.rb +21 -11
- metadata +2 -2
data/CHANGELOG.md
CHANGED
data/README.rdoc
CHANGED
@@ -83,6 +83,14 @@ You can override these defaults by placing a config file named 'xapian_db.yml' i
|
|
83
83
|
database: db/xapian_db/production
|
84
84
|
<<: *defaults
|
85
85
|
|
86
|
+
==== Available options
|
87
|
+
|
88
|
+
- adapter: :active_record|:datamapper, default: :active_record
|
89
|
+
- language: any iso language code, default: :none (activates spelling corrections, stemmer and stop words if an iso language code ist set)
|
90
|
+
- term_min_length: <n>, default: 1 (do not index terms shorter than n)
|
91
|
+
- enable_phrase_search: true|false, default: false (see the xapian docs for an intro to phrase searching)
|
92
|
+
- term_splitter_count: <n>, default: 0 (see chapter Term Splitting)
|
93
|
+
|
86
94
|
If you do not configure settings for an environment in this file, xapian_db applies the defaults.
|
87
95
|
|
88
96
|
=== Configure an index blueprint
|
@@ -370,6 +378,10 @@ XapianDb serializes objects to xapian documents using YAML by default. This way,
|
|
370
378
|
|
371
379
|
However, dates need special handling to support date range queries. To support date range queries and allow the addition of other custom data types in the future, XapianDb uses a simple, extensible mechanism to serialize / deserialize your objects. An example on how to extend this mechanism is provided in examples/custom_serialization.rb.
|
372
380
|
|
381
|
+
== Term Splitting
|
382
|
+
|
383
|
+
If you want to build a realtime search showing results while the user types, you might experience very poor performance and a huge memory load for the first typed characters (1*, 12*...). XapianDb allows you to configure the term_splitter_count to avoid this. If you configure a term_splitter_count of e.g. 2, the term "test" will get indexed with "t", "te" and "test". Now you can apply the "*" only for search terms that are longer than the configured term_splitter_count resulting in a much better performance and lower memory footprint.
|
384
|
+
|
373
385
|
== Production setup
|
374
386
|
|
375
387
|
Since Xapian allows only one database instance to write to the index, the default setup of XapianDb will not work
|
data/lib/type_codec.rb
CHANGED
@@ -130,10 +130,13 @@ module XapianDb
|
|
130
130
|
# @param [Integer, BigDecimal, Float] number a number object to encode
|
131
131
|
# @return [String] the encoded number
|
132
132
|
def self.encode(number)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
133
|
+
case number.class.name
|
134
|
+
when "Fixnum", "Float"
|
135
|
+
Xapian::sortable_serialise number
|
136
|
+
when "BigDecimal"
|
137
|
+
Xapian::sortable_serialise number.to_f
|
138
|
+
else
|
139
|
+
raise ArgumentError.new "#{number} was expected to be a number"
|
137
140
|
end
|
138
141
|
end
|
139
142
|
|
data/lib/xapian_db/config.rb
CHANGED
@@ -53,13 +53,21 @@ module XapianDb
|
|
53
53
|
@config.instance_variable_get("@_term_min_length") || 1
|
54
54
|
end
|
55
55
|
|
56
|
+
def phrase_search_enabled?
|
57
|
+
@config.instance_variable_get("@_phrase_search_enabled") || false
|
58
|
+
end
|
59
|
+
|
60
|
+
def term_splitter_count
|
61
|
+
@config.instance_variable_get("@_term_splitter_count") || 0
|
62
|
+
end
|
56
63
|
end
|
57
64
|
|
58
65
|
# ---------------------------------------------------------------------------------
|
59
66
|
# DSL methods
|
60
67
|
# ---------------------------------------------------------------------------------
|
61
68
|
|
62
|
-
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length
|
69
|
+
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length,
|
70
|
+
:_phrase_search_enabled, :_term_splitter_count
|
63
71
|
|
64
72
|
# Set the global database to use
|
65
73
|
# @param [String] path The path to the database. Either apply a file sytem path or :memory
|
@@ -144,6 +152,20 @@ module XapianDb
|
|
144
152
|
def term_min_length(length)
|
145
153
|
@_term_min_length = length
|
146
154
|
end
|
155
|
+
|
156
|
+
# Enable phrase search support ("search this exact sentence")
|
157
|
+
def enable_phrase_search
|
158
|
+
@_phrase_search_enabled = true
|
159
|
+
end
|
160
|
+
|
161
|
+
# Disable phrase search support ("search this exact sentence")
|
162
|
+
def disable_phrase_search
|
163
|
+
@_phrase_search_enabled = false
|
164
|
+
end
|
165
|
+
|
166
|
+
def term_splitter_count(count)
|
167
|
+
@_term_splitter_count = count
|
168
|
+
end
|
147
169
|
end
|
148
170
|
|
149
171
|
end
|
data/lib/xapian_db/indexer.rb
CHANGED
@@ -86,6 +86,7 @@ module XapianDb
|
|
86
86
|
values = get_values_to_index_from obj
|
87
87
|
values.each do |value|
|
88
88
|
terms = value.to_s.downcase
|
89
|
+
terms = split(terms) if XapianDb::Config.term_splitter_count > 0
|
89
90
|
# Add value with field name
|
90
91
|
term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed
|
91
92
|
# Add value without field name
|
@@ -115,6 +116,16 @@ module XapianDb
|
|
115
116
|
obj.to_s.nil? ? [] : [obj]
|
116
117
|
end
|
117
118
|
|
118
|
-
|
119
|
+
private
|
120
|
+
|
121
|
+
def split(terms)
|
122
|
+
splitted_terms = []
|
123
|
+
terms.split(" ").each do |term|
|
124
|
+
(1..XapianDb::Config.term_splitter_count).each { |i| splitted_terms << term[0...i] }
|
125
|
+
splitted_terms << term
|
126
|
+
end
|
127
|
+
splitted_terms.join " "
|
128
|
+
end
|
119
129
|
|
130
|
+
end
|
120
131
|
end
|
@@ -21,7 +21,7 @@ module XapianDb
|
|
21
21
|
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN # enable boolean operators
|
22
22
|
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE # enable case insensitive boolean operators
|
23
23
|
@query_flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION # enable spelling corrections
|
24
|
-
@query_flags |= Xapian::QueryParser::FLAG_PHRASE
|
24
|
+
@query_flags |= Xapian::QueryParser::FLAG_PHRASE if XapianDb::Config.phrase_search_enabled?
|
25
25
|
end
|
26
26
|
|
27
27
|
# Parse an expression
|
data/lib/xapian_db/railtie.rb
CHANGED
@@ -51,6 +51,12 @@ module XapianDb
|
|
51
51
|
config.resque_queue @resque_queue
|
52
52
|
config.language @language.try(:to_sym)
|
53
53
|
config.term_min_length @term_min_length
|
54
|
+
if @enable_phrase_search
|
55
|
+
config.enable_phrase_search
|
56
|
+
else
|
57
|
+
config.disable_phrase_search
|
58
|
+
end
|
59
|
+
config.term_splitter_count @term_splitter_count
|
54
60
|
end
|
55
61
|
|
56
62
|
end
|
@@ -66,22 +72,26 @@ module XapianDb
|
|
66
72
|
|
67
73
|
# use the config options from the config file
|
68
74
|
def self.configure_from(env_config)
|
69
|
-
@database_path
|
70
|
-
@adapter
|
71
|
-
@writer
|
72
|
-
@
|
73
|
-
@resque_queue
|
74
|
-
@language
|
75
|
-
@term_min_length
|
75
|
+
@database_path = env_config["database"] || ":memory:"
|
76
|
+
@adapter = env_config["adapter"] || :active_record
|
77
|
+
@writer = env_config["writer"] || :direct
|
78
|
+
@beanstalk_daemon_url = env_config["beanstalk_daemon"]
|
79
|
+
@resque_queue = env_config["resque_queue"]
|
80
|
+
@language = env_config["language"]
|
81
|
+
@term_min_length = env_config["term_min_length"]
|
82
|
+
@enable_phrase_search = env_config["enable_phrase_search"] == true
|
83
|
+
@term_splitter_count = env_config["term_splitter_count"] || 0
|
76
84
|
end
|
77
85
|
|
78
86
|
# set default config options
|
79
87
|
def self.configure_defaults
|
80
88
|
Rails.env == "test" ? @database_path = ":memory:" : @database_path = "db/xapian_db/#{Rails.env}"
|
81
|
-
@adapter
|
82
|
-
@writer
|
83
|
-
@beanstalk_daemon
|
84
|
-
@term_min_length
|
89
|
+
@adapter = :active_record
|
90
|
+
@writer = :direct
|
91
|
+
@beanstalk_daemon = nil
|
92
|
+
@term_min_length = 1
|
93
|
+
@enable_phrase_search = false
|
94
|
+
@term_splitter_count = 0
|
85
95
|
end
|
86
96
|
|
87
97
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.5
|
4
|
+
version: 1.2.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|