xapian_db 1.2.5 → 1.2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +7 -0
- data/README.rdoc +12 -0
- data/lib/type_codec.rb +7 -4
- data/lib/xapian_db/config.rb +23 -1
- data/lib/xapian_db/indexer.rb +12 -1
- data/lib/xapian_db/query_parser.rb +1 -1
- data/lib/xapian_db/railtie.rb +21 -11
- metadata +2 -2
data/CHANGELOG.md
CHANGED
data/README.rdoc
CHANGED
@@ -83,6 +83,14 @@ You can override these defaults by placing a config file named 'xapian_db.yml' i
|
|
83
83
|
database: db/xapian_db/production
|
84
84
|
<<: *defaults
|
85
85
|
|
86
|
+
==== Available options
|
87
|
+
|
88
|
+
- adapter: :active_record|:datamapper, default: :active_record
|
89
|
+
- language: any iso language code, default: :none (activates spelling corrections, stemmer and stop words if an iso language code ist set)
|
90
|
+
- term_min_length: <n>, default: 1 (do not index terms shorter than n)
|
91
|
+
- enable_phrase_search: true|false, default: false (see the xapian docs for an intro to phrase searching)
|
92
|
+
- term_splitter_count: <n>, default: 0 (see chapter Term Splitting)
|
93
|
+
|
86
94
|
If you do not configure settings for an environment in this file, xapian_db applies the defaults.
|
87
95
|
|
88
96
|
=== Configure an index blueprint
|
@@ -370,6 +378,10 @@ XapianDb serializes objects to xapian documents using YAML by default. This way,
|
|
370
378
|
|
371
379
|
However, dates need special handling to support date range queries. To support date range queries and allow the addition of other custom data types in the future, XapianDb uses a simple, extensible mechanism to serialize / deserialize your objects. An example on how to extend this mechanism is provided in examples/custom_serialization.rb.
|
372
380
|
|
381
|
+
== Term Splitting
|
382
|
+
|
383
|
+
If you want to build a realtime search showing results while the user types, you might experience very poor performance and a huge memory load for the first typed characters (1*, 12*...). XapianDb allows you to configure the term_splitter_count to avoid this. If you configure a term_splitter_count of e.g. 2, the term "test" will get indexed with "t", "te" and "test". Now you can apply the "*" only for search terms that are longer than the configured term_splitter_count resulting in a much better performance and lower memory footprint.
|
384
|
+
|
373
385
|
== Production setup
|
374
386
|
|
375
387
|
Since Xapian allows only one database instance to write to the index, the default setup of XapianDb will not work
|
data/lib/type_codec.rb
CHANGED
@@ -130,10 +130,13 @@ module XapianDb
|
|
130
130
|
# @param [Integer, BigDecimal, Float] number a number object to encode
|
131
131
|
# @return [String] the encoded number
|
132
132
|
def self.encode(number)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
133
|
+
case number.class.name
|
134
|
+
when "Fixnum", "Float"
|
135
|
+
Xapian::sortable_serialise number
|
136
|
+
when "BigDecimal"
|
137
|
+
Xapian::sortable_serialise number.to_f
|
138
|
+
else
|
139
|
+
raise ArgumentError.new "#{number} was expected to be a number"
|
137
140
|
end
|
138
141
|
end
|
139
142
|
|
data/lib/xapian_db/config.rb
CHANGED
@@ -53,13 +53,21 @@ module XapianDb
|
|
53
53
|
@config.instance_variable_get("@_term_min_length") || 1
|
54
54
|
end
|
55
55
|
|
56
|
+
def phrase_search_enabled?
|
57
|
+
@config.instance_variable_get("@_phrase_search_enabled") || false
|
58
|
+
end
|
59
|
+
|
60
|
+
def term_splitter_count
|
61
|
+
@config.instance_variable_get("@_term_splitter_count") || 0
|
62
|
+
end
|
56
63
|
end
|
57
64
|
|
58
65
|
# ---------------------------------------------------------------------------------
|
59
66
|
# DSL methods
|
60
67
|
# ---------------------------------------------------------------------------------
|
61
68
|
|
62
|
-
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length
|
69
|
+
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length,
|
70
|
+
:_phrase_search_enabled, :_term_splitter_count
|
63
71
|
|
64
72
|
# Set the global database to use
|
65
73
|
# @param [String] path The path to the database. Either apply a file sytem path or :memory
|
@@ -144,6 +152,20 @@ module XapianDb
|
|
144
152
|
def term_min_length(length)
|
145
153
|
@_term_min_length = length
|
146
154
|
end
|
155
|
+
|
156
|
+
# Enable phrase search support ("search this exact sentence")
|
157
|
+
def enable_phrase_search
|
158
|
+
@_phrase_search_enabled = true
|
159
|
+
end
|
160
|
+
|
161
|
+
# Disable phrase search support ("search this exact sentence")
|
162
|
+
def disable_phrase_search
|
163
|
+
@_phrase_search_enabled = false
|
164
|
+
end
|
165
|
+
|
166
|
+
def term_splitter_count(count)
|
167
|
+
@_term_splitter_count = count
|
168
|
+
end
|
147
169
|
end
|
148
170
|
|
149
171
|
end
|
data/lib/xapian_db/indexer.rb
CHANGED
@@ -86,6 +86,7 @@ module XapianDb
|
|
86
86
|
values = get_values_to_index_from obj
|
87
87
|
values.each do |value|
|
88
88
|
terms = value.to_s.downcase
|
89
|
+
terms = split(terms) if XapianDb::Config.term_splitter_count > 0
|
89
90
|
# Add value with field name
|
90
91
|
term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed
|
91
92
|
# Add value without field name
|
@@ -115,6 +116,16 @@ module XapianDb
|
|
115
116
|
obj.to_s.nil? ? [] : [obj]
|
116
117
|
end
|
117
118
|
|
118
|
-
|
119
|
+
private
|
120
|
+
|
121
|
+
def split(terms)
|
122
|
+
splitted_terms = []
|
123
|
+
terms.split(" ").each do |term|
|
124
|
+
(1..XapianDb::Config.term_splitter_count).each { |i| splitted_terms << term[0...i] }
|
125
|
+
splitted_terms << term
|
126
|
+
end
|
127
|
+
splitted_terms.join " "
|
128
|
+
end
|
119
129
|
|
130
|
+
end
|
120
131
|
end
|
@@ -21,7 +21,7 @@ module XapianDb
|
|
21
21
|
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN # enable boolean operators
|
22
22
|
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE # enable case insensitive boolean operators
|
23
23
|
@query_flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION # enable spelling corrections
|
24
|
-
@query_flags |= Xapian::QueryParser::FLAG_PHRASE
|
24
|
+
@query_flags |= Xapian::QueryParser::FLAG_PHRASE if XapianDb::Config.phrase_search_enabled?
|
25
25
|
end
|
26
26
|
|
27
27
|
# Parse an expression
|
data/lib/xapian_db/railtie.rb
CHANGED
@@ -51,6 +51,12 @@ module XapianDb
|
|
51
51
|
config.resque_queue @resque_queue
|
52
52
|
config.language @language.try(:to_sym)
|
53
53
|
config.term_min_length @term_min_length
|
54
|
+
if @enable_phrase_search
|
55
|
+
config.enable_phrase_search
|
56
|
+
else
|
57
|
+
config.disable_phrase_search
|
58
|
+
end
|
59
|
+
config.term_splitter_count @term_splitter_count
|
54
60
|
end
|
55
61
|
|
56
62
|
end
|
@@ -66,22 +72,26 @@ module XapianDb
|
|
66
72
|
|
67
73
|
# use the config options from the config file
|
68
74
|
def self.configure_from(env_config)
|
69
|
-
@database_path
|
70
|
-
@adapter
|
71
|
-
@writer
|
72
|
-
@
|
73
|
-
@resque_queue
|
74
|
-
@language
|
75
|
-
@term_min_length
|
75
|
+
@database_path = env_config["database"] || ":memory:"
|
76
|
+
@adapter = env_config["adapter"] || :active_record
|
77
|
+
@writer = env_config["writer"] || :direct
|
78
|
+
@beanstalk_daemon_url = env_config["beanstalk_daemon"]
|
79
|
+
@resque_queue = env_config["resque_queue"]
|
80
|
+
@language = env_config["language"]
|
81
|
+
@term_min_length = env_config["term_min_length"]
|
82
|
+
@enable_phrase_search = env_config["enable_phrase_search"] == true
|
83
|
+
@term_splitter_count = env_config["term_splitter_count"] || 0
|
76
84
|
end
|
77
85
|
|
78
86
|
# set default config options
|
79
87
|
def self.configure_defaults
|
80
88
|
Rails.env == "test" ? @database_path = ":memory:" : @database_path = "db/xapian_db/#{Rails.env}"
|
81
|
-
@adapter
|
82
|
-
@writer
|
83
|
-
@beanstalk_daemon
|
84
|
-
@term_min_length
|
89
|
+
@adapter = :active_record
|
90
|
+
@writer = :direct
|
91
|
+
@beanstalk_daemon = nil
|
92
|
+
@term_min_length = 1
|
93
|
+
@enable_phrase_search = false
|
94
|
+
@term_splitter_count = 0
|
85
95
|
end
|
86
96
|
|
87
97
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.5
|
4
|
+
version: 1.2.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|