xapian_db 1.2.5.1 → 1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +5 -1
- data/README.rdoc +8 -4
- data/lib/generators/templates/beanstalk_worker +3 -3
- data/lib/type_codec.rb +59 -18
- data/lib/xapian_db/database.rb +1 -1
- data/lib/xapian_db/document_blueprint.rb +7 -2
- data/lib/xapian_db/index_writers/beanstalk_writer.rb +4 -3
- data/lib/xapian_db.rb +1 -1
- metadata +6 -3
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
##1.
|
1
|
+
##1.3 (February 19th, 2013)
|
2
|
+
|
3
|
+
Breaking Changes:
|
4
|
+
- changed the internal serialization format for attributes; the new default is string, new codecs were added. YAML is not supported
|
5
|
+
anymore, although you could implement a YAML-Codec on your own if you need it (see examples/custom_serialization.rb)
|
2
6
|
|
3
7
|
Changes:
|
4
8
|
|
data/README.rdoc
CHANGED
@@ -4,7 +4,10 @@
|
|
4
4
|
|
5
5
|
== Important Information
|
6
6
|
|
7
|
-
|
7
|
+
Version 1.3 does not support YAML serialization for attributes anymore. If you don't apply a type information for an attribute (like attribute :birth, as: :date) in your blueprint,
|
8
|
+
the attribute gets stored as a string. The new preferred type for complex attributes is JSON. Why the change? We were experiencing a unacceptable performance hit when we switched from Syck to Psych. The new philosophy for blueprints is therefore explicit type information. lib/type_codec.rb contains the most common codecs and of course, you can add your own (see examples/custom_serialization.rb).
|
9
|
+
|
10
|
+
<b>Please note: You may want to fine tune your blueprints and you MUST rebuild your xapian index when switching to version 1.3</b>
|
8
11
|
|
9
12
|
== What's in the box?
|
10
13
|
|
@@ -139,13 +142,14 @@ You may add a filter expression to exclude objects from the index. This is handy
|
|
139
142
|
blueprint.ignore_if {active == false}
|
140
143
|
end
|
141
144
|
|
142
|
-
You can add a type information to an attribute. As of now the special types :string, :date, :date_time and :number are supported (and required for range queries):
|
145
|
+
You can add a type information to an attribute (default format is string). As of now the special types :string, :date, :date_time and :number are supported (and required for range queries):
|
143
146
|
|
144
147
|
XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
|
145
148
|
blueprint.attribute :age, :as => :number
|
146
149
|
blueprint.attribute :date_of_birth, :as => :date
|
147
150
|
blueprint.attribute :name, :as => :string
|
148
151
|
blueprint.attribute :updated_at, :as => :date_time
|
152
|
+
blueprint.attribute :address, :as => :json
|
149
153
|
end
|
150
154
|
|
151
155
|
If you don't need field searches for an attribute, turn off the prefixed option (makes your index smaller and more efficient):
|
@@ -165,7 +169,7 @@ one more class that is not stored in the database, but you want it to be indexed
|
|
165
169
|
If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
|
166
170
|
|
167
171
|
XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
|
168
|
-
blueprint.index :addresses
|
172
|
+
blueprint.index :addresses, as: :json
|
169
173
|
blueprint.base_query { |p| p.includes(:addresses) }
|
170
174
|
end
|
171
175
|
|
@@ -374,7 +378,7 @@ you can use the auto_indexing_disabled method with a block and rebuild the whole
|
|
374
378
|
|
375
379
|
== Add your own serializers for special objects
|
376
380
|
|
377
|
-
XapianDb serializes objects to xapian documents
|
381
|
+
XapianDb serializes objects to xapian documents as strings by default.
|
378
382
|
|
379
383
|
However, dates need special handling to support date range queries. To support date range queries and allow the addition of other custom data types in the future, XapianDb uses a simple, extensible mechanism to serialize / deserialize your objects. An example on how to extend this mechanism is provided in examples/custom_serialization.rb.
|
380
384
|
|
@@ -5,7 +5,7 @@ require "bundler/setup"
|
|
5
5
|
require 'rails'
|
6
6
|
require 'daemons'
|
7
7
|
require "xapian_db"
|
8
|
-
require '
|
8
|
+
require 'json'
|
9
9
|
require 'rails/railtie' # execute xapian_db railtie
|
10
10
|
|
11
11
|
# daemon script to manage the beanstalk worker daemon
|
@@ -33,7 +33,7 @@ module XapianDb
|
|
33
33
|
loop do
|
34
34
|
job = beanstalk.reserve
|
35
35
|
begin
|
36
|
-
params =
|
36
|
+
params = JSON.parse(job.body).symbolize_keys!
|
37
37
|
Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
|
38
38
|
Rails.logger.flush
|
39
39
|
task = params.delete :task
|
@@ -74,4 +74,4 @@ Daemons.run_proc('beanstalk_worker', daemon_options) do
|
|
74
74
|
Dir.chdir dir
|
75
75
|
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'config', 'environment'))
|
76
76
|
XapianDb::BeanstalkWorker.new.process_requests
|
77
|
-
end
|
77
|
+
end
|
data/lib/type_codec.rb
CHANGED
@@ -24,31 +24,27 @@ module XapianDb
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
class
|
27
|
+
class JsonCodec
|
28
28
|
|
29
|
-
# Encode an object to its
|
29
|
+
# Encode an object to its json representation
|
30
30
|
# @param [Object] object an object to encode
|
31
|
-
# @return [String] the
|
31
|
+
# @return [String] the json string
|
32
32
|
def self.encode(object)
|
33
33
|
begin
|
34
|
-
|
35
|
-
object.attributes.to_yaml
|
36
|
-
else
|
37
|
-
object.to_yaml
|
38
|
-
end
|
34
|
+
object.to_json
|
39
35
|
rescue NoMethodError
|
40
|
-
raise ArgumentError.new "#{object} does not support
|
36
|
+
raise ArgumentError.new "#{object} does not support json serialization"
|
41
37
|
end
|
42
38
|
end
|
43
39
|
|
44
|
-
# Decode an object from a
|
45
|
-
# @param [String]
|
46
|
-
# @return [
|
47
|
-
def self.decode(
|
40
|
+
# Decode an object from a json string
|
41
|
+
# @param [String] json_string a json string representing the object
|
42
|
+
# @return [Hash] a ruby hash
|
43
|
+
def self.decode(json_string)
|
48
44
|
begin
|
49
|
-
|
45
|
+
JSON.parse json_string
|
50
46
|
rescue TypeError
|
51
|
-
raise ArgumentError.new "'#{
|
47
|
+
raise ArgumentError.new "'#{json_string}' cannot be parsed"
|
52
48
|
end
|
53
49
|
end
|
54
50
|
end
|
@@ -66,7 +62,24 @@ module XapianDb
|
|
66
62
|
# @param [String] string a string
|
67
63
|
# @return [String] the string
|
68
64
|
def self.decode(string)
|
69
|
-
string
|
65
|
+
string.force_encoding("UTF-8")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class BooleanCodec
|
70
|
+
|
71
|
+
# Encode a boolean value to a string
|
72
|
+
# @param [Object] value a value to encode
|
73
|
+
# @return [String] the string
|
74
|
+
def self.encode(value)
|
75
|
+
value.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Decode a string representing a boolean
|
79
|
+
# @param [String] string a string
|
80
|
+
# @return [Boolean] the boolean value
|
81
|
+
def self.decode(string)
|
82
|
+
string == "true"
|
70
83
|
end
|
71
84
|
end
|
72
85
|
|
@@ -127,11 +140,11 @@ module XapianDb
|
|
127
140
|
class NumberCodec
|
128
141
|
|
129
142
|
# Encode a number to a sortable string
|
130
|
-
# @param [Integer, BigDecimal, Float] number a number object to encode
|
143
|
+
# @param [Integer, BigDecimal, Bignum, Float] number a number object to encode
|
131
144
|
# @return [String] the encoded number
|
132
145
|
def self.encode(number)
|
133
146
|
case number.class.name
|
134
|
-
when "Fixnum", "Float"
|
147
|
+
when "Fixnum", "Float", "Bignum"
|
135
148
|
Xapian::sortable_serialise number
|
136
149
|
when "BigDecimal"
|
137
150
|
Xapian::sortable_serialise number.to_f
|
@@ -152,5 +165,33 @@ module XapianDb
|
|
152
165
|
end
|
153
166
|
end
|
154
167
|
|
168
|
+
class IntegerCodec
|
169
|
+
|
170
|
+
# Encode an integer to a sortable string
|
171
|
+
# @param [Integer] integer an integer to encode
|
172
|
+
# @return [String] the encoded integer
|
173
|
+
def self.encode(number)
|
174
|
+
return nil if number.nil?
|
175
|
+
case number.class.name
|
176
|
+
when "Fixnum"
|
177
|
+
Xapian::sortable_serialise number
|
178
|
+
else
|
179
|
+
raise ArgumentError.new "#{number} was expected to be an integer"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Decode a string to an integer
|
184
|
+
# @param [String] integer_as_string a string representing an integer
|
185
|
+
# @return [Integer] the decoded integer
|
186
|
+
def self.decode(encoded_integer)
|
187
|
+
begin
|
188
|
+
return nil if encoded_integer.nil? || encoded_integer.to_s.strip == ""
|
189
|
+
Xapian::sortable_unserialise(encoded_integer).to_i
|
190
|
+
rescue TypeError
|
191
|
+
raise ArgumentError.new "#{encoded_integer} cannot be unserialized"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
155
196
|
end
|
156
197
|
end
|
data/lib/xapian_db/database.rb
CHANGED
@@ -137,7 +137,7 @@ module XapianDb
|
|
137
137
|
enquiry.collapse_key = value_number
|
138
138
|
facets = {}
|
139
139
|
enquiry.mset(0, size).matches.each do |match|
|
140
|
-
facet_value =
|
140
|
+
facet_value = match.document.value(value_number)
|
141
141
|
# We must add 1 to the collapse_count since collapse_count means
|
142
142
|
# "how many other matches are there?"
|
143
143
|
facets[facet_value] = match.collapse_count + 1
|
@@ -52,6 +52,11 @@ module XapianDb
|
|
52
52
|
@attributes = @blueprints.values.map { |blueprint| blueprint.attribute_names}.flatten.compact.uniq.sort || []
|
53
53
|
end
|
54
54
|
|
55
|
+
# reset the blueprint setup
|
56
|
+
def reset
|
57
|
+
@blueprints = {}
|
58
|
+
end
|
59
|
+
|
55
60
|
# is a blueprint configured for the given name?
|
56
61
|
# @return [Boolean]
|
57
62
|
def configured?(name)
|
@@ -311,7 +316,7 @@ module XapianDb
|
|
311
316
|
def attribute(name, options={}, &block)
|
312
317
|
raise ArgumentError.new("You cannot use #{name} as an attribute name since it is a reserved method name of Xapian::Document") if reserved_method_name?(name)
|
313
318
|
do_not_index = options.delete(:index) == false
|
314
|
-
@type_map[name] = (options.delete(:as) || :
|
319
|
+
@type_map[name] = (options.delete(:as) || :string)
|
315
320
|
|
316
321
|
if block_given?
|
317
322
|
@attributes_hash[name] = {:block => block}.merge(options)
|
@@ -328,7 +333,7 @@ module XapianDb
|
|
328
333
|
attributes.each do |attr|
|
329
334
|
raise ArgumentError.new("You cannot use #{attr} as an attribute name since it is a reserved method name of Xapian::Document") if reserved_method_name?(attr)
|
330
335
|
@attributes_hash[attr] = {}
|
331
|
-
@type_map[attr] = :
|
336
|
+
@type_map[attr] = :string
|
332
337
|
self.index attr
|
333
338
|
end
|
334
339
|
end
|
@@ -7,6 +7,7 @@
|
|
7
7
|
# @author Gernot Kogler
|
8
8
|
|
9
9
|
require 'beanstalk-client'
|
10
|
+
require 'json'
|
10
11
|
|
11
12
|
module XapianDb
|
12
13
|
module IndexWriters
|
@@ -18,19 +19,19 @@ module XapianDb
|
|
18
19
|
# Update an object in the index
|
19
20
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
21
|
def index(obj, commit=true)
|
21
|
-
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.
|
22
|
+
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.to_json )
|
22
23
|
end
|
23
24
|
|
24
25
|
# Remove an object from the index
|
25
26
|
# @param [String] xapian_id The document id
|
26
27
|
def delete_doc_with(xapian_id, commit=true)
|
27
|
-
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.
|
28
|
+
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.to_json )
|
28
29
|
end
|
29
30
|
|
30
31
|
# Reindex all objects of a given class
|
31
32
|
# @param [Class] klass The class to reindex
|
32
33
|
def reindex_class(klass, options={})
|
33
|
-
beanstalk.put( { :task => "reindex_class_task", :class => klass.name }.
|
34
|
+
beanstalk.put( { :task => "reindex_class_task", :class => klass.name }.to_json )
|
34
35
|
end
|
35
36
|
|
36
37
|
def beanstalk
|
data/lib/xapian_db.rb
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
# @author Gernot Kogler
|
8
8
|
|
9
9
|
require 'xapian'
|
10
|
-
require '
|
10
|
+
require 'json'
|
11
11
|
|
12
12
|
do_not_require = %w(update_stopwords railtie base_adapter generic_adapter active_record_adapter datamapper_adapter
|
13
13
|
beanstalk_writer resque_writer utilities install_generator datamapper)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.3'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-02-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|
@@ -244,6 +244,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
244
244
|
- - ! '>='
|
245
245
|
- !ruby/object:Gem::Version
|
246
246
|
version: '0'
|
247
|
+
segments:
|
248
|
+
- 0
|
249
|
+
hash: -2176648600874761364
|
247
250
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
248
251
|
none: false
|
249
252
|
requirements:
|
@@ -252,7 +255,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
252
255
|
version: 1.3.6
|
253
256
|
requirements: []
|
254
257
|
rubyforge_project:
|
255
|
-
rubygems_version: 1.8.
|
258
|
+
rubygems_version: 1.8.25
|
256
259
|
signing_key:
|
257
260
|
specification_version: 3
|
258
261
|
summary: Ruby library to use a Xapian db as a key/value store with high performance
|