xapian_db 1.2.5.1 → 1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -1
- data/README.rdoc +8 -4
- data/lib/generators/templates/beanstalk_worker +3 -3
- data/lib/type_codec.rb +59 -18
- data/lib/xapian_db/database.rb +1 -1
- data/lib/xapian_db/document_blueprint.rb +7 -2
- data/lib/xapian_db/index_writers/beanstalk_writer.rb +4 -3
- data/lib/xapian_db.rb +1 -1
- metadata +6 -3
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
##1.
|
1
|
+
##1.3 (February 19th, 2013)
|
2
|
+
|
3
|
+
Breaking Changes:
|
4
|
+
- changed the internal serialization format for attributes; the new default is string, new codecs were added. YAML is not supported
|
5
|
+
anymore, although you could implement a YAML-Codec on your own if you need it (see examples/custom_serialization.rb)
|
2
6
|
|
3
7
|
Changes:
|
4
8
|
|
data/README.rdoc
CHANGED
@@ -4,7 +4,10 @@
|
|
4
4
|
|
5
5
|
== Important Information
|
6
6
|
|
7
|
-
|
7
|
+
Version 1.3 does not support YAML serialization for attributes anymore. If you don't apply a type information for an attribute (like attribute :birth, as: :date) in your blueprint,
|
8
|
+
the attribute gets stored as a string. The new preferred type for complex attributes is JSON. Why the change? We were experiencing a unacceptable performance hit when we switched from Syck to Psych. The new philosophy for blueprints is therefore explicit type information. lib/type_codec.rb contains the most common codecs and of course, you can add your own (see examples/custom_serialization.rb).
|
9
|
+
|
10
|
+
<b>Please note: You may want to fine tune your blueprints and you MUST rebuild your xapian index when switching to version 1.3</b>
|
8
11
|
|
9
12
|
== What's in the box?
|
10
13
|
|
@@ -139,13 +142,14 @@ You may add a filter expression to exclude objects from the index. This is handy
|
|
139
142
|
blueprint.ignore_if {active == false}
|
140
143
|
end
|
141
144
|
|
142
|
-
You can add a type information to an attribute. As of now the special types :string, :date, :date_time and :number are supported (and required for range queries):
|
145
|
+
You can add a type information to an attribute (default format is string). As of now the special types :string, :date, :date_time and :number are supported (and required for range queries):
|
143
146
|
|
144
147
|
XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
|
145
148
|
blueprint.attribute :age, :as => :number
|
146
149
|
blueprint.attribute :date_of_birth, :as => :date
|
147
150
|
blueprint.attribute :name, :as => :string
|
148
151
|
blueprint.attribute :updated_at, :as => :date_time
|
152
|
+
blueprint.attribute :address, :as => :json
|
149
153
|
end
|
150
154
|
|
151
155
|
If you don't need field searches for an attribute, turn off the prefixed option (makes your index smaller and more efficient):
|
@@ -165,7 +169,7 @@ one more class that is not stored in the database, but you want it to be indexed
|
|
165
169
|
If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
|
166
170
|
|
167
171
|
XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
|
168
|
-
blueprint.index :addresses
|
172
|
+
blueprint.index :addresses, as: :json
|
169
173
|
blueprint.base_query { |p| p.includes(:addresses) }
|
170
174
|
end
|
171
175
|
|
@@ -374,7 +378,7 @@ you can use the auto_indexing_disabled method with a block and rebuild the whole
|
|
374
378
|
|
375
379
|
== Add your own serializers for special objects
|
376
380
|
|
377
|
-
XapianDb serializes objects to xapian documents
|
381
|
+
XapianDb serializes objects to xapian documents as strings by default.
|
378
382
|
|
379
383
|
However, dates need special handling to support date range queries. To support date range queries and allow the addition of other custom data types in the future, XapianDb uses a simple, extensible mechanism to serialize / deserialize your objects. An example on how to extend this mechanism is provided in examples/custom_serialization.rb.
|
380
384
|
|
@@ -5,7 +5,7 @@ require "bundler/setup"
|
|
5
5
|
require 'rails'
|
6
6
|
require 'daemons'
|
7
7
|
require "xapian_db"
|
8
|
-
require '
|
8
|
+
require 'json'
|
9
9
|
require 'rails/railtie' # execute xapian_db railtie
|
10
10
|
|
11
11
|
# daemon script to manage the beanstalk worker daemon
|
@@ -33,7 +33,7 @@ module XapianDb
|
|
33
33
|
loop do
|
34
34
|
job = beanstalk.reserve
|
35
35
|
begin
|
36
|
-
params =
|
36
|
+
params = JSON.parse(job.body).symbolize_keys!
|
37
37
|
Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
|
38
38
|
Rails.logger.flush
|
39
39
|
task = params.delete :task
|
@@ -74,4 +74,4 @@ Daemons.run_proc('beanstalk_worker', daemon_options) do
|
|
74
74
|
Dir.chdir dir
|
75
75
|
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'config', 'environment'))
|
76
76
|
XapianDb::BeanstalkWorker.new.process_requests
|
77
|
-
end
|
77
|
+
end
|
data/lib/type_codec.rb
CHANGED
@@ -24,31 +24,27 @@ module XapianDb
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
class
|
27
|
+
class JsonCodec
|
28
28
|
|
29
|
-
# Encode an object to its
|
29
|
+
# Encode an object to its json representation
|
30
30
|
# @param [Object] object an object to encode
|
31
|
-
# @return [String] the
|
31
|
+
# @return [String] the json string
|
32
32
|
def self.encode(object)
|
33
33
|
begin
|
34
|
-
|
35
|
-
object.attributes.to_yaml
|
36
|
-
else
|
37
|
-
object.to_yaml
|
38
|
-
end
|
34
|
+
object.to_json
|
39
35
|
rescue NoMethodError
|
40
|
-
raise ArgumentError.new "#{object} does not support
|
36
|
+
raise ArgumentError.new "#{object} does not support json serialization"
|
41
37
|
end
|
42
38
|
end
|
43
39
|
|
44
|
-
# Decode an object from a
|
45
|
-
# @param [String]
|
46
|
-
# @return [
|
47
|
-
def self.decode(
|
40
|
+
# Decode an object from a json string
|
41
|
+
# @param [String] json_string a json string representing the object
|
42
|
+
# @return [Hash] a ruby hash
|
43
|
+
def self.decode(json_string)
|
48
44
|
begin
|
49
|
-
|
45
|
+
JSON.parse json_string
|
50
46
|
rescue TypeError
|
51
|
-
raise ArgumentError.new "'#{
|
47
|
+
raise ArgumentError.new "'#{json_string}' cannot be parsed"
|
52
48
|
end
|
53
49
|
end
|
54
50
|
end
|
@@ -66,7 +62,24 @@ module XapianDb
|
|
66
62
|
# @param [String] string a string
|
67
63
|
# @return [String] the string
|
68
64
|
def self.decode(string)
|
69
|
-
string
|
65
|
+
string.force_encoding("UTF-8")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class BooleanCodec
|
70
|
+
|
71
|
+
# Encode a boolean value to a string
|
72
|
+
# @param [Object] value a value to encode
|
73
|
+
# @return [String] the string
|
74
|
+
def self.encode(value)
|
75
|
+
value.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Decode a string representing a boolean
|
79
|
+
# @param [String] string a string
|
80
|
+
# @return [Boolean] the boolean value
|
81
|
+
def self.decode(string)
|
82
|
+
string == "true"
|
70
83
|
end
|
71
84
|
end
|
72
85
|
|
@@ -127,11 +140,11 @@ module XapianDb
|
|
127
140
|
class NumberCodec
|
128
141
|
|
129
142
|
# Encode a number to a sortable string
|
130
|
-
# @param [Integer, BigDecimal, Float] number a number object to encode
|
143
|
+
# @param [Integer, BigDecimal, Bignum, Float] number a number object to encode
|
131
144
|
# @return [String] the encoded number
|
132
145
|
def self.encode(number)
|
133
146
|
case number.class.name
|
134
|
-
when "Fixnum", "Float"
|
147
|
+
when "Fixnum", "Float", "Bignum"
|
135
148
|
Xapian::sortable_serialise number
|
136
149
|
when "BigDecimal"
|
137
150
|
Xapian::sortable_serialise number.to_f
|
@@ -152,5 +165,33 @@ module XapianDb
|
|
152
165
|
end
|
153
166
|
end
|
154
167
|
|
168
|
+
class IntegerCodec
|
169
|
+
|
170
|
+
# Encode an integer to a sortable string
|
171
|
+
# @param [Integer] integer an integer to encode
|
172
|
+
# @return [String] the encoded integer
|
173
|
+
def self.encode(number)
|
174
|
+
return nil if number.nil?
|
175
|
+
case number.class.name
|
176
|
+
when "Fixnum"
|
177
|
+
Xapian::sortable_serialise number
|
178
|
+
else
|
179
|
+
raise ArgumentError.new "#{number} was expected to be an integer"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Decode a string to an integer
|
184
|
+
# @param [String] integer_as_string a string representing an integer
|
185
|
+
# @return [Integer] the decoded integer
|
186
|
+
def self.decode(encoded_integer)
|
187
|
+
begin
|
188
|
+
return nil if encoded_integer.nil? || encoded_integer.to_s.strip == ""
|
189
|
+
Xapian::sortable_unserialise(encoded_integer).to_i
|
190
|
+
rescue TypeError
|
191
|
+
raise ArgumentError.new "#{encoded_integer} cannot be unserialized"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
155
196
|
end
|
156
197
|
end
|
data/lib/xapian_db/database.rb
CHANGED
@@ -137,7 +137,7 @@ module XapianDb
|
|
137
137
|
enquiry.collapse_key = value_number
|
138
138
|
facets = {}
|
139
139
|
enquiry.mset(0, size).matches.each do |match|
|
140
|
-
facet_value =
|
140
|
+
facet_value = match.document.value(value_number)
|
141
141
|
# We must add 1 to the collapse_count since collapse_count means
|
142
142
|
# "how many other matches are there?"
|
143
143
|
facets[facet_value] = match.collapse_count + 1
|
@@ -52,6 +52,11 @@ module XapianDb
|
|
52
52
|
@attributes = @blueprints.values.map { |blueprint| blueprint.attribute_names}.flatten.compact.uniq.sort || []
|
53
53
|
end
|
54
54
|
|
55
|
+
# reset the blueprint setup
|
56
|
+
def reset
|
57
|
+
@blueprints = {}
|
58
|
+
end
|
59
|
+
|
55
60
|
# is a blueprint configured for the given name?
|
56
61
|
# @return [Boolean]
|
57
62
|
def configured?(name)
|
@@ -311,7 +316,7 @@ module XapianDb
|
|
311
316
|
def attribute(name, options={}, &block)
|
312
317
|
raise ArgumentError.new("You cannot use #{name} as an attribute name since it is a reserved method name of Xapian::Document") if reserved_method_name?(name)
|
313
318
|
do_not_index = options.delete(:index) == false
|
314
|
-
@type_map[name] = (options.delete(:as) || :
|
319
|
+
@type_map[name] = (options.delete(:as) || :string)
|
315
320
|
|
316
321
|
if block_given?
|
317
322
|
@attributes_hash[name] = {:block => block}.merge(options)
|
@@ -328,7 +333,7 @@ module XapianDb
|
|
328
333
|
attributes.each do |attr|
|
329
334
|
raise ArgumentError.new("You cannot use #{attr} as an attribute name since it is a reserved method name of Xapian::Document") if reserved_method_name?(attr)
|
330
335
|
@attributes_hash[attr] = {}
|
331
|
-
@type_map[attr] = :
|
336
|
+
@type_map[attr] = :string
|
332
337
|
self.index attr
|
333
338
|
end
|
334
339
|
end
|
@@ -7,6 +7,7 @@
|
|
7
7
|
# @author Gernot Kogler
|
8
8
|
|
9
9
|
require 'beanstalk-client'
|
10
|
+
require 'json'
|
10
11
|
|
11
12
|
module XapianDb
|
12
13
|
module IndexWriters
|
@@ -18,19 +19,19 @@ module XapianDb
|
|
18
19
|
# Update an object in the index
|
19
20
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
21
|
def index(obj, commit=true)
|
21
|
-
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.
|
22
|
+
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.to_json )
|
22
23
|
end
|
23
24
|
|
24
25
|
# Remove an object from the index
|
25
26
|
# @param [String] xapian_id The document id
|
26
27
|
def delete_doc_with(xapian_id, commit=true)
|
27
|
-
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.
|
28
|
+
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.to_json )
|
28
29
|
end
|
29
30
|
|
30
31
|
# Reindex all objects of a given class
|
31
32
|
# @param [Class] klass The class to reindex
|
32
33
|
def reindex_class(klass, options={})
|
33
|
-
beanstalk.put( { :task => "reindex_class_task", :class => klass.name }.
|
34
|
+
beanstalk.put( { :task => "reindex_class_task", :class => klass.name }.to_json )
|
34
35
|
end
|
35
36
|
|
36
37
|
def beanstalk
|
data/lib/xapian_db.rb
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
# @author Gernot Kogler
|
8
8
|
|
9
9
|
require 'xapian'
|
10
|
-
require '
|
10
|
+
require 'json'
|
11
11
|
|
12
12
|
do_not_require = %w(update_stopwords railtie base_adapter generic_adapter active_record_adapter datamapper_adapter
|
13
13
|
beanstalk_writer resque_writer utilities install_generator datamapper)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.3'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-02-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: daemons
|
@@ -244,6 +244,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
244
244
|
- - ! '>='
|
245
245
|
- !ruby/object:Gem::Version
|
246
246
|
version: '0'
|
247
|
+
segments:
|
248
|
+
- 0
|
249
|
+
hash: -2176648600874761364
|
247
250
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
248
251
|
none: false
|
249
252
|
requirements:
|
@@ -252,7 +255,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
252
255
|
version: 1.3.6
|
253
256
|
requirements: []
|
254
257
|
rubyforge_project:
|
255
|
-
rubygems_version: 1.8.
|
258
|
+
rubygems_version: 1.8.25
|
256
259
|
signing_key:
|
257
260
|
specification_version: 3
|
258
261
|
summary: Ruby library to use a Xapian db as a key/value store with high performance
|