xapian_db 1.2.4.7 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,4 +1,15 @@
1
- ##1.2.4.7 (Decmber 17th, 2012)
1
+ ##1.2.5 (January 4th, 2013)
2
+
3
+ Changes:
4
+
5
+ - Define the min length a term must have to make it into the index (see README)
6
+ - define if a method should be indexed with its method name so you can do field searches (e.g. name:kogler); defaults to true
7
+
8
+ Fixes:
9
+
10
+ - query result creation optimized (lower memory footprint, better performance)
11
+
12
+ ##1.2.4.7 (December 17th, 2012)
2
13
 
3
14
  Fixes:
4
15
 
data/README.rdoc CHANGED
@@ -69,6 +69,7 @@ You can override these defaults by placing a config file named 'xapian_db.yml' i
69
69
  defaults: &defaults
70
70
  adapter: datamapper # Avaliable adapters: :active_record, :datamapper
71
71
  language: de # Global language; can be overridden for specific blueprints
72
+ term_min_length: 2 # Ignore single character terms
72
73
 
73
74
  development:
74
75
  database: db/xapian_db/development
@@ -139,6 +140,12 @@ You can add a type information to an attribute. As of now the special types :str
139
140
  blueprint.attribute :updated_at, :as => :date_time
140
141
  end
141
142
 
143
+ If you don't need field searches for an attribute, turn off the prefixed option (makes your index smaller and more efficient):
144
+
145
+ XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
146
+ blueprint.attribute :complex_object, prefixed: false
147
+ end
148
+
142
149
  You can override the global adapter configuration in a specific blueprint. Let's say you use ActiveRecord, but you have
143
150
  one more class that is not stored in the database, but you want it to be indexed:
144
151
 
@@ -48,13 +48,18 @@ module XapianDb
48
48
  def resque_queue
49
49
  @config.instance_variable_get("@_resque_queue") || 'xapian_db'
50
50
  end
51
+
52
+ def term_min_length
53
+ @config.instance_variable_get("@_term_min_length") || 1
54
+ end
55
+
51
56
  end
52
57
 
53
58
  # ---------------------------------------------------------------------------------
54
59
  # DSL methods
55
60
  # ---------------------------------------------------------------------------------
56
61
 
57
- attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper
62
+ attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length
58
63
 
59
64
  # Set the global database to use
60
65
  # @param [String] path The path to the database. Either apply a file sytem path or :memory
@@ -134,6 +139,11 @@ module XapianDb
134
139
  @_stopper = lang == :none ? nil : XapianDb::Repositories::Stopper.stopper_for(lang)
135
140
  end
136
141
 
142
+ # Set minimum length a term must have to get indexed; 2 is a good value to start
143
+ # @param [Integer] length The minimum length
144
+ def term_min_length(length)
145
+ @_term_min_length = length
146
+ end
137
147
  end
138
148
 
139
149
  end
@@ -357,7 +357,7 @@ module XapianDb
357
357
  # Is it a method name with options?
358
358
  if args.last.is_a? Hash
359
359
  options = args.last
360
- assert_valid_keys options, :weight
360
+ assert_valid_keys options, :weight, :prefixed
361
361
  @indexed_methods_hash[args.first] = IndexOptions.new(options.merge(:block => block))
362
362
  else
363
363
  add_indexes_from args
@@ -398,14 +398,15 @@ module XapianDb
398
398
  # Options for an indexed method
399
399
  class IndexOptions
400
400
 
401
- attr_reader :weight, :block
401
+ attr_reader :weight, :prefixed, :block
402
402
 
403
403
  # Constructor
404
404
  # @param [Hash] options
405
405
  # @option options [Integer] :weight (1) The weight for the indexed value
406
406
  def initialize(options = {})
407
- @weight = options[:weight] || 1
408
- @block = options[:block]
407
+ @weight = options[:weight] || 1
408
+ @prefixed = options[:prefixed].nil? ? true : options[:prefixed]
409
+ @block = options[:block]
409
410
  end
410
411
 
411
412
  end
@@ -85,13 +85,17 @@ module XapianDb
85
85
  unless obj.nil?
86
86
  values = get_values_to_index_from obj
87
87
  values.each do |value|
88
+ terms = value.to_s.downcase
88
89
  # Add value with field name
89
- term_generator.index_text(value.to_s.downcase, options.weight, "X#{method.upcase}")
90
+ term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed
90
91
  # Add value without field name
91
- term_generator.index_text(value.to_s.downcase, options.weight)
92
+ term_generator.index_text(terms, options.weight)
92
93
  end
93
94
  end
94
95
  end
96
+
97
+ terms_to_ignore = @xapian_doc.terms.select{ |term| term.term.length < XapianDb::Config.term_min_length }
98
+ terms_to_ignore.each { |term| @xapian_doc.remove_term term.term }
95
99
  end
96
100
 
97
101
  # Get the values to index from an object
@@ -50,6 +50,7 @@ module XapianDb
50
50
  config.beanstalk_daemon_url @beanstalk_daemon
51
51
  config.resque_queue @resque_queue
52
52
  config.language @language.try(:to_sym)
53
+ config.term_min_length @term_min_length
53
54
  end
54
55
 
55
56
  end
@@ -71,6 +72,7 @@ module XapianDb
71
72
  @beanstalk_daemon = env_config["beanstalk_daemon"]
72
73
  @resque_queue = env_config["resque_queue"]
73
74
  @language = env_config["language"]
75
+ @term_min_length = env_config["term_min_length"]
74
76
  end
75
77
 
76
78
  # set default config options
@@ -79,6 +81,7 @@ module XapianDb
79
81
  @adapter = :active_record
80
82
  @writer = :direct
81
83
  @beanstalk_daemon = nil
84
+ @term_min_length = 1
82
85
  end
83
86
 
84
87
  end
@@ -54,8 +54,6 @@ module XapianDb
54
54
  return build_empty_resultset if enquiry.nil?
55
55
  db_size = options.delete :db_size
56
56
  @spelling_suggestion = options.delete :spelling_suggestion
57
- @hits = enquiry.mset(0, db_size).matches_estimated
58
- return build_empty_resultset if @hits == 0
59
57
 
60
58
  limit = options.delete :limit
61
59
  page = options.delete :page
@@ -63,18 +61,21 @@ module XapianDb
63
61
  raise ArgumentError.new "unsupported options for resultset: #{options}" if options.size > 0
64
62
  raise ArgumentError.new "db_size option is required" unless db_size
65
63
 
66
- limit = limit.nil? ? @hits : limit.to_i
64
+ limit = limit.nil? ? db_size : limit.to_i
67
65
  per_page = per_page.nil? ? limit : per_page.to_i
68
66
  page = page.nil? ? 1 : page.to_i
69
67
  offset = (page - 1) * per_page
70
- @total_pages = (limit / per_page.to_f).ceil
71
68
  count = offset + per_page < limit ? per_page : limit - offset
72
- raise ArgumentError.new "page #{@page} does not exist" if @hits > 0 && offset >= limit
73
69
 
74
70
  result_window = enquiry.mset(offset, count)
71
+ @hits = result_window.matches_estimated
72
+ return build_empty_resultset if @hits == 0
73
+ raise ArgumentError.new "page #{@page} does not exist" if @hits > 0 && offset >= limit
74
+
75
75
  self.replace result_window.matches.map{|match| decorate(match).document}
76
+ @total_pages = (limit / per_page.to_f).ceil
76
77
  @current_page = page
77
- @limit_value = per_page
78
+ @limit_value = per_page
78
79
  end
79
80
 
80
81
  # The previous page number
@@ -111,4 +112,4 @@ module XapianDb
111
112
 
112
113
  end
113
114
 
114
- end
115
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian_db
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4.7
4
+ version: 1.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2013-01-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: daemons
@@ -244,9 +244,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
244
244
  - - ! '>='
245
245
  - !ruby/object:Gem::Version
246
246
  version: '0'
247
- segments:
248
- - 0
249
- hash: 1251927671395109686
250
247
  required_rubygems_version: !ruby/object:Gem::Requirement
251
248
  none: false
252
249
  requirements: