xapian_db 1.2.4.7 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,4 +1,15 @@
1
- ##1.2.4.7 (Decmber 17th, 2012)
1
+ ##1.2.5 (January 4th, 2013)
2
+
3
+ Changes:
4
+
5
+ - Define the min length a term must have to make it into the index (see README)
6
+ - define if a method should be indexed with its method name so you can do field searches (e.g. name:kogler); defaults to true
7
+
8
+ Fixes:
9
+
10
+ - query result creation optimized (lower memory footprint, better performance)
11
+
12
+ ##1.2.4.7 (December 17th, 2012)
2
13
 
3
14
  Fixes:
4
15
 
data/README.rdoc CHANGED
@@ -69,6 +69,7 @@ You can override these defaults by placing a config file named 'xapian_db.yml' i
69
69
  defaults: &defaults
70
70
  adapter: datamapper # Avaliable adapters: :active_record, :datamapper
71
71
  language: de # Global language; can be overridden for specific blueprints
72
+ term_min_length: 2 # Ignore single character terms
72
73
 
73
74
  development:
74
75
  database: db/xapian_db/development
@@ -139,6 +140,12 @@ You can add a type information to an attribute. As of now the special types :str
139
140
  blueprint.attribute :updated_at, :as => :date_time
140
141
  end
141
142
 
143
+ If you don't need field searches for an attribute, turn off the prefixed option (makes your index smaller and more efficient):
144
+
145
+ XapianDb::DocumentBlueprint.setup(:Person) do |blueprint|
146
+ blueprint.attribute :complex_object, prefixed: false
147
+ end
148
+
142
149
  You can override the global adapter configuration in a specific blueprint. Let's say you use ActiveRecord, but you have
143
150
  one more class that is not stored in the database, but you want it to be indexed:
144
151
 
@@ -48,13 +48,18 @@ module XapianDb
48
48
  def resque_queue
49
49
  @config.instance_variable_get("@_resque_queue") || 'xapian_db'
50
50
  end
51
+
52
+ def term_min_length
53
+ @config.instance_variable_get("@_term_min_length") || 1
54
+ end
55
+
51
56
  end
52
57
 
53
58
  # ---------------------------------------------------------------------------------
54
59
  # DSL methods
55
60
  # ---------------------------------------------------------------------------------
56
61
 
57
- attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper
62
+ attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper, :_term_min_length
58
63
 
59
64
  # Set the global database to use
60
65
  # @param [String] path The path to the database. Either apply a file sytem path or :memory
@@ -134,6 +139,11 @@ module XapianDb
134
139
  @_stopper = lang == :none ? nil : XapianDb::Repositories::Stopper.stopper_for(lang)
135
140
  end
136
141
 
142
+ # Set minimum length a term must have to get indexed; 2 is a good value to start
143
+ # @param [Integer] length The minimum length
144
+ def term_min_length(length)
145
+ @_term_min_length = length
146
+ end
137
147
  end
138
148
 
139
149
  end
@@ -357,7 +357,7 @@ module XapianDb
357
357
  # Is it a method name with options?
358
358
  if args.last.is_a? Hash
359
359
  options = args.last
360
- assert_valid_keys options, :weight
360
+ assert_valid_keys options, :weight, :prefixed
361
361
  @indexed_methods_hash[args.first] = IndexOptions.new(options.merge(:block => block))
362
362
  else
363
363
  add_indexes_from args
@@ -398,14 +398,15 @@ module XapianDb
398
398
  # Options for an indexed method
399
399
  class IndexOptions
400
400
 
401
- attr_reader :weight, :block
401
+ attr_reader :weight, :prefixed, :block
402
402
 
403
403
  # Constructor
404
404
  # @param [Hash] options
405
405
  # @option options [Integer] :weight (1) The weight for the indexed value
406
406
  def initialize(options = {})
407
- @weight = options[:weight] || 1
408
- @block = options[:block]
407
+ @weight = options[:weight] || 1
408
+ @prefixed = options[:prefixed].nil? ? true : options[:prefixed]
409
+ @block = options[:block]
409
410
  end
410
411
 
411
412
  end
@@ -85,13 +85,17 @@ module XapianDb
85
85
  unless obj.nil?
86
86
  values = get_values_to_index_from obj
87
87
  values.each do |value|
88
+ terms = value.to_s.downcase
88
89
  # Add value with field name
89
- term_generator.index_text(value.to_s.downcase, options.weight, "X#{method.upcase}")
90
+ term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed
90
91
  # Add value without field name
91
- term_generator.index_text(value.to_s.downcase, options.weight)
92
+ term_generator.index_text(terms, options.weight)
92
93
  end
93
94
  end
94
95
  end
96
+
97
+ terms_to_ignore = @xapian_doc.terms.select{ |term| term.term.length < XapianDb::Config.term_min_length }
98
+ terms_to_ignore.each { |term| @xapian_doc.remove_term term.term }
95
99
  end
96
100
 
97
101
  # Get the values to index from an object
@@ -50,6 +50,7 @@ module XapianDb
50
50
  config.beanstalk_daemon_url @beanstalk_daemon
51
51
  config.resque_queue @resque_queue
52
52
  config.language @language.try(:to_sym)
53
+ config.term_min_length @term_min_length
53
54
  end
54
55
 
55
56
  end
@@ -71,6 +72,7 @@ module XapianDb
71
72
  @beanstalk_daemon = env_config["beanstalk_daemon"]
72
73
  @resque_queue = env_config["resque_queue"]
73
74
  @language = env_config["language"]
75
+ @term_min_length = env_config["term_min_length"]
74
76
  end
75
77
 
76
78
  # set default config options
@@ -79,6 +81,7 @@ module XapianDb
79
81
  @adapter = :active_record
80
82
  @writer = :direct
81
83
  @beanstalk_daemon = nil
84
+ @term_min_length = 1
82
85
  end
83
86
 
84
87
  end
@@ -54,8 +54,6 @@ module XapianDb
54
54
  return build_empty_resultset if enquiry.nil?
55
55
  db_size = options.delete :db_size
56
56
  @spelling_suggestion = options.delete :spelling_suggestion
57
- @hits = enquiry.mset(0, db_size).matches_estimated
58
- return build_empty_resultset if @hits == 0
59
57
 
60
58
  limit = options.delete :limit
61
59
  page = options.delete :page
@@ -63,18 +61,21 @@ module XapianDb
63
61
  raise ArgumentError.new "unsupported options for resultset: #{options}" if options.size > 0
64
62
  raise ArgumentError.new "db_size option is required" unless db_size
65
63
 
66
- limit = limit.nil? ? @hits : limit.to_i
64
+ limit = limit.nil? ? db_size : limit.to_i
67
65
  per_page = per_page.nil? ? limit : per_page.to_i
68
66
  page = page.nil? ? 1 : page.to_i
69
67
  offset = (page - 1) * per_page
70
- @total_pages = (limit / per_page.to_f).ceil
71
68
  count = offset + per_page < limit ? per_page : limit - offset
72
- raise ArgumentError.new "page #{@page} does not exist" if @hits > 0 && offset >= limit
73
69
 
74
70
  result_window = enquiry.mset(offset, count)
71
+ @hits = result_window.matches_estimated
72
+ return build_empty_resultset if @hits == 0
73
+ raise ArgumentError.new "page #{@page} does not exist" if @hits > 0 && offset >= limit
74
+
75
75
  self.replace result_window.matches.map{|match| decorate(match).document}
76
+ @total_pages = (limit / per_page.to_f).ceil
76
77
  @current_page = page
77
- @limit_value = per_page
78
+ @limit_value = per_page
78
79
  end
79
80
 
80
81
  # The previous page number
@@ -111,4 +112,4 @@ module XapianDb
111
112
 
112
113
  end
113
114
 
114
- end
115
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian_db
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4.7
4
+ version: 1.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2013-01-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: daemons
@@ -244,9 +244,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
244
244
  - - ! '>='
245
245
  - !ruby/object:Gem::Version
246
246
  version: '0'
247
- segments:
248
- - 0
249
- hash: 1251927671395109686
250
247
  required_rubygems_version: !ruby/object:Gem::Requirement
251
248
  none: false
252
249
  requirements: