picky 4.13.1 → 4.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -28,6 +28,7 @@ module Picky
28
28
  #
29
29
  # Advanced Options:
30
30
  # * source: Use if the category should use a different source.
31
+ # * tokenize: Whether to use the tokenizer (default is true).
31
32
  # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
32
33
  # * weight: Weights::Logarithmic.new, Weights::Constant.new(int = 0),
33
34
  # Weights::Dynamic.new(&block) or an object that responds
@@ -47,6 +48,7 @@ module Picky
47
48
  # Instantly extracted to raise an error instantly.
48
49
  #
49
50
  @source = Source.from options[:source], true, @index.name
51
+ @tokenize = options[:tokenize] != false
50
52
  @tokenizer = Tokenizer.from options[:indexing], @index.name, name
51
53
  @ranger = options[:ranging] || Range
52
54
 
@@ -78,7 +80,7 @@ module Picky
78
80
  #
79
81
  # TODO Rewrite it such that this does not need to be maintained separately.
80
82
  #
81
- @@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight]
83
+ @@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight]
82
84
  def warn_if_unknown options
83
85
  warn <<-WARNING if options && (options.keys - @@known_keys).size > 0
84
86
 
@@ -95,8 +95,10 @@ module Picky
95
95
  # If one isn't set on this category, will try the index,
96
96
  # and finally the default index tokenizer.
97
97
  #
98
+ # Will return nil if tokenize is set to false.
99
+ #
98
100
  def tokenizer
99
- @tokenizer || @index.tokenizer
101
+ @tokenizer || @index.tokenizer if @tokenize
100
102
  end
101
103
 
102
104
  # Clears the caches.
@@ -57,10 +57,17 @@ module Picky
57
57
  # For the given id, adds the list of
58
58
  # strings to the index for the given id.
59
59
  #
60
- def add_text id, text, where = :unshift
60
+ def add_text id, text_or_tokens, where = :unshift
61
61
  # text = text.to_sym if @symbols # SYMBOLS.
62
- tokens, _ = tokenizer.tokenize text
62
+ tokens = nil
63
+ if tokenizer
64
+ tokens, _ = tokenizer.tokenize text_or_tokens
65
+ else
66
+ tokens = text_or_tokens
67
+ end
63
68
  tokens.each { |text| add_tokenized_token id.send(key_format), text, where, false }
69
+ rescue NoMethodError
70
+ raise %Q{You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
64
71
  end
65
72
 
66
73
  #
@@ -58,12 +58,26 @@ module Picky
58
58
  def index_flush objects, file, category, cache, tokenizer
59
59
  comma = ?,
60
60
  newline = ?\n
61
-
62
- objects.each do |object|
63
- tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
64
- tokens.each do |token_text|
65
- next unless token_text
66
- cache << object.id << comma << token_text << newline
61
+
62
+ # Optimized, therefore duplicate code.
63
+ #
64
+ # TODO Deoptimize?
65
+ #
66
+ if tokenizer
67
+ objects.each do |object|
68
+ tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
69
+ tokens.each do |token_text|
70
+ next unless token_text
71
+ cache << object.id << comma << token_text << newline
72
+ end
73
+ end
74
+ else
75
+ objects.each do |object|
76
+ tokens = object.send(category.from) # Note: Originals not needed.
77
+ tokens.each do |token_text|
78
+ next unless token_text
79
+ cache << object.id << comma << token_text << newline
80
+ end
67
81
  end
68
82
  end
69
83
 
@@ -54,12 +54,25 @@ module Picky
54
54
  def index_flush datas, file, cache, tokenizer
55
55
  comma = ?,
56
56
  newline = ?\n
57
-
58
- datas.each do |indexed_id, text|
59
- tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
60
- tokens.each do |token_text|
61
- next unless token_text
62
- cache << indexed_id << comma << token_text << newline
57
+
58
+ # Optimized, therefore duplicate code.
59
+ #
60
+ # TODO Deoptimize?
61
+ #
62
+ if tokenizer
63
+ datas.each do |indexed_id, text|
64
+ tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
65
+ tokens.each do |token_text|
66
+ next unless token_text
67
+ cache << indexed_id << comma << token_text << newline
68
+ end
69
+ end
70
+ else
71
+ datas.each do |indexed_id, tokens|
72
+ tokens.each do |token_text|
73
+ next unless token_text
74
+ cache << indexed_id << comma << token_text << newline
75
+ end
63
76
  end
64
77
  end
65
78
 
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe 'Category#tokenize(false)' do
6
+
7
+ it 'does tokenize' do
8
+ index = Picky::Index.new :thing do
9
+ category :text, tokenize: true
10
+ end
11
+
12
+ thing = Struct.new :id, :text
13
+ # expect do # Does not fail – because #to_s is called on the Array.
14
+ index.add thing.new(1, ['already', 'tokenized'])
15
+ # end.to raise_error
16
+ index.add thing.new(2, 'this should fail')
17
+
18
+ try = Picky::Search.new index
19
+
20
+ try.search('already').ids.should == [] # Not found because ['already', is indexed.
21
+ end
22
+ it 'does not tokenize' do
23
+ index = Picky::Index.new :thing do
24
+ category :text, tokenize: false
25
+ end
26
+
27
+ thing = Struct.new :id, :text
28
+ index.add thing.new(1, ['already', 'tokenized'])
29
+ expect do
30
+ index.add thing.new(2, 'this should fail')
31
+ end.to raise_error('You probably set tokenize: false on category "text". It will need an Enumerator of previously tokenized tokens.')
32
+
33
+ try = Picky::Search.new index
34
+
35
+ try.search('already').ids.should == [1]
36
+ end
37
+
38
+ end
@@ -52,7 +52,7 @@ describe Picky::Category do
52
52
  category.should_receive(:warn).once.with <<-WARNING
53
53
 
54
54
  Warning: Category options {:weights=>:some_weight} for category some_category contain an unknown option.
55
- Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight].
55
+ Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight].
56
56
  WARNING
57
57
 
58
58
  category.warn_if_unknown :weights => :some_weight
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.13.1
4
+ version: 4.14.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-01 00:00:00.000000000 Z
12
+ date: 2013-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70248447814120 !ruby/object:Gem::Requirement
16
+ requirement: &70147105922620 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,21 +21,21 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70248447814120
24
+ version_requirements: *70147105922620
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: picky-client
27
- requirement: &70248447750680 !ruby/object:Gem::Requirement
27
+ requirement: &70147105922020 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: 4.13.1
32
+ version: 4.14.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70248447750680
35
+ version_requirements: *70147105922020
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: text
38
- requirement: &70248447748820 !ruby/object:Gem::Requirement
38
+ requirement: &70147105921540 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70248447748820
46
+ version_requirements: *70147105921540
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: multi_json
49
- requirement: &70248447747100 !ruby/object:Gem::Requirement
49
+ requirement: &70147105921060 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70248447747100
57
+ version_requirements: *70147105921060
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70248447744720 !ruby/object:Gem::Requirement
60
+ requirement: &70147105920460 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '3.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70248447744720
68
+ version_requirements: *70147105920460
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rack_fast_escape
71
- requirement: &70248447728420 !ruby/object:Gem::Requirement
71
+ requirement: &70147105919900 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70248447728420
79
+ version_requirements: *70147105919900
80
80
  description: Fast Ruby semantic text search engine with comfortable single field interface.
81
81
  email: florian.hanke+picky@gmail.com
82
82
  executables:
@@ -239,6 +239,7 @@ files:
239
239
  - spec/functional/backends/memory_bundle_realtime_spec.rb
240
240
  - spec/functional/backends/memory_json_utf8_spec.rb
241
241
  - spec/functional/backends/memory_spec.rb
242
+ - spec/functional/backends/no_tokenize_spec.rb
242
243
  - spec/functional/backends/redis_bundle_realtime_spec.rb
243
244
  - spec/functional/backends/redis_spec.rb
244
245
  - spec/functional/backends/special_spec.rb
@@ -407,6 +408,7 @@ test_files:
407
408
  - spec/functional/backends/memory_bundle_realtime_spec.rb
408
409
  - spec/functional/backends/memory_json_utf8_spec.rb
409
410
  - spec/functional/backends/memory_spec.rb
411
+ - spec/functional/backends/no_tokenize_spec.rb
410
412
  - spec/functional/backends/redis_bundle_realtime_spec.rb
411
413
  - spec/functional/backends/redis_spec.rb
412
414
  - spec/functional/backends/special_spec.rb