picky 4.13.1 → 4.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ module Picky
28
28
  #
29
29
  # Advanced Options:
30
30
  # * source: Use if the category should use a different source.
31
+ # * tokenize: Whether to use the tokenizer (default is true).
31
32
  # * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
32
33
  # * weight: Weights::Logarithmic.new, Weights::Constant.new(int = 0),
33
34
  # Weights::Dynamic.new(&block) or an object that responds
@@ -47,6 +48,7 @@ module Picky
47
48
  # Instantly extracted to raise an error instantly.
48
49
  #
49
50
  @source = Source.from options[:source], true, @index.name
51
+ @tokenize = options[:tokenize] != false
50
52
  @tokenizer = Tokenizer.from options[:indexing], @index.name, name
51
53
  @ranger = options[:ranging] || Range
52
54
 
@@ -78,7 +80,7 @@ module Picky
78
80
  #
79
81
  # TODO Rewrite it such that this does not need to be maintained separately.
80
82
  #
81
- @@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight]
83
+ @@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight]
82
84
  def warn_if_unknown options
83
85
  warn <<-WARNING if options && (options.keys - @@known_keys).size > 0
84
86
 
@@ -95,8 +95,10 @@ module Picky
95
95
  # If one isn't set on this category, will try the index,
96
96
  # and finally the default index tokenizer.
97
97
  #
98
+ # Will return nil if tokenize is set to false.
99
+ #
98
100
  def tokenizer
99
- @tokenizer || @index.tokenizer
101
+ @tokenizer || @index.tokenizer if @tokenize
100
102
  end
101
103
 
102
104
  # Clears the caches.
@@ -57,10 +57,17 @@ module Picky
57
57
  # For the given id, adds the list of
58
58
  # strings to the index for the given id.
59
59
  #
60
- def add_text id, text, where = :unshift
60
+ def add_text id, text_or_tokens, where = :unshift
61
61
  # text = text.to_sym if @symbols # SYMBOLS.
62
- tokens, _ = tokenizer.tokenize text
62
+ tokens = nil
63
+ if tokenizer
64
+ tokens, _ = tokenizer.tokenize text_or_tokens
65
+ else
66
+ tokens = text_or_tokens
67
+ end
63
68
  tokens.each { |text| add_tokenized_token id.send(key_format), text, where, false }
69
+ rescue NoMethodError
70
+ raise %Q{You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
64
71
  end
65
72
 
66
73
  #
@@ -58,12 +58,26 @@ module Picky
58
58
  def index_flush objects, file, category, cache, tokenizer
59
59
  comma = ?,
60
60
  newline = ?\n
61
-
62
- objects.each do |object|
63
- tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
64
- tokens.each do |token_text|
65
- next unless token_text
66
- cache << object.id << comma << token_text << newline
61
+
62
+ # Optimized, therefore duplicate code.
63
+ #
64
+ # TODO Deoptimize?
65
+ #
66
+ if tokenizer
67
+ objects.each do |object|
68
+ tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
69
+ tokens.each do |token_text|
70
+ next unless token_text
71
+ cache << object.id << comma << token_text << newline
72
+ end
73
+ end
74
+ else
75
+ objects.each do |object|
76
+ tokens = object.send(category.from) # Note: Originals not needed.
77
+ tokens.each do |token_text|
78
+ next unless token_text
79
+ cache << object.id << comma << token_text << newline
80
+ end
67
81
  end
68
82
  end
69
83
 
@@ -54,12 +54,25 @@ module Picky
54
54
  def index_flush datas, file, cache, tokenizer
55
55
  comma = ?,
56
56
  newline = ?\n
57
-
58
- datas.each do |indexed_id, text|
59
- tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
60
- tokens.each do |token_text|
61
- next unless token_text
62
- cache << indexed_id << comma << token_text << newline
57
+
58
+ # Optimized, therefore duplicate code.
59
+ #
60
+ # TODO Deoptimize?
61
+ #
62
+ if tokenizer
63
+ datas.each do |indexed_id, text|
64
+ tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
65
+ tokens.each do |token_text|
66
+ next unless token_text
67
+ cache << indexed_id << comma << token_text << newline
68
+ end
69
+ end
70
+ else
71
+ datas.each do |indexed_id, tokens|
72
+ tokens.each do |token_text|
73
+ next unless token_text
74
+ cache << indexed_id << comma << token_text << newline
75
+ end
63
76
  end
64
77
  end
65
78
 
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe 'Category#tokenize(false)' do
6
+
7
+ it 'does tokenize' do
8
+ index = Picky::Index.new :thing do
9
+ category :text, tokenize: true
10
+ end
11
+
12
+ thing = Struct.new :id, :text
13
+ # expect do # Does not fail – because #to_s is called on the Array.
14
+ index.add thing.new(1, ['already', 'tokenized'])
15
+ # end.to raise_error
16
+ index.add thing.new(2, 'this should fail')
17
+
18
+ try = Picky::Search.new index
19
+
20
+ try.search('already').ids.should == [] # Not found because ['already', is indexed.
21
+ end
22
+ it 'does not tokenize' do
23
+ index = Picky::Index.new :thing do
24
+ category :text, tokenize: false
25
+ end
26
+
27
+ thing = Struct.new :id, :text
28
+ index.add thing.new(1, ['already', 'tokenized'])
29
+ expect do
30
+ index.add thing.new(2, 'this should fail')
31
+ end.to raise_error('You probably set tokenize: false on category "text". It will need an Enumerator of previously tokenized tokens.')
32
+
33
+ try = Picky::Search.new index
34
+
35
+ try.search('already').ids.should == [1]
36
+ end
37
+
38
+ end
@@ -52,7 +52,7 @@ describe Picky::Category do
52
52
  category.should_receive(:warn).once.with <<-WARNING
53
53
 
54
54
  Warning: Category options {:weights=>:some_weight} for category some_category contain an unknown option.
55
- Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight].
55
+ Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight].
56
56
  WARNING
57
57
 
58
58
  category.warn_if_unknown :weights => :some_weight
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.13.1
4
+ version: 4.14.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-01 00:00:00.000000000 Z
12
+ date: 2013-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70248447814120 !ruby/object:Gem::Requirement
16
+ requirement: &70147105922620 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,21 +21,21 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70248447814120
24
+ version_requirements: *70147105922620
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: picky-client
27
- requirement: &70248447750680 !ruby/object:Gem::Requirement
27
+ requirement: &70147105922020 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: 4.13.1
32
+ version: 4.14.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70248447750680
35
+ version_requirements: *70147105922020
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: text
38
- requirement: &70248447748820 !ruby/object:Gem::Requirement
38
+ requirement: &70147105921540 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70248447748820
46
+ version_requirements: *70147105921540
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: multi_json
49
- requirement: &70248447747100 !ruby/object:Gem::Requirement
49
+ requirement: &70147105921060 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70248447747100
57
+ version_requirements: *70147105921060
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70248447744720 !ruby/object:Gem::Requirement
60
+ requirement: &70147105920460 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '3.0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70248447744720
68
+ version_requirements: *70147105920460
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rack_fast_escape
71
- requirement: &70248447728420 !ruby/object:Gem::Requirement
71
+ requirement: &70147105919900 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70248447728420
79
+ version_requirements: *70147105919900
80
80
  description: Fast Ruby semantic text search engine with comfortable single field interface.
81
81
  email: florian.hanke+picky@gmail.com
82
82
  executables:
@@ -239,6 +239,7 @@ files:
239
239
  - spec/functional/backends/memory_bundle_realtime_spec.rb
240
240
  - spec/functional/backends/memory_json_utf8_spec.rb
241
241
  - spec/functional/backends/memory_spec.rb
242
+ - spec/functional/backends/no_tokenize_spec.rb
242
243
  - spec/functional/backends/redis_bundle_realtime_spec.rb
243
244
  - spec/functional/backends/redis_spec.rb
244
245
  - spec/functional/backends/special_spec.rb
@@ -407,6 +408,7 @@ test_files:
407
408
  - spec/functional/backends/memory_bundle_realtime_spec.rb
408
409
  - spec/functional/backends/memory_json_utf8_spec.rb
409
410
  - spec/functional/backends/memory_spec.rb
411
+ - spec/functional/backends/no_tokenize_spec.rb
410
412
  - spec/functional/backends/redis_bundle_realtime_spec.rb
411
413
  - spec/functional/backends/redis_spec.rb
412
414
  - spec/functional/backends/special_spec.rb