picky 4.13.1 → 4.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/category.rb +3 -1
- data/lib/picky/category_indexing.rb +3 -1
- data/lib/picky/category_realtime.rb +9 -2
- data/lib/picky/indexers/parallel.rb +20 -6
- data/lib/picky/indexers/serial.rb +19 -6
- data/spec/functional/backends/no_tokenize_spec.rb +38 -0
- data/spec/lib/category_spec.rb +1 -1
- metadata +17 -15
data/lib/picky/category.rb
CHANGED
@@ -28,6 +28,7 @@ module Picky
|
|
28
28
|
#
|
29
29
|
# Advanced Options:
|
30
30
|
# * source: Use if the category should use a different source.
|
31
|
+
# * tokenize: Whether to use the tokenizer (default is true).
|
31
32
|
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
32
33
|
# * weight: Weights::Logarithmic.new, Weights::Constant.new(int = 0),
|
33
34
|
# Weights::Dynamic.new(&block) or an object that responds
|
@@ -47,6 +48,7 @@ module Picky
|
|
47
48
|
# Instantly extracted to raise an error instantly.
|
48
49
|
#
|
49
50
|
@source = Source.from options[:source], true, @index.name
|
51
|
+
@tokenize = options[:tokenize] != false
|
50
52
|
@tokenizer = Tokenizer.from options[:indexing], @index.name, name
|
51
53
|
@ranger = options[:ranging] || Range
|
52
54
|
|
@@ -78,7 +80,7 @@ module Picky
|
|
78
80
|
#
|
79
81
|
# TODO Rewrite it such that this does not need to be maintained separately.
|
80
82
|
#
|
81
|
-
@@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight]
|
83
|
+
@@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight]
|
82
84
|
def warn_if_unknown options
|
83
85
|
warn <<-WARNING if options && (options.keys - @@known_keys).size > 0
|
84
86
|
|
@@ -95,8 +95,10 @@ module Picky
|
|
95
95
|
# If one isn't set on this category, will try the index,
|
96
96
|
# and finally the default index tokenizer.
|
97
97
|
#
|
98
|
+
# Will return nil if tokenize is set to false.
|
99
|
+
#
|
98
100
|
def tokenizer
|
99
|
-
@tokenizer || @index.tokenizer
|
101
|
+
@tokenizer || @index.tokenizer if @tokenize
|
100
102
|
end
|
101
103
|
|
102
104
|
# Clears the caches.
|
@@ -57,10 +57,17 @@ module Picky
|
|
57
57
|
# For the given id, adds the list of
|
58
58
|
# strings to the index for the given id.
|
59
59
|
#
|
60
|
-
def add_text id,
|
60
|
+
def add_text id, text_or_tokens, where = :unshift
|
61
61
|
# text = text.to_sym if @symbols # SYMBOLS.
|
62
|
-
tokens
|
62
|
+
tokens = nil
|
63
|
+
if tokenizer
|
64
|
+
tokens, _ = tokenizer.tokenize text_or_tokens
|
65
|
+
else
|
66
|
+
tokens = text_or_tokens
|
67
|
+
end
|
63
68
|
tokens.each { |text| add_tokenized_token id.send(key_format), text, where, false }
|
69
|
+
rescue NoMethodError
|
70
|
+
raise %Q{You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
|
64
71
|
end
|
65
72
|
|
66
73
|
#
|
@@ -58,12 +58,26 @@ module Picky
|
|
58
58
|
def index_flush objects, file, category, cache, tokenizer
|
59
59
|
comma = ?,
|
60
60
|
newline = ?\n
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
|
62
|
+
# Optimized, therefore duplicate code.
|
63
|
+
#
|
64
|
+
# TODO Deoptimize?
|
65
|
+
#
|
66
|
+
if tokenizer
|
67
|
+
objects.each do |object|
|
68
|
+
tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
|
69
|
+
tokens.each do |token_text|
|
70
|
+
next unless token_text
|
71
|
+
cache << object.id << comma << token_text << newline
|
72
|
+
end
|
73
|
+
end
|
74
|
+
else
|
75
|
+
objects.each do |object|
|
76
|
+
tokens = object.send(category.from) # Note: Originals not needed.
|
77
|
+
tokens.each do |token_text|
|
78
|
+
next unless token_text
|
79
|
+
cache << object.id << comma << token_text << newline
|
80
|
+
end
|
67
81
|
end
|
68
82
|
end
|
69
83
|
|
@@ -54,12 +54,25 @@ module Picky
|
|
54
54
|
def index_flush datas, file, cache, tokenizer
|
55
55
|
comma = ?,
|
56
56
|
newline = ?\n
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
57
|
+
|
58
|
+
# Optimized, therefore duplicate code.
|
59
|
+
#
|
60
|
+
# TODO Deoptimize?
|
61
|
+
#
|
62
|
+
if tokenizer
|
63
|
+
datas.each do |indexed_id, text|
|
64
|
+
tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
|
65
|
+
tokens.each do |token_text|
|
66
|
+
next unless token_text
|
67
|
+
cache << indexed_id << comma << token_text << newline
|
68
|
+
end
|
69
|
+
end
|
70
|
+
else
|
71
|
+
datas.each do |indexed_id, tokens|
|
72
|
+
tokens.each do |token_text|
|
73
|
+
next unless token_text
|
74
|
+
cache << indexed_id << comma << token_text << newline
|
75
|
+
end
|
63
76
|
end
|
64
77
|
end
|
65
78
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe 'Category#tokenize(false)' do
|
6
|
+
|
7
|
+
it 'does tokenize' do
|
8
|
+
index = Picky::Index.new :thing do
|
9
|
+
category :text, tokenize: true
|
10
|
+
end
|
11
|
+
|
12
|
+
thing = Struct.new :id, :text
|
13
|
+
# expect do # Does not fail – because #to_s is called on the Array.
|
14
|
+
index.add thing.new(1, ['already', 'tokenized'])
|
15
|
+
# end.to raise_error
|
16
|
+
index.add thing.new(2, 'this should fail')
|
17
|
+
|
18
|
+
try = Picky::Search.new index
|
19
|
+
|
20
|
+
try.search('already').ids.should == [] # Not found because ['already', is indexed.
|
21
|
+
end
|
22
|
+
it 'does not tokenize' do
|
23
|
+
index = Picky::Index.new :thing do
|
24
|
+
category :text, tokenize: false
|
25
|
+
end
|
26
|
+
|
27
|
+
thing = Struct.new :id, :text
|
28
|
+
index.add thing.new(1, ['already', 'tokenized'])
|
29
|
+
expect do
|
30
|
+
index.add thing.new(2, 'this should fail')
|
31
|
+
end.to raise_error('You probably set tokenize: false on category "text". It will need an Enumerator of previously tokenized tokens.')
|
32
|
+
|
33
|
+
try = Picky::Search.new index
|
34
|
+
|
35
|
+
try.search('already').ids.should == [1]
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/spec/lib/category_spec.rb
CHANGED
@@ -52,7 +52,7 @@ describe Picky::Category do
|
|
52
52
|
category.should_receive(:warn).once.with <<-WARNING
|
53
53
|
|
54
54
|
Warning: Category options {:weights=>:some_weight} for category some_category contain an unknown option.
|
55
|
-
Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :weight].
|
55
|
+
Working options are: [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight].
|
56
56
|
WARNING
|
57
57
|
|
58
58
|
category.warn_if_unknown :weights => :some_weight
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.14.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70147105922620 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,21 +21,21 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70147105922620
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: picky-client
|
27
|
-
requirement: &
|
27
|
+
requirement: &70147105922020 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 4.
|
32
|
+
version: 4.14.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70147105922020
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: text
|
38
|
-
requirement: &
|
38
|
+
requirement: &70147105921540 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70147105921540
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: multi_json
|
49
|
-
requirement: &
|
49
|
+
requirement: &70147105921060 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70147105921060
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: activesupport
|
60
|
-
requirement: &
|
60
|
+
requirement: &70147105920460 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '3.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70147105920460
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rack_fast_escape
|
71
|
-
requirement: &
|
71
|
+
requirement: &70147105919900 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70147105919900
|
80
80
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|
81
81
|
email: florian.hanke+picky@gmail.com
|
82
82
|
executables:
|
@@ -239,6 +239,7 @@ files:
|
|
239
239
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
240
240
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
241
241
|
- spec/functional/backends/memory_spec.rb
|
242
|
+
- spec/functional/backends/no_tokenize_spec.rb
|
242
243
|
- spec/functional/backends/redis_bundle_realtime_spec.rb
|
243
244
|
- spec/functional/backends/redis_spec.rb
|
244
245
|
- spec/functional/backends/special_spec.rb
|
@@ -407,6 +408,7 @@ test_files:
|
|
407
408
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
408
409
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
409
410
|
- spec/functional/backends/memory_spec.rb
|
411
|
+
- spec/functional/backends/no_tokenize_spec.rb
|
410
412
|
- spec/functional/backends/redis_bundle_realtime_spec.rb
|
411
413
|
- spec/functional/backends/redis_spec.rb
|
412
414
|
- spec/functional/backends/special_spec.rb
|