picky 4.31.0 → 4.31.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b3c54b9a8b3db32d2cfb2ddbc7acd92889ae4bf
|
4
|
+
data.tar.gz: dc1355c15b92450a9b4d5ac2d05cdd1754938e94
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c8267d3a6d40cb9170d9f068d296dca257c83651db5848c2e8d43c1e8547bb07a57a5a58bf20b8e9d3c3e1e0c5b64564daefa5a0e7cc434df0b2a7fbded6154
|
7
|
+
data.tar.gz: 4a5e283823b8558e5e030cb583b491f5be88669ca6e6b70904a78a5d92e1aeae10183b96e27deea2c816af677bd4038a8fa7251982c684ea0606fe1b9d55d68d
|
data/lib/picky/category.rb
CHANGED
@@ -26,6 +26,7 @@ module Picky
|
|
26
26
|
class Automatic
|
27
27
|
|
28
28
|
def initialize category, options = {}
|
29
|
+
@category = category
|
29
30
|
@exact = category.exact
|
30
31
|
@partial = category.partial
|
31
32
|
@with_partial = options[:partial]
|
@@ -58,13 +59,16 @@ module Picky
|
|
58
59
|
|
59
60
|
def segment text, use_partial = false
|
60
61
|
segments, score = segment_recursively text, use_partial
|
62
|
+
segments.collect!(&:to_s) if @category.symbol_keys?
|
61
63
|
[segments, score && score-text.size+segments.size]
|
62
64
|
end
|
63
65
|
|
64
66
|
# Segments the given text recursively.
|
65
67
|
#
|
66
68
|
def segment_recursively text, use_partial = false
|
69
|
+
text = text.to_sym if @category.symbol_keys?
|
67
70
|
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
71
|
+
tail = tail.to_sym if @category.symbol_keys?
|
68
72
|
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
69
73
|
tail_weight && tail_weight += (tail.size-1)
|
70
74
|
|
@@ -4,117 +4,123 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe "automatic splitting" do
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
[false, true].each do |sym_keys|
|
8
|
+
context "symbol index? #{sym_keys}" do
|
9
|
+
let(:index) do
|
10
|
+
index = Picky::Index.new :automatic_text_splitting do
|
11
|
+
symbol_keys sym_keys
|
12
|
+
|
13
|
+
indexing removes_characters: /[^a-z\s]/i,
|
14
|
+
stopwords: /\b(in|a)\b/
|
15
|
+
category :text
|
16
|
+
end
|
13
17
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
require 'ostruct'
|
19
|
+
index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
|
20
|
+
index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
|
21
|
+
index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
|
22
|
+
index.add OpenStruct.new(id: 4, text: 'The color purple.')
|
23
|
+
index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
|
24
|
+
index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
|
21
25
|
|
22
|
-
|
23
|
-
|
26
|
+
index
|
27
|
+
end
|
24
28
|
|
25
|
-
|
26
|
-
|
29
|
+
context 'splitting the text automatically' do
|
30
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
27
31
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
# It splits the text correctly.
|
33
|
+
#
|
34
|
+
it do
|
35
|
+
automatic_splitter.segment('purplerainbow').should == [
|
36
|
+
['purple', 'rain', 'bow'],
|
37
|
+
2.078999999999999
|
38
|
+
]
|
39
|
+
end
|
40
|
+
end
|
37
41
|
|
38
|
-
|
39
|
-
|
42
|
+
context 'splitting the text automatically' do
|
43
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
40
44
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
# It splits the text correctly.
|
46
|
+
#
|
47
|
+
it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
|
48
|
+
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
49
|
+
it { automatic_splitter.split('purple').should == ['purple'] }
|
46
50
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
51
|
+
# When it can't, it splits it using the partial index (correctly).
|
52
|
+
#
|
53
|
+
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
|
54
|
+
it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
|
55
|
+
#
|
56
|
+
it { automatic_splitter.split('purplerai').should == ['purple'] }
|
57
|
+
it { automatic_splitter.split('purplera').should == ['purple'] }
|
58
|
+
it { automatic_splitter.split('purpler').should == ['purple'] }
|
59
|
+
#
|
60
|
+
it { automatic_splitter.split('purpl').should == [] }
|
61
|
+
it { automatic_splitter.split('purp').should == [] }
|
62
|
+
it { automatic_splitter.split('pur').should == [] }
|
63
|
+
it { automatic_splitter.split('pu').should == [] }
|
64
|
+
it { automatic_splitter.split('p').should == [] }
|
65
|
+
end
|
62
66
|
|
63
|
-
|
64
|
-
|
67
|
+
context 'splitting text automatically (with partial)' do
|
68
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
|
65
69
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
70
|
+
# It splits the text correctly.
|
71
|
+
#
|
72
|
+
it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
|
73
|
+
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
74
|
+
it { automatic_splitter.split('purple').should == ['purple'] }
|
71
75
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
76
|
+
# Creates the right queries (see below).
|
77
|
+
#
|
78
|
+
it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
|
79
|
+
it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
|
80
|
+
it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
|
77
81
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
82
|
+
# When it can't, it splits it using the partial index (correctly).
|
83
|
+
#
|
84
|
+
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
|
85
|
+
it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
|
86
|
+
#
|
87
|
+
it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
|
88
|
+
it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
|
89
|
+
it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
|
90
|
+
#
|
91
|
+
it { automatic_splitter.split('purpl').should == ['purpl'] }
|
92
|
+
it { automatic_splitter.split('purp').should == ['purp'] }
|
93
|
+
it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
|
94
|
+
it { automatic_splitter.split('pu').should == [] }
|
95
|
+
it { automatic_splitter.split('p').should == [] }
|
92
96
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
97
|
+
let(:try) do
|
98
|
+
splitter = automatic_splitter
|
99
|
+
Picky::Search.new index do
|
100
|
+
searching splits_text_on: splitter
|
101
|
+
end
|
102
|
+
end
|
99
103
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
+
# Should find the one with all parts.
|
105
|
+
#
|
106
|
+
it { try.search('purplerainbow').ids.should == [1] }
|
107
|
+
it { try.search('sunandrain').ids.should == [5] }
|
104
108
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
109
|
+
# Common parts are found in multiple examples.
|
110
|
+
#
|
111
|
+
it { try.search('colorpurple').ids.should == [4,1] }
|
112
|
+
it { try.search('bownew').ids.should == [3,1] }
|
113
|
+
it { try.search('spainisking').ids.should == [6,1] }
|
114
|
+
end
|
111
115
|
|
112
|
-
|
113
|
-
|
116
|
+
it 'is fast enough' do
|
117
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
114
118
|
|
115
|
-
|
116
|
-
|
117
|
-
|
119
|
+
performance_of do
|
120
|
+
automatic_splitter.split('purplerainbow')
|
121
|
+
end.should < 0.0002
|
122
|
+
end
|
123
|
+
end
|
118
124
|
end
|
119
125
|
|
120
126
|
end
|