picky 4.31.0 → 4.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2b3c54b9a8b3db32d2cfb2ddbc7acd92889ae4bf
|
|
4
|
+
data.tar.gz: dc1355c15b92450a9b4d5ac2d05cdd1754938e94
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5c8267d3a6d40cb9170d9f068d296dca257c83651db5848c2e8d43c1e8547bb07a57a5a58bf20b8e9d3c3e1e0c5b64564daefa5a0e7cc434df0b2a7fbded6154
|
|
7
|
+
data.tar.gz: 4a5e283823b8558e5e030cb583b491f5be88669ca6e6b70904a78a5d92e1aeae10183b96e27deea2c816af677bd4038a8fa7251982c684ea0606fe1b9d55d68d
|
data/lib/picky/category.rb
CHANGED
|
@@ -26,6 +26,7 @@ module Picky
|
|
|
26
26
|
class Automatic
|
|
27
27
|
|
|
28
28
|
def initialize category, options = {}
|
|
29
|
+
@category = category
|
|
29
30
|
@exact = category.exact
|
|
30
31
|
@partial = category.partial
|
|
31
32
|
@with_partial = options[:partial]
|
|
@@ -58,13 +59,16 @@ module Picky
|
|
|
58
59
|
|
|
59
60
|
def segment text, use_partial = false
|
|
60
61
|
segments, score = segment_recursively text, use_partial
|
|
62
|
+
segments.collect!(&:to_s) if @category.symbol_keys?
|
|
61
63
|
[segments, score && score-text.size+segments.size]
|
|
62
64
|
end
|
|
63
65
|
|
|
64
66
|
# Segments the given text recursively.
|
|
65
67
|
#
|
|
66
68
|
def segment_recursively text, use_partial = false
|
|
69
|
+
text = text.to_sym if @category.symbol_keys?
|
|
67
70
|
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
|
71
|
+
tail = tail.to_sym if @category.symbol_keys?
|
|
68
72
|
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
|
69
73
|
tail_weight && tail_weight += (tail.size-1)
|
|
70
74
|
|
|
@@ -4,117 +4,123 @@ require 'spec_helper'
|
|
|
4
4
|
|
|
5
5
|
describe "automatic splitting" do
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
[false, true].each do |sym_keys|
|
|
8
|
+
context "symbol index? #{sym_keys}" do
|
|
9
|
+
let(:index) do
|
|
10
|
+
index = Picky::Index.new :automatic_text_splitting do
|
|
11
|
+
symbol_keys sym_keys
|
|
12
|
+
|
|
13
|
+
indexing removes_characters: /[^a-z\s]/i,
|
|
14
|
+
stopwords: /\b(in|a)\b/
|
|
15
|
+
category :text
|
|
16
|
+
end
|
|
13
17
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
require 'ostruct'
|
|
19
|
+
index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
|
|
20
|
+
index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
|
|
21
|
+
index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
|
|
22
|
+
index.add OpenStruct.new(id: 4, text: 'The color purple.')
|
|
23
|
+
index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
|
|
24
|
+
index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
|
|
21
25
|
|
|
22
|
-
|
|
23
|
-
|
|
26
|
+
index
|
|
27
|
+
end
|
|
24
28
|
|
|
25
|
-
|
|
26
|
-
|
|
29
|
+
context 'splitting the text automatically' do
|
|
30
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
# It splits the text correctly.
|
|
33
|
+
#
|
|
34
|
+
it do
|
|
35
|
+
automatic_splitter.segment('purplerainbow').should == [
|
|
36
|
+
['purple', 'rain', 'bow'],
|
|
37
|
+
2.078999999999999
|
|
38
|
+
]
|
|
39
|
+
end
|
|
40
|
+
end
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
|
|
42
|
+
context 'splitting the text automatically' do
|
|
43
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
# It splits the text correctly.
|
|
46
|
+
#
|
|
47
|
+
it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
|
|
48
|
+
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
|
49
|
+
it { automatic_splitter.split('purple').should == ['purple'] }
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
51
|
+
# When it can't, it splits it using the partial index (correctly).
|
|
52
|
+
#
|
|
53
|
+
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
|
|
54
|
+
it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
|
|
55
|
+
#
|
|
56
|
+
it { automatic_splitter.split('purplerai').should == ['purple'] }
|
|
57
|
+
it { automatic_splitter.split('purplera').should == ['purple'] }
|
|
58
|
+
it { automatic_splitter.split('purpler').should == ['purple'] }
|
|
59
|
+
#
|
|
60
|
+
it { automatic_splitter.split('purpl').should == [] }
|
|
61
|
+
it { automatic_splitter.split('purp').should == [] }
|
|
62
|
+
it { automatic_splitter.split('pur').should == [] }
|
|
63
|
+
it { automatic_splitter.split('pu').should == [] }
|
|
64
|
+
it { automatic_splitter.split('p').should == [] }
|
|
65
|
+
end
|
|
62
66
|
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
context 'splitting text automatically (with partial)' do
|
|
68
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
|
|
65
69
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
70
|
+
# It splits the text correctly.
|
|
71
|
+
#
|
|
72
|
+
it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
|
|
73
|
+
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
|
74
|
+
it { automatic_splitter.split('purple').should == ['purple'] }
|
|
71
75
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
76
|
+
# Creates the right queries (see below).
|
|
77
|
+
#
|
|
78
|
+
it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
|
|
79
|
+
it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
|
|
80
|
+
it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
|
|
77
81
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
82
|
+
# When it can't, it splits it using the partial index (correctly).
|
|
83
|
+
#
|
|
84
|
+
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
|
|
85
|
+
it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
|
|
86
|
+
#
|
|
87
|
+
it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
|
|
88
|
+
it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
|
|
89
|
+
it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
|
|
90
|
+
#
|
|
91
|
+
it { automatic_splitter.split('purpl').should == ['purpl'] }
|
|
92
|
+
it { automatic_splitter.split('purp').should == ['purp'] }
|
|
93
|
+
it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
|
|
94
|
+
it { automatic_splitter.split('pu').should == [] }
|
|
95
|
+
it { automatic_splitter.split('p').should == [] }
|
|
92
96
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
let(:try) do
|
|
98
|
+
splitter = automatic_splitter
|
|
99
|
+
Picky::Search.new index do
|
|
100
|
+
searching splits_text_on: splitter
|
|
101
|
+
end
|
|
102
|
+
end
|
|
99
103
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
+
# Should find the one with all parts.
|
|
105
|
+
#
|
|
106
|
+
it { try.search('purplerainbow').ids.should == [1] }
|
|
107
|
+
it { try.search('sunandrain').ids.should == [5] }
|
|
104
108
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
# Common parts are found in multiple examples.
|
|
110
|
+
#
|
|
111
|
+
it { try.search('colorpurple').ids.should == [4,1] }
|
|
112
|
+
it { try.search('bownew').ids.should == [3,1] }
|
|
113
|
+
it { try.search('spainisking').ids.should == [6,1] }
|
|
114
|
+
end
|
|
111
115
|
|
|
112
|
-
|
|
113
|
-
|
|
116
|
+
it 'is fast enough' do
|
|
117
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
|
114
118
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
119
|
+
performance_of do
|
|
120
|
+
automatic_splitter.split('purplerainbow')
|
|
121
|
+
end.should < 0.0002
|
|
122
|
+
end
|
|
123
|
+
end
|
|
118
124
|
end
|
|
119
125
|
|
|
120
126
|
end
|