picky 4.19.3 → 4.19.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -56,18 +56,25 @@ module Picky
|
|
|
56
56
|
end
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
+
def segment text, use_partial = false
|
|
60
|
+
segments, score = segment_recursively text, use_partial
|
|
61
|
+
[segments, score && score-text.size+segments.size]
|
|
62
|
+
end
|
|
63
|
+
|
|
59
64
|
# Segments the given text recursively.
|
|
60
65
|
#
|
|
61
|
-
def
|
|
66
|
+
def segment_recursively text, use_partial = false
|
|
62
67
|
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
|
63
68
|
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
|
69
|
+
tail_weight && tail_weight += (tail.size-1)
|
|
64
70
|
|
|
65
|
-
segments, head_weight =
|
|
71
|
+
segments, head_weight = segment_recursively head, use_partial
|
|
66
72
|
|
|
67
73
|
weight = (head_weight && tail_weight &&
|
|
68
74
|
(head_weight + tail_weight) ||
|
|
69
75
|
tail_weight || head_weight)
|
|
70
|
-
|
|
76
|
+
|
|
77
|
+
if (weight || -1) >= (heaviest || 0)
|
|
71
78
|
[tail_weight ? segments + [tail] : segments, weight]
|
|
72
79
|
else
|
|
73
80
|
[current, heaviest]
|
|
@@ -21,6 +21,19 @@ describe "automatic splitting" do
|
|
|
21
21
|
|
|
22
22
|
index
|
|
23
23
|
end
|
|
24
|
+
|
|
25
|
+
context 'splitting the text automatically' do
|
|
26
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
|
27
|
+
|
|
28
|
+
# It splits the text correctly.
|
|
29
|
+
#
|
|
30
|
+
it do
|
|
31
|
+
automatic_splitter.segment('purplerainbow').should == [
|
|
32
|
+
['purple', 'rain', 'bow'],
|
|
33
|
+
2.078999999999999
|
|
34
|
+
]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
24
37
|
|
|
25
38
|
context 'splitting the text automatically' do
|
|
26
39
|
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
|
@@ -56,6 +69,12 @@ describe "automatic splitting" do
|
|
|
56
69
|
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
|
57
70
|
it { automatic_splitter.split('purple').should == ['purple'] }
|
|
58
71
|
|
|
72
|
+
# Creates the right queries (see below).
|
|
73
|
+
#
|
|
74
|
+
it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
|
|
75
|
+
it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
|
|
76
|
+
it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
|
|
77
|
+
|
|
59
78
|
# When it can't, it splits it using the partial index (correctly).
|
|
60
79
|
#
|
|
61
80
|
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
describe "partial automatic splitting with small partials" do
|
|
6
|
+
|
|
7
|
+
let(:index) do
|
|
8
|
+
index = Picky::Index.new :automatic_text_splitting do
|
|
9
|
+
indexing removes_characters: /[^a-z\s]/i,
|
|
10
|
+
stopwords: /\b(in|a)\b/
|
|
11
|
+
category :text,
|
|
12
|
+
partial: Picky::Partial::Postfix.new(from: 1)
|
|
13
|
+
end
|
|
14
|
+
index.replace_from id: 1, text: 'Rainbow'
|
|
15
|
+
index
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
context 'splitting the text automatically' do
|
|
19
|
+
let(:automatic_splitter) {
|
|
20
|
+
Picky::Splitters::Automatic.new index[:text], partial: true }
|
|
21
|
+
|
|
22
|
+
# It splits the text correctly.
|
|
23
|
+
#
|
|
24
|
+
it do
|
|
25
|
+
automatic_splitter.segment('rainbow', true).should == [
|
|
26
|
+
['rainbow'],
|
|
27
|
+
0.0
|
|
28
|
+
]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
context 'splitting the text automatically' do
|
|
33
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
|
|
34
|
+
|
|
35
|
+
# It splits the text correctly.
|
|
36
|
+
#
|
|
37
|
+
it { automatic_splitter.split('rainbowrainbow').should == ['rainbow', 'rainbow'] }
|
|
38
|
+
it { automatic_splitter.split('rainbowrain').should == ['rainbow', 'rain'] }
|
|
39
|
+
it { automatic_splitter.split('rain').should == ['rain'] }
|
|
40
|
+
|
|
41
|
+
# When it can't, it splits it using the partial index (correctly).
|
|
42
|
+
#
|
|
43
|
+
it { automatic_splitter.split('r').should == ['r'] }
|
|
44
|
+
it { automatic_splitter.split('rr').should == ['r', 'r'] }
|
|
45
|
+
it { automatic_splitter.split('rrr').should == ['r', 'r', 'r'] }
|
|
46
|
+
it { automatic_splitter.split('rrrr').should == ['r', 'r', 'r', 'r'] }
|
|
47
|
+
|
|
48
|
+
it { automatic_splitter.split('rarara').should == ['ra', 'ra', 'ra'] }
|
|
49
|
+
it { automatic_splitter.split('rainrairar').should == ['rain', 'rai', 'ra', 'r'] }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: picky
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.19.
|
|
4
|
+
version: 4.19.4
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-12-
|
|
12
|
+
date: 2013-12-07 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rspec
|
|
@@ -50,7 +50,7 @@ dependencies:
|
|
|
50
50
|
requirements:
|
|
51
51
|
- - ~>
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: 4.19.
|
|
53
|
+
version: 4.19.4
|
|
54
54
|
type: :development
|
|
55
55
|
prerelease: false
|
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -58,7 +58,7 @@ dependencies:
|
|
|
58
58
|
requirements:
|
|
59
59
|
- - ~>
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 4.19.
|
|
61
|
+
version: 4.19.4
|
|
62
62
|
- !ruby/object:Gem::Dependency
|
|
63
63
|
name: text
|
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -284,6 +284,7 @@ files:
|
|
|
284
284
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
|
285
285
|
- spec/functional/arrays_as_ids_spec.rb
|
|
286
286
|
- spec/functional/automatic_segmentation_spec.rb
|
|
287
|
+
- spec/functional/automatic_segmentation_with_small_partials_spec.rb
|
|
287
288
|
- spec/functional/backends/file_spec.rb
|
|
288
289
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
|
289
290
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
|
@@ -461,6 +462,7 @@ test_files:
|
|
|
461
462
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
|
462
463
|
- spec/functional/arrays_as_ids_spec.rb
|
|
463
464
|
- spec/functional/automatic_segmentation_spec.rb
|
|
465
|
+
- spec/functional/automatic_segmentation_with_small_partials_spec.rb
|
|
464
466
|
- spec/functional/backends/file_spec.rb
|
|
465
467
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
|
466
468
|
- spec/functional/backends/memory_json_utf8_spec.rb
|