picky 4.19.3 → 4.19.4
Sign up to get free protection for your applications and to get access to all the features.
@@ -56,18 +56,25 @@ module Picky
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
+
def segment text, use_partial = false
|
60
|
+
segments, score = segment_recursively text, use_partial
|
61
|
+
[segments, score && score-text.size+segments.size]
|
62
|
+
end
|
63
|
+
|
59
64
|
# Segments the given text recursively.
|
60
65
|
#
|
61
|
-
def
|
66
|
+
def segment_recursively text, use_partial = false
|
62
67
|
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
63
68
|
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
69
|
+
tail_weight && tail_weight += (tail.size-1)
|
64
70
|
|
65
|
-
segments, head_weight =
|
71
|
+
segments, head_weight = segment_recursively head, use_partial
|
66
72
|
|
67
73
|
weight = (head_weight && tail_weight &&
|
68
74
|
(head_weight + tail_weight) ||
|
69
75
|
tail_weight || head_weight)
|
70
|
-
|
76
|
+
|
77
|
+
if (weight || -1) >= (heaviest || 0)
|
71
78
|
[tail_weight ? segments + [tail] : segments, weight]
|
72
79
|
else
|
73
80
|
[current, heaviest]
|
@@ -21,6 +21,19 @@ describe "automatic splitting" do
|
|
21
21
|
|
22
22
|
index
|
23
23
|
end
|
24
|
+
|
25
|
+
context 'splitting the text automatically' do
|
26
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
27
|
+
|
28
|
+
# It splits the text correctly.
|
29
|
+
#
|
30
|
+
it do
|
31
|
+
automatic_splitter.segment('purplerainbow').should == [
|
32
|
+
['purple', 'rain', 'bow'],
|
33
|
+
2.078999999999999
|
34
|
+
]
|
35
|
+
end
|
36
|
+
end
|
24
37
|
|
25
38
|
context 'splitting the text automatically' do
|
26
39
|
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
|
@@ -56,6 +69,12 @@ describe "automatic splitting" do
|
|
56
69
|
it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
|
57
70
|
it { automatic_splitter.split('purple').should == ['purple'] }
|
58
71
|
|
72
|
+
# Creates the right queries (see below).
|
73
|
+
#
|
74
|
+
it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
|
75
|
+
it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
|
76
|
+
it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
|
77
|
+
|
59
78
|
# When it can't, it splits it using the partial index (correctly).
|
60
79
|
#
|
61
80
|
it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "partial automatic splitting with small partials" do
|
6
|
+
|
7
|
+
let(:index) do
|
8
|
+
index = Picky::Index.new :automatic_text_splitting do
|
9
|
+
indexing removes_characters: /[^a-z\s]/i,
|
10
|
+
stopwords: /\b(in|a)\b/
|
11
|
+
category :text,
|
12
|
+
partial: Picky::Partial::Postfix.new(from: 1)
|
13
|
+
end
|
14
|
+
index.replace_from id: 1, text: 'Rainbow'
|
15
|
+
index
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'splitting the text automatically' do
|
19
|
+
let(:automatic_splitter) {
|
20
|
+
Picky::Splitters::Automatic.new index[:text], partial: true }
|
21
|
+
|
22
|
+
# It splits the text correctly.
|
23
|
+
#
|
24
|
+
it do
|
25
|
+
automatic_splitter.segment('rainbow', true).should == [
|
26
|
+
['rainbow'],
|
27
|
+
0.0
|
28
|
+
]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'splitting the text automatically' do
|
33
|
+
let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
|
34
|
+
|
35
|
+
# It splits the text correctly.
|
36
|
+
#
|
37
|
+
it { automatic_splitter.split('rainbowrainbow').should == ['rainbow', 'rainbow'] }
|
38
|
+
it { automatic_splitter.split('rainbowrain').should == ['rainbow', 'rain'] }
|
39
|
+
it { automatic_splitter.split('rain').should == ['rain'] }
|
40
|
+
|
41
|
+
# When it can't, it splits it using the partial index (correctly).
|
42
|
+
#
|
43
|
+
it { automatic_splitter.split('r').should == ['r'] }
|
44
|
+
it { automatic_splitter.split('rr').should == ['r', 'r'] }
|
45
|
+
it { automatic_splitter.split('rrr').should == ['r', 'r', 'r'] }
|
46
|
+
it { automatic_splitter.split('rrrr').should == ['r', 'r', 'r', 'r'] }
|
47
|
+
|
48
|
+
it { automatic_splitter.split('rarara').should == ['ra', 'ra', 'ra'] }
|
49
|
+
it { automatic_splitter.split('rainrairar').should == ['rain', 'rai', 'ra', 'r'] }
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.19.
|
4
|
+
version: 4.19.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-12-
|
12
|
+
date: 2013-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 4.19.
|
53
|
+
version: 4.19.4
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 4.19.
|
61
|
+
version: 4.19.4
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: text
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -284,6 +284,7 @@ files:
|
|
284
284
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
285
285
|
- spec/functional/arrays_as_ids_spec.rb
|
286
286
|
- spec/functional/automatic_segmentation_spec.rb
|
287
|
+
- spec/functional/automatic_segmentation_with_small_partials_spec.rb
|
287
288
|
- spec/functional/backends/file_spec.rb
|
288
289
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
289
290
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
@@ -461,6 +462,7 @@ test_files:
|
|
461
462
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
462
463
|
- spec/functional/arrays_as_ids_spec.rb
|
463
464
|
- spec/functional/automatic_segmentation_spec.rb
|
465
|
+
- spec/functional/automatic_segmentation_with_small_partials_spec.rb
|
464
466
|
- spec/functional/backends/file_spec.rb
|
465
467
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
466
468
|
- spec/functional/backends/memory_json_utf8_spec.rb
|