picky 4.19.3 → 4.19.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -56,18 +56,25 @@ module Picky
56
56
  end
57
57
  end
58
58
 
59
+ def segment text, use_partial = false
60
+ segments, score = segment_recursively text, use_partial
61
+ [segments, score && score-text.size+segments.size]
62
+ end
63
+
59
64
  # Segments the given text recursively.
60
65
  #
61
- def segment text, use_partial = false
66
+ def segment_recursively text, use_partial = false
62
67
  (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
63
68
  tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
69
+ tail_weight && tail_weight += (tail.size-1)
64
70
 
65
- segments, head_weight = segment head
71
+ segments, head_weight = segment_recursively head, use_partial
66
72
 
67
73
  weight = (head_weight && tail_weight &&
68
74
  (head_weight + tail_weight) ||
69
75
  tail_weight || head_weight)
70
- if (weight || -1) > (heaviest || 0)
76
+
77
+ if (weight || -1) >= (heaviest || 0)
71
78
  [tail_weight ? segments + [tail] : segments, weight]
72
79
  else
73
80
  [current, heaviest]
@@ -21,6 +21,19 @@ describe "automatic splitting" do
21
21
 
22
22
  index
23
23
  end
24
+
25
+ context 'splitting the text automatically' do
26
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
27
+
28
+ # It splits the text correctly.
29
+ #
30
+ it do
31
+ automatic_splitter.segment('purplerainbow').should == [
32
+ ['purple', 'rain', 'bow'],
33
+ 2.078999999999999
34
+ ]
35
+ end
36
+ end
24
37
 
25
38
  context 'splitting the text automatically' do
26
39
  let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
@@ -56,6 +69,12 @@ describe "automatic splitting" do
56
69
  it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
57
70
  it { automatic_splitter.split('purple').should == ['purple'] }
58
71
 
72
+ # Creates the right queries (see below).
73
+ #
74
+ it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
75
+ it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
76
+ it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
77
+
59
78
  # When it can't, it splits it using the partial index (correctly).
60
79
  #
61
80
  it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe "partial automatic splitting with small partials" do
6
+
7
+ let(:index) do
8
+ index = Picky::Index.new :automatic_text_splitting do
9
+ indexing removes_characters: /[^a-z\s]/i,
10
+ stopwords: /\b(in|a)\b/
11
+ category :text,
12
+ partial: Picky::Partial::Postfix.new(from: 1)
13
+ end
14
+ index.replace_from id: 1, text: 'Rainbow'
15
+ index
16
+ end
17
+
18
+ context 'splitting the text automatically' do
19
+ let(:automatic_splitter) {
20
+ Picky::Splitters::Automatic.new index[:text], partial: true }
21
+
22
+ # It splits the text correctly.
23
+ #
24
+ it do
25
+ automatic_splitter.segment('rainbow', true).should == [
26
+ ['rainbow'],
27
+ 0.0
28
+ ]
29
+ end
30
+ end
31
+
32
+ context 'splitting the text automatically' do
33
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
34
+
35
+ # It splits the text correctly.
36
+ #
37
+ it { automatic_splitter.split('rainbowrainbow').should == ['rainbow', 'rainbow'] }
38
+ it { automatic_splitter.split('rainbowrain').should == ['rainbow', 'rain'] }
39
+ it { automatic_splitter.split('rain').should == ['rain'] }
40
+
41
+ # When it can't, it splits it using the partial index (correctly).
42
+ #
43
+ it { automatic_splitter.split('r').should == ['r'] }
44
+ it { automatic_splitter.split('rr').should == ['r', 'r'] }
45
+ it { automatic_splitter.split('rrr').should == ['r', 'r', 'r'] }
46
+ it { automatic_splitter.split('rrrr').should == ['r', 'r', 'r', 'r'] }
47
+
48
+ it { automatic_splitter.split('rarara').should == ['ra', 'ra', 'ra'] }
49
+ it { automatic_splitter.split('rainrairar').should == ['rain', 'rai', 'ra', 'r'] }
50
+ end
51
+
52
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.19.3
4
+ version: 4.19.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-01 00:00:00.000000000 Z
12
+ date: 2013-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -50,7 +50,7 @@ dependencies:
50
50
  requirements:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
- version: 4.19.3
53
+ version: 4.19.4
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +58,7 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 4.19.3
61
+ version: 4.19.4
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: text
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -284,6 +284,7 @@ files:
284
284
  - spec/functional/allocations_uniq_by_definition_spec.rb
285
285
  - spec/functional/arrays_as_ids_spec.rb
286
286
  - spec/functional/automatic_segmentation_spec.rb
287
+ - spec/functional/automatic_segmentation_with_small_partials_spec.rb
287
288
  - spec/functional/backends/file_spec.rb
288
289
  - spec/functional/backends/memory_bundle_realtime_spec.rb
289
290
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -461,6 +462,7 @@ test_files:
461
462
  - spec/functional/allocations_uniq_by_definition_spec.rb
462
463
  - spec/functional/arrays_as_ids_spec.rb
463
464
  - spec/functional/automatic_segmentation_spec.rb
465
+ - spec/functional/automatic_segmentation_with_small_partials_spec.rb
464
466
  - spec/functional/backends/file_spec.rb
465
467
  - spec/functional/backends/memory_bundle_realtime_spec.rb
466
468
  - spec/functional/backends/memory_json_utf8_spec.rb