picky 4.19.3 → 4.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,18 +56,25 @@ module Picky
56
56
  end
57
57
  end
58
58
 
59
+ def segment text, use_partial = false
60
+ segments, score = segment_recursively text, use_partial
61
+ [segments, score && score-text.size+segments.size]
62
+ end
63
+
59
64
  # Segments the given text recursively.
60
65
  #
61
- def segment text, use_partial = false
66
+ def segment_recursively text, use_partial = false
62
67
  (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
63
68
  tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
69
+ tail_weight && tail_weight += (tail.size-1)
64
70
 
65
- segments, head_weight = segment head
71
+ segments, head_weight = segment_recursively head, use_partial
66
72
 
67
73
  weight = (head_weight && tail_weight &&
68
74
  (head_weight + tail_weight) ||
69
75
  tail_weight || head_weight)
70
- if (weight || -1) > (heaviest || 0)
76
+
77
+ if (weight || -1) >= (heaviest || 0)
71
78
  [tail_weight ? segments + [tail] : segments, weight]
72
79
  else
73
80
  [current, heaviest]
@@ -21,6 +21,19 @@ describe "automatic splitting" do
21
21
 
22
22
  index
23
23
  end
24
+
25
+ context 'splitting the text automatically' do
26
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
27
+
28
+ # It splits the text correctly.
29
+ #
30
+ it do
31
+ automatic_splitter.segment('purplerainbow').should == [
32
+ ['purple', 'rain', 'bow'],
33
+ 2.078999999999999
34
+ ]
35
+ end
36
+ end
24
37
 
25
38
  context 'splitting the text automatically' do
26
39
  let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
@@ -56,6 +69,12 @@ describe "automatic splitting" do
56
69
  it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
57
70
  it { automatic_splitter.split('purple').should == ['purple'] }
58
71
 
72
+ # Creates the right queries (see below).
73
+ #
74
+ it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
75
+ it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
76
+ it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
77
+
59
78
  # When it can't, it splits it using the partial index (correctly).
60
79
  #
61
80
  it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe "partial automatic splitting with small partials" do
6
+
7
+ let(:index) do
8
+ index = Picky::Index.new :automatic_text_splitting do
9
+ indexing removes_characters: /[^a-z\s]/i,
10
+ stopwords: /\b(in|a)\b/
11
+ category :text,
12
+ partial: Picky::Partial::Postfix.new(from: 1)
13
+ end
14
+ index.replace_from id: 1, text: 'Rainbow'
15
+ index
16
+ end
17
+
18
+ context 'splitting the text automatically' do
19
+ let(:automatic_splitter) {
20
+ Picky::Splitters::Automatic.new index[:text], partial: true }
21
+
22
+ # It splits the text correctly.
23
+ #
24
+ it do
25
+ automatic_splitter.segment('rainbow', true).should == [
26
+ ['rainbow'],
27
+ 0.0
28
+ ]
29
+ end
30
+ end
31
+
32
+ context 'splitting the text automatically' do
33
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
34
+
35
+ # It splits the text correctly.
36
+ #
37
+ it { automatic_splitter.split('rainbowrainbow').should == ['rainbow', 'rainbow'] }
38
+ it { automatic_splitter.split('rainbowrain').should == ['rainbow', 'rain'] }
39
+ it { automatic_splitter.split('rain').should == ['rain'] }
40
+
41
+ # When it can't, it splits it using the partial index (correctly).
42
+ #
43
+ it { automatic_splitter.split('r').should == ['r'] }
44
+ it { automatic_splitter.split('rr').should == ['r', 'r'] }
45
+ it { automatic_splitter.split('rrr').should == ['r', 'r', 'r'] }
46
+ it { automatic_splitter.split('rrrr').should == ['r', 'r', 'r', 'r'] }
47
+
48
+ it { automatic_splitter.split('rarara').should == ['ra', 'ra', 'ra'] }
49
+ it { automatic_splitter.split('rainrairar').should == ['rain', 'rai', 'ra', 'r'] }
50
+ end
51
+
52
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.19.3
4
+ version: 4.19.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-01 00:00:00.000000000 Z
12
+ date: 2013-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -50,7 +50,7 @@ dependencies:
50
50
  requirements:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
- version: 4.19.3
53
+ version: 4.19.4
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +58,7 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 4.19.3
61
+ version: 4.19.4
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: text
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -284,6 +284,7 @@ files:
284
284
  - spec/functional/allocations_uniq_by_definition_spec.rb
285
285
  - spec/functional/arrays_as_ids_spec.rb
286
286
  - spec/functional/automatic_segmentation_spec.rb
287
+ - spec/functional/automatic_segmentation_with_small_partials_spec.rb
287
288
  - spec/functional/backends/file_spec.rb
288
289
  - spec/functional/backends/memory_bundle_realtime_spec.rb
289
290
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -461,6 +462,7 @@ test_files:
461
462
  - spec/functional/allocations_uniq_by_definition_spec.rb
462
463
  - spec/functional/arrays_as_ids_spec.rb
463
464
  - spec/functional/automatic_segmentation_spec.rb
465
+ - spec/functional/automatic_segmentation_with_small_partials_spec.rb
464
466
  - spec/functional/backends/file_spec.rb
465
467
  - spec/functional/backends/memory_bundle_realtime_spec.rb
466
468
  - spec/functional/backends/memory_json_utf8_spec.rb