picky 4.31.0 → 4.31.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1399f4e15ee1e94d7ec6c91ab88b9721702524f6
4
- data.tar.gz: 24e20ab22278fddf14d13a3ce5f22319aac4fe7b
3
+ metadata.gz: 2b3c54b9a8b3db32d2cfb2ddbc7acd92889ae4bf
4
+ data.tar.gz: dc1355c15b92450a9b4d5ac2d05cdd1754938e94
5
5
  SHA512:
6
- metadata.gz: f0948fcb8da05a7174713c5dae9e408eb69808173de4d55dacfba1bdfd024016b63d030293a929973b3c805b29c319afc59bb85959f10609b438c4058fa81897
7
- data.tar.gz: 930e22468a92b53f87fa06c51c96b94e3142d6d33457bafa936bebd65f5e14ab5a089d90f4cafb295005c65e157230b3d4e1e2694cb0a0d801c1798e8cf9837b
6
+ metadata.gz: 5c8267d3a6d40cb9170d9f068d296dca257c83651db5848c2e8d43c1e8547bb07a57a5a58bf20b8e9d3c3e1e0c5b64564daefa5a0e7cc434df0b2a7fbded6154
7
+ data.tar.gz: 4a5e283823b8558e5e030cb583b491f5be88669ca6e6b70904a78a5d92e1aeae10183b96e27deea2c816af677bd4038a8fa7251982c684ea0606fe1b9d55d68d
@@ -62,6 +62,10 @@ module Picky
62
62
 
63
63
  @symbol_keys = options[:symbol_keys] || @index.symbol_keys # SYMBOLS.
64
64
  end
65
+
66
+ def symbol_keys?
67
+ @symbol_keys
68
+ end
65
69
 
66
70
  # TODO I do a lot of helper method calls here. Refactor?
67
71
  #
@@ -26,6 +26,7 @@ module Picky
26
26
  class Automatic
27
27
 
28
28
  def initialize category, options = {}
29
+ @category = category
29
30
  @exact = category.exact
30
31
  @partial = category.partial
31
32
  @with_partial = options[:partial]
@@ -58,13 +59,16 @@ module Picky
58
59
 
59
60
  def segment text, use_partial = false
60
61
  segments, score = segment_recursively text, use_partial
62
+ segments.collect!(&:to_s) if @category.symbol_keys?
61
63
  [segments, score && score-text.size+segments.size]
62
64
  end
63
65
 
64
66
  # Segments the given text recursively.
65
67
  #
66
68
  def segment_recursively text, use_partial = false
69
+ text = text.to_sym if @category.symbol_keys?
67
70
  (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
71
+ tail = tail.to_sym if @category.symbol_keys?
68
72
  tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
69
73
  tail_weight && tail_weight += (tail.size-1)
70
74
 
@@ -4,117 +4,123 @@ require 'spec_helper'
4
4
 
5
5
  describe "automatic splitting" do
6
6
 
7
- let(:index) do
8
- index = Picky::Index.new :automatic_text_splitting do
9
- indexing removes_characters: /[^a-z\s]/i,
10
- stopwords: /\b(in|a)\b/
11
- category :text
12
- end
7
+ [false, true].each do |sym_keys|
8
+ context "symbol index? #{sym_keys}" do
9
+ let(:index) do
10
+ index = Picky::Index.new :automatic_text_splitting do
11
+ symbol_keys sym_keys
12
+
13
+ indexing removes_characters: /[^a-z\s]/i,
14
+ stopwords: /\b(in|a)\b/
15
+ category :text
16
+ end
13
17
 
14
- require 'ostruct'
15
- index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
16
- index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
17
- index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
18
- index.add OpenStruct.new(id: 4, text: 'The color purple.')
19
- index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
20
- index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
18
+ require 'ostruct'
19
+ index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
20
+ index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
21
+ index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
22
+ index.add OpenStruct.new(id: 4, text: 'The color purple.')
23
+ index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
24
+ index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
21
25
 
22
- index
23
- end
26
+ index
27
+ end
24
28
 
25
- context 'splitting the text automatically' do
26
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
29
+ context 'splitting the text automatically' do
30
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
27
31
 
28
- # It splits the text correctly.
29
- #
30
- it do
31
- automatic_splitter.segment('purplerainbow').should == [
32
- ['purple', 'rain', 'bow'],
33
- 2.078999999999999
34
- ]
35
- end
36
- end
32
+ # It splits the text correctly.
33
+ #
34
+ it do
35
+ automatic_splitter.segment('purplerainbow').should == [
36
+ ['purple', 'rain', 'bow'],
37
+ 2.078999999999999
38
+ ]
39
+ end
40
+ end
37
41
 
38
- context 'splitting the text automatically' do
39
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
42
+ context 'splitting the text automatically' do
43
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
40
44
 
41
- # It splits the text correctly.
42
- #
43
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
44
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
45
- it { automatic_splitter.split('purple').should == ['purple'] }
45
+ # It splits the text correctly.
46
+ #
47
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
48
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
49
+ it { automatic_splitter.split('purple').should == ['purple'] }
46
50
 
47
- # When it can't, it splits it using the partial index (correctly).
48
- #
49
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
50
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
51
- #
52
- it { automatic_splitter.split('purplerai').should == ['purple'] }
53
- it { automatic_splitter.split('purplera').should == ['purple'] }
54
- it { automatic_splitter.split('purpler').should == ['purple'] }
55
- #
56
- it { automatic_splitter.split('purpl').should == [] }
57
- it { automatic_splitter.split('purp').should == [] }
58
- it { automatic_splitter.split('pur').should == [] }
59
- it { automatic_splitter.split('pu').should == [] }
60
- it { automatic_splitter.split('p').should == [] }
61
- end
51
+ # When it can't, it splits it using the partial index (correctly).
52
+ #
53
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
54
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
55
+ #
56
+ it { automatic_splitter.split('purplerai').should == ['purple'] }
57
+ it { automatic_splitter.split('purplera').should == ['purple'] }
58
+ it { automatic_splitter.split('purpler').should == ['purple'] }
59
+ #
60
+ it { automatic_splitter.split('purpl').should == [] }
61
+ it { automatic_splitter.split('purp').should == [] }
62
+ it { automatic_splitter.split('pur').should == [] }
63
+ it { automatic_splitter.split('pu').should == [] }
64
+ it { automatic_splitter.split('p').should == [] }
65
+ end
62
66
 
63
- context 'splitting text automatically (with partial)' do
64
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
67
+ context 'splitting text automatically (with partial)' do
68
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
65
69
 
66
- # It splits the text correctly.
67
- #
68
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
69
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
70
- it { automatic_splitter.split('purple').should == ['purple'] }
70
+ # It splits the text correctly.
71
+ #
72
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
73
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
74
+ it { automatic_splitter.split('purple').should == ['purple'] }
71
75
 
72
- # Creates the right queries (see below).
73
- #
74
- it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
75
- it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
76
- it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
76
+ # Creates the right queries (see below).
77
+ #
78
+ it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
79
+ it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
80
+ it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
77
81
 
78
- # When it can't, it splits it using the partial index (correctly).
79
- #
80
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
81
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
82
- #
83
- it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
84
- it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
85
- it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
86
- #
87
- it { automatic_splitter.split('purpl').should == ['purpl'] }
88
- it { automatic_splitter.split('purp').should == ['purp'] }
89
- it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
90
- it { automatic_splitter.split('pu').should == [] }
91
- it { automatic_splitter.split('p').should == [] }
82
+ # When it can't, it splits it using the partial index (correctly).
83
+ #
84
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
85
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
86
+ #
87
+ it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
88
+ it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
89
+ it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
90
+ #
91
+ it { automatic_splitter.split('purpl').should == ['purpl'] }
92
+ it { automatic_splitter.split('purp').should == ['purp'] }
93
+ it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
94
+ it { automatic_splitter.split('pu').should == [] }
95
+ it { automatic_splitter.split('p').should == [] }
92
96
 
93
- let(:try) do
94
- splitter = automatic_splitter
95
- Picky::Search.new index do
96
- searching splits_text_on: splitter
97
- end
98
- end
97
+ let(:try) do
98
+ splitter = automatic_splitter
99
+ Picky::Search.new index do
100
+ searching splits_text_on: splitter
101
+ end
102
+ end
99
103
 
100
- # Should find the one with all parts.
101
- #
102
- it { try.search('purplerainbow').ids.should == [1] }
103
- it { try.search('sunandrain').ids.should == [5] }
104
+ # Should find the one with all parts.
105
+ #
106
+ it { try.search('purplerainbow').ids.should == [1] }
107
+ it { try.search('sunandrain').ids.should == [5] }
104
108
 
105
- # Common parts are found in multiple examples.
106
- #
107
- it { try.search('colorpurple').ids.should == [4,1] }
108
- it { try.search('bownew').ids.should == [3,1] }
109
- it { try.search('spainisking').ids.should == [6,1] }
110
- end
109
+ # Common parts are found in multiple examples.
110
+ #
111
+ it { try.search('colorpurple').ids.should == [4,1] }
112
+ it { try.search('bownew').ids.should == [3,1] }
113
+ it { try.search('spainisking').ids.should == [6,1] }
114
+ end
111
115
 
112
- it 'is fast enough' do
113
- automatic_splitter = Picky::Splitters::Automatic.new index[:text]
116
+ it 'is fast enough' do
117
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
114
118
 
115
- performance_of do
116
- automatic_splitter.split('purplerainbow')
117
- end.should < 0.0002
119
+ performance_of do
120
+ automatic_splitter.split('purplerainbow')
121
+ end.should < 0.0002
122
+ end
123
+ end
118
124
  end
119
125
 
120
126
  end
@@ -75,7 +75,7 @@ describe "Memory optimization" do
75
75
  GC.start
76
76
 
77
77
  # Optimize saves some memory.
78
- (memsize_with_optimized_memory + 2952).should == memsize_with_readded_thing
78
+ (memsize_with_optimized_memory + 2000).should <= memsize_with_readded_thing
79
79
  end
80
80
 
81
81
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.31.0
4
+ version: 4.31.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Hanke