picky 4.31.0 → 4.31.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1399f4e15ee1e94d7ec6c91ab88b9721702524f6
4
- data.tar.gz: 24e20ab22278fddf14d13a3ce5f22319aac4fe7b
3
+ metadata.gz: 2b3c54b9a8b3db32d2cfb2ddbc7acd92889ae4bf
4
+ data.tar.gz: dc1355c15b92450a9b4d5ac2d05cdd1754938e94
5
5
  SHA512:
6
- metadata.gz: f0948fcb8da05a7174713c5dae9e408eb69808173de4d55dacfba1bdfd024016b63d030293a929973b3c805b29c319afc59bb85959f10609b438c4058fa81897
7
- data.tar.gz: 930e22468a92b53f87fa06c51c96b94e3142d6d33457bafa936bebd65f5e14ab5a089d90f4cafb295005c65e157230b3d4e1e2694cb0a0d801c1798e8cf9837b
6
+ metadata.gz: 5c8267d3a6d40cb9170d9f068d296dca257c83651db5848c2e8d43c1e8547bb07a57a5a58bf20b8e9d3c3e1e0c5b64564daefa5a0e7cc434df0b2a7fbded6154
7
+ data.tar.gz: 4a5e283823b8558e5e030cb583b491f5be88669ca6e6b70904a78a5d92e1aeae10183b96e27deea2c816af677bd4038a8fa7251982c684ea0606fe1b9d55d68d
@@ -62,6 +62,10 @@ module Picky
62
62
 
63
63
  @symbol_keys = options[:symbol_keys] || @index.symbol_keys # SYMBOLS.
64
64
  end
65
+
66
+ def symbol_keys?
67
+ @symbol_keys
68
+ end
65
69
 
66
70
  # TODO I do a lot of helper method calls here. Refactor?
67
71
  #
@@ -26,6 +26,7 @@ module Picky
26
26
  class Automatic
27
27
 
28
28
  def initialize category, options = {}
29
+ @category = category
29
30
  @exact = category.exact
30
31
  @partial = category.partial
31
32
  @with_partial = options[:partial]
@@ -58,13 +59,16 @@ module Picky
58
59
 
59
60
  def segment text, use_partial = false
60
61
  segments, score = segment_recursively text, use_partial
62
+ segments.collect!(&:to_s) if @category.symbol_keys?
61
63
  [segments, score && score-text.size+segments.size]
62
64
  end
63
65
 
64
66
  # Segments the given text recursively.
65
67
  #
66
68
  def segment_recursively text, use_partial = false
69
+ text = text.to_sym if @category.symbol_keys?
67
70
  (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
71
+ tail = tail.to_sym if @category.symbol_keys?
68
72
  tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
69
73
  tail_weight && tail_weight += (tail.size-1)
70
74
 
@@ -4,117 +4,123 @@ require 'spec_helper'
4
4
 
5
5
  describe "automatic splitting" do
6
6
 
7
- let(:index) do
8
- index = Picky::Index.new :automatic_text_splitting do
9
- indexing removes_characters: /[^a-z\s]/i,
10
- stopwords: /\b(in|a)\b/
11
- category :text
12
- end
7
+ [false, true].each do |sym_keys|
8
+ context "symbol index? #{sym_keys}" do
9
+ let(:index) do
10
+ index = Picky::Index.new :automatic_text_splitting do
11
+ symbol_keys sym_keys
12
+
13
+ indexing removes_characters: /[^a-z\s]/i,
14
+ stopwords: /\b(in|a)\b/
15
+ category :text
16
+ end
13
17
 
14
- require 'ostruct'
15
- index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
16
- index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
17
- index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
18
- index.add OpenStruct.new(id: 4, text: 'The color purple.')
19
- index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
20
- index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
18
+ require 'ostruct'
19
+ index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
20
+ index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
21
+ index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
22
+ index.add OpenStruct.new(id: 4, text: 'The color purple.')
23
+ index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
24
+ index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
21
25
 
22
- index
23
- end
26
+ index
27
+ end
24
28
 
25
- context 'splitting the text automatically' do
26
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
29
+ context 'splitting the text automatically' do
30
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
27
31
 
28
- # It splits the text correctly.
29
- #
30
- it do
31
- automatic_splitter.segment('purplerainbow').should == [
32
- ['purple', 'rain', 'bow'],
33
- 2.078999999999999
34
- ]
35
- end
36
- end
32
+ # It splits the text correctly.
33
+ #
34
+ it do
35
+ automatic_splitter.segment('purplerainbow').should == [
36
+ ['purple', 'rain', 'bow'],
37
+ 2.078999999999999
38
+ ]
39
+ end
40
+ end
37
41
 
38
- context 'splitting the text automatically' do
39
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
42
+ context 'splitting the text automatically' do
43
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
40
44
 
41
- # It splits the text correctly.
42
- #
43
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
44
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
45
- it { automatic_splitter.split('purple').should == ['purple'] }
45
+ # It splits the text correctly.
46
+ #
47
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
48
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
49
+ it { automatic_splitter.split('purple').should == ['purple'] }
46
50
 
47
- # When it can't, it splits it using the partial index (correctly).
48
- #
49
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
50
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
51
- #
52
- it { automatic_splitter.split('purplerai').should == ['purple'] }
53
- it { automatic_splitter.split('purplera').should == ['purple'] }
54
- it { automatic_splitter.split('purpler').should == ['purple'] }
55
- #
56
- it { automatic_splitter.split('purpl').should == [] }
57
- it { automatic_splitter.split('purp').should == [] }
58
- it { automatic_splitter.split('pur').should == [] }
59
- it { automatic_splitter.split('pu').should == [] }
60
- it { automatic_splitter.split('p').should == [] }
61
- end
51
+ # When it can't, it splits it using the partial index (correctly).
52
+ #
53
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
54
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
55
+ #
56
+ it { automatic_splitter.split('purplerai').should == ['purple'] }
57
+ it { automatic_splitter.split('purplera').should == ['purple'] }
58
+ it { automatic_splitter.split('purpler').should == ['purple'] }
59
+ #
60
+ it { automatic_splitter.split('purpl').should == [] }
61
+ it { automatic_splitter.split('purp').should == [] }
62
+ it { automatic_splitter.split('pur').should == [] }
63
+ it { automatic_splitter.split('pu').should == [] }
64
+ it { automatic_splitter.split('p').should == [] }
65
+ end
62
66
 
63
- context 'splitting text automatically (with partial)' do
64
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
67
+ context 'splitting text automatically (with partial)' do
68
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
65
69
 
66
- # It splits the text correctly.
67
- #
68
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
69
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
70
- it { automatic_splitter.split('purple').should == ['purple'] }
70
+ # It splits the text correctly.
71
+ #
72
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
73
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
74
+ it { automatic_splitter.split('purple').should == ['purple'] }
71
75
 
72
- # Creates the right queries (see below).
73
- #
74
- it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
75
- it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
76
- it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
76
+ # Creates the right queries (see below).
77
+ #
78
+ it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
79
+ it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
80
+ it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
77
81
 
78
- # When it can't, it splits it using the partial index (correctly).
79
- #
80
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
81
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
82
- #
83
- it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
84
- it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
85
- it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
86
- #
87
- it { automatic_splitter.split('purpl').should == ['purpl'] }
88
- it { automatic_splitter.split('purp').should == ['purp'] }
89
- it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
90
- it { automatic_splitter.split('pu').should == [] }
91
- it { automatic_splitter.split('p').should == [] }
82
+ # When it can't, it splits it using the partial index (correctly).
83
+ #
84
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
85
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
86
+ #
87
+ it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
88
+ it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
89
+ it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
90
+ #
91
+ it { automatic_splitter.split('purpl').should == ['purpl'] }
92
+ it { automatic_splitter.split('purp').should == ['purp'] }
93
+ it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
94
+ it { automatic_splitter.split('pu').should == [] }
95
+ it { automatic_splitter.split('p').should == [] }
92
96
 
93
- let(:try) do
94
- splitter = automatic_splitter
95
- Picky::Search.new index do
96
- searching splits_text_on: splitter
97
- end
98
- end
97
+ let(:try) do
98
+ splitter = automatic_splitter
99
+ Picky::Search.new index do
100
+ searching splits_text_on: splitter
101
+ end
102
+ end
99
103
 
100
- # Should find the one with all parts.
101
- #
102
- it { try.search('purplerainbow').ids.should == [1] }
103
- it { try.search('sunandrain').ids.should == [5] }
104
+ # Should find the one with all parts.
105
+ #
106
+ it { try.search('purplerainbow').ids.should == [1] }
107
+ it { try.search('sunandrain').ids.should == [5] }
104
108
 
105
- # Common parts are found in multiple examples.
106
- #
107
- it { try.search('colorpurple').ids.should == [4,1] }
108
- it { try.search('bownew').ids.should == [3,1] }
109
- it { try.search('spainisking').ids.should == [6,1] }
110
- end
109
+ # Common parts are found in multiple examples.
110
+ #
111
+ it { try.search('colorpurple').ids.should == [4,1] }
112
+ it { try.search('bownew').ids.should == [3,1] }
113
+ it { try.search('spainisking').ids.should == [6,1] }
114
+ end
111
115
 
112
- it 'is fast enough' do
113
- automatic_splitter = Picky::Splitters::Automatic.new index[:text]
116
+ it 'is fast enough' do
117
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
114
118
 
115
- performance_of do
116
- automatic_splitter.split('purplerainbow')
117
- end.should < 0.0002
119
+ performance_of do
120
+ automatic_splitter.split('purplerainbow')
121
+ end.should < 0.0002
122
+ end
123
+ end
118
124
  end
119
125
 
120
126
  end
@@ -75,7 +75,7 @@ describe "Memory optimization" do
75
75
  GC.start
76
76
 
77
77
  # Optimize saves some memory.
78
- (memsize_with_optimized_memory + 2952).should == memsize_with_readded_thing
78
+ (memsize_with_optimized_memory + 2000).should <= memsize_with_readded_thing
79
79
  end
80
80
 
81
81
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.31.0
4
+ version: 4.31.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Hanke