vector_embed 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWRiOGI3YTdkMDIwNDljNTA0NDkzOGNiNGVkNThiZjczNjdlNDU3OQ==
4
+ YWJmNzczYzU1MjZlODE4NzcxZGMxZTc0MTgzNjdkNjFjYzc5MjRjNA==
5
5
  data.tar.gz: !binary |-
6
- ZDIxY2Q1NDFjMjkxNDFkOGJkZTk0NTZiMDc4NTgwNjYwZGE5MDI1MQ==
6
+ NDNhZmQwZDNjOWMwNWJlOGM3NTBiOTVmNjIwZGJjM2EzYjdlZWZiMA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- YzAwOGQ0NjlmNThiYzNmZmQxNDk0ZTY2ZTIxMTA2M2NjMjZiNGE4MGU0NzE3
10
- YTZhNTkzM2ViMWU2M2FhZDE5NTk5YTdkOWU3NGYxYjVjNmJkYmZjNGVhMTU0
11
- ODY1OTkzNmFhNDIwODY4MjUwYTJjODU0ZDgzNWYzNmE1ZTljYzg=
9
+ MDc4NjE0YWQ4ZmYzZWZmNTdhOTM3ZTkxODI0Y2RjNjU4YTAxMjMzMjdlZWQx
10
+ OTUzMDY4NGE4YjFkOWY1OWJiNTVjNWJmYTFkN2I1ZDdhZTY1OWQ0YzRkYjE3
11
+ MDkxZWYzYmQ2NDMyOTIxMzEyZDBkNGMwMjdkNDY1Yzc2YTMyNzk=
12
12
  data.tar.gz: !binary |-
13
- ZjM3OGUyNDk1YzIxMjc5OTgyYmI3ZWY3NGEwYjRjYWY1MTk0MjJhYzhjODU0
14
- NTg0OWYxNWU4ZTQ3YTQ0ZDFlOWVjYzMyYmZhOTE0MjNlMDRjMjMyMDkwNGMy
15
- NDNhMjhkYjUzNGRhZmRhNjliMGFiNzY1NmNiNzk5MDhhOTUyOTU=
13
+ ZmI5MzkzYWRmNzY2NjA0ZjBhY2NmNWIzMWMxZmM3OTU3MTVlY2Q5ZTg0YzJi
14
+ NzUyZDI3MjgzNmJhNDFiYTY0YjRlOTNlNzE4NjYyYjZlMTI5ZDFhMGZhMmY2
15
+ ZmYyZmU5Nzc1NzBmYjhkMmFmMDkxYzdkZjM0NTY2OTZmY2U0ODU=
data/CHANGELOG CHANGED
@@ -1,3 +1,13 @@
1
+ 0.2.0 / 2013-05-14
2
+
3
+ * Breaking changes
4
+
5
+ * Per the whole point of sparse vectors, don't output numbers features with value 0
6
+
7
+ * Enhancements
8
+
9
+ * More concise number representations per https://github.com/scikit-learn/scikit-learn/pull/1849
10
+
1
11
  0.1.1 / 2013-04-04
2
12
 
3
13
  * Enhancements
@@ -29,11 +29,17 @@ class VectorEmbed
29
29
  when Array
30
30
  memo = []
31
31
  v.each_with_index do |vv, i|
32
- memo << [ parent.index([k, i]), value(vv) ]
32
+ unless (vvv = value(vv)).nil?
33
+ memo << [ parent.index([k, i]), vvv ]
34
+ end
33
35
  end
34
36
  memo
35
37
  else
36
- [ [ parent.index([k]), value(v) ] ]
38
+ if (vv = value(v)).nil?
39
+ []
40
+ else
41
+ [ [ parent.index([k]), value(v) ] ]
42
+ end
37
43
  end
38
44
  end
39
45
  end
@@ -18,7 +18,9 @@ class VectorEmbed
18
18
  else
19
19
  v
20
20
  end
21
- num > 1e10 ? ('%.10e' % num) : num
21
+ if num.nonzero?
22
+ '%.16g' % num
23
+ end
22
24
  end
23
25
  end
24
26
 
@@ -27,7 +29,7 @@ class VectorEmbed
27
29
  when Numeric, JUST_A_NUMBER
28
30
  Number.numify v
29
31
  when NilClass, NULL, SLASH_N
30
- 0
32
+ nil
31
33
  else
32
34
  raise ArgumentError, "Can't embed #{v.inspect} in number feature #{k.inspect}"
33
35
  end
@@ -1,3 +1,3 @@
1
1
  class VectorEmbed
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -122,8 +122,21 @@ describe VectorEmbed do
122
122
  v.line(1, 1 => '9').should == "1 #{l_h('1')}:9"
123
123
  v.line(1, 1 => 5.4).should == "1 #{l_h('1')}:5.4"
124
124
  v.line(1, 1 => '5.4').should == "1 #{l_h('1')}:5.4"
125
- v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000.0"
126
- v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000.0"
125
+ v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000"
126
+ v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000"
127
+ end
128
+
129
+ it "does not output 0 in number attributes" do
130
+ v = VectorEmbed.new
131
+ v.line(3, 1 => 1)
132
+ v.line(3, 1 => 0).should == "3"
133
+ v.line(3, 1 => '0').should == "3"
134
+ end
135
+
136
+ it "treats nil like zero in number attributes" do
137
+ v = VectorEmbed.new
138
+ v.line(1, 1 => 1)
139
+ v.line(1, 1 => nil).should == v.line(1, 1 => 0)
127
140
  end
128
141
 
129
142
  it "stores strings as m-category attributes" do
@@ -160,10 +173,10 @@ describe VectorEmbed do
160
173
  it "in number mode, treats null as 0" do
161
174
  v = VectorEmbed.new
162
175
  v.line(1, 1 => 9).should == "1 #{l_h('1')}:9"
163
- v.line(1, 1 => nil).should == "1 #{l_h('1')}:0"
164
- v.line(1, 1 => 'null').should == "1 #{l_h('1')}:0"
165
- v.line(1, 1 => 'NULL').should == "1 #{l_h('1')}:0"
166
- v.line(1, 1 => '\N').should == "1 #{l_h('1')}:0"
176
+ v.line(1, 1 => nil).should == v.line(1, 1 => 0)
177
+ v.line(1, 1 => 'null').should == v.line(1, 1 => 0)
178
+ v.line(1, 1 => 'NULL').should == v.line(1, 1 => 0)
179
+ v.line(1, 1 => '\N').should == v.line(1, 1 => 0)
167
180
  end
168
181
 
169
182
  it "doesn't allow embedding boolean in number mode or vice-versa" do
@@ -186,12 +199,12 @@ describe VectorEmbed do
186
199
 
187
200
  it "uses scientific notation for large numbers" do
188
201
  v = VectorEmbed.new
189
- v.line(5, 1 => 8.12e13).should == "5 #{l_h('1')}:8.1200000000e+13"
202
+ v.line(5, 1 => 8.12e27).should == "5 #{l_h('1')}:8.12e+27"
190
203
  end
191
204
 
192
205
  it "detects numbers in strings" do
193
206
  v = VectorEmbed.new
194
- v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:8.1200000000e+13"
207
+ v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:81200000000000"
195
208
  end
196
209
 
197
210
  it "allows 2 byte n-grams" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vector_embed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-09 00:00:00.000000000 Z
11
+ date: 2013-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: murmurhash3