vector_embed 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWRiOGI3YTdkMDIwNDljNTA0NDkzOGNiNGVkNThiZjczNjdlNDU3OQ==
4
+ YWJmNzczYzU1MjZlODE4NzcxZGMxZTc0MTgzNjdkNjFjYzc5MjRjNA==
5
5
  data.tar.gz: !binary |-
6
- ZDIxY2Q1NDFjMjkxNDFkOGJkZTk0NTZiMDc4NTgwNjYwZGE5MDI1MQ==
6
+ NDNhZmQwZDNjOWMwNWJlOGM3NTBiOTVmNjIwZGJjM2EzYjdlZWZiMA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- YzAwOGQ0NjlmNThiYzNmZmQxNDk0ZTY2ZTIxMTA2M2NjMjZiNGE4MGU0NzE3
10
- YTZhNTkzM2ViMWU2M2FhZDE5NTk5YTdkOWU3NGYxYjVjNmJkYmZjNGVhMTU0
11
- ODY1OTkzNmFhNDIwODY4MjUwYTJjODU0ZDgzNWYzNmE1ZTljYzg=
9
+ MDc4NjE0YWQ4ZmYzZWZmNTdhOTM3ZTkxODI0Y2RjNjU4YTAxMjMzMjdlZWQx
10
+ OTUzMDY4NGE4YjFkOWY1OWJiNTVjNWJmYTFkN2I1ZDdhZTY1OWQ0YzRkYjE3
11
+ MDkxZWYzYmQ2NDMyOTIxMzEyZDBkNGMwMjdkNDY1Yzc2YTMyNzk=
12
12
  data.tar.gz: !binary |-
13
- ZjM3OGUyNDk1YzIxMjc5OTgyYmI3ZWY3NGEwYjRjYWY1MTk0MjJhYzhjODU0
14
- NTg0OWYxNWU4ZTQ3YTQ0ZDFlOWVjYzMyYmZhOTE0MjNlMDRjMjMyMDkwNGMy
15
- NDNhMjhkYjUzNGRhZmRhNjliMGFiNzY1NmNiNzk5MDhhOTUyOTU=
13
+ ZmI5MzkzYWRmNzY2NjA0ZjBhY2NmNWIzMWMxZmM3OTU3MTVlY2Q5ZTg0YzJi
14
+ NzUyZDI3MjgzNmJhNDFiYTY0YjRlOTNlNzE4NjYyYjZlMTI5ZDFhMGZhMmY2
15
+ ZmYyZmU5Nzc1NzBmYjhkMmFmMDkxYzdkZjM0NTY2OTZmY2U0ODU=
data/CHANGELOG CHANGED
@@ -1,3 +1,13 @@
1
+ 0.2.0 / 2013-05-14
2
+
3
+ * Breaking changes
4
+
5
+ * Per the whole point of sparse vectors, don't output numbers features with value 0
6
+
7
+ * Enhancements
8
+
9
+ * More concise number representations per https://github.com/scikit-learn/scikit-learn/pull/1849
10
+
1
11
  0.1.1 / 2013-04-04
2
12
 
3
13
  * Enhancements
@@ -29,11 +29,17 @@ class VectorEmbed
29
29
  when Array
30
30
  memo = []
31
31
  v.each_with_index do |vv, i|
32
- memo << [ parent.index([k, i]), value(vv) ]
32
+ unless (vvv = value(vv)).nil?
33
+ memo << [ parent.index([k, i]), vvv ]
34
+ end
33
35
  end
34
36
  memo
35
37
  else
36
- [ [ parent.index([k]), value(v) ] ]
38
+ if (vv = value(v)).nil?
39
+ []
40
+ else
41
+ [ [ parent.index([k]), value(v) ] ]
42
+ end
37
43
  end
38
44
  end
39
45
  end
@@ -18,7 +18,9 @@ class VectorEmbed
18
18
  else
19
19
  v
20
20
  end
21
- num > 1e10 ? ('%.10e' % num) : num
21
+ if num.nonzero?
22
+ '%.16g' % num
23
+ end
22
24
  end
23
25
  end
24
26
 
@@ -27,7 +29,7 @@ class VectorEmbed
27
29
  when Numeric, JUST_A_NUMBER
28
30
  Number.numify v
29
31
  when NilClass, NULL, SLASH_N
30
- 0
32
+ nil
31
33
  else
32
34
  raise ArgumentError, "Can't embed #{v.inspect} in number feature #{k.inspect}"
33
35
  end
@@ -1,3 +1,3 @@
1
1
  class VectorEmbed
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -122,8 +122,21 @@ describe VectorEmbed do
122
122
  v.line(1, 1 => '9').should == "1 #{l_h('1')}:9"
123
123
  v.line(1, 1 => 5.4).should == "1 #{l_h('1')}:5.4"
124
124
  v.line(1, 1 => '5.4').should == "1 #{l_h('1')}:5.4"
125
- v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000.0"
126
- v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000.0"
125
+ v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000"
126
+ v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000"
127
+ end
128
+
129
+ it "does not output 0 in number attributes" do
130
+ v = VectorEmbed.new
131
+ v.line(3, 1 => 1)
132
+ v.line(3, 1 => 0).should == "3"
133
+ v.line(3, 1 => '0').should == "3"
134
+ end
135
+
136
+ it "treats nil like zero in number attributes" do
137
+ v = VectorEmbed.new
138
+ v.line(1, 1 => 1)
139
+ v.line(1, 1 => nil).should == v.line(1, 1 => 0)
127
140
  end
128
141
 
129
142
  it "stores strings as m-category attributes" do
@@ -160,10 +173,10 @@ describe VectorEmbed do
160
173
  it "in number mode, treats null as 0" do
161
174
  v = VectorEmbed.new
162
175
  v.line(1, 1 => 9).should == "1 #{l_h('1')}:9"
163
- v.line(1, 1 => nil).should == "1 #{l_h('1')}:0"
164
- v.line(1, 1 => 'null').should == "1 #{l_h('1')}:0"
165
- v.line(1, 1 => 'NULL').should == "1 #{l_h('1')}:0"
166
- v.line(1, 1 => '\N').should == "1 #{l_h('1')}:0"
176
+ v.line(1, 1 => nil).should == v.line(1, 1 => 0)
177
+ v.line(1, 1 => 'null').should == v.line(1, 1 => 0)
178
+ v.line(1, 1 => 'NULL').should == v.line(1, 1 => 0)
179
+ v.line(1, 1 => '\N').should == v.line(1, 1 => 0)
167
180
  end
168
181
 
169
182
  it "doesn't allow embedding boolean in number mode or vice-versa" do
@@ -186,12 +199,12 @@ describe VectorEmbed do
186
199
 
187
200
  it "uses scientific notation for large numbers" do
188
201
  v = VectorEmbed.new
189
- v.line(5, 1 => 8.12e13).should == "5 #{l_h('1')}:8.1200000000e+13"
202
+ v.line(5, 1 => 8.12e27).should == "5 #{l_h('1')}:8.12e+27"
190
203
  end
191
204
 
192
205
  it "detects numbers in strings" do
193
206
  v = VectorEmbed.new
194
- v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:8.1200000000e+13"
207
+ v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:81200000000000"
195
208
  end
196
209
 
197
210
  it "allows 2 byte n-grams" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vector_embed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-09 00:00:00.000000000 Z
11
+ date: 2013-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: murmurhash3