vector_embed 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG +10 -0
- data/lib/vector_embed/maker.rb +8 -2
- data/lib/vector_embed/maker/number.rb +4 -2
- data/lib/vector_embed/version.rb +1 -1
- data/spec/vector_embed_spec.rb +21 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWJmNzczYzU1MjZlODE4NzcxZGMxZTc0MTgzNjdkNjFjYzc5MjRjNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NDNhZmQwZDNjOWMwNWJlOGM3NTBiOTVmNjIwZGJjM2EzYjdlZWZiMA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDc4NjE0YWQ4ZmYzZWZmNTdhOTM3ZTkxODI0Y2RjNjU4YTAxMjMzMjdlZWQx
|
10
|
+
OTUzMDY4NGE4YjFkOWY1OWJiNTVjNWJmYTFkN2I1ZDdhZTY1OWQ0YzRkYjE3
|
11
|
+
MDkxZWYzYmQ2NDMyOTIxMzEyZDBkNGMwMjdkNDY1Yzc2YTMyNzk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZmI5MzkzYWRmNzY2NjA0ZjBhY2NmNWIzMWMxZmM3OTU3MTVlY2Q5ZTg0YzJi
|
14
|
+
NzUyZDI3MjgzNmJhNDFiYTY0YjRlOTNlNzE4NjYyYjZlMTI5ZDFhMGZhMmY2
|
15
|
+
ZmYyZmU5Nzc1NzBmYjhkMmFmMDkxYzdkZjM0NTY2OTZmY2U0ODU=
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
0.2.0 / 2013-05-14
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Per the whole point of sparse vectors, don't output numbers features with value 0
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* More concise number representations per https://github.com/scikit-learn/scikit-learn/pull/1849
|
10
|
+
|
1
11
|
0.1.1 / 2013-04-04
|
2
12
|
|
3
13
|
* Enhancements
|
data/lib/vector_embed/maker.rb
CHANGED
@@ -29,11 +29,17 @@ class VectorEmbed
|
|
29
29
|
when Array
|
30
30
|
memo = []
|
31
31
|
v.each_with_index do |vv, i|
|
32
|
-
|
32
|
+
unless (vvv = value(vv)).nil?
|
33
|
+
memo << [ parent.index([k, i]), vvv ]
|
34
|
+
end
|
33
35
|
end
|
34
36
|
memo
|
35
37
|
else
|
36
|
-
|
38
|
+
if (vv = value(v)).nil?
|
39
|
+
[]
|
40
|
+
else
|
41
|
+
[ [ parent.index([k]), value(v) ] ]
|
42
|
+
end
|
37
43
|
end
|
38
44
|
end
|
39
45
|
end
|
@@ -18,7 +18,9 @@ class VectorEmbed
|
|
18
18
|
else
|
19
19
|
v
|
20
20
|
end
|
21
|
-
num
|
21
|
+
if num.nonzero?
|
22
|
+
'%.16g' % num
|
23
|
+
end
|
22
24
|
end
|
23
25
|
end
|
24
26
|
|
@@ -27,7 +29,7 @@ class VectorEmbed
|
|
27
29
|
when Numeric, JUST_A_NUMBER
|
28
30
|
Number.numify v
|
29
31
|
when NilClass, NULL, SLASH_N
|
30
|
-
|
32
|
+
nil
|
31
33
|
else
|
32
34
|
raise ArgumentError, "Can't embed #{v.inspect} in number feature #{k.inspect}"
|
33
35
|
end
|
data/lib/vector_embed/version.rb
CHANGED
data/spec/vector_embed_spec.rb
CHANGED
@@ -122,8 +122,21 @@ describe VectorEmbed do
|
|
122
122
|
v.line(1, 1 => '9').should == "1 #{l_h('1')}:9"
|
123
123
|
v.line(1, 1 => 5.4).should == "1 #{l_h('1')}:5.4"
|
124
124
|
v.line(1, 1 => '5.4').should == "1 #{l_h('1')}:5.4"
|
125
|
-
v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000
|
126
|
-
v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000
|
125
|
+
v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000"
|
126
|
+
v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000"
|
127
|
+
end
|
128
|
+
|
129
|
+
it "does not output 0 in number attributes" do
|
130
|
+
v = VectorEmbed.new
|
131
|
+
v.line(3, 1 => 1)
|
132
|
+
v.line(3, 1 => 0).should == "3"
|
133
|
+
v.line(3, 1 => '0').should == "3"
|
134
|
+
end
|
135
|
+
|
136
|
+
it "treats nil like zero in number attributes" do
|
137
|
+
v = VectorEmbed.new
|
138
|
+
v.line(1, 1 => 1)
|
139
|
+
v.line(1, 1 => nil).should == v.line(1, 1 => 0)
|
127
140
|
end
|
128
141
|
|
129
142
|
it "stores strings as m-category attributes" do
|
@@ -160,10 +173,10 @@ describe VectorEmbed do
|
|
160
173
|
it "in number mode, treats null as 0" do
|
161
174
|
v = VectorEmbed.new
|
162
175
|
v.line(1, 1 => 9).should == "1 #{l_h('1')}:9"
|
163
|
-
v.line(1, 1 => nil).should ==
|
164
|
-
v.line(1, 1 => 'null').should ==
|
165
|
-
v.line(1, 1 => 'NULL').should ==
|
166
|
-
v.line(1, 1 => '\N').should ==
|
176
|
+
v.line(1, 1 => nil).should == v.line(1, 1 => 0)
|
177
|
+
v.line(1, 1 => 'null').should == v.line(1, 1 => 0)
|
178
|
+
v.line(1, 1 => 'NULL').should == v.line(1, 1 => 0)
|
179
|
+
v.line(1, 1 => '\N').should == v.line(1, 1 => 0)
|
167
180
|
end
|
168
181
|
|
169
182
|
it "doesn't allow embedding boolean in number mode or vice-versa" do
|
@@ -186,12 +199,12 @@ describe VectorEmbed do
|
|
186
199
|
|
187
200
|
it "uses scientific notation for large numbers" do
|
188
201
|
v = VectorEmbed.new
|
189
|
-
v.line(5, 1 => 8.
|
202
|
+
v.line(5, 1 => 8.12e27).should == "5 #{l_h('1')}:8.12e+27"
|
190
203
|
end
|
191
204
|
|
192
205
|
it "detects numbers in strings" do
|
193
206
|
v = VectorEmbed.new
|
194
|
-
v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:
|
207
|
+
v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:81200000000000"
|
195
208
|
end
|
196
209
|
|
197
210
|
it "allows 2 byte n-grams" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vector_embed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: murmurhash3
|