vector_embed 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG +10 -0
- data/lib/vector_embed/maker.rb +8 -2
- data/lib/vector_embed/maker/number.rb +4 -2
- data/lib/vector_embed/version.rb +1 -1
- data/spec/vector_embed_spec.rb +21 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWJmNzczYzU1MjZlODE4NzcxZGMxZTc0MTgzNjdkNjFjYzc5MjRjNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NDNhZmQwZDNjOWMwNWJlOGM3NTBiOTVmNjIwZGJjM2EzYjdlZWZiMA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDc4NjE0YWQ4ZmYzZWZmNTdhOTM3ZTkxODI0Y2RjNjU4YTAxMjMzMjdlZWQx
|
10
|
+
OTUzMDY4NGE4YjFkOWY1OWJiNTVjNWJmYTFkN2I1ZDdhZTY1OWQ0YzRkYjE3
|
11
|
+
MDkxZWYzYmQ2NDMyOTIxMzEyZDBkNGMwMjdkNDY1Yzc2YTMyNzk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZmI5MzkzYWRmNzY2NjA0ZjBhY2NmNWIzMWMxZmM3OTU3MTVlY2Q5ZTg0YzJi
|
14
|
+
NzUyZDI3MjgzNmJhNDFiYTY0YjRlOTNlNzE4NjYyYjZlMTI5ZDFhMGZhMmY2
|
15
|
+
ZmYyZmU5Nzc1NzBmYjhkMmFmMDkxYzdkZjM0NTY2OTZmY2U0ODU=
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
0.2.0 / 2013-05-14
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Per the whole point of sparse vectors, don't output numbers features with value 0
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* More concise number representations per https://github.com/scikit-learn/scikit-learn/pull/1849
|
10
|
+
|
1
11
|
0.1.1 / 2013-04-04
|
2
12
|
|
3
13
|
* Enhancements
|
data/lib/vector_embed/maker.rb
CHANGED
@@ -29,11 +29,17 @@ class VectorEmbed
|
|
29
29
|
when Array
|
30
30
|
memo = []
|
31
31
|
v.each_with_index do |vv, i|
|
32
|
-
|
32
|
+
unless (vvv = value(vv)).nil?
|
33
|
+
memo << [ parent.index([k, i]), vvv ]
|
34
|
+
end
|
33
35
|
end
|
34
36
|
memo
|
35
37
|
else
|
36
|
-
|
38
|
+
if (vv = value(v)).nil?
|
39
|
+
[]
|
40
|
+
else
|
41
|
+
[ [ parent.index([k]), value(v) ] ]
|
42
|
+
end
|
37
43
|
end
|
38
44
|
end
|
39
45
|
end
|
@@ -18,7 +18,9 @@ class VectorEmbed
|
|
18
18
|
else
|
19
19
|
v
|
20
20
|
end
|
21
|
-
num
|
21
|
+
if num.nonzero?
|
22
|
+
'%.16g' % num
|
23
|
+
end
|
22
24
|
end
|
23
25
|
end
|
24
26
|
|
@@ -27,7 +29,7 @@ class VectorEmbed
|
|
27
29
|
when Numeric, JUST_A_NUMBER
|
28
30
|
Number.numify v
|
29
31
|
when NilClass, NULL, SLASH_N
|
30
|
-
|
32
|
+
nil
|
31
33
|
else
|
32
34
|
raise ArgumentError, "Can't embed #{v.inspect} in number feature #{k.inspect}"
|
33
35
|
end
|
data/lib/vector_embed/version.rb
CHANGED
data/spec/vector_embed_spec.rb
CHANGED
@@ -122,8 +122,21 @@ describe VectorEmbed do
|
|
122
122
|
v.line(1, 1 => '9').should == "1 #{l_h('1')}:9"
|
123
123
|
v.line(1, 1 => 5.4).should == "1 #{l_h('1')}:5.4"
|
124
124
|
v.line(1, 1 => '5.4').should == "1 #{l_h('1')}:5.4"
|
125
|
-
v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000
|
126
|
-
v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000
|
125
|
+
v.line(1, 1 => 9e9).should == "1 #{l_h('1')}:9000000000"
|
126
|
+
v.line(1, 1 => '9e9').should == "1 #{l_h('1')}:9000000000"
|
127
|
+
end
|
128
|
+
|
129
|
+
it "does not output 0 in number attributes" do
|
130
|
+
v = VectorEmbed.new
|
131
|
+
v.line(3, 1 => 1)
|
132
|
+
v.line(3, 1 => 0).should == "3"
|
133
|
+
v.line(3, 1 => '0').should == "3"
|
134
|
+
end
|
135
|
+
|
136
|
+
it "treats nil like zero in number attributes" do
|
137
|
+
v = VectorEmbed.new
|
138
|
+
v.line(1, 1 => 1)
|
139
|
+
v.line(1, 1 => nil).should == v.line(1, 1 => 0)
|
127
140
|
end
|
128
141
|
|
129
142
|
it "stores strings as m-category attributes" do
|
@@ -160,10 +173,10 @@ describe VectorEmbed do
|
|
160
173
|
it "in number mode, treats null as 0" do
|
161
174
|
v = VectorEmbed.new
|
162
175
|
v.line(1, 1 => 9).should == "1 #{l_h('1')}:9"
|
163
|
-
v.line(1, 1 => nil).should ==
|
164
|
-
v.line(1, 1 => 'null').should ==
|
165
|
-
v.line(1, 1 => 'NULL').should ==
|
166
|
-
v.line(1, 1 => '\N').should ==
|
176
|
+
v.line(1, 1 => nil).should == v.line(1, 1 => 0)
|
177
|
+
v.line(1, 1 => 'null').should == v.line(1, 1 => 0)
|
178
|
+
v.line(1, 1 => 'NULL').should == v.line(1, 1 => 0)
|
179
|
+
v.line(1, 1 => '\N').should == v.line(1, 1 => 0)
|
167
180
|
end
|
168
181
|
|
169
182
|
it "doesn't allow embedding boolean in number mode or vice-versa" do
|
@@ -186,12 +199,12 @@ describe VectorEmbed do
|
|
186
199
|
|
187
200
|
it "uses scientific notation for large numbers" do
|
188
201
|
v = VectorEmbed.new
|
189
|
-
v.line(5, 1 => 8.
|
202
|
+
v.line(5, 1 => 8.12e27).should == "5 #{l_h('1')}:8.12e+27"
|
190
203
|
end
|
191
204
|
|
192
205
|
it "detects numbers in strings" do
|
193
206
|
v = VectorEmbed.new
|
194
|
-
v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:
|
207
|
+
v.line(5, 1 => '8.12e13').should == "5 #{l_h('1')}:81200000000000"
|
195
208
|
end
|
196
209
|
|
197
210
|
it "allows 2 byte n-grams" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vector_embed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: murmurhash3
|