ruby-nuggets 0.9.6.pre2 → 0.9.6.pre3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/nuggets/lsi.rb +19 -20
- data/lib/nuggets/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e661023674b7ff0f06dd711df4d2c67aa7026230
|
4
|
+
data.tar.gz: 727d8002a0c68c8dadf8237b240448fe882efa90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59fd315cfa38d34aed7113e06281b2a956251cb6cf49b8ce07a36af341d48bac0d4d94a3e2a5bc0e0fa538397f18e3660b94c982c14a228075872600657f69b7
|
7
|
+
data.tar.gz: e213af8417fae28b29f2f76a7b2a6d31cc47ecba82475436f0e75984174e8e2a0108e16b1325f28a43fc018d901997da259d69c179554df874a802b6f48ad2c8
|
data/lib/nuggets/lsi.rb
CHANGED
@@ -89,21 +89,19 @@ module Nuggets
|
|
89
89
|
# abs:: minimum absolute value to consider
|
90
90
|
# nul:: exclude null values (true or Float)
|
91
91
|
# new:: exclude original terms / only yield new ones
|
92
|
-
def
|
93
|
-
return enum_for(:
|
92
|
+
def each_term(key = nil, options = {})
|
93
|
+
return enum_for(:each_term, key, options) unless block_given?
|
94
94
|
|
95
95
|
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
96
96
|
nul = DEFAULT_EPSILON if nul == true
|
97
97
|
|
98
|
-
list = @invlist
|
98
|
+
list, norm = @invlist, options[:norm]
|
99
99
|
|
100
100
|
(key ? [self[key]] : docs).each { |doc|
|
101
|
-
if doc &&
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
i += 1
|
106
|
-
yield doc, list[i], v unless (min && v < min) ||
|
101
|
+
if doc && vec = norm ? doc.norm : doc.vector
|
102
|
+
vec.enum_for(:each).with_index { |v, i|
|
103
|
+
yield doc, list[i], v unless v.nan? ||
|
104
|
+
(min && v < min) ||
|
107
105
|
(abs && v.abs < abs) ||
|
108
106
|
(nul && v.abs < nul) ||
|
109
107
|
(new && doc.include?(i))
|
@@ -112,6 +110,10 @@ module Nuggets
|
|
112
110
|
}
|
113
111
|
end
|
114
112
|
|
113
|
+
def each_norm(key = nil, options = {}, &block)
|
114
|
+
each_term(key, options.merge(:norm => true), &block)
|
115
|
+
end
|
116
|
+
|
115
117
|
def related(key, num = 5)
|
116
118
|
if doc = self[key] and norm = doc.norm
|
117
119
|
temp = sort_by { |k, v| -norm * v.norm.col }
|
@@ -158,8 +160,8 @@ module Nuggets
|
|
158
160
|
# MxN matrix, M<N, is not implemented (file svd.c, line 61)
|
159
161
|
u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
|
160
162
|
|
161
|
-
|
162
|
-
|
163
|
+
(u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
|
164
|
+
enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
|
163
165
|
|
164
166
|
size
|
165
167
|
end
|
@@ -175,12 +177,7 @@ module Nuggets
|
|
175
177
|
# k < 1:: keep (at most) this proportion
|
176
178
|
def reduce(s, k, m = s.size)
|
177
179
|
if k && k < m
|
178
|
-
|
179
|
-
k = (m * k).floor if k < 1
|
180
|
-
s[k, m - k] = 0
|
181
|
-
else
|
182
|
-
s.set_zero
|
183
|
-
end
|
180
|
+
k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
|
184
181
|
end
|
185
182
|
|
186
183
|
s.to_m_diagonal
|
@@ -207,7 +204,7 @@ module Nuggets
|
|
207
204
|
else alias_method(method, "#{transform ||= :raw}_vector")
|
208
205
|
end
|
209
206
|
|
210
|
-
@transform = transform
|
207
|
+
@transform = transform.to_sym
|
211
208
|
end
|
212
209
|
|
213
210
|
end
|
@@ -245,9 +242,11 @@ module Nuggets
|
|
245
242
|
end
|
246
243
|
|
247
244
|
def tfidf_vector(*args)
|
248
|
-
vec, f
|
245
|
+
vec, f = raw_vector(*args), @freq
|
249
246
|
s, d = vec.sum, @total = args.fetch(1, @total).to_f
|
250
|
-
|
247
|
+
|
248
|
+
vec.enum_for(:map).with_index { |v, i|
|
249
|
+
v > 0 ? ::Math.log(d / f[i]) * v / s : v }
|
251
250
|
end
|
252
251
|
|
253
252
|
self.transform = DEFAULT_TRANSFORM
|
data/lib/nuggets/version.rb
CHANGED
@@ -10,12 +10,12 @@ module Nuggets
|
|
10
10
|
|
11
11
|
# Returns array representation.
|
12
12
|
def to_a
|
13
|
-
[MAJOR, MINOR, TINY]
|
13
|
+
[MAJOR, MINOR, TINY] << 'pre3'
|
14
14
|
end
|
15
15
|
|
16
16
|
# Short-cut for version string.
|
17
17
|
def to_s
|
18
|
-
to_a.join('.')
|
18
|
+
to_a.join('.')
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.6.
|
4
|
+
version: 0.9.6.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Some extensions to the Ruby programming language.
|
14
14
|
email: jens.wille@gmail.com
|
@@ -233,7 +233,7 @@ rdoc_options:
|
|
233
233
|
- "--line-numbers"
|
234
234
|
- "--all"
|
235
235
|
- "--title"
|
236
|
-
- ruby-nuggets Application documentation (v0.9.6.
|
236
|
+
- ruby-nuggets Application documentation (v0.9.6.pre3)
|
237
237
|
- "--main"
|
238
238
|
- README
|
239
239
|
require_paths:
|