ruby-nuggets 0.9.6.pre2 → 0.9.6.pre3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/nuggets/lsi.rb +19 -20
- data/lib/nuggets/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e661023674b7ff0f06dd711df4d2c67aa7026230
|
4
|
+
data.tar.gz: 727d8002a0c68c8dadf8237b240448fe882efa90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59fd315cfa38d34aed7113e06281b2a956251cb6cf49b8ce07a36af341d48bac0d4d94a3e2a5bc0e0fa538397f18e3660b94c982c14a228075872600657f69b7
|
7
|
+
data.tar.gz: e213af8417fae28b29f2f76a7b2a6d31cc47ecba82475436f0e75984174e8e2a0108e16b1325f28a43fc018d901997da259d69c179554df874a802b6f48ad2c8
|
data/lib/nuggets/lsi.rb
CHANGED
@@ -89,21 +89,19 @@ module Nuggets
|
|
89
89
|
# abs:: minimum absolute value to consider
|
90
90
|
# nul:: exclude null values (true or Float)
|
91
91
|
# new:: exclude original terms / only yield new ones
|
92
|
-
def
|
93
|
-
return enum_for(:
|
92
|
+
def each_term(key = nil, options = {})
|
93
|
+
return enum_for(:each_term, key, options) unless block_given?
|
94
94
|
|
95
95
|
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
96
96
|
nul = DEFAULT_EPSILON if nul == true
|
97
97
|
|
98
|
-
list = @invlist
|
98
|
+
list, norm = @invlist, options[:norm]
|
99
99
|
|
100
100
|
(key ? [self[key]] : docs).each { |doc|
|
101
|
-
if doc &&
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
i += 1
|
106
|
-
yield doc, list[i], v unless (min && v < min) ||
|
101
|
+
if doc && vec = norm ? doc.norm : doc.vector
|
102
|
+
vec.enum_for(:each).with_index { |v, i|
|
103
|
+
yield doc, list[i], v unless v.nan? ||
|
104
|
+
(min && v < min) ||
|
107
105
|
(abs && v.abs < abs) ||
|
108
106
|
(nul && v.abs < nul) ||
|
109
107
|
(new && doc.include?(i))
|
@@ -112,6 +110,10 @@ module Nuggets
|
|
112
110
|
}
|
113
111
|
end
|
114
112
|
|
113
|
+
def each_norm(key = nil, options = {}, &block)
|
114
|
+
each_term(key, options.merge(:norm => true), &block)
|
115
|
+
end
|
116
|
+
|
115
117
|
def related(key, num = 5)
|
116
118
|
if doc = self[key] and norm = doc.norm
|
117
119
|
temp = sort_by { |k, v| -norm * v.norm.col }
|
@@ -158,8 +160,8 @@ module Nuggets
|
|
158
160
|
# MxN matrix, M<N, is not implemented (file svd.c, line 61)
|
159
161
|
u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
|
160
162
|
|
161
|
-
|
162
|
-
|
163
|
+
(u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
|
164
|
+
enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
|
163
165
|
|
164
166
|
size
|
165
167
|
end
|
@@ -175,12 +177,7 @@ module Nuggets
|
|
175
177
|
# k < 1:: keep (at most) this proportion
|
176
178
|
def reduce(s, k, m = s.size)
|
177
179
|
if k && k < m
|
178
|
-
|
179
|
-
k = (m * k).floor if k < 1
|
180
|
-
s[k, m - k] = 0
|
181
|
-
else
|
182
|
-
s.set_zero
|
183
|
-
end
|
180
|
+
k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
|
184
181
|
end
|
185
182
|
|
186
183
|
s.to_m_diagonal
|
@@ -207,7 +204,7 @@ module Nuggets
|
|
207
204
|
else alias_method(method, "#{transform ||= :raw}_vector")
|
208
205
|
end
|
209
206
|
|
210
|
-
@transform = transform
|
207
|
+
@transform = transform.to_sym
|
211
208
|
end
|
212
209
|
|
213
210
|
end
|
@@ -245,9 +242,11 @@ module Nuggets
|
|
245
242
|
end
|
246
243
|
|
247
244
|
def tfidf_vector(*args)
|
248
|
-
vec, f
|
245
|
+
vec, f = raw_vector(*args), @freq
|
249
246
|
s, d = vec.sum, @total = args.fetch(1, @total).to_f
|
250
|
-
|
247
|
+
|
248
|
+
vec.enum_for(:map).with_index { |v, i|
|
249
|
+
v > 0 ? ::Math.log(d / f[i]) * v / s : v }
|
251
250
|
end
|
252
251
|
|
253
252
|
self.transform = DEFAULT_TRANSFORM
|
data/lib/nuggets/version.rb
CHANGED
@@ -10,12 +10,12 @@ module Nuggets
|
|
10
10
|
|
11
11
|
# Returns array representation.
|
12
12
|
def to_a
|
13
|
-
[MAJOR, MINOR, TINY]
|
13
|
+
[MAJOR, MINOR, TINY] << 'pre3'
|
14
14
|
end
|
15
15
|
|
16
16
|
# Short-cut for version string.
|
17
17
|
def to_s
|
18
|
-
to_a.join('.')
|
18
|
+
to_a.join('.')
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.6.
|
4
|
+
version: 0.9.6.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Some extensions to the Ruby programming language.
|
14
14
|
email: jens.wille@gmail.com
|
@@ -233,7 +233,7 @@ rdoc_options:
|
|
233
233
|
- "--line-numbers"
|
234
234
|
- "--all"
|
235
235
|
- "--title"
|
236
|
-
- ruby-nuggets Application documentation (v0.9.6.
|
236
|
+
- ruby-nuggets Application documentation (v0.9.6.pre3)
|
237
237
|
- "--main"
|
238
238
|
- README
|
239
239
|
require_paths:
|