lsi4r 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +6 -0
- data/README +5 -3
- data/Rakefile +2 -2
- data/lib/lsi4r.rb +36 -22
- data/lib/lsi4r/doc.rb +10 -6
- data/lib/lsi4r/version.rb +1 -1
- metadata +30 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c34d7f418bac80657aa021566b252720cffa221c
|
4
|
+
data.tar.gz: c4a66f3dda978161ede1c8621973295bcb541de8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df1f502ca56b19e70f2dfeaaf099223950dae3d17939f5bb716bec7db2b4fd2d1adfbcc78c74c2042c12dee70e616c4a4ca28d3b529dd32e408905cc0795c92c
|
7
|
+
data.tar.gz: 01998370603677234cc9dbc7ff1f18773a16e8923ec7ebb0517567eb8cd3f5a26699dd97e44bfe82172c2cd245ce8356ed85357b0ce2483a8eeca716157ae29e
|
data/ChangeLog
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
= Revision history for lsi4r
|
4
4
|
|
5
|
+
== 0.0.2 [unreleased]
|
6
|
+
|
7
|
+
* Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
|
8
|
+
* Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
|
9
|
+
* Refactored Lsi4R#related and Lsi4R#related_score.
|
10
|
+
|
5
11
|
== 0.0.1 [2014-04-11]
|
6
12
|
|
7
13
|
* First release (extracted from ruby-nuggets).
|
data/README
CHANGED
@@ -2,20 +2,22 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to lsi4r version 0.0.
|
5
|
+
This documentation refers to lsi4r version 0.0.2
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
9
9
|
|
10
10
|
LSI[https://en.wikipedia.org/wiki/Latent_semantic_indexing] processing
|
11
|
-
for Ruby.
|
11
|
+
for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl]
|
12
|
+
for the heavy lifting.
|
12
13
|
|
13
14
|
|
14
15
|
== LINKS
|
15
16
|
|
16
|
-
Documentation:: https://blackwinter.github.
|
17
|
+
Documentation:: https://blackwinter.github.com/lsi4r
|
17
18
|
Source code:: https://github.com/blackwinter/lsi4r
|
18
19
|
RubyGem:: https://rubygems.org/gems/lsi4r
|
20
|
+
Travis CI:: https://travis-ci.org/blackwinter/lsi4r
|
19
21
|
|
20
22
|
|
21
23
|
== AUTHORS
|
data/Rakefile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'lib/lsi4r/version'
|
2
2
|
|
3
3
|
begin
|
4
4
|
require 'hen'
|
@@ -13,7 +13,7 @@ begin
|
|
13
13
|
email: %q{jens.wille@gmail.com},
|
14
14
|
license: %q{AGPL-3.0},
|
15
15
|
homepage: :blackwinter,
|
16
|
-
dependencies: %w[],
|
16
|
+
dependencies: %w[rb-gsl],
|
17
17
|
|
18
18
|
required_ruby_version: '>= 1.9.3'
|
19
19
|
}
|
data/lib/lsi4r.rb
CHANGED
@@ -80,6 +80,16 @@ class Lsi4R
|
|
80
80
|
add(value.object_id, value)
|
81
81
|
end
|
82
82
|
|
83
|
+
def each_vector(key = nil, norm = true)
|
84
|
+
return enum_for(:each_vector, key, norm) unless block_given?
|
85
|
+
|
86
|
+
(key ? [self[key]] : docs).each { |doc|
|
87
|
+
if doc && vec = norm ? doc.norm : doc.vector
|
88
|
+
yield doc, vec
|
89
|
+
end
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
83
93
|
# min:: minimum value to consider
|
84
94
|
# abs:: minimum absolute value to consider
|
85
95
|
# nul:: exclude null values (true or Float)
|
@@ -90,18 +100,16 @@ class Lsi4R
|
|
90
100
|
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
91
101
|
nul = DEFAULT_EPSILON if nul == true
|
92
102
|
|
93
|
-
list
|
103
|
+
list = @invlist
|
94
104
|
|
95
|
-
(key
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
}
|
104
|
-
end
|
105
|
+
each_vector(key, options[:norm]) { |doc, vec|
|
106
|
+
vec.enum_for(:each).with_index { |v, i|
|
107
|
+
yield doc, list[i], v unless v.nan? ||
|
108
|
+
(min && v < min) ||
|
109
|
+
(abs && v.abs < abs) ||
|
110
|
+
(nul && v.abs < nul) ||
|
111
|
+
(new && doc.include?(i))
|
112
|
+
}
|
105
113
|
}
|
106
114
|
end
|
107
115
|
|
@@ -110,19 +118,25 @@ class Lsi4R
|
|
110
118
|
end
|
111
119
|
|
112
120
|
def related(key, num = 5)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
121
|
+
each_vector(key) { |_, vec|
|
122
|
+
tmp, del = block_given? ? yield(vec) :
|
123
|
+
[sort_by { |_, v| -vec * v.norm.col }.map! { |k,| k }]
|
124
|
+
|
125
|
+
tmp.delete(del || key)
|
126
|
+
|
127
|
+
return tmp[0, num]
|
128
|
+
}
|
129
|
+
|
130
|
+
nil
|
118
131
|
end
|
119
132
|
|
120
|
-
def related_score(key, num = 5)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
133
|
+
def related_score(key, num = 5, threshold = 0)
|
134
|
+
related(key, num) { |vec|
|
135
|
+
[tmp = map { |k, v|
|
136
|
+
score = vec * v.norm.col
|
137
|
+
[k, score] if score > threshold
|
138
|
+
}.compact.sort_by { |_, i| -i }, tmp.assoc(key)]
|
139
|
+
}
|
126
140
|
end
|
127
141
|
|
128
142
|
def build(options = {})
|
data/lib/lsi4r/doc.rb
CHANGED
@@ -43,13 +43,17 @@ class Lsi4R
|
|
43
43
|
def transform=(transform)
|
44
44
|
method = :transformed_vector
|
45
45
|
|
46
|
-
case transform
|
47
|
-
when
|
48
|
-
|
49
|
-
|
46
|
+
@transform = case transform ||= :raw
|
47
|
+
when Symbol, String
|
48
|
+
alias_method(method, "#{transform}_vector")
|
49
|
+
transform.to_sym
|
50
|
+
when Proc, UnboundMethod
|
51
|
+
define_method(method, transform)
|
52
|
+
transform.to_s
|
53
|
+
else
|
54
|
+
raise TypeError, "wrong argument type #{transform.class} " <<
|
55
|
+
'(expected Symbol/String or Proc/UnboundMethod)'
|
50
56
|
end
|
51
|
-
|
52
|
-
@transform = transform.to_sym
|
53
57
|
end
|
54
58
|
|
55
59
|
end
|
data/lib/lsi4r/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,49 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lsi4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rb-gsl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hen
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.8'
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.8.1
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0.8'
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.8.1
|
27
47
|
- !ruby/object:Gem::Dependency
|
28
48
|
name: rake
|
29
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -60,13 +80,15 @@ licenses:
|
|
60
80
|
metadata: {}
|
61
81
|
post_install_message: |2+
|
62
82
|
|
63
|
-
lsi4r-0.0.
|
83
|
+
lsi4r-0.0.2 [unreleased]:
|
64
84
|
|
65
|
-
*
|
85
|
+
* Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
|
86
|
+
* Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
|
87
|
+
* Refactored Lsi4R#related and Lsi4R#related_score.
|
66
88
|
|
67
89
|
rdoc_options:
|
68
90
|
- "--title"
|
69
|
-
- lsi4r Application documentation (v0.0.
|
91
|
+
- lsi4r Application documentation (v0.0.2)
|
70
92
|
- "--charset"
|
71
93
|
- UTF-8
|
72
94
|
- "--line-numbers"
|
@@ -87,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
109
|
version: '0'
|
88
110
|
requirements: []
|
89
111
|
rubyforge_project:
|
90
|
-
rubygems_version: 2.
|
112
|
+
rubygems_version: 2.4.5
|
91
113
|
signing_key:
|
92
114
|
specification_version: 4
|
93
115
|
summary: Latent semantic indexing for Ruby.
|