lsi4r 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +6 -0
- data/README +5 -3
- data/Rakefile +2 -2
- data/lib/lsi4r.rb +36 -22
- data/lib/lsi4r/doc.rb +10 -6
- data/lib/lsi4r/version.rb +1 -1
- metadata +30 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c34d7f418bac80657aa021566b252720cffa221c
|
4
|
+
data.tar.gz: c4a66f3dda978161ede1c8621973295bcb541de8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df1f502ca56b19e70f2dfeaaf099223950dae3d17939f5bb716bec7db2b4fd2d1adfbcc78c74c2042c12dee70e616c4a4ca28d3b529dd32e408905cc0795c92c
|
7
|
+
data.tar.gz: 01998370603677234cc9dbc7ff1f18773a16e8923ec7ebb0517567eb8cd3f5a26699dd97e44bfe82172c2cd245ce8356ed85357b0ce2483a8eeca716157ae29e
|
data/ChangeLog
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
= Revision history for lsi4r
|
4
4
|
|
5
|
+
== 0.0.2 [unreleased]
|
6
|
+
|
7
|
+
* Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
|
8
|
+
* Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
|
9
|
+
* Refactored Lsi4R#related and Lsi4R#related_score.
|
10
|
+
|
5
11
|
== 0.0.1 [2014-04-11]
|
6
12
|
|
7
13
|
* First release (extracted from ruby-nuggets).
|
data/README
CHANGED
@@ -2,20 +2,22 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to lsi4r version 0.0.
|
5
|
+
This documentation refers to lsi4r version 0.0.2
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
9
9
|
|
10
10
|
LSI[https://en.wikipedia.org/wiki/Latent_semantic_indexing] processing
|
11
|
-
for Ruby.
|
11
|
+
for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl]
|
12
|
+
for the heavy lifting.
|
12
13
|
|
13
14
|
|
14
15
|
== LINKS
|
15
16
|
|
16
|
-
Documentation:: https://blackwinter.github.
|
17
|
+
Documentation:: https://blackwinter.github.com/lsi4r
|
17
18
|
Source code:: https://github.com/blackwinter/lsi4r
|
18
19
|
RubyGem:: https://rubygems.org/gems/lsi4r
|
20
|
+
Travis CI:: https://travis-ci.org/blackwinter/lsi4r
|
19
21
|
|
20
22
|
|
21
23
|
== AUTHORS
|
data/Rakefile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'lib/lsi4r/version'
|
2
2
|
|
3
3
|
begin
|
4
4
|
require 'hen'
|
@@ -13,7 +13,7 @@ begin
|
|
13
13
|
email: %q{jens.wille@gmail.com},
|
14
14
|
license: %q{AGPL-3.0},
|
15
15
|
homepage: :blackwinter,
|
16
|
-
dependencies: %w[],
|
16
|
+
dependencies: %w[rb-gsl],
|
17
17
|
|
18
18
|
required_ruby_version: '>= 1.9.3'
|
19
19
|
}
|
data/lib/lsi4r.rb
CHANGED
@@ -80,6 +80,16 @@ class Lsi4R
|
|
80
80
|
add(value.object_id, value)
|
81
81
|
end
|
82
82
|
|
83
|
+
def each_vector(key = nil, norm = true)
|
84
|
+
return enum_for(:each_vector, key, norm) unless block_given?
|
85
|
+
|
86
|
+
(key ? [self[key]] : docs).each { |doc|
|
87
|
+
if doc && vec = norm ? doc.norm : doc.vector
|
88
|
+
yield doc, vec
|
89
|
+
end
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
83
93
|
# min:: minimum value to consider
|
84
94
|
# abs:: minimum absolute value to consider
|
85
95
|
# nul:: exclude null values (true or Float)
|
@@ -90,18 +100,16 @@ class Lsi4R
|
|
90
100
|
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
91
101
|
nul = DEFAULT_EPSILON if nul == true
|
92
102
|
|
93
|
-
list
|
103
|
+
list = @invlist
|
94
104
|
|
95
|
-
(key
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
}
|
104
|
-
end
|
105
|
+
each_vector(key, options[:norm]) { |doc, vec|
|
106
|
+
vec.enum_for(:each).with_index { |v, i|
|
107
|
+
yield doc, list[i], v unless v.nan? ||
|
108
|
+
(min && v < min) ||
|
109
|
+
(abs && v.abs < abs) ||
|
110
|
+
(nul && v.abs < nul) ||
|
111
|
+
(new && doc.include?(i))
|
112
|
+
}
|
105
113
|
}
|
106
114
|
end
|
107
115
|
|
@@ -110,19 +118,25 @@ class Lsi4R
|
|
110
118
|
end
|
111
119
|
|
112
120
|
def related(key, num = 5)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
121
|
+
each_vector(key) { |_, vec|
|
122
|
+
tmp, del = block_given? ? yield(vec) :
|
123
|
+
[sort_by { |_, v| -vec * v.norm.col }.map! { |k,| k }]
|
124
|
+
|
125
|
+
tmp.delete(del || key)
|
126
|
+
|
127
|
+
return tmp[0, num]
|
128
|
+
}
|
129
|
+
|
130
|
+
nil
|
118
131
|
end
|
119
132
|
|
120
|
-
def related_score(key, num = 5)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
133
|
+
def related_score(key, num = 5, threshold = 0)
|
134
|
+
related(key, num) { |vec|
|
135
|
+
[tmp = map { |k, v|
|
136
|
+
score = vec * v.norm.col
|
137
|
+
[k, score] if score > threshold
|
138
|
+
}.compact.sort_by { |_, i| -i }, tmp.assoc(key)]
|
139
|
+
}
|
126
140
|
end
|
127
141
|
|
128
142
|
def build(options = {})
|
data/lib/lsi4r/doc.rb
CHANGED
@@ -43,13 +43,17 @@ class Lsi4R
|
|
43
43
|
def transform=(transform)
|
44
44
|
method = :transformed_vector
|
45
45
|
|
46
|
-
case transform
|
47
|
-
when
|
48
|
-
|
49
|
-
|
46
|
+
@transform = case transform ||= :raw
|
47
|
+
when Symbol, String
|
48
|
+
alias_method(method, "#{transform}_vector")
|
49
|
+
transform.to_sym
|
50
|
+
when Proc, UnboundMethod
|
51
|
+
define_method(method, transform)
|
52
|
+
transform.to_s
|
53
|
+
else
|
54
|
+
raise TypeError, "wrong argument type #{transform.class} " <<
|
55
|
+
'(expected Symbol/String or Proc/UnboundMethod)'
|
50
56
|
end
|
51
|
-
|
52
|
-
@transform = transform.to_sym
|
53
57
|
end
|
54
58
|
|
55
59
|
end
|
data/lib/lsi4r/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,49 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lsi4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rb-gsl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hen
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.8'
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.8.1
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0.8'
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.8.1
|
27
47
|
- !ruby/object:Gem::Dependency
|
28
48
|
name: rake
|
29
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -60,13 +80,15 @@ licenses:
|
|
60
80
|
metadata: {}
|
61
81
|
post_install_message: |2+
|
62
82
|
|
63
|
-
lsi4r-0.0.
|
83
|
+
lsi4r-0.0.2 [unreleased]:
|
64
84
|
|
65
|
-
*
|
85
|
+
* Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
|
86
|
+
* Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
|
87
|
+
* Refactored Lsi4R#related and Lsi4R#related_score.
|
66
88
|
|
67
89
|
rdoc_options:
|
68
90
|
- "--title"
|
69
|
-
- lsi4r Application documentation (v0.0.
|
91
|
+
- lsi4r Application documentation (v0.0.2)
|
70
92
|
- "--charset"
|
71
93
|
- UTF-8
|
72
94
|
- "--line-numbers"
|
@@ -87,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
109
|
version: '0'
|
88
110
|
requirements: []
|
89
111
|
rubyforge_project:
|
90
|
-
rubygems_version: 2.
|
112
|
+
rubygems_version: 2.4.5
|
91
113
|
signing_key:
|
92
114
|
specification_version: 4
|
93
115
|
summary: Latent semantic indexing for Ruby.
|