lsi4r 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c6c1576781e887d79aacd4b32b66d828974f6876
4
- data.tar.gz: 342f4b9ee69aa05c6b0a221ecae5a92100196e48
3
+ metadata.gz: c34d7f418bac80657aa021566b252720cffa221c
4
+ data.tar.gz: c4a66f3dda978161ede1c8621973295bcb541de8
5
5
  SHA512:
6
- metadata.gz: 53a2e8be5fd96724902b4ce027378734f72febd78110b234ed8e6de9ef4dcec8f6ac72740029d1d287db72d98bd055cda74f9362e3b2f1820155497cbf34a0d7
7
- data.tar.gz: afb8ba85b4a16849577a944b82ee6e957f2173214dd7f2df664af7b5f4f70318b751b5fe7e55c4e2680e223d0ab107ce69a035c9d1a2ada1639d29505cfc83c0
6
+ metadata.gz: df1f502ca56b19e70f2dfeaaf099223950dae3d17939f5bb716bec7db2b4fd2d1adfbcc78c74c2042c12dee70e616c4a4ca28d3b529dd32e408905cc0795c92c
7
+ data.tar.gz: 01998370603677234cc9dbc7ff1f18773a16e8923ec7ebb0517567eb8cd3f5a26699dd97e44bfe82172c2cd245ce8356ed85357b0ce2483a8eeca716157ae29e
data/ChangeLog CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  = Revision history for lsi4r
4
4
 
5
+ == 0.0.2 [unreleased]
6
+
7
+ * Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
8
+ * Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
9
+ * Refactored Lsi4R#related and Lsi4R#related_score.
10
+
5
11
  == 0.0.1 [2014-04-11]
6
12
 
7
13
  * First release (extracted from ruby-nuggets).
data/README CHANGED
@@ -2,20 +2,22 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to lsi4r version 0.0.1
5
+ This documentation refers to lsi4r version 0.0.2
6
6
 
7
7
 
8
8
  == DESCRIPTION
9
9
 
10
10
  LSI[https://en.wikipedia.org/wiki/Latent_semantic_indexing] processing
11
- for Ruby.
11
+ for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl]
12
+ for the heavy lifting.
12
13
 
13
14
 
14
15
  == LINKS
15
16
 
16
- Documentation:: https://blackwinter.github.io/lsi4r/
17
+ Documentation:: https://blackwinter.github.com/lsi4r
17
18
  Source code:: https://github.com/blackwinter/lsi4r
18
19
  RubyGem:: https://rubygems.org/gems/lsi4r
20
+ Travis CI:: https://travis-ci.org/blackwinter/lsi4r
19
21
 
20
22
 
21
23
  == AUTHORS
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- require File.expand_path(%q{../lib/lsi4r/version}, __FILE__)
1
+ require_relative 'lib/lsi4r/version'
2
2
 
3
3
  begin
4
4
  require 'hen'
@@ -13,7 +13,7 @@ begin
13
13
  email: %q{jens.wille@gmail.com},
14
14
  license: %q{AGPL-3.0},
15
15
  homepage: :blackwinter,
16
- dependencies: %w[],
16
+ dependencies: %w[rb-gsl],
17
17
 
18
18
  required_ruby_version: '>= 1.9.3'
19
19
  }
@@ -80,6 +80,16 @@ class Lsi4R
80
80
  add(value.object_id, value)
81
81
  end
82
82
 
83
+ def each_vector(key = nil, norm = true)
84
+ return enum_for(:each_vector, key, norm) unless block_given?
85
+
86
+ (key ? [self[key]] : docs).each { |doc|
87
+ if doc && vec = norm ? doc.norm : doc.vector
88
+ yield doc, vec
89
+ end
90
+ }
91
+ end
92
+
83
93
  # min:: minimum value to consider
84
94
  # abs:: minimum absolute value to consider
85
95
  # nul:: exclude null values (true or Float)
@@ -90,18 +100,16 @@ class Lsi4R
90
100
  min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
91
101
  nul = DEFAULT_EPSILON if nul == true
92
102
 
93
- list, norm = @invlist, options[:norm]
103
+ list = @invlist
94
104
 
95
- (key ? [self[key]] : docs).each { |doc|
96
- if doc && vec = norm ? doc.norm : doc.vector
97
- vec.enum_for(:each).with_index { |v, i|
98
- yield doc, list[i], v unless v.nan? ||
99
- (min && v < min) ||
100
- (abs && v.abs < abs) ||
101
- (nul && v.abs < nul) ||
102
- (new && doc.include?(i))
103
- }
104
- end
105
+ each_vector(key, options[:norm]) { |doc, vec|
106
+ vec.enum_for(:each).with_index { |v, i|
107
+ yield doc, list[i], v unless v.nan? ||
108
+ (min && v < min) ||
109
+ (abs && v.abs < abs) ||
110
+ (nul && v.abs < nul) ||
111
+ (new && doc.include?(i))
112
+ }
105
113
  }
106
114
  end
107
115
 
@@ -110,19 +118,25 @@ class Lsi4R
110
118
  end
111
119
 
112
120
  def related(key, num = 5)
113
- if doc = self[key] and norm = doc.norm
114
- temp = sort_by { |k, v| -norm * v.norm.col }
115
- temp.map! { |k,| k }.delete(key)
116
- temp[0, num]
117
- end
121
+ each_vector(key) { |_, vec|
122
+ tmp, del = block_given? ? yield(vec) :
123
+ [sort_by { |_, v| -vec * v.norm.col }.map! { |k,| k }]
124
+
125
+ tmp.delete(del || key)
126
+
127
+ return tmp[0, num]
128
+ }
129
+
130
+ nil
118
131
  end
119
132
 
120
- def related_score(key, num = 5)
121
- if doc = self[key] and norm = doc.norm
122
- temp = map { |k, v| [k, norm * v.norm.col] }.sort_by { |_, i| -i }
123
- temp.delete(temp.assoc(key))
124
- temp[0, num]
125
- end
133
+ def related_score(key, num = 5, threshold = 0)
134
+ related(key, num) { |vec|
135
+ [tmp = map { |k, v|
136
+ score = vec * v.norm.col
137
+ [k, score] if score > threshold
138
+ }.compact.sort_by { |_, i| -i }, tmp.assoc(key)]
139
+ }
126
140
  end
127
141
 
128
142
  def build(options = {})
@@ -43,13 +43,17 @@ class Lsi4R
43
43
  def transform=(transform)
44
44
  method = :transformed_vector
45
45
 
46
- case transform
47
- when Proc then define_method(method, &transform)
48
- when UnboundMethod then define_method(method, transform)
49
- else alias_method(method, "#{transform ||= :raw}_vector")
46
+ @transform = case transform ||= :raw
47
+ when Symbol, String
48
+ alias_method(method, "#{transform}_vector")
49
+ transform.to_sym
50
+ when Proc, UnboundMethod
51
+ define_method(method, transform)
52
+ transform.to_s
53
+ else
54
+ raise TypeError, "wrong argument type #{transform.class} " <<
55
+ '(expected Symbol/String or Proc/UnboundMethod)'
50
56
  end
51
-
52
- @transform = transform.to_sym
53
57
  end
54
58
 
55
59
  end
@@ -4,7 +4,7 @@ class Lsi4R
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 1
7
+ TINY = 2
8
8
 
9
9
  class << self
10
10
 
metadata CHANGED
@@ -1,29 +1,49 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsi4r
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-11 00:00:00.000000000 Z
11
+ date: 2014-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: hen
14
+ name: rb-gsl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
- type: :development
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hen
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.8.1
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '0.8'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.8.1
27
47
  - !ruby/object:Gem::Dependency
28
48
  name: rake
29
49
  requirement: !ruby/object:Gem::Requirement
@@ -60,13 +80,15 @@ licenses:
60
80
  metadata: {}
61
81
  post_install_message: |2+
62
82
 
63
- lsi4r-0.0.1 [2014-04-11]:
83
+ lsi4r-0.0.2 [unreleased]:
64
84
 
65
- * First release (extracted from ruby-nuggets).
85
+ * Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
86
+ * Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
87
+ * Refactored Lsi4R#related and Lsi4R#related_score.
66
88
 
67
89
  rdoc_options:
68
90
  - "--title"
69
- - lsi4r Application documentation (v0.0.1)
91
+ - lsi4r Application documentation (v0.0.2)
70
92
  - "--charset"
71
93
  - UTF-8
72
94
  - "--line-numbers"
@@ -87,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
87
109
  version: '0'
88
110
  requirements: []
89
111
  rubyforge_project:
90
- rubygems_version: 2.2.2
112
+ rubygems_version: 2.4.5
91
113
  signing_key:
92
114
  specification_version: 4
93
115
  summary: Latent semantic indexing for Ruby.