lsi4r 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c6c1576781e887d79aacd4b32b66d828974f6876
4
- data.tar.gz: 342f4b9ee69aa05c6b0a221ecae5a92100196e48
3
+ metadata.gz: c34d7f418bac80657aa021566b252720cffa221c
4
+ data.tar.gz: c4a66f3dda978161ede1c8621973295bcb541de8
5
5
  SHA512:
6
- metadata.gz: 53a2e8be5fd96724902b4ce027378734f72febd78110b234ed8e6de9ef4dcec8f6ac72740029d1d287db72d98bd055cda74f9362e3b2f1820155497cbf34a0d7
7
- data.tar.gz: afb8ba85b4a16849577a944b82ee6e957f2173214dd7f2df664af7b5f4f70318b751b5fe7e55c4e2680e223d0ab107ce69a035c9d1a2ada1639d29505cfc83c0
6
+ metadata.gz: df1f502ca56b19e70f2dfeaaf099223950dae3d17939f5bb716bec7db2b4fd2d1adfbcc78c74c2042c12dee70e616c4a4ca28d3b529dd32e408905cc0795c92c
7
+ data.tar.gz: 01998370603677234cc9dbc7ff1f18773a16e8923ec7ebb0517567eb8cd3f5a26699dd97e44bfe82172c2cd245ce8356ed85357b0ce2483a8eeca716157ae29e
data/ChangeLog CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  = Revision history for lsi4r
4
4
 
5
+ == 0.0.2 [unreleased]
6
+
7
+ * Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
8
+ * Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
9
+ * Refactored Lsi4R#related and Lsi4R#related_score.
10
+
5
11
  == 0.0.1 [2014-04-11]
6
12
 
7
13
  * First release (extracted from ruby-nuggets).
data/README CHANGED
@@ -2,20 +2,22 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to lsi4r version 0.0.1
5
+ This documentation refers to lsi4r version 0.0.2
6
6
 
7
7
 
8
8
  == DESCRIPTION
9
9
 
10
10
  LSI[https://en.wikipedia.org/wiki/Latent_semantic_indexing] processing
11
- for Ruby.
11
+ for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl]
12
+ for the heavy lifting.
12
13
 
13
14
 
14
15
  == LINKS
15
16
 
16
- Documentation:: https://blackwinter.github.io/lsi4r/
17
+ Documentation:: https://blackwinter.github.com/lsi4r
17
18
  Source code:: https://github.com/blackwinter/lsi4r
18
19
  RubyGem:: https://rubygems.org/gems/lsi4r
20
+ Travis CI:: https://travis-ci.org/blackwinter/lsi4r
19
21
 
20
22
 
21
23
  == AUTHORS
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- require File.expand_path(%q{../lib/lsi4r/version}, __FILE__)
1
+ require_relative 'lib/lsi4r/version'
2
2
 
3
3
  begin
4
4
  require 'hen'
@@ -13,7 +13,7 @@ begin
13
13
  email: %q{jens.wille@gmail.com},
14
14
  license: %q{AGPL-3.0},
15
15
  homepage: :blackwinter,
16
- dependencies: %w[],
16
+ dependencies: %w[rb-gsl],
17
17
 
18
18
  required_ruby_version: '>= 1.9.3'
19
19
  }
@@ -80,6 +80,16 @@ class Lsi4R
80
80
  add(value.object_id, value)
81
81
  end
82
82
 
83
+ def each_vector(key = nil, norm = true)
84
+ return enum_for(:each_vector, key, norm) unless block_given?
85
+
86
+ (key ? [self[key]] : docs).each { |doc|
87
+ if doc && vec = norm ? doc.norm : doc.vector
88
+ yield doc, vec
89
+ end
90
+ }
91
+ end
92
+
83
93
  # min:: minimum value to consider
84
94
  # abs:: minimum absolute value to consider
85
95
  # nul:: exclude null values (true or Float)
@@ -90,18 +100,16 @@ class Lsi4R
90
100
  min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
91
101
  nul = DEFAULT_EPSILON if nul == true
92
102
 
93
- list, norm = @invlist, options[:norm]
103
+ list = @invlist
94
104
 
95
- (key ? [self[key]] : docs).each { |doc|
96
- if doc && vec = norm ? doc.norm : doc.vector
97
- vec.enum_for(:each).with_index { |v, i|
98
- yield doc, list[i], v unless v.nan? ||
99
- (min && v < min) ||
100
- (abs && v.abs < abs) ||
101
- (nul && v.abs < nul) ||
102
- (new && doc.include?(i))
103
- }
104
- end
105
+ each_vector(key, options[:norm]) { |doc, vec|
106
+ vec.enum_for(:each).with_index { |v, i|
107
+ yield doc, list[i], v unless v.nan? ||
108
+ (min && v < min) ||
109
+ (abs && v.abs < abs) ||
110
+ (nul && v.abs < nul) ||
111
+ (new && doc.include?(i))
112
+ }
105
113
  }
106
114
  end
107
115
 
@@ -110,19 +118,25 @@ class Lsi4R
110
118
  end
111
119
 
112
120
  def related(key, num = 5)
113
- if doc = self[key] and norm = doc.norm
114
- temp = sort_by { |k, v| -norm * v.norm.col }
115
- temp.map! { |k,| k }.delete(key)
116
- temp[0, num]
117
- end
121
+ each_vector(key) { |_, vec|
122
+ tmp, del = block_given? ? yield(vec) :
123
+ [sort_by { |_, v| -vec * v.norm.col }.map! { |k,| k }]
124
+
125
+ tmp.delete(del || key)
126
+
127
+ return tmp[0, num]
128
+ }
129
+
130
+ nil
118
131
  end
119
132
 
120
- def related_score(key, num = 5)
121
- if doc = self[key] and norm = doc.norm
122
- temp = map { |k, v| [k, norm * v.norm.col] }.sort_by { |_, i| -i }
123
- temp.delete(temp.assoc(key))
124
- temp[0, num]
125
- end
133
+ def related_score(key, num = 5, threshold = 0)
134
+ related(key, num) { |vec|
135
+ [tmp = map { |k, v|
136
+ score = vec * v.norm.col
137
+ [k, score] if score > threshold
138
+ }.compact.sort_by { |_, i| -i }, tmp.assoc(key)]
139
+ }
126
140
  end
127
141
 
128
142
  def build(options = {})
@@ -43,13 +43,17 @@ class Lsi4R
43
43
  def transform=(transform)
44
44
  method = :transformed_vector
45
45
 
46
- case transform
47
- when Proc then define_method(method, &transform)
48
- when UnboundMethod then define_method(method, transform)
49
- else alias_method(method, "#{transform ||= :raw}_vector")
46
+ @transform = case transform ||= :raw
47
+ when Symbol, String
48
+ alias_method(method, "#{transform}_vector")
49
+ transform.to_sym
50
+ when Proc, UnboundMethod
51
+ define_method(method, transform)
52
+ transform.to_s
53
+ else
54
+ raise TypeError, "wrong argument type #{transform.class} " <<
55
+ '(expected Symbol/String or Proc/UnboundMethod)'
50
56
  end
51
-
52
- @transform = transform.to_sym
53
57
  end
54
58
 
55
59
  end
@@ -4,7 +4,7 @@ class Lsi4R
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 1
7
+ TINY = 2
8
8
 
9
9
  class << self
10
10
 
metadata CHANGED
@@ -1,29 +1,49 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsi4r
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-11 00:00:00.000000000 Z
11
+ date: 2014-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: hen
14
+ name: rb-gsl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
- type: :development
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hen
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.8.1
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '0.8'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.8.1
27
47
  - !ruby/object:Gem::Dependency
28
48
  name: rake
29
49
  requirement: !ruby/object:Gem::Requirement
@@ -60,13 +80,15 @@ licenses:
60
80
  metadata: {}
61
81
  post_install_message: |2+
62
82
 
63
- lsi4r-0.0.1 [2014-04-11]:
83
+ lsi4r-0.0.2 [unreleased]:
64
84
 
65
- * First release (extracted from ruby-nuggets).
85
+ * Fixed Lsi4R::Doc#transform= with regard to non-Symbol/String transforms.
86
+ * Added Lsi4R#each_vector, extracted from Lsi4R#each_term.
87
+ * Refactored Lsi4R#related and Lsi4R#related_score.
66
88
 
67
89
  rdoc_options:
68
90
  - "--title"
69
- - lsi4r Application documentation (v0.0.1)
91
+ - lsi4r Application documentation (v0.0.2)
70
92
  - "--charset"
71
93
  - UTF-8
72
94
  - "--line-numbers"
@@ -87,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
87
109
  version: '0'
88
110
  requirements: []
89
111
  rubyforge_project:
90
- rubygems_version: 2.2.2
112
+ rubygems_version: 2.4.5
91
113
  signing_key:
92
114
  specification_version: 4
93
115
  summary: Latent semantic indexing for Ruby.