ruby-svd 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lsa.rb +37 -0
- data/lib/ruby-svd.rb +1 -1
- data/lib/svd_matrix.rb +2 -2
- metadata +3 -2
data/lib/lsa.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'svd_matrix'
|
2
|
+
|
3
|
+
class LSA
|
4
|
+
attr_accessor :u, :s, :v
|
5
|
+
|
6
|
+
def initialize(matrix)
|
7
|
+
@u, @s, @v = matrix.decompose(2)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Return a distance (cosine similarity) between a new vector,
|
11
|
+
# and all the clusters (columns) used in the original matrix.
|
12
|
+
# Returns a sorted list of indexes and distances,
|
13
|
+
def classify_vector(values)
|
14
|
+
raise "Unsupported vector length" unless values.size == @u.row_size
|
15
|
+
vector = Matrix.row_vector(values)
|
16
|
+
position = vector * @u * @s.inverse
|
17
|
+
puts position
|
18
|
+
x = position[0,0]
|
19
|
+
y = position[0,1]
|
20
|
+
results = []
|
21
|
+
|
22
|
+
@v.row_size.times do |index|
|
23
|
+
results << [index, cosine_similarity(x, y, @v[index, 0], @v[index, 1])]
|
24
|
+
end
|
25
|
+
|
26
|
+
results.sort {|a, b| b[1] <=> a[1]}
|
27
|
+
end
|
28
|
+
|
29
|
+
# Determines the cosine similarity between two 2D points
|
30
|
+
def cosine_similarity(x1, y1, x2, y2)
|
31
|
+
dp = (x1 * x2) + (y1 * y2)
|
32
|
+
mag1 = Math.sqrt((x1 ** 2) + (y1 ** 2))
|
33
|
+
mag2 = Math.sqrt((x2 ** 2) + (y2 ** 2))
|
34
|
+
return 0 if mag1 == 0 || mag2 == 0
|
35
|
+
return (dp / (mag1 * mag2))
|
36
|
+
end
|
37
|
+
end
|
data/lib/ruby-svd.rb
CHANGED
data/lib/svd_matrix.rb
CHANGED
@@ -41,11 +41,11 @@ class SVDMatrix < Matrix
|
|
41
41
|
|
42
42
|
# recompose U matrix
|
43
43
|
u = SVDMatrix.new(row_size, reduce_dimensions_to || column_size)
|
44
|
-
row_size.times {|i| u.set_row(i, u_array.slice!(0,
|
44
|
+
row_size.times {|i| u.set_row(i, u_array.slice!(0, column_size)[0...reduce_dimensions_to])}
|
45
45
|
|
46
46
|
# recompose V matric
|
47
47
|
v = SVDMatrix.new(column_size, reduce_dimensions_to || column_size)
|
48
|
-
column_size.times {|i| v.set_row(i, v_array.slice!(0,
|
48
|
+
column_size.times {|i| v.set_row(i, v_array.slice!(0, column_size)[0...reduce_dimensions_to])}
|
49
49
|
|
50
50
|
# diagonalise W array as a matrix
|
51
51
|
if reduce_dimensions_to
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 4
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.4.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Will Cannings
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- ext/nrutil.h
|
33
33
|
- ext/svd.c
|
34
34
|
- ext/svd.h
|
35
|
+
- lib/lsa.rb
|
35
36
|
- lib/ruby-svd.rb
|
36
37
|
- lib/svd_matrix.rb
|
37
38
|
- LICENSE
|