josephwilk-rsemantic 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -3
- data/lib/semantic/search.rb +1 -1
- data/lib/semantic/transform/lsa_transform.rb +13 -9
- data/lib/semantic/version.rb +1 -1
- data/lib/semantic.rb +4 -0
- data/rsemantic.gemspec +2 -2
- data/spec/semantic/search_spec.rb +37 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
-
== 0.
|
1
|
+
== 0.1.1
|
2
2
|
|
3
|
-
|
4
|
-
*
|
3
|
+
= Bugs
|
4
|
+
* fixed a bug where verbose mode was getting stuck at INFO level and would never change (Joseph Wilk)
|
5
|
+
|
6
|
+
== 0.1.0
|
7
|
+
|
8
|
+
* Changed internal representation of vector space. Using columns as documents and rows as terms. This is more consistent which LSA research papers. (Joseph Wilk)
|
9
|
+
* Wrap DMatrix in VectorSpace::Model, allowing us to store keywords with the matrix and get pretty output (Joseph Wilk)
|
data/lib/semantic/search.rb
CHANGED
@@ -2,7 +2,7 @@ module Semantic
|
|
2
2
|
class Search
|
3
3
|
|
4
4
|
def initialize(documents, options={})
|
5
|
-
Semantic.logger.level = Logger::INFO
|
5
|
+
Semantic.logger.level = options[:verbose] ? Logger::INFO : Logger::ERROR
|
6
6
|
|
7
7
|
@builder = VectorSpace::Builder.new(options)
|
8
8
|
@matrix_transformer = MatrixTransformer.new(options)
|
@@ -4,30 +4,34 @@ module Semantic
|
|
4
4
|
|
5
5
|
class << self
|
6
6
|
|
7
|
-
|
8
|
-
# Reduce the dimension of sigma by specified factor producing sigma'.
|
9
|
-
# Then dot product the matrices: U . SIGMA' . VT = MATRIX'
|
10
|
-
def transform(matrix, dimensions=1)
|
7
|
+
def transform(matrix, number_of_dimensions_to_reduce = 1)
|
11
8
|
columns = matrix.num_columns
|
12
9
|
|
13
10
|
if dimensions <= columns: #Its a valid reduction
|
14
11
|
|
15
12
|
u, sigma, vt = matrix.singular_value_decomposition
|
16
13
|
|
17
|
-
|
18
|
-
for index in ((columns-dimensions)...columns)
|
19
|
-
sigma[index,index]=0
|
20
|
-
end
|
14
|
+
sigma_prime = reduce_dimensions(number_of_dimensions_to_reduce, sigma)
|
21
15
|
|
22
16
|
#Reconstruct MATRIX' and Save transform
|
23
|
-
|
17
|
+
matrix_prime = u * sigma_prime * vt
|
24
18
|
|
25
19
|
else
|
26
20
|
raise Exception, "dimension reduction cannot be greater than %s" % rows
|
27
21
|
end
|
28
22
|
|
23
|
+
matrix_prime
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def reduce_dimensions(number_of_dimensions_to_reduce, matrix)
|
28
|
+
columns = matrix.num_columns
|
29
|
+
for index in ((columns-number_of_dimensions_to_reduce)...columns)
|
30
|
+
matrix[index,index] = 0
|
31
|
+
end
|
29
32
|
matrix
|
30
33
|
end
|
34
|
+
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
data/lib/semantic/version.rb
CHANGED
data/lib/semantic.rb
CHANGED
data/rsemantic.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = %q{rsemantic}
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.1"
|
4
4
|
|
5
5
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
6
6
|
s.authors = ["Joseph Wilk"]
|
7
|
-
s.date = %q{2008-11-
|
7
|
+
s.date = %q{2008-11-14}
|
8
8
|
s.description = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
|
9
9
|
s.email = ["josephwilk@joesniff.co.uk"]
|
10
10
|
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.txt", "TODO.txt"]
|
@@ -18,7 +18,7 @@ module Semantic
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def vector_space_model(stubs = {})
|
21
|
-
@vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]),
|
21
|
+
@vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]), {})
|
22
22
|
end
|
23
23
|
|
24
24
|
def matrix(array)
|
@@ -89,5 +89,41 @@ module Semantic
|
|
89
89
|
|
90
90
|
end
|
91
91
|
|
92
|
+
describe "logging" do
|
93
|
+
|
94
|
+
before(:each) do
|
95
|
+
@out = StringIO.new
|
96
|
+
Semantic.logger = Logger.new(@out)
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should set info level if in verbose mode" do
|
100
|
+
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
101
|
+
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
102
|
+
|
103
|
+
Search.new(['test'], :verbose => true)
|
104
|
+
|
105
|
+
Semantic.logger.level.should == Logger::INFO
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should set error level if not in verbose mode" do
|
109
|
+
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
110
|
+
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
111
|
+
|
112
|
+
Search.new(['test'], :verbose => false)
|
113
|
+
|
114
|
+
Semantic.logger.level.should == Logger::ERROR
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should default to error level if verbose is not specified" do
|
118
|
+
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
119
|
+
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
120
|
+
|
121
|
+
Search.new(['test'])
|
122
|
+
|
123
|
+
Semantic.logger.level.should == Logger::ERROR
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
92
128
|
end
|
93
129
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: josephwilk-rsemantic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Wilk
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-11-
|
12
|
+
date: 2008-11-14 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|