rsemantic 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/{README.txt → README.md} +19 -10
- data/lib/semantic.rb +8 -5
- data/lib/semantic/compare.rb +4 -1
- data/lib/semantic/corpus.rb +61 -0
- data/lib/semantic/document.rb +39 -0
- data/lib/semantic/matrix_transformer.rb +4 -5
- data/lib/semantic/parser.rb +22 -10
- data/lib/semantic/search.rb +22 -16
- data/lib/semantic/search_result.rb +16 -0
- data/lib/semantic/transform/lsa_transform.rb +47 -22
- data/lib/semantic/transform/tf_idf_transform.rb +12 -23
- data/lib/semantic/vector_space/builder.rb +29 -22
- data/lib/semantic/vector_space/model.rb +14 -13
- data/lib/semantic/version.rb +1 -1
- data/lib/tasks/rspec.rake +13 -0
- metadata +75 -107
- data/Manifest.txt +0 -38
- data/Rakefile +0 -9
- data/config/hoe.rb +0 -69
- data/config/requirements.rb +0 -15
- data/gem_tasks/deployment.rake +0 -34
- data/gem_tasks/environment.rake +0 -7
- data/gem_tasks/examples.rake +0 -29
- data/gem_tasks/fix_cr_lf.rake +0 -10
- data/gem_tasks/gemspec.rake +0 -6
- data/gem_tasks/rspec.rake +0 -33
- data/gem_tasks/website.rake +0 -17
- data/rsemantic.gemspec +0 -41
- data/spec/semantic/compare_spec.rb +0 -16
- data/spec/semantic/matrix_transformer_spec.rb +0 -51
- data/spec/semantic/parser_spec.rb +0 -34
- data/spec/semantic/search_spec.rb +0 -129
- data/spec/semantic/transform/lsa_transform_spec.rb +0 -59
- data/spec/semantic/transform/tf_idf_transform_spec.rb +0 -35
- data/spec/semantic/vector_space/builder_spec.rb +0 -44
- data/spec/semantic/vector_space/model_spec.rb +0 -22
- data/spec/spec.opts +0 -2
- data/spec/spec_helper.rb +0 -7
@@ -1,59 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe Transform::LSA do
|
5
|
-
|
6
|
-
tiny_matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
|
7
|
-
[1.0, 0.0, 1.0]])
|
8
|
-
|
9
|
-
u = Linalg::DMatrix.rows([[1,0],
|
10
|
-
[0,1]])
|
11
|
-
|
12
|
-
vt = Linalg::DMatrix.rows([[1,0,0],
|
13
|
-
[1,0,0],
|
14
|
-
[1,0,0]])
|
15
|
-
|
16
|
-
sigma = Linalg::DMatrix.rows([[1,0,0],
|
17
|
-
[0,1,0]])
|
18
|
-
|
19
|
-
describe "latent semantic analysis transform" do
|
20
|
-
|
21
|
-
it "should use svd on matrix" do
|
22
|
-
matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
|
23
|
-
[1.0, 0.0, 1.0]])
|
24
|
-
|
25
|
-
matrix.should_receive(:singular_value_decomposition).and_return([u, sigma, vt])
|
26
|
-
|
27
|
-
Linalg::DMatrix.stub!(:columns).and_return(matrix)
|
28
|
-
|
29
|
-
Transform::LSA.transform(matrix)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "should reduce the noise in the sigma matrix" do
|
33
|
-
matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
|
34
|
-
[1.0, 0.0, 1.0]])
|
35
|
-
|
36
|
-
matrix.stub!(:singular_value_decomposition).and_return([u, sigma, vt])
|
37
|
-
Linalg::DMatrix.stub!(:columns).and_return(matrix)
|
38
|
-
|
39
|
-
sigma.should_receive(:[]=).with(0,0,0)
|
40
|
-
sigma.should_receive(:[]=).with(1,1,0)
|
41
|
-
|
42
|
-
Transform::LSA.transform(matrix, 2)
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should prevent reducing dimensions greater than the matrixes own dimensions" do
|
46
|
-
lambda { Transform::LSA.transform tiny_matrix, 100 }.should raise_error(Exception)
|
47
|
-
end
|
48
|
-
|
49
|
-
it "should transform LSA matrix" do
|
50
|
-
transformed_matrix = Transform::LSA.transform tiny_matrix
|
51
|
-
|
52
|
-
#TODO: better way to compare result matrix
|
53
|
-
transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0,0,0],[1,0,1]]).to_s
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe Transform::TFIDF do
|
5
|
-
|
6
|
-
def matrix(matrix)
|
7
|
-
Linalg::DMatrix.rows(matrix)
|
8
|
-
end
|
9
|
-
|
10
|
-
tiny_matrix = Linalg::DMatrix.rows([[0.0, 1.0, 0.0],
|
11
|
-
[1.0, 0.0, 1.0]])
|
12
|
-
|
13
|
-
describe "term frequency / inverse document frequency transform" do
|
14
|
-
|
15
|
-
it "should find the number of times each term occurs" do
|
16
|
-
Transform::TFIDF.should_receive(:number_of_documents_with_term).with(0, matrix([[1]])).and_return(2)
|
17
|
-
|
18
|
-
Transform::TFIDF.transform(matrix([[1]]))
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should ignore counting terms with 0 weighting" do
|
22
|
-
Transform::TFIDF.should_not_receive(:number_of_documents_with_term)
|
23
|
-
|
24
|
-
Transform::TFIDF.transform(matrix([[0,0],[0,0]]))
|
25
|
-
end
|
26
|
-
|
27
|
-
it "should calculate term frequency * inverse document freuency" do
|
28
|
-
transformed_matrix = Transform::TFIDF.transform matrix([[1,1],[0,1]])
|
29
|
-
|
30
|
-
transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0, 0],[0, 0.346574]]).to_s
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
module VectorSpace
|
5
|
-
describe Builder do
|
6
|
-
|
7
|
-
def mock_parser
|
8
|
-
@parser ||= mock("Parser")
|
9
|
-
end
|
10
|
-
|
11
|
-
def documents
|
12
|
-
['nipon','ichiban']
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
describe "building query vector" do
|
17
|
-
|
18
|
-
it "should build vector from string" do
|
19
|
-
builder = Builder.new
|
20
|
-
builder.should_receive(:build_vector).with("query string")
|
21
|
-
|
22
|
-
builder.build_query_vector(["query","string"])
|
23
|
-
end
|
24
|
-
|
25
|
-
it "should generate a valid vector" do
|
26
|
-
builder = Builder.new
|
27
|
-
builder.build_document_matrix(["query string"])
|
28
|
-
query = builder.build_query_vector(["query","string"])
|
29
|
-
|
30
|
-
query.should == Linalg::DMatrix.columns([[1,1]])
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should generate empty vector when terms are not in document matrix" do
|
34
|
-
builder = Builder.new
|
35
|
-
builder.build_document_matrix(["string"])
|
36
|
-
query = builder.build_query_vector(["not-in-document"])
|
37
|
-
|
38
|
-
query.should == Linalg::DMatrix.columns([[0]])
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
module VectorSpace
|
5
|
-
|
6
|
-
describe Model do
|
7
|
-
|
8
|
-
it "should output a DMatrix as a pretty string" do
|
9
|
-
model = Model.new(Linalg::DMatrix.columns([[0.11111,0.66666],[0.33333, 0.001]]), {})
|
10
|
-
|
11
|
-
model.to_s.should include("[ +0.11 +0.33 ]\n[ +0.67 +0.00 ]\n")
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should output keywords for the matrix rows" do
|
15
|
-
model = Model.new(Linalg::DMatrix.columns([[0]]), {'shiva' => 0})
|
16
|
-
|
17
|
-
model.to_s.should include("shiva [ +0.00 ]")
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/spec/spec.opts
DELETED