rsemantic 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/{README.txt → README.md} +19 -10
  2. data/lib/semantic.rb +8 -5
  3. data/lib/semantic/compare.rb +4 -1
  4. data/lib/semantic/corpus.rb +61 -0
  5. data/lib/semantic/document.rb +39 -0
  6. data/lib/semantic/matrix_transformer.rb +4 -5
  7. data/lib/semantic/parser.rb +22 -10
  8. data/lib/semantic/search.rb +22 -16
  9. data/lib/semantic/search_result.rb +16 -0
  10. data/lib/semantic/transform/lsa_transform.rb +47 -22
  11. data/lib/semantic/transform/tf_idf_transform.rb +12 -23
  12. data/lib/semantic/vector_space/builder.rb +29 -22
  13. data/lib/semantic/vector_space/model.rb +14 -13
  14. data/lib/semantic/version.rb +1 -1
  15. data/lib/tasks/rspec.rake +13 -0
  16. metadata +75 -107
  17. data/Manifest.txt +0 -38
  18. data/Rakefile +0 -9
  19. data/config/hoe.rb +0 -69
  20. data/config/requirements.rb +0 -15
  21. data/gem_tasks/deployment.rake +0 -34
  22. data/gem_tasks/environment.rake +0 -7
  23. data/gem_tasks/examples.rake +0 -29
  24. data/gem_tasks/fix_cr_lf.rake +0 -10
  25. data/gem_tasks/gemspec.rake +0 -6
  26. data/gem_tasks/rspec.rake +0 -33
  27. data/gem_tasks/website.rake +0 -17
  28. data/rsemantic.gemspec +0 -41
  29. data/spec/semantic/compare_spec.rb +0 -16
  30. data/spec/semantic/matrix_transformer_spec.rb +0 -51
  31. data/spec/semantic/parser_spec.rb +0 -34
  32. data/spec/semantic/search_spec.rb +0 -129
  33. data/spec/semantic/transform/lsa_transform_spec.rb +0 -59
  34. data/spec/semantic/transform/tf_idf_transform_spec.rb +0 -35
  35. data/spec/semantic/vector_space/builder_spec.rb +0 -44
  36. data/spec/semantic/vector_space/model_spec.rb +0 -22
  37. data/spec/spec.opts +0 -2
  38. data/spec/spec_helper.rb +0 -7
@@ -1,59 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
2
-
3
- module Semantic
4
- describe Transform::LSA do
5
-
6
- tiny_matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
7
- [1.0, 0.0, 1.0]])
8
-
9
- u = Linalg::DMatrix.rows([[1,0],
10
- [0,1]])
11
-
12
- vt = Linalg::DMatrix.rows([[1,0,0],
13
- [1,0,0],
14
- [1,0,0]])
15
-
16
- sigma = Linalg::DMatrix.rows([[1,0,0],
17
- [0,1,0]])
18
-
19
- describe "latent semantic analysis transform" do
20
-
21
- it "should use svd on matrix" do
22
- matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
23
- [1.0, 0.0, 1.0]])
24
-
25
- matrix.should_receive(:singular_value_decomposition).and_return([u, sigma, vt])
26
-
27
- Linalg::DMatrix.stub!(:columns).and_return(matrix)
28
-
29
- Transform::LSA.transform(matrix)
30
- end
31
-
32
- it "should reduce the noise in the sigma matrix" do
33
- matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
34
- [1.0, 0.0, 1.0]])
35
-
36
- matrix.stub!(:singular_value_decomposition).and_return([u, sigma, vt])
37
- Linalg::DMatrix.stub!(:columns).and_return(matrix)
38
-
39
- sigma.should_receive(:[]=).with(0,0,0)
40
- sigma.should_receive(:[]=).with(1,1,0)
41
-
42
- Transform::LSA.transform(matrix, 2)
43
- end
44
-
45
- it "should prevent reducing dimensions greater than the matrixes own dimensions" do
46
- lambda { Transform::LSA.transform tiny_matrix, 100 }.should raise_error(Exception)
47
- end
48
-
49
- it "should transform LSA matrix" do
50
- transformed_matrix = Transform::LSA.transform tiny_matrix
51
-
52
- #TODO: better way to compare result matrix
53
- transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0,0,0],[1,0,1]]).to_s
54
- end
55
-
56
- end
57
-
58
- end
59
- end
@@ -1,35 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
2
-
3
- module Semantic
4
- describe Transform::TFIDF do
5
-
6
- def matrix(matrix)
7
- Linalg::DMatrix.rows(matrix)
8
- end
9
-
10
- tiny_matrix = Linalg::DMatrix.rows([[0.0, 1.0, 0.0],
11
- [1.0, 0.0, 1.0]])
12
-
13
- describe "term frequency / inverse document frequency transform" do
14
-
15
- it "should find the number of times each term occurs" do
16
- Transform::TFIDF.should_receive(:number_of_documents_with_term).with(0, matrix([[1]])).and_return(2)
17
-
18
- Transform::TFIDF.transform(matrix([[1]]))
19
- end
20
-
21
- it "should ignore counting terms with 0 weighting" do
22
- Transform::TFIDF.should_not_receive(:number_of_documents_with_term)
23
-
24
- Transform::TFIDF.transform(matrix([[0,0],[0,0]]))
25
- end
26
-
27
- it "should calculate term frequency * inverse document freuency" do
28
- transformed_matrix = Transform::TFIDF.transform matrix([[1,1],[0,1]])
29
-
30
- transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0, 0],[0, 0.346574]]).to_s
31
- end
32
-
33
- end
34
- end
35
- end
@@ -1,44 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
2
-
3
- module Semantic
4
- module VectorSpace
5
- describe Builder do
6
-
7
- def mock_parser
8
- @parser ||= mock("Parser")
9
- end
10
-
11
- def documents
12
- ['nipon','ichiban']
13
- end
14
-
15
-
16
- describe "building query vector" do
17
-
18
- it "should build vector from string" do
19
- builder = Builder.new
20
- builder.should_receive(:build_vector).with("query string")
21
-
22
- builder.build_query_vector(["query","string"])
23
- end
24
-
25
- it "should generate a valid vector" do
26
- builder = Builder.new
27
- builder.build_document_matrix(["query string"])
28
- query = builder.build_query_vector(["query","string"])
29
-
30
- query.should == Linalg::DMatrix.columns([[1,1]])
31
- end
32
-
33
- it "should generate empty vector when terms are not in document matrix" do
34
- builder = Builder.new
35
- builder.build_document_matrix(["string"])
36
- query = builder.build_query_vector(["not-in-document"])
37
-
38
- query.should == Linalg::DMatrix.columns([[0]])
39
- end
40
-
41
- end
42
- end
43
- end
44
- end
@@ -1,22 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
2
-
3
- module Semantic
4
- module VectorSpace
5
-
6
- describe Model do
7
-
8
- it "should output a DMatrix as a pretty string" do
9
- model = Model.new(Linalg::DMatrix.columns([[0.11111,0.66666],[0.33333, 0.001]]), {})
10
-
11
- model.to_s.should include("[ +0.11 +0.33 ]\n[ +0.67 +0.00 ]\n")
12
- end
13
-
14
- it "should output keywords for the matrix rows" do
15
- model = Model.new(Linalg::DMatrix.columns([[0]]), {'shiva' => 0})
16
-
17
- model.to_s.should include("shiva [ +0.00 ]")
18
- end
19
-
20
- end
21
- end
22
- end
data/spec/spec.opts DELETED
@@ -1,2 +0,0 @@
1
- --colour
2
- --diff
data/spec/spec_helper.rb DELETED
@@ -1,7 +0,0 @@
1
- require 'rubygems'
2
- gem 'rspec'
3
- require 'spec'
4
-
5
- $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
6
-
7
- require 'semantic'