rsemantic 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{README.txt → README.md} +19 -10
 - data/lib/semantic.rb +8 -5
 - data/lib/semantic/compare.rb +4 -1
 - data/lib/semantic/corpus.rb +61 -0
 - data/lib/semantic/document.rb +39 -0
 - data/lib/semantic/matrix_transformer.rb +4 -5
 - data/lib/semantic/parser.rb +22 -10
 - data/lib/semantic/search.rb +22 -16
 - data/lib/semantic/search_result.rb +16 -0
 - data/lib/semantic/transform/lsa_transform.rb +47 -22
 - data/lib/semantic/transform/tf_idf_transform.rb +12 -23
 - data/lib/semantic/vector_space/builder.rb +29 -22
 - data/lib/semantic/vector_space/model.rb +14 -13
 - data/lib/semantic/version.rb +1 -1
 - data/lib/tasks/rspec.rake +13 -0
 - metadata +75 -107
 - data/Manifest.txt +0 -38
 - data/Rakefile +0 -9
 - data/config/hoe.rb +0 -69
 - data/config/requirements.rb +0 -15
 - data/gem_tasks/deployment.rake +0 -34
 - data/gem_tasks/environment.rake +0 -7
 - data/gem_tasks/examples.rake +0 -29
 - data/gem_tasks/fix_cr_lf.rake +0 -10
 - data/gem_tasks/gemspec.rake +0 -6
 - data/gem_tasks/rspec.rake +0 -33
 - data/gem_tasks/website.rake +0 -17
 - data/rsemantic.gemspec +0 -41
 - data/spec/semantic/compare_spec.rb +0 -16
 - data/spec/semantic/matrix_transformer_spec.rb +0 -51
 - data/spec/semantic/parser_spec.rb +0 -34
 - data/spec/semantic/search_spec.rb +0 -129
 - data/spec/semantic/transform/lsa_transform_spec.rb +0 -59
 - data/spec/semantic/transform/tf_idf_transform_spec.rb +0 -35
 - data/spec/semantic/vector_space/builder_spec.rb +0 -44
 - data/spec/semantic/vector_space/model_spec.rb +0 -22
 - data/spec/spec.opts +0 -2
 - data/spec/spec_helper.rb +0 -7
 
| 
         @@ -1,59 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require File.dirname(__FILE__) + '/../../spec_helper'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            module Semantic
         
     | 
| 
       4 
     | 
    
         
            -
              describe Transform::LSA do
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                tiny_matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
         
     | 
| 
       7 
     | 
    
         
            -
                                                       [1.0, 0.0, 1.0]])
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
                u = Linalg::DMatrix.rows([[1,0],
         
     | 
| 
       10 
     | 
    
         
            -
                                          [0,1]])
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
                vt = Linalg::DMatrix.rows([[1,0,0],
         
     | 
| 
       13 
     | 
    
         
            -
                                           [1,0,0],
         
     | 
| 
       14 
     | 
    
         
            -
                                           [1,0,0]])
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                sigma = Linalg::DMatrix.rows([[1,0,0],
         
     | 
| 
       17 
     | 
    
         
            -
                                              [0,1,0]])
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                describe "latent semantic analysis transform" do
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                  it "should use svd on matrix" do
         
     | 
| 
       22 
     | 
    
         
            -
                    matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
         
     | 
| 
       23 
     | 
    
         
            -
                                                      [1.0, 0.0, 1.0]])
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                    matrix.should_receive(:singular_value_decomposition).and_return([u, sigma, vt])
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                    Linalg::DMatrix.stub!(:columns).and_return(matrix)
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                    Transform::LSA.transform(matrix)
         
     | 
| 
       30 
     | 
    
         
            -
                  end
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
                  it "should reduce the noise in the sigma matrix" do
         
     | 
| 
       33 
     | 
    
         
            -
                    matrix = Linalg::DMatrix.columns([[0.0, 1.0, 0.0],
         
     | 
| 
       34 
     | 
    
         
            -
                                                      [1.0, 0.0, 1.0]])
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                    matrix.stub!(:singular_value_decomposition).and_return([u, sigma, vt])
         
     | 
| 
       37 
     | 
    
         
            -
                    Linalg::DMatrix.stub!(:columns).and_return(matrix)
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
                    sigma.should_receive(:[]=).with(0,0,0)
         
     | 
| 
       40 
     | 
    
         
            -
                    sigma.should_receive(:[]=).with(1,1,0)
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                    Transform::LSA.transform(matrix, 2)
         
     | 
| 
       43 
     | 
    
         
            -
                  end
         
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
                  it "should prevent reducing dimensions greater than the matrixes own dimensions" do
         
     | 
| 
       46 
     | 
    
         
            -
                    lambda { Transform::LSA.transform tiny_matrix, 100 }.should raise_error(Exception)
         
     | 
| 
       47 
     | 
    
         
            -
                  end
         
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                  it "should transform LSA matrix" do
         
     | 
| 
       50 
     | 
    
         
            -
                    transformed_matrix = Transform::LSA.transform tiny_matrix
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                    #TODO: better way to compare result matrix
         
     | 
| 
       53 
     | 
    
         
            -
                    transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0,0,0],[1,0,1]]).to_s
         
     | 
| 
       54 
     | 
    
         
            -
                  end
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                end
         
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
              end
         
     | 
| 
       59 
     | 
    
         
            -
            end
         
     | 
| 
         @@ -1,35 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require File.dirname(__FILE__) + '/../../spec_helper'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            module Semantic
         
     | 
| 
       4 
     | 
    
         
            -
              describe Transform::TFIDF do
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                def matrix(matrix)
         
     | 
| 
       7 
     | 
    
         
            -
                  Linalg::DMatrix.rows(matrix)
         
     | 
| 
       8 
     | 
    
         
            -
                end
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
                tiny_matrix = Linalg::DMatrix.rows([[0.0, 1.0, 0.0],
         
     | 
| 
       11 
     | 
    
         
            -
                [1.0, 0.0, 1.0]])
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
                describe "term frequency / inverse document frequency transform" do
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                  it "should find the number of times each term occurs" do
         
     | 
| 
       16 
     | 
    
         
            -
                    Transform::TFIDF.should_receive(:number_of_documents_with_term).with(0, matrix([[1]])).and_return(2)
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                    Transform::TFIDF.transform(matrix([[1]]))
         
     | 
| 
       19 
     | 
    
         
            -
                  end
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                  it "should ignore counting terms with 0 weighting" do
         
     | 
| 
       22 
     | 
    
         
            -
                    Transform::TFIDF.should_not_receive(:number_of_documents_with_term)
         
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                    Transform::TFIDF.transform(matrix([[0,0],[0,0]]))
         
     | 
| 
       25 
     | 
    
         
            -
                  end
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                  it "should calculate term frequency * inverse document freuency" do
         
     | 
| 
       28 
     | 
    
         
            -
                    transformed_matrix = Transform::TFIDF.transform matrix([[1,1],[0,1]])
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                    transformed_matrix.to_s.should == Linalg::DMatrix.columns([[0, 0],[0, 0.346574]]).to_s
         
     | 
| 
       31 
     | 
    
         
            -
                  end
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
                end
         
     | 
| 
       34 
     | 
    
         
            -
              end
         
     | 
| 
       35 
     | 
    
         
            -
            end
         
     | 
| 
         @@ -1,44 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require File.dirname(__FILE__) + '/../../spec_helper'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            module Semantic
         
     | 
| 
       4 
     | 
    
         
            -
              module VectorSpace
         
     | 
| 
       5 
     | 
    
         
            -
                describe Builder do
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                  def mock_parser
         
     | 
| 
       8 
     | 
    
         
            -
                    @parser ||= mock("Parser")
         
     | 
| 
       9 
     | 
    
         
            -
                  end
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
                  def documents
         
     | 
| 
       12 
     | 
    
         
            -
                    ['nipon','ichiban']
         
     | 
| 
       13 
     | 
    
         
            -
                  end
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                  describe "building query vector" do
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                    it "should build vector from string" do
         
     | 
| 
       19 
     | 
    
         
            -
                      builder = Builder.new
         
     | 
| 
       20 
     | 
    
         
            -
                      builder.should_receive(:build_vector).with("query string")
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                      builder.build_query_vector(["query","string"])
         
     | 
| 
       23 
     | 
    
         
            -
                    end
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                    it "should generate a valid vector" do
         
     | 
| 
       26 
     | 
    
         
            -
                      builder = Builder.new
         
     | 
| 
       27 
     | 
    
         
            -
                      builder.build_document_matrix(["query string"])
         
     | 
| 
       28 
     | 
    
         
            -
                      query = builder.build_query_vector(["query","string"])
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                      query.should == Linalg::DMatrix.columns([[1,1]])
         
     | 
| 
       31 
     | 
    
         
            -
                    end
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
                    it "should generate empty vector when terms are not in document matrix" do
         
     | 
| 
       34 
     | 
    
         
            -
                      builder = Builder.new
         
     | 
| 
       35 
     | 
    
         
            -
                      builder.build_document_matrix(["string"])
         
     | 
| 
       36 
     | 
    
         
            -
                      query = builder.build_query_vector(["not-in-document"])
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
                      query.should == Linalg::DMatrix.columns([[0]])
         
     | 
| 
       39 
     | 
    
         
            -
                    end
         
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
                  end
         
     | 
| 
       42 
     | 
    
         
            -
                end
         
     | 
| 
       43 
     | 
    
         
            -
              end
         
     | 
| 
       44 
     | 
    
         
            -
            end
         
     | 
| 
         @@ -1,22 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require File.dirname(__FILE__) + '/../../spec_helper'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            module Semantic
         
     | 
| 
       4 
     | 
    
         
            -
              module VectorSpace
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                describe Model do
         
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                  it "should output a DMatrix as a pretty string" do
         
     | 
| 
       9 
     | 
    
         
            -
                    model = Model.new(Linalg::DMatrix.columns([[0.11111,0.66666],[0.33333, 0.001]]), {})
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
                    model.to_s.should include("[ +0.11 +0.33 ]\n[ +0.67 +0.00 ]\n")
         
     | 
| 
       12 
     | 
    
         
            -
                  end
         
     | 
| 
       13 
     | 
    
         
            -
                  
         
     | 
| 
       14 
     | 
    
         
            -
                  it "should output keywords for the matrix rows" do
         
     | 
| 
       15 
     | 
    
         
            -
                    model = Model.new(Linalg::DMatrix.columns([[0]]), {'shiva' => 0})
         
     | 
| 
       16 
     | 
    
         
            -
                    
         
     | 
| 
       17 
     | 
    
         
            -
                    model.to_s.should include("shiva [ +0.00 ]")
         
     | 
| 
       18 
     | 
    
         
            -
                  end
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
                end
         
     | 
| 
       21 
     | 
    
         
            -
              end
         
     | 
| 
       22 
     | 
    
         
            -
            end
         
     | 
    
        data/spec/spec.opts
    DELETED