rsemantic 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{README.txt → README.md} +19 -10
- data/lib/semantic.rb +8 -5
- data/lib/semantic/compare.rb +4 -1
- data/lib/semantic/corpus.rb +61 -0
- data/lib/semantic/document.rb +39 -0
- data/lib/semantic/matrix_transformer.rb +4 -5
- data/lib/semantic/parser.rb +22 -10
- data/lib/semantic/search.rb +22 -16
- data/lib/semantic/search_result.rb +16 -0
- data/lib/semantic/transform/lsa_transform.rb +47 -22
- data/lib/semantic/transform/tf_idf_transform.rb +12 -23
- data/lib/semantic/vector_space/builder.rb +29 -22
- data/lib/semantic/vector_space/model.rb +14 -13
- data/lib/semantic/version.rb +1 -1
- data/lib/tasks/rspec.rake +13 -0
- metadata +75 -107
- data/Manifest.txt +0 -38
- data/Rakefile +0 -9
- data/config/hoe.rb +0 -69
- data/config/requirements.rb +0 -15
- data/gem_tasks/deployment.rake +0 -34
- data/gem_tasks/environment.rake +0 -7
- data/gem_tasks/examples.rake +0 -29
- data/gem_tasks/fix_cr_lf.rake +0 -10
- data/gem_tasks/gemspec.rake +0 -6
- data/gem_tasks/rspec.rake +0 -33
- data/gem_tasks/website.rake +0 -17
- data/rsemantic.gemspec +0 -41
- data/spec/semantic/compare_spec.rb +0 -16
- data/spec/semantic/matrix_transformer_spec.rb +0 -51
- data/spec/semantic/parser_spec.rb +0 -34
- data/spec/semantic/search_spec.rb +0 -129
- data/spec/semantic/transform/lsa_transform_spec.rb +0 -59
- data/spec/semantic/transform/tf_idf_transform_spec.rb +0 -35
- data/spec/semantic/vector_space/builder_spec.rb +0 -44
- data/spec/semantic/vector_space/model_spec.rb +0 -22
- data/spec/spec.opts +0 -2
- data/spec/spec_helper.rb +0 -7
data/Rakefile
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
ENV['NODOT'] = 'true' # We don't want class diagrams in RDoc
|
2
|
-
require 'config/requirements'
|
3
|
-
require 'config/hoe' # setup Hoe + all gem configuration
|
4
|
-
|
5
|
-
Dir['gem_tasks/**/*.rake'].each { |rake| load rake }
|
6
|
-
|
7
|
-
# Hoe gives us :default => :test, but we don't have Test::Unit tests.
|
8
|
-
Rake::Task[:default].clear_prerequisites
|
9
|
-
task :default => [:spec]
|
data/config/hoe.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'semantic/version'
|
2
|
-
|
3
|
-
AUTHOR = 'Joseph Wilk' # can also be an array of Authors
|
4
|
-
EMAIL = "josephwilk@joesniff.co.uk"
|
5
|
-
DESCRIPTION = "A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency"
|
6
|
-
GEM_NAME = 'rsemantic' # what ppl will type to install your gem
|
7
|
-
HOMEPATH = "http://github.com/josephwilk/rsemantic"
|
8
|
-
RUBYFORGE_PROJECT = 'rsemantic'
|
9
|
-
|
10
|
-
@config_file = "~/.rubyforge/user-config.yml"
|
11
|
-
@config = nil
|
12
|
-
RUBYFORGE_USERNAME = "joseph_wilk"
|
13
|
-
def rubyforge_username
|
14
|
-
unless @config
|
15
|
-
begin
|
16
|
-
@config = YAML.load(File.read(File.expand_path(@config_file)))
|
17
|
-
rescue
|
18
|
-
puts <<-EOS
|
19
|
-
ERROR: No rubyforge config file found: #{@config_file}
|
20
|
-
Run 'rubyforge setup' to prepare your env for access to Rubyforge
|
21
|
-
- See http://newgem.rubyforge.org/rubyforge.html for more details
|
22
|
-
EOS
|
23
|
-
exit
|
24
|
-
end
|
25
|
-
end
|
26
|
-
RUBYFORGE_USERNAME.replace @config["username"]
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
|
-
REV = nil
|
31
|
-
# UNCOMMENT IF REQUIRED:
|
32
|
-
# REV = YAML.load(`svn info`)['Revision']
|
33
|
-
VERS = Semantic::VERSION::STRING + (REV ? ".#{REV}" : "")
|
34
|
-
RDOC_OPTS = ['--quiet', '--title', 'Rsemantic documentation',
|
35
|
-
"--opname", "index.html",
|
36
|
-
"--line-numbers",
|
37
|
-
"--main", "README.textile",
|
38
|
-
"--inline-source"]
|
39
|
-
|
40
|
-
class Hoe
|
41
|
-
def extra_deps
|
42
|
-
@extra_deps.reject! { |x| Array(x).first == 'hoe' }
|
43
|
-
@extra_deps
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
# Generate all the Rake tasks
|
48
|
-
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
49
|
-
$hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
50
|
-
p.developer(AUTHOR, EMAIL)
|
51
|
-
p.description = DESCRIPTION
|
52
|
-
p.summary = DESCRIPTION
|
53
|
-
p.url = HOMEPATH
|
54
|
-
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
55
|
-
p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store', '**/*.class', '**/*.jar'] #An array of file patterns to delete on clean.
|
56
|
-
|
57
|
-
# == Optional
|
58
|
-
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
59
|
-
#p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
60
|
-
p.extra_deps = [ ['term-ansicolor', '>= 1.0.3'], ['rspec', '>= 1.1.5'], ['diff-lcs', '>= 1.1.2'] ]
|
61
|
-
|
62
|
-
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
CHANGES = $hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
|
67
|
-
PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
|
68
|
-
$hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
|
69
|
-
$hoe.rsync_args = '-av --delete --ignore-errors'
|
data/config/requirements.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
include FileUtils
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
%w[rake hoe].each do |req_gem|
|
6
|
-
begin
|
7
|
-
require req_gem
|
8
|
-
rescue LoadError
|
9
|
-
puts "This Rakefile requires the '#{req_gem}' RubyGem."
|
10
|
-
puts "Installation: gem install #{req_gem} -y"
|
11
|
-
exit
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
$:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
|
data/gem_tasks/deployment.rake
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
desc 'Release the website and new gem version'
|
2
|
-
task :deploy => [:check_version, :website, :release] do
|
3
|
-
puts "Remember to create SVN tag:"
|
4
|
-
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
5
|
-
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
6
|
-
puts "Suggested comment:"
|
7
|
-
puts "Tagging release #{CHANGES}"
|
8
|
-
end
|
9
|
-
|
10
|
-
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
11
|
-
task :local_deploy => [:website_generate, :install_gem]
|
12
|
-
|
13
|
-
task :check_version do
|
14
|
-
unless ENV['VERSION']
|
15
|
-
puts 'Must pass a VERSION=x.y.z release version'
|
16
|
-
exit
|
17
|
-
end
|
18
|
-
unless ENV['VERSION'] == VERS
|
19
|
-
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
20
|
-
exit
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
|
25
|
-
task :install_gem_no_doc => [:clean, :package] do
|
26
|
-
sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
|
27
|
-
end
|
28
|
-
|
29
|
-
namespace :manifest do
|
30
|
-
desc 'Recreate Manifest.txt to include ALL files'
|
31
|
-
task :refresh do
|
32
|
-
`rake check_manifest | patch -p0 > Manifest.txt`
|
33
|
-
end
|
34
|
-
end
|
data/gem_tasks/environment.rake
DELETED
data/gem_tasks/examples.rake
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'lib/semantic'
|
2
|
-
|
3
|
-
namespace :example do
|
4
|
-
|
5
|
-
documents = ["The cat in the hat disabled", "A cat is a fine pet ponies.", "Dogs and cats make good pets.","I haven't got a hat."]
|
6
|
-
|
7
|
-
desc "run main LSA example"
|
8
|
-
task :lsa do
|
9
|
-
search = Semantic::Search.new(documents, :verbose => true)
|
10
|
-
end
|
11
|
-
|
12
|
-
desc "run main Vector space example"
|
13
|
-
task :vector_space do
|
14
|
-
search = Semantic::Search.new(documents)
|
15
|
-
|
16
|
-
puts "Documents:"
|
17
|
-
documents.each_with_index { |document, index| puts "#{index}: #{document}" }
|
18
|
-
puts
|
19
|
-
|
20
|
-
puts "Documents related to first document: #{documents[0]}"
|
21
|
-
puts search.related(0)
|
22
|
-
puts
|
23
|
-
|
24
|
-
puts "Searching for the word cat:"
|
25
|
-
puts search.search(["cat"])
|
26
|
-
puts
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
data/gem_tasks/fix_cr_lf.rake
DELETED
data/gem_tasks/gemspec.rake
DELETED
data/gem_tasks/rspec.rake
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
begin
|
2
|
-
require 'spec'
|
3
|
-
rescue LoadError
|
4
|
-
require 'rubygems'
|
5
|
-
require 'spec'
|
6
|
-
end
|
7
|
-
begin
|
8
|
-
require 'spec/rake/spectask'
|
9
|
-
require 'spec/rake/verify_rcov'
|
10
|
-
rescue LoadError
|
11
|
-
puts <<-EOS
|
12
|
-
To use rspec for testing you must install rspec gem:
|
13
|
-
gem install rspec
|
14
|
-
EOS
|
15
|
-
exit(0)
|
16
|
-
end
|
17
|
-
|
18
|
-
desc "Run the specs under spec/models"
|
19
|
-
Spec::Rake::SpecTask.new do |t|
|
20
|
-
t.spec_opts = ['--options', "spec/spec.opts"]
|
21
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
22
|
-
|
23
|
-
unless ENV['NO_RCOV']
|
24
|
-
t.rcov = true
|
25
|
-
t.rcov_dir = 'coverage'
|
26
|
-
t.rcov_opts = ['--exclude', '_helper\.rb,_spec\.rb,spec\/boss,\/var\/lib\/gems,\/Library\/Ruby,\.autotest']
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
RCov::VerifyTask.new(:verify_rcov => :spec) do |t|
|
31
|
-
t.threshold = 99.7 # Make sure you have rcov 0.9 or higher!
|
32
|
-
t.index_html = 'coverage/index.html'
|
33
|
-
end
|
data/gem_tasks/website.rake
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
desc 'Generate website files'
|
2
|
-
task :website_generate => :ruby_env do
|
3
|
-
(Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
|
4
|
-
sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
desc 'Upload website files to rubyforge'
|
9
|
-
task :website_upload do
|
10
|
-
host = "#{rubyforge_username}@rubyforge.org"
|
11
|
-
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
12
|
-
local_dir = 'website'
|
13
|
-
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
14
|
-
end
|
15
|
-
|
16
|
-
desc 'Generate and upload website files'
|
17
|
-
task :website => [:website_generate, :website_upload, :publish_docs]
|
data/rsemantic.gemspec
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
Gem::Specification.new do |s|
|
2
|
-
s.name = %q{rsemantic}
|
3
|
-
s.version = "0.1.3"
|
4
|
-
|
5
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
6
|
-
s.authors = ["Joseph Wilk"]
|
7
|
-
s.date = %q{2009-08-01}
|
8
|
-
s.description = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
|
9
|
-
s.email = ["joe@josephwilk.net"]
|
10
|
-
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.txt", "TODO.txt"]
|
11
|
-
s.files = ["History.txt", "Manifest.txt", "README.txt", "Rakefile", "TODO.txt", "config/hoe.rb", "config/requirements.rb", "gem_tasks/deployment.rake", "gem_tasks/environment.rake", "gem_tasks/examples.rake", "gem_tasks/fix_cr_lf.rake", "gem_tasks/gemspec.rake", "gem_tasks/rspec.rake", "gem_tasks/website.rake", "lib/semantic.rb", "lib/semantic/compare.rb", "lib/semantic/matrix_transformer.rb", "lib/semantic/parser.rb", "lib/semantic/search.rb", "lib/semantic/transform.rb", "lib/semantic/transform/lsa_transform.rb", "lib/semantic/transform/tf_idf_transform.rb", "lib/semantic/vector_space.rb", "lib/semantic/vector_space/builder.rb", "lib/semantic/vector_space/model.rb", "lib/semantic/version.rb", "resources/english.stop", "rsemantic.gemspec", "spec/semantic/compare_spec.rb", "spec/semantic/matrix_transformer_spec.rb", "spec/semantic/parser_spec.rb", "spec/semantic/search_spec.rb", "spec/semantic/transform/lsa_transform_spec.rb", "spec/semantic/transform/tf_idf_transform_spec.rb", "spec/semantic/vector_space/builder_spec.rb", "spec/semantic/vector_space/model_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
|
12
|
-
s.has_rdoc = true
|
13
|
-
s.homepage = %q{http://github.com/josephwilk/rsemantic}
|
14
|
-
s.rdoc_options = ["--main", "README.txt"]
|
15
|
-
s.require_paths = ["lib"]
|
16
|
-
s.rubyforge_project = %q{rsemantic}
|
17
|
-
s.rubygems_version = %q{1.3.1}
|
18
|
-
s.summary = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
|
19
|
-
|
20
|
-
if s.respond_to? :specification_version then
|
21
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
22
|
-
s.specification_version = 2
|
23
|
-
|
24
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
25
|
-
s.add_runtime_dependency(%q<term-ansicolor>, [">= 1.0.3"])
|
26
|
-
s.add_runtime_dependency(%q<rspec>, [">= 1.1.5"])
|
27
|
-
s.add_runtime_dependency(%q<diff-lcs>, [">= 1.1.2"])
|
28
|
-
s.add_development_dependency(%q<hoe>, [">= 2.3.2"])
|
29
|
-
else
|
30
|
-
s.add_dependency(%q<term-ansicolor>, [">= 1.0.3"])
|
31
|
-
s.add_dependency(%q<rspec>, [">= 1.1.5"])
|
32
|
-
s.add_dependency(%q<diff-lcs>, [">= 1.1.2"])
|
33
|
-
s.add_dependency(%q<hoe>, [">= 2.3.2"])
|
34
|
-
end
|
35
|
-
else
|
36
|
-
s.add_dependency(%q<term-ansicolor>, [">= 1.0.3"])
|
37
|
-
s.add_dependency(%q<rspec>, [">= 1.1.5"])
|
38
|
-
s.add_dependency(%q<diff-lcs>, [">= 1.1.2"])
|
39
|
-
s.add_dependency(%q<hoe>, [">= 2.3.2"])
|
40
|
-
end
|
41
|
-
end
|
@@ -1,16 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe Compare do
|
5
|
-
|
6
|
-
def vector(values)
|
7
|
-
Linalg::DMatrix.columns([values])
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should calculate cosine" do
|
11
|
-
cosine = Compare.cosine( vector([0.1,0.5]), vector([0.9, 0.3]) )
|
12
|
-
cosine.should be_close(0.4961, 0.0001)
|
13
|
-
end
|
14
|
-
|
15
|
-
end
|
16
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe MatrixTransformer do
|
5
|
-
|
6
|
-
def mock_transform
|
7
|
-
@transform ||= mock(Transform)
|
8
|
-
end
|
9
|
-
|
10
|
-
def mock_vector_space
|
11
|
-
mock("vector space", :matrix => Linalg::DMatrix.rows([[1,0],[0,1]]), :matrix= => nil )
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
describe "transforming matrix" do
|
16
|
-
|
17
|
-
it "should ignore invalid transform class" do
|
18
|
-
matrix_transformer = MatrixTransformer.new(:transforms => [:FAKE])
|
19
|
-
lambda {
|
20
|
-
matrix_transformer.apply_transforms(mock_vector_space)
|
21
|
-
}.should_not raise_error
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should use defaults transforms in none are specified" do
|
25
|
-
matrix_transformer = MatrixTransformer.new
|
26
|
-
Transform.should_receive(:const_get).with(:LSA).and_return(mock_transform)
|
27
|
-
Transform.should_receive(:const_get).with(:TFIDF).and_return(mock_transform)
|
28
|
-
|
29
|
-
matrix_transformer.apply_transforms(mock_vector_space)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "should send transform message to class to transform matrix" do
|
33
|
-
matrix_transformer = MatrixTransformer.new(:transforms => [:LSA])
|
34
|
-
Transform.stub!(:const_get).and_return(mock_transform)
|
35
|
-
|
36
|
-
mock_transform.should_receive(:transform)
|
37
|
-
|
38
|
-
matrix_transformer.apply_transforms(mock_vector_space)
|
39
|
-
end
|
40
|
-
|
41
|
-
it "should check that transform class is capable of transforming" do
|
42
|
-
matrix_transformer = MatrixTransformer.new(:transforms => [:LSA])
|
43
|
-
Transform.stub!(:const_get).and_return(mock_transform)
|
44
|
-
mock_transform.should_receive(:respond_to?).with(:transform)
|
45
|
-
|
46
|
-
matrix_transformer.apply_transforms(mock_vector_space)
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe Parser do
|
5
|
-
|
6
|
-
it "should remove stop words" do
|
7
|
-
file = mock("file")
|
8
|
-
file.stub!(:read).and_return("a to be")
|
9
|
-
File.stub!(:open).and_yield(file)
|
10
|
-
parser = Parser.new
|
11
|
-
|
12
|
-
parser.remove_stop_words(['a','house']).should == ['house']
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should remove any non characters" do
|
16
|
-
file = mock("file")
|
17
|
-
file.stub!(:read).and_return("a to be")
|
18
|
-
File.stub!(:open).and_yield(file)
|
19
|
-
|
20
|
-
parser = Parser.new
|
21
|
-
parser.tokenise_and_stem("dragon.").should == ["dragon"]
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should tokenise the string" do
|
25
|
-
parser = Parser.new
|
26
|
-
|
27
|
-
parser.stub!(:remove_stop_words).and_return(['mouse','trap'])
|
28
|
-
parser.should_receive(:tokenise_and_stem).and_return(['mouse','trap'])
|
29
|
-
|
30
|
-
parser.tokenise_and_filter(['the mouse trap'])
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
end
|
@@ -1,129 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
module Semantic
|
4
|
-
describe Search do
|
5
|
-
|
6
|
-
documents = ["The cat in the hat disabled", "A cat is a fine pet ponies.", "Dogs and cats make good pets.","I haven't got a hat."]
|
7
|
-
|
8
|
-
def mock_builder
|
9
|
-
@builder ||= mock(VectorSpace::Builder)
|
10
|
-
end
|
11
|
-
|
12
|
-
def mock_matrix_transformer
|
13
|
-
@matrix_transformer ||= mock(MatrixTransformer)
|
14
|
-
end
|
15
|
-
|
16
|
-
def query_vector
|
17
|
-
@query_vector ||= Linalg::DMatrix.columns([[1,0]])
|
18
|
-
end
|
19
|
-
|
20
|
-
def vector_space_model(stubs = {})
|
21
|
-
@vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]), {})
|
22
|
-
end
|
23
|
-
|
24
|
-
def matrix(array)
|
25
|
-
Linalg::DMatrix.rows(array)
|
26
|
-
end
|
27
|
-
|
28
|
-
def vector(vector)
|
29
|
-
matrix([vector])
|
30
|
-
end
|
31
|
-
|
32
|
-
describe "setting up" do
|
33
|
-
|
34
|
-
it "should build the vector space" do
|
35
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
36
|
-
mock_builder.should_receive(:build_document_matrix).with(['test']).and_return(vector_space_model)
|
37
|
-
|
38
|
-
Search.new(['test'])
|
39
|
-
end
|
40
|
-
|
41
|
-
it "should transform matrices" do
|
42
|
-
MatrixTransformer.stub!(:new).and_return(mock_matrix_transformer)
|
43
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
44
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
45
|
-
|
46
|
-
#FIXME: with will not match vector_space_model, requests class Data. Think this is related to Delegate and Rspec
|
47
|
-
mock_matrix_transformer.should_receive(:apply_transforms).with(anything).and_return(vector_space_model)
|
48
|
-
|
49
|
-
Search.new(['test'])
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
describe "searching" do
|
55
|
-
|
56
|
-
it "should map search term to vector space" do
|
57
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
58
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
59
|
-
|
60
|
-
mock_builder.should_receive(:build_query_vector).with("cat").and_return(query_vector)
|
61
|
-
|
62
|
-
vector_search = Search.new(documents)
|
63
|
-
vector_search.search("cat")
|
64
|
-
end
|
65
|
-
|
66
|
-
it "should compare the documents using cosine" do
|
67
|
-
pending
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
describe "relating" do
|
73
|
-
|
74
|
-
it "should find related documents by comparing cosine" do
|
75
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
76
|
-
|
77
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
78
|
-
|
79
|
-
MatrixTransformer.stub!(:new).and_return(mock_matrix_transformer)
|
80
|
-
mock_matrix_transformer.stub!(:apply_transforms).and_return(vector_space_model)
|
81
|
-
|
82
|
-
Compare.should_receive(:cosine).with(matrix([[0],[1]]), matrix([[0],[1]]))
|
83
|
-
Compare.should_receive(:cosine).with(matrix([[0],[1]]), matrix([[1],[0]]))
|
84
|
-
|
85
|
-
vector_search = Search.new(documents)
|
86
|
-
|
87
|
-
vector_search.related(0)
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
91
|
-
|
92
|
-
describe "logging" do
|
93
|
-
|
94
|
-
before(:each) do
|
95
|
-
@out = StringIO.new
|
96
|
-
Semantic.logger = Logger.new(@out)
|
97
|
-
end
|
98
|
-
|
99
|
-
it "should set info level if in verbose mode" do
|
100
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
101
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
102
|
-
|
103
|
-
Search.new(['test'], :verbose => true)
|
104
|
-
|
105
|
-
Semantic.logger.level.should == Logger::INFO
|
106
|
-
end
|
107
|
-
|
108
|
-
it "should set error level if not in verbose mode" do
|
109
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
110
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
111
|
-
|
112
|
-
Search.new(['test'], :verbose => false)
|
113
|
-
|
114
|
-
Semantic.logger.level.should == Logger::ERROR
|
115
|
-
end
|
116
|
-
|
117
|
-
it "should default to error level if verbose is not specified" do
|
118
|
-
VectorSpace::Builder.stub!(:new).and_return(mock_builder)
|
119
|
-
mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
|
120
|
-
|
121
|
-
Search.new(['test'])
|
122
|
-
|
123
|
-
Semantic.logger.level.should == Logger::ERROR
|
124
|
-
end
|
125
|
-
|
126
|
-
end
|
127
|
-
|
128
|
-
end
|
129
|
-
end
|