rsemantic 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/{README.txt → README.md} +19 -10
  2. data/lib/semantic.rb +8 -5
  3. data/lib/semantic/compare.rb +4 -1
  4. data/lib/semantic/corpus.rb +61 -0
  5. data/lib/semantic/document.rb +39 -0
  6. data/lib/semantic/matrix_transformer.rb +4 -5
  7. data/lib/semantic/parser.rb +22 -10
  8. data/lib/semantic/search.rb +22 -16
  9. data/lib/semantic/search_result.rb +16 -0
  10. data/lib/semantic/transform/lsa_transform.rb +47 -22
  11. data/lib/semantic/transform/tf_idf_transform.rb +12 -23
  12. data/lib/semantic/vector_space/builder.rb +29 -22
  13. data/lib/semantic/vector_space/model.rb +14 -13
  14. data/lib/semantic/version.rb +1 -1
  15. data/lib/tasks/rspec.rake +13 -0
  16. metadata +75 -107
  17. data/Manifest.txt +0 -38
  18. data/Rakefile +0 -9
  19. data/config/hoe.rb +0 -69
  20. data/config/requirements.rb +0 -15
  21. data/gem_tasks/deployment.rake +0 -34
  22. data/gem_tasks/environment.rake +0 -7
  23. data/gem_tasks/examples.rake +0 -29
  24. data/gem_tasks/fix_cr_lf.rake +0 -10
  25. data/gem_tasks/gemspec.rake +0 -6
  26. data/gem_tasks/rspec.rake +0 -33
  27. data/gem_tasks/website.rake +0 -17
  28. data/rsemantic.gemspec +0 -41
  29. data/spec/semantic/compare_spec.rb +0 -16
  30. data/spec/semantic/matrix_transformer_spec.rb +0 -51
  31. data/spec/semantic/parser_spec.rb +0 -34
  32. data/spec/semantic/search_spec.rb +0 -129
  33. data/spec/semantic/transform/lsa_transform_spec.rb +0 -59
  34. data/spec/semantic/transform/tf_idf_transform_spec.rb +0 -35
  35. data/spec/semantic/vector_space/builder_spec.rb +0 -44
  36. data/spec/semantic/vector_space/model_spec.rb +0 -22
  37. data/spec/spec.opts +0 -2
  38. data/spec/spec_helper.rb +0 -7
data/Rakefile DELETED
@@ -1,9 +0,0 @@
1
- ENV['NODOT'] = 'true' # We don't want class diagrams in RDoc
2
- require 'config/requirements'
3
- require 'config/hoe' # setup Hoe + all gem configuration
4
-
5
- Dir['gem_tasks/**/*.rake'].each { |rake| load rake }
6
-
7
- # Hoe gives us :default => :test, but we don't have Test::Unit tests.
8
- Rake::Task[:default].clear_prerequisites
9
- task :default => [:spec]
data/config/hoe.rb DELETED
@@ -1,69 +0,0 @@
1
- require 'semantic/version'
2
-
3
- AUTHOR = 'Joseph Wilk' # can also be an array of Authors
4
- EMAIL = "josephwilk@joesniff.co.uk"
5
- DESCRIPTION = "A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency"
6
- GEM_NAME = 'rsemantic' # what ppl will type to install your gem
7
- HOMEPATH = "http://github.com/josephwilk/rsemantic"
8
- RUBYFORGE_PROJECT = 'rsemantic'
9
-
10
- @config_file = "~/.rubyforge/user-config.yml"
11
- @config = nil
12
- RUBYFORGE_USERNAME = "joseph_wilk"
13
- def rubyforge_username
14
- unless @config
15
- begin
16
- @config = YAML.load(File.read(File.expand_path(@config_file)))
17
- rescue
18
- puts <<-EOS
19
- ERROR: No rubyforge config file found: #{@config_file}
20
- Run 'rubyforge setup' to prepare your env for access to Rubyforge
21
- - See http://newgem.rubyforge.org/rubyforge.html for more details
22
- EOS
23
- exit
24
- end
25
- end
26
- RUBYFORGE_USERNAME.replace @config["username"]
27
- end
28
-
29
-
30
- REV = nil
31
- # UNCOMMENT IF REQUIRED:
32
- # REV = YAML.load(`svn info`)['Revision']
33
- VERS = Semantic::VERSION::STRING + (REV ? ".#{REV}" : "")
34
- RDOC_OPTS = ['--quiet', '--title', 'Rsemantic documentation',
35
- "--opname", "index.html",
36
- "--line-numbers",
37
- "--main", "README.textile",
38
- "--inline-source"]
39
-
40
- class Hoe
41
- def extra_deps
42
- @extra_deps.reject! { |x| Array(x).first == 'hoe' }
43
- @extra_deps
44
- end
45
- end
46
-
47
- # Generate all the Rake tasks
48
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
49
- $hoe = Hoe.new(GEM_NAME, VERS) do |p|
50
- p.developer(AUTHOR, EMAIL)
51
- p.description = DESCRIPTION
52
- p.summary = DESCRIPTION
53
- p.url = HOMEPATH
54
- p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
55
- p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store', '**/*.class', '**/*.jar'] #An array of file patterns to delete on clean.
56
-
57
- # == Optional
58
- p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
59
- #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
60
- p.extra_deps = [ ['term-ansicolor', '>= 1.0.3'], ['rspec', '>= 1.1.5'], ['diff-lcs', '>= 1.1.2'] ]
61
-
62
- #p.spec_extras = {} # A hash of extra values to set in the gemspec.
63
-
64
- end
65
-
66
- CHANGES = $hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
67
- PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
68
- $hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
69
- $hoe.rsync_args = '-av --delete --ignore-errors'
@@ -1,15 +0,0 @@
1
- require 'fileutils'
2
- include FileUtils
3
-
4
- require 'rubygems'
5
- %w[rake hoe].each do |req_gem|
6
- begin
7
- require req_gem
8
- rescue LoadError
9
- puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
- puts "Installation: gem install #{req_gem} -y"
11
- exit
12
- end
13
- end
14
-
15
- $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
@@ -1,34 +0,0 @@
1
- desc 'Release the website and new gem version'
2
- task :deploy => [:check_version, :website, :release] do
3
- puts "Remember to create SVN tag:"
4
- puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
5
- "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
6
- puts "Suggested comment:"
7
- puts "Tagging release #{CHANGES}"
8
- end
9
-
10
- desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
11
- task :local_deploy => [:website_generate, :install_gem]
12
-
13
- task :check_version do
14
- unless ENV['VERSION']
15
- puts 'Must pass a VERSION=x.y.z release version'
16
- exit
17
- end
18
- unless ENV['VERSION'] == VERS
19
- puts "Please update your version.rb to match the release version, currently #{VERS}"
20
- exit
21
- end
22
- end
23
-
24
- desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
25
- task :install_gem_no_doc => [:clean, :package] do
26
- sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
27
- end
28
-
29
- namespace :manifest do
30
- desc 'Recreate Manifest.txt to include ALL files'
31
- task :refresh do
32
- `rake check_manifest | patch -p0 > Manifest.txt`
33
- end
34
- end
@@ -1,7 +0,0 @@
1
- task :ruby_env do
2
- RUBY_APP = if RUBY_PLATFORM =~ /java/
3
- "jruby"
4
- else
5
- "ruby"
6
- end unless defined? RUBY_APP
7
- end
@@ -1,29 +0,0 @@
1
- require 'lib/semantic'
2
-
3
- namespace :example do
4
-
5
- documents = ["The cat in the hat disabled", "A cat is a fine pet ponies.", "Dogs and cats make good pets.","I haven't got a hat."]
6
-
7
- desc "run main LSA example"
8
- task :lsa do
9
- search = Semantic::Search.new(documents, :verbose => true)
10
- end
11
-
12
- desc "run main Vector space example"
13
- task :vector_space do
14
- search = Semantic::Search.new(documents)
15
-
16
- puts "Documents:"
17
- documents.each_with_index { |document, index| puts "#{index}: #{document}" }
18
- puts
19
-
20
- puts "Documents related to first document: #{documents[0]}"
21
- puts search.related(0)
22
- puts
23
-
24
- puts "Searching for the word cat:"
25
- puts search.search(["cat"])
26
- puts
27
- end
28
-
29
- end
@@ -1,10 +0,0 @@
1
- desc 'Make all files use UNIX (\n) line endings'
2
- task :fix_cr_lf do
3
- files = FileList['**/*']
4
- files.each do |f|
5
- next if File.directory?(f)
6
- s = IO.read(f)
7
- s.gsub!(/\r?\n/, "\n")
8
- File.open(f, "w") { |io| io.write(s) }
9
- end
10
- end
@@ -1,6 +0,0 @@
1
- namespace :gemspec do
2
- desc 'Refresh rsemantic.gemspec to include ALL files'
3
- task :refresh => 'manifest:refresh' do
4
- File.open('rsemantic.gemspec', 'w') {|io| io.write($hoe.spec.to_ruby)}
5
- end
6
- end
data/gem_tasks/rspec.rake DELETED
@@ -1,33 +0,0 @@
1
- begin
2
- require 'spec'
3
- rescue LoadError
4
- require 'rubygems'
5
- require 'spec'
6
- end
7
- begin
8
- require 'spec/rake/spectask'
9
- require 'spec/rake/verify_rcov'
10
- rescue LoadError
11
- puts <<-EOS
12
- To use rspec for testing you must install rspec gem:
13
- gem install rspec
14
- EOS
15
- exit(0)
16
- end
17
-
18
- desc "Run the specs under spec/models"
19
- Spec::Rake::SpecTask.new do |t|
20
- t.spec_opts = ['--options', "spec/spec.opts"]
21
- t.spec_files = FileList['spec/**/*_spec.rb']
22
-
23
- unless ENV['NO_RCOV']
24
- t.rcov = true
25
- t.rcov_dir = 'coverage'
26
- t.rcov_opts = ['--exclude', '_helper\.rb,_spec\.rb,spec\/boss,\/var\/lib\/gems,\/Library\/Ruby,\.autotest']
27
- end
28
- end
29
-
30
- RCov::VerifyTask.new(:verify_rcov => :spec) do |t|
31
- t.threshold = 99.7 # Make sure you have rcov 0.9 or higher!
32
- t.index_html = 'coverage/index.html'
33
- end
@@ -1,17 +0,0 @@
1
- desc 'Generate website files'
2
- task :website_generate => :ruby_env do
3
- (Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
4
- sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
5
- end
6
- end
7
-
8
- desc 'Upload website files to rubyforge'
9
- task :website_upload do
10
- host = "#{rubyforge_username}@rubyforge.org"
11
- remote_dir = "/var/www/gforge-projects/#{PATH}/"
12
- local_dir = 'website'
13
- sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
14
- end
15
-
16
- desc 'Generate and upload website files'
17
- task :website => [:website_generate, :website_upload, :publish_docs]
data/rsemantic.gemspec DELETED
@@ -1,41 +0,0 @@
1
- Gem::Specification.new do |s|
2
- s.name = %q{rsemantic}
3
- s.version = "0.1.3"
4
-
5
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
- s.authors = ["Joseph Wilk"]
7
- s.date = %q{2009-08-01}
8
- s.description = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
9
- s.email = ["joe@josephwilk.net"]
10
- s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.txt", "TODO.txt"]
11
- s.files = ["History.txt", "Manifest.txt", "README.txt", "Rakefile", "TODO.txt", "config/hoe.rb", "config/requirements.rb", "gem_tasks/deployment.rake", "gem_tasks/environment.rake", "gem_tasks/examples.rake", "gem_tasks/fix_cr_lf.rake", "gem_tasks/gemspec.rake", "gem_tasks/rspec.rake", "gem_tasks/website.rake", "lib/semantic.rb", "lib/semantic/compare.rb", "lib/semantic/matrix_transformer.rb", "lib/semantic/parser.rb", "lib/semantic/search.rb", "lib/semantic/transform.rb", "lib/semantic/transform/lsa_transform.rb", "lib/semantic/transform/tf_idf_transform.rb", "lib/semantic/vector_space.rb", "lib/semantic/vector_space/builder.rb", "lib/semantic/vector_space/model.rb", "lib/semantic/version.rb", "resources/english.stop", "rsemantic.gemspec", "spec/semantic/compare_spec.rb", "spec/semantic/matrix_transformer_spec.rb", "spec/semantic/parser_spec.rb", "spec/semantic/search_spec.rb", "spec/semantic/transform/lsa_transform_spec.rb", "spec/semantic/transform/tf_idf_transform_spec.rb", "spec/semantic/vector_space/builder_spec.rb", "spec/semantic/vector_space/model_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
12
- s.has_rdoc = true
13
- s.homepage = %q{http://github.com/josephwilk/rsemantic}
14
- s.rdoc_options = ["--main", "README.txt"]
15
- s.require_paths = ["lib"]
16
- s.rubyforge_project = %q{rsemantic}
17
- s.rubygems_version = %q{1.3.1}
18
- s.summary = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
19
-
20
- if s.respond_to? :specification_version then
21
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
22
- s.specification_version = 2
23
-
24
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
25
- s.add_runtime_dependency(%q<term-ansicolor>, [">= 1.0.3"])
26
- s.add_runtime_dependency(%q<rspec>, [">= 1.1.5"])
27
- s.add_runtime_dependency(%q<diff-lcs>, [">= 1.1.2"])
28
- s.add_development_dependency(%q<hoe>, [">= 2.3.2"])
29
- else
30
- s.add_dependency(%q<term-ansicolor>, [">= 1.0.3"])
31
- s.add_dependency(%q<rspec>, [">= 1.1.5"])
32
- s.add_dependency(%q<diff-lcs>, [">= 1.1.2"])
33
- s.add_dependency(%q<hoe>, [">= 2.3.2"])
34
- end
35
- else
36
- s.add_dependency(%q<term-ansicolor>, [">= 1.0.3"])
37
- s.add_dependency(%q<rspec>, [">= 1.1.5"])
38
- s.add_dependency(%q<diff-lcs>, [">= 1.1.2"])
39
- s.add_dependency(%q<hoe>, [">= 2.3.2"])
40
- end
41
- end
@@ -1,16 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- module Semantic
4
- describe Compare do
5
-
6
- def vector(values)
7
- Linalg::DMatrix.columns([values])
8
- end
9
-
10
- it "should calculate cosine" do
11
- cosine = Compare.cosine( vector([0.1,0.5]), vector([0.9, 0.3]) )
12
- cosine.should be_close(0.4961, 0.0001)
13
- end
14
-
15
- end
16
- end
@@ -1,51 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- module Semantic
4
- describe MatrixTransformer do
5
-
6
- def mock_transform
7
- @transform ||= mock(Transform)
8
- end
9
-
10
- def mock_vector_space
11
- mock("vector space", :matrix => Linalg::DMatrix.rows([[1,0],[0,1]]), :matrix= => nil )
12
- end
13
-
14
-
15
- describe "transforming matrix" do
16
-
17
- it "should ignore invalid transform class" do
18
- matrix_transformer = MatrixTransformer.new(:transforms => [:FAKE])
19
- lambda {
20
- matrix_transformer.apply_transforms(mock_vector_space)
21
- }.should_not raise_error
22
- end
23
-
24
- it "should use defaults transforms in none are specified" do
25
- matrix_transformer = MatrixTransformer.new
26
- Transform.should_receive(:const_get).with(:LSA).and_return(mock_transform)
27
- Transform.should_receive(:const_get).with(:TFIDF).and_return(mock_transform)
28
-
29
- matrix_transformer.apply_transforms(mock_vector_space)
30
- end
31
-
32
- it "should send transform message to class to transform matrix" do
33
- matrix_transformer = MatrixTransformer.new(:transforms => [:LSA])
34
- Transform.stub!(:const_get).and_return(mock_transform)
35
-
36
- mock_transform.should_receive(:transform)
37
-
38
- matrix_transformer.apply_transforms(mock_vector_space)
39
- end
40
-
41
- it "should check that transform class is capable of transforming" do
42
- matrix_transformer = MatrixTransformer.new(:transforms => [:LSA])
43
- Transform.stub!(:const_get).and_return(mock_transform)
44
- mock_transform.should_receive(:respond_to?).with(:transform)
45
-
46
- matrix_transformer.apply_transforms(mock_vector_space)
47
- end
48
-
49
- end
50
- end
51
- end
@@ -1,34 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- module Semantic
4
- describe Parser do
5
-
6
- it "should remove stop words" do
7
- file = mock("file")
8
- file.stub!(:read).and_return("a to be")
9
- File.stub!(:open).and_yield(file)
10
- parser = Parser.new
11
-
12
- parser.remove_stop_words(['a','house']).should == ['house']
13
- end
14
-
15
- it "should remove any non characters" do
16
- file = mock("file")
17
- file.stub!(:read).and_return("a to be")
18
- File.stub!(:open).and_yield(file)
19
-
20
- parser = Parser.new
21
- parser.tokenise_and_stem("dragon.").should == ["dragon"]
22
- end
23
-
24
- it "should tokenise the string" do
25
- parser = Parser.new
26
-
27
- parser.stub!(:remove_stop_words).and_return(['mouse','trap'])
28
- parser.should_receive(:tokenise_and_stem).and_return(['mouse','trap'])
29
-
30
- parser.tokenise_and_filter(['the mouse trap'])
31
- end
32
-
33
- end
34
- end
@@ -1,129 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- module Semantic
4
- describe Search do
5
-
6
- documents = ["The cat in the hat disabled", "A cat is a fine pet ponies.", "Dogs and cats make good pets.","I haven't got a hat."]
7
-
8
- def mock_builder
9
- @builder ||= mock(VectorSpace::Builder)
10
- end
11
-
12
- def mock_matrix_transformer
13
- @matrix_transformer ||= mock(MatrixTransformer)
14
- end
15
-
16
- def query_vector
17
- @query_vector ||= Linalg::DMatrix.columns([[1,0]])
18
- end
19
-
20
- def vector_space_model(stubs = {})
21
- @vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]), {})
22
- end
23
-
24
- def matrix(array)
25
- Linalg::DMatrix.rows(array)
26
- end
27
-
28
- def vector(vector)
29
- matrix([vector])
30
- end
31
-
32
- describe "setting up" do
33
-
34
- it "should build the vector space" do
35
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
36
- mock_builder.should_receive(:build_document_matrix).with(['test']).and_return(vector_space_model)
37
-
38
- Search.new(['test'])
39
- end
40
-
41
- it "should transform matrices" do
42
- MatrixTransformer.stub!(:new).and_return(mock_matrix_transformer)
43
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
44
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
45
-
46
- #FIXME: with will not match vector_space_model, requests class Data. Think this is related to Delegate and Rspec
47
- mock_matrix_transformer.should_receive(:apply_transforms).with(anything).and_return(vector_space_model)
48
-
49
- Search.new(['test'])
50
- end
51
-
52
- end
53
-
54
- describe "searching" do
55
-
56
- it "should map search term to vector space" do
57
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
58
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
59
-
60
- mock_builder.should_receive(:build_query_vector).with("cat").and_return(query_vector)
61
-
62
- vector_search = Search.new(documents)
63
- vector_search.search("cat")
64
- end
65
-
66
- it "should compare the documents using cosine" do
67
- pending
68
- end
69
-
70
- end
71
-
72
- describe "relating" do
73
-
74
- it "should find related documents by comparing cosine" do
75
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
76
-
77
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
78
-
79
- MatrixTransformer.stub!(:new).and_return(mock_matrix_transformer)
80
- mock_matrix_transformer.stub!(:apply_transforms).and_return(vector_space_model)
81
-
82
- Compare.should_receive(:cosine).with(matrix([[0],[1]]), matrix([[0],[1]]))
83
- Compare.should_receive(:cosine).with(matrix([[0],[1]]), matrix([[1],[0]]))
84
-
85
- vector_search = Search.new(documents)
86
-
87
- vector_search.related(0)
88
- end
89
-
90
- end
91
-
92
- describe "logging" do
93
-
94
- before(:each) do
95
- @out = StringIO.new
96
- Semantic.logger = Logger.new(@out)
97
- end
98
-
99
- it "should set info level if in verbose mode" do
100
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
101
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
102
-
103
- Search.new(['test'], :verbose => true)
104
-
105
- Semantic.logger.level.should == Logger::INFO
106
- end
107
-
108
- it "should set error level if not in verbose mode" do
109
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
110
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
111
-
112
- Search.new(['test'], :verbose => false)
113
-
114
- Semantic.logger.level.should == Logger::ERROR
115
- end
116
-
117
- it "should default to error level if verbose is not specified" do
118
- VectorSpace::Builder.stub!(:new).and_return(mock_builder)
119
- mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
120
-
121
- Search.new(['test'])
122
-
123
- Semantic.logger.level.should == Logger::ERROR
124
- end
125
-
126
- end
127
-
128
- end
129
- end