josephwilk-rsemantic 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,4 +1,9 @@
1
- == 0.0.1
1
+ == 0.1.1
2
2
 
3
- * Changed internal representation of vector space. Using columns as documents and rows as terms. This is more consistent which LSA research papers.
4
- * Wrap DMatrix in VectorSpace::Model, allowing us to store keywords with the matrix and get pretty output
3
+ = Bugs
4
+ * fixed a bug where verbose mode was getting stuck at INFO level and would never change (Joseph Wilk)
5
+
6
+ == 0.1.0
7
+
8
+ * Changed internal representation of vector space. Using columns as documents and rows as terms. This is more consistent which LSA research papers. (Joseph Wilk)
9
+ * Wrap DMatrix in VectorSpace::Model, allowing us to store keywords with the matrix and get pretty output (Joseph Wilk)
@@ -2,7 +2,7 @@ module Semantic
2
2
  class Search
3
3
 
4
4
  def initialize(documents, options={})
5
- Semantic.logger.level = Logger::INFO if options[:verbose]
5
+ Semantic.logger.level = options[:verbose] ? Logger::INFO : Logger::ERROR
6
6
 
7
7
  @builder = VectorSpace::Builder.new(options)
8
8
  @matrix_transformer = MatrixTransformer.new(options)
@@ -4,30 +4,34 @@ module Semantic
4
4
 
5
5
  class << self
6
6
 
7
- # Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX
8
- # Reduce the dimension of sigma by specified factor producing sigma'.
9
- # Then dot product the matrices: U . SIGMA' . VT = MATRIX'
10
- def transform(matrix, dimensions=1)
7
+ def transform(matrix, number_of_dimensions_to_reduce = 1)
11
8
  columns = matrix.num_columns
12
9
 
13
10
  if dimensions <= columns: #Its a valid reduction
14
11
 
15
12
  u, sigma, vt = matrix.singular_value_decomposition
16
13
 
17
- #Dimension reduction, build SIGMA'
18
- for index in ((columns-dimensions)...columns)
19
- sigma[index,index]=0
20
- end
14
+ sigma_prime = reduce_dimensions(number_of_dimensions_to_reduce, sigma)
21
15
 
22
16
  #Reconstruct MATRIX' and Save transform
23
- matrix = u * sigma * vt
17
+ matrix_prime = u * sigma_prime * vt
24
18
 
25
19
  else
26
20
  raise Exception, "dimension reduction cannot be greater than %s" % rows
27
21
  end
28
22
 
23
+ matrix_prime
24
+ end
25
+
26
+ private
27
+ def reduce_dimensions(number_of_dimensions_to_reduce, matrix)
28
+ columns = matrix.num_columns
29
+ for index in ((columns-number_of_dimensions_to_reduce)...columns)
30
+ matrix[index,index] = 0
31
+ end
29
32
  matrix
30
33
  end
34
+
31
35
  end
32
36
  end
33
37
  end
@@ -2,7 +2,7 @@ module Semantic #:nodoc:
2
2
  class VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/semantic.rb CHANGED
@@ -18,6 +18,10 @@ require 'logger'
18
18
 
19
19
  module Semantic
20
20
 
21
+ class << self
22
+ attr_writer :logger
23
+ end
24
+
21
25
  def self.logger
22
26
  return @logger if @logger
23
27
  @logger = Logger.new(STDOUT)
data/rsemantic.gemspec CHANGED
@@ -1,10 +1,10 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = %q{rsemantic}
3
- s.version = "0.1.0"
3
+ s.version = "0.1.1"
4
4
 
5
5
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
6
  s.authors = ["Joseph Wilk"]
7
- s.date = %q{2008-11-13}
7
+ s.date = %q{2008-11-14}
8
8
  s.description = %q{A document vector search with flexible matrix transforms. Currently supports Latent semantic analysis and Term frequency - inverse document frequency}
9
9
  s.email = ["josephwilk@joesniff.co.uk"]
10
10
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.txt", "TODO.txt"]
@@ -18,7 +18,7 @@ module Semantic
18
18
  end
19
19
 
20
20
  def vector_space_model(stubs = {})
21
- @vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]), [])
21
+ @vector_space_model ||= VectorSpace::Model.new(Linalg::DMatrix.rows([[0,1],[1,0]]), {})
22
22
  end
23
23
 
24
24
  def matrix(array)
@@ -89,5 +89,41 @@ module Semantic
89
89
 
90
90
  end
91
91
 
92
+ describe "logging" do
93
+
94
+ before(:each) do
95
+ @out = StringIO.new
96
+ Semantic.logger = Logger.new(@out)
97
+ end
98
+
99
+ it "should set info level if in verbose mode" do
100
+ VectorSpace::Builder.stub!(:new).and_return(mock_builder)
101
+ mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
102
+
103
+ Search.new(['test'], :verbose => true)
104
+
105
+ Semantic.logger.level.should == Logger::INFO
106
+ end
107
+
108
+ it "should set error level if not in verbose mode" do
109
+ VectorSpace::Builder.stub!(:new).and_return(mock_builder)
110
+ mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
111
+
112
+ Search.new(['test'], :verbose => false)
113
+
114
+ Semantic.logger.level.should == Logger::ERROR
115
+ end
116
+
117
+ it "should default to error level if verbose is not specified" do
118
+ VectorSpace::Builder.stub!(:new).and_return(mock_builder)
119
+ mock_builder.stub!(:build_document_matrix).and_return(vector_space_model)
120
+
121
+ Search.new(['test'])
122
+
123
+ Semantic.logger.level.should == Logger::ERROR
124
+ end
125
+
126
+ end
127
+
92
128
  end
93
129
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: josephwilk-rsemantic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Wilk
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-13 00:00:00 -08:00
12
+ date: 2008-11-14 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency