hoatzin 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +2 -1
- data/VERSION +1 -1
- data/hoatzin.gemspec +4 -4
- data/lib/classifier.rb +3 -3
- data/lib/{vector_space → feature_vector}/builder.rb +2 -2
- data/lib/{vector_space → feature_vector}/model.rb +1 -1
- data/lib/hoatzin.rb +2 -2
- metadata +6 -6
data/README.markdown
CHANGED
@@ -74,7 +74,8 @@ need to recompute the feature vectors.
|
|
74
74
|
## Acknowledgements
|
75
75
|
|
76
76
|
See http://www.igvita.com/2008/01/07/support-vector-machines-svm-in-ruby/ for the original inspiration.
|
77
|
-
|
77
|
+
|
78
|
+
The Feature Vector model implementation is adapted from https://github.com/josephwilk/rsemantic
|
78
79
|
|
79
80
|
## Copyright and License
|
80
81
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/hoatzin.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{hoatzin}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["robl"]
|
12
|
-
s.date = %q{2011-01-
|
12
|
+
s.date = %q{2011-01-16}
|
13
13
|
s.description = %q{Hoatzin is a text classifier in Ruby that uses SVM for it's classification.}
|
14
14
|
s.email = %q{robl@rjlee.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,10 +25,10 @@ Gem::Specification.new do |s|
|
|
25
25
|
"VERSION",
|
26
26
|
"hoatzin.gemspec",
|
27
27
|
"lib/classifier.rb",
|
28
|
+
"lib/feature_vector/builder.rb",
|
29
|
+
"lib/feature_vector/model.rb",
|
28
30
|
"lib/hoatzin.rb",
|
29
31
|
"lib/parser.rb",
|
30
|
-
"lib/vector_space/builder.rb",
|
31
|
-
"lib/vector_space/model.rb",
|
32
32
|
"test/helper.rb",
|
33
33
|
"test/models/readonly-test/metadata",
|
34
34
|
"test/models/readonly-test/model",
|
data/lib/classifier.rb
CHANGED
@@ -21,7 +21,7 @@ module Hoatzin
|
|
21
21
|
@metadata_file = options.delete(:metadata) || nil
|
22
22
|
@model_file = options.delete(:model) || nil
|
23
23
|
|
24
|
-
@builder =
|
24
|
+
@builder = FeatureVector::Builder.new(:parser => Hoatzin::Parser.new)
|
25
25
|
|
26
26
|
# If we have model and metadata files then load them
|
27
27
|
load if @metadata_file && @model_file
|
@@ -106,8 +106,8 @@ module Hoatzin
|
|
106
106
|
end
|
107
107
|
|
108
108
|
def assign_model
|
109
|
-
|
110
|
-
@problem = Problem.new(@labels,
|
109
|
+
feature_vector_model = @builder.build_document_matrix(@documents)
|
110
|
+
@problem = Problem.new(@labels, feature_vector_model.matrix)
|
111
111
|
@model = Model.new(@problem, @parameters)
|
112
112
|
end
|
113
113
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Adapted from : https://github.com/josephwilk/rsemantic
|
2
2
|
|
3
3
|
module Hoatzin
|
4
|
-
module
|
4
|
+
module FeatureVector
|
5
5
|
#A algebraic model for representing text documents as vectors of identifiers.
|
6
6
|
#A document is represented as a vector. Each dimension of the vector corresponds to a
|
7
7
|
#separate term. If a term occurs in the document, then the value in the vector is non-zero.
|
@@ -72,7 +72,7 @@ module Hoatzin
|
|
72
72
|
end
|
73
73
|
|
74
74
|
vector = Array.new(@vector_keyword_index.length, 0)
|
75
|
-
word_list.each { |word| vector[@vector_keyword_index[word]]
|
75
|
+
word_list.each { |word| vector[@vector_keyword_index[word]] = 1 if @vector_keyword_index.has_key?(word) }
|
76
76
|
vector
|
77
77
|
end
|
78
78
|
|
data/lib/hoatzin.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hoatzin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- robl
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-16 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -124,10 +124,10 @@ files:
|
|
124
124
|
- VERSION
|
125
125
|
- hoatzin.gemspec
|
126
126
|
- lib/classifier.rb
|
127
|
+
- lib/feature_vector/builder.rb
|
128
|
+
- lib/feature_vector/model.rb
|
127
129
|
- lib/hoatzin.rb
|
128
130
|
- lib/parser.rb
|
129
|
-
- lib/vector_space/builder.rb
|
130
|
-
- lib/vector_space/model.rb
|
131
131
|
- test/helper.rb
|
132
132
|
- test/models/readonly-test/metadata
|
133
133
|
- test/models/readonly-test/model
|