hoatzin 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +2 -1
- data/VERSION +1 -1
- data/hoatzin.gemspec +4 -4
- data/lib/classifier.rb +3 -3
- data/lib/{vector_space → feature_vector}/builder.rb +2 -2
- data/lib/{vector_space → feature_vector}/model.rb +1 -1
- data/lib/hoatzin.rb +2 -2
- metadata +6 -6
data/README.markdown
CHANGED
@@ -74,7 +74,8 @@ need to recompute the feature vectors.
|
|
74
74
|
## Acknowledgements
|
75
75
|
|
76
76
|
See http://www.igvita.com/2008/01/07/support-vector-machines-svm-in-ruby/ for the original inspiration.
|
77
|
-
|
77
|
+
|
78
|
+
The Feature Vector model implementation is adapted from https://github.com/josephwilk/rsemantic
|
78
79
|
|
79
80
|
## Copyright and License
|
80
81
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/hoatzin.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{hoatzin}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["robl"]
|
12
|
-
s.date = %q{2011-01-
|
12
|
+
s.date = %q{2011-01-16}
|
13
13
|
s.description = %q{Hoatzin is a text classifier in Ruby that uses SVM for it's classification.}
|
14
14
|
s.email = %q{robl@rjlee.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,10 +25,10 @@ Gem::Specification.new do |s|
|
|
25
25
|
"VERSION",
|
26
26
|
"hoatzin.gemspec",
|
27
27
|
"lib/classifier.rb",
|
28
|
+
"lib/feature_vector/builder.rb",
|
29
|
+
"lib/feature_vector/model.rb",
|
28
30
|
"lib/hoatzin.rb",
|
29
31
|
"lib/parser.rb",
|
30
|
-
"lib/vector_space/builder.rb",
|
31
|
-
"lib/vector_space/model.rb",
|
32
32
|
"test/helper.rb",
|
33
33
|
"test/models/readonly-test/metadata",
|
34
34
|
"test/models/readonly-test/model",
|
data/lib/classifier.rb
CHANGED
@@ -21,7 +21,7 @@ module Hoatzin
|
|
21
21
|
@metadata_file = options.delete(:metadata) || nil
|
22
22
|
@model_file = options.delete(:model) || nil
|
23
23
|
|
24
|
-
@builder =
|
24
|
+
@builder = FeatureVector::Builder.new(:parser => Hoatzin::Parser.new)
|
25
25
|
|
26
26
|
# If we have model and metadata files then load them
|
27
27
|
load if @metadata_file && @model_file
|
@@ -106,8 +106,8 @@ module Hoatzin
|
|
106
106
|
end
|
107
107
|
|
108
108
|
def assign_model
|
109
|
-
|
110
|
-
@problem = Problem.new(@labels,
|
109
|
+
feature_vector_model = @builder.build_document_matrix(@documents)
|
110
|
+
@problem = Problem.new(@labels, feature_vector_model.matrix)
|
111
111
|
@model = Model.new(@problem, @parameters)
|
112
112
|
end
|
113
113
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Adapted from : https://github.com/josephwilk/rsemantic
|
2
2
|
|
3
3
|
module Hoatzin
|
4
|
-
module
|
4
|
+
module FeatureVector
|
5
5
|
#A algebraic model for representing text documents as vectors of identifiers.
|
6
6
|
#A document is represented as a vector. Each dimension of the vector corresponds to a
|
7
7
|
#separate term. If a term occurs in the document, then the value in the vector is non-zero.
|
@@ -72,7 +72,7 @@ module Hoatzin
|
|
72
72
|
end
|
73
73
|
|
74
74
|
vector = Array.new(@vector_keyword_index.length, 0)
|
75
|
-
word_list.each { |word| vector[@vector_keyword_index[word]]
|
75
|
+
word_list.each { |word| vector[@vector_keyword_index[word]] = 1 if @vector_keyword_index.has_key?(word) }
|
76
76
|
vector
|
77
77
|
end
|
78
78
|
|
data/lib/hoatzin.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hoatzin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- robl
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-16 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -124,10 +124,10 @@ files:
|
|
124
124
|
- VERSION
|
125
125
|
- hoatzin.gemspec
|
126
126
|
- lib/classifier.rb
|
127
|
+
- lib/feature_vector/builder.rb
|
128
|
+
- lib/feature_vector/model.rb
|
127
129
|
- lib/hoatzin.rb
|
128
130
|
- lib/parser.rb
|
129
|
-
- lib/vector_space/builder.rb
|
130
|
-
- lib/vector_space/model.rb
|
131
131
|
- test/helper.rb
|
132
132
|
- test/models/readonly-test/metadata
|
133
133
|
- test/models/readonly-test/model
|