tf_idf 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/tf_idf.rb +6 -5
- data/tf_idf.gemspec +2 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/lib/tf_idf.rb
CHANGED
@@ -59,22 +59,23 @@ class TfIdf
|
|
59
59
|
# It is then normalized (as some documents are longer than others)
|
60
60
|
def calculate_term_frequencies
|
61
61
|
original_ngrams = n_gram.ngrams_of_inputs.clone
|
62
|
-
|
62
|
+
|
63
63
|
original_ngrams.each_with_index do |document, index|
|
64
64
|
|
65
65
|
# Calculate the total number of terms
|
66
66
|
total_terms = 0.0
|
67
67
|
document[@n].each_value {|v| total_terms += v}
|
68
|
-
|
68
|
+
|
69
69
|
document[@n].each_pair do |key, value|
|
70
70
|
original_ngrams[index][@n][key] = (value.to_f / total_terms)
|
71
71
|
end
|
72
72
|
end
|
73
|
-
|
74
|
-
original_ngrams.map {|x| x.map {|y| y[
|
73
|
+
|
74
|
+
original_ngrams.map {|x| x.map {|y| y[1] }}.flatten
|
75
75
|
end
|
76
76
|
|
77
77
|
def n_gram
|
78
78
|
@n_gram ||= NGram.new(@data, :n => @n)
|
79
|
-
end
|
79
|
+
end
|
80
|
+
|
80
81
|
end
|
data/tf_idf.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{tf_idf}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2009-12-
|
12
|
+
s.date = %q{2009-12-21}
|
13
13
|
s.description = %q{A TF-IDF in ruby - http://en.wikipedia.org/wiki/Tf–idf}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tf_idf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-21 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|