ruby-tf-idf 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +25 -4
- data/lib/ruby-tf-idf.rb +4 -2
- data/lib/ruby-tf-idf/version.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
# Ruby
|
1
|
+
# Ruby-Tf-Idf
|
2
2
|
|
3
|
-
|
3
|
+
This gem calculates TF-IDF to find the most relevant words of each document in corpus
|
4
|
+
|
5
|
+
TF-IDF is for Term Frequency - Inverse Document Frequency
|
6
|
+
http://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -10,7 +13,7 @@ Add this line to your application's Gemfile:
|
|
10
13
|
|
11
14
|
And then execute:
|
12
15
|
|
13
|
-
$ bundle
|
16
|
+
$ bundle install
|
14
17
|
|
15
18
|
Or install it yourself as:
|
16
19
|
|
@@ -18,7 +21,25 @@ Or install it yourself as:
|
|
18
21
|
|
19
22
|
## Usage
|
20
23
|
|
21
|
-
|
24
|
+
require 'rubygems'
|
25
|
+
require 'ruby-tf-idf'
|
26
|
+
|
27
|
+
corpus =
|
28
|
+
[
|
29
|
+
'A big enough hammer can usually fix anything',
|
30
|
+
'A bird in the hand is a big mistake .',
|
31
|
+
'A bird in the hand is better than one overhead!',
|
32
|
+
'A career is a job that takes about 20 more hours a week.'
|
33
|
+
'A clean desk is a sign of a cluttered desk drawer.'
|
34
|
+
'A cynic smells flowers and looks for the casket.'
|
35
|
+
]
|
36
|
+
|
37
|
+
limit = 2 #restrict to the top 2 relevant words per document
|
38
|
+
exclude_stop_words = false
|
39
|
+
|
40
|
+
@t = Tfidf.new(corpus,limit,exclude_stop_words)
|
41
|
+
puts @t.tf_idf
|
42
|
+
|
22
43
|
|
23
44
|
## Contributing
|
24
45
|
|
data/lib/ruby-tf-idf.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: ISO-8859-1
|
3
|
+
|
1
4
|
require "ruby-tf-idf/version"
|
2
5
|
|
3
6
|
module RubyTfIdf
|
@@ -37,8 +40,7 @@ module RubyTfIdf
|
|
37
40
|
|
38
41
|
STOP_WORDS_FR = [
|
39
42
|
|
40
|
-
'-elle','-il','
|
41
|
-
'9ème','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
|
43
|
+
'-elle','-il','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
|
42
44
|
'aucune','auprès','auquel','auquelles','auquels','auraient','aurais','aurait','aurez',
|
43
45
|
'auriez','aurions','aurons','auront','aussi','aussitôt','autre','autres','aux',
|
44
46
|
'avaient','avais','avait','avant','avec','avez','aviez','avoir','avons','ayant',
|
data/lib/ruby-tf-idf/version.rb
CHANGED