ruby-tf-idf 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +25 -4
- data/lib/ruby-tf-idf.rb +4 -2
- data/lib/ruby-tf-idf/version.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
# Ruby
|
1
|
+
# Ruby-Tf-Idf
|
2
2
|
|
3
|
-
|
3
|
+
This gem calculates TF-IDF to find the most relevant words of each document in corpus
|
4
|
+
|
5
|
+
TF-IDF is for Term Frequency - Inverse Document Frequency
|
6
|
+
http://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -10,7 +13,7 @@ Add this line to your application's Gemfile:
|
|
10
13
|
|
11
14
|
And then execute:
|
12
15
|
|
13
|
-
$ bundle
|
16
|
+
$ bundle install
|
14
17
|
|
15
18
|
Or install it yourself as:
|
16
19
|
|
@@ -18,7 +21,25 @@ Or install it yourself as:
|
|
18
21
|
|
19
22
|
## Usage
|
20
23
|
|
21
|
-
|
24
|
+
require 'rubygems'
|
25
|
+
require 'ruby-tf-idf'
|
26
|
+
|
27
|
+
corpus =
|
28
|
+
[
|
29
|
+
'A big enough hammer can usually fix anything',
|
30
|
+
'A bird in the hand is a big mistake .',
|
31
|
+
'A bird in the hand is better than one overhead!',
|
32
|
+
'A career is a job that takes about 20 more hours a week.'
|
33
|
+
'A clean desk is a sign of a cluttered desk drawer.'
|
34
|
+
'A cynic smells flowers and looks for the casket.'
|
35
|
+
]
|
36
|
+
|
37
|
+
limit = 2 #restrict to the top 2 relevant words per document
|
38
|
+
exclude_stop_words = false
|
39
|
+
|
40
|
+
@t = Tfidf.new(corpus,limit,exclude_stop_words)
|
41
|
+
puts @t.tf_idf
|
42
|
+
|
22
43
|
|
23
44
|
## Contributing
|
24
45
|
|
data/lib/ruby-tf-idf.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: ISO-8859-1
|
3
|
+
|
1
4
|
require "ruby-tf-idf/version"
|
2
5
|
|
3
6
|
module RubyTfIdf
|
@@ -37,8 +40,7 @@ module RubyTfIdf
|
|
37
40
|
|
38
41
|
STOP_WORDS_FR = [
|
39
42
|
|
40
|
-
'-elle','-il','
|
41
|
-
'9ème','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
|
43
|
+
'-elle','-il','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
|
42
44
|
'aucune','auprès','auquel','auquelles','auquels','auraient','aurais','aurait','aurez',
|
43
45
|
'auriez','aurions','aurons','auront','aussi','aussitôt','autre','autres','aux',
|
44
46
|
'avaient','avais','avait','avant','avec','avez','aviez','avoir','avons','ayant',
|
data/lib/ruby-tf-idf/version.rb
CHANGED