ruby-tf-idf 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
- # Ruby::Tf::Idf
1
+ # Ruby-Tf-Idf
2
2
 
3
- TODO: Write a gem description
3
+ This gem calculates TF-IDF to find the most relevant words of each document in corpus
4
+
5
+ TF-IDF is for Term Frequency - Inverse Document Frequency
6
+ http://en.wikipedia.org/wiki/Tf%E2%80%93idf
4
7
 
5
8
  ## Installation
6
9
 
@@ -10,7 +13,7 @@ Add this line to your application's Gemfile:
10
13
 
11
14
  And then execute:
12
15
 
13
- $ bundle
16
+ $ bundle install
14
17
 
15
18
  Or install it yourself as:
16
19
 
@@ -18,7 +21,25 @@ Or install it yourself as:
18
21
 
19
22
  ## Usage
20
23
 
21
- TODO: Write usage instructions here
24
+ require 'rubygems'
25
+ require 'ruby-tf-idf'
26
+
27
+ corpus =
28
+ [
29
+ 'A big enough hammer can usually fix anything',
30
+ 'A bird in the hand is a big mistake .',
31
+ 'A bird in the hand is better than one overhead!',
32
+ 'A career is a job that takes about 20 more hours a week.'
33
+ 'A clean desk is a sign of a cluttered desk drawer.'
34
+ 'A cynic smells flowers and looks for the casket.'
35
+ ]
36
+
37
+ limit = 2 #restrict to the top 2 relevant words per document
38
+ exclude_stop_words = false
39
+
40
+ @t = Tfidf.new(corpus,limit,exclude_stop_words)
41
+ puts @t.tf_idf
42
+
22
43
 
23
44
  ## Contributing
24
45
 
@@ -1,3 +1,6 @@
1
+ #!/bin/env ruby
2
+ # encoding: ISO-8859-1
3
+
1
4
  require "ruby-tf-idf/version"
2
5
 
3
6
  module RubyTfIdf
@@ -37,8 +40,7 @@ module RubyTfIdf
37
40
 
38
41
  STOP_WORDS_FR = [
39
42
 
40
- '-elle','-il','10ème','1er','1ère','2ème','3ème','4ème','5ème','6ème','7ème','8ème',
41
- '9ème','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
43
+ '-elle','-il','à','a','afin','ai','ainsi','ais','ait','alors','après','as','assez','au','aucun',
42
44
  'aucune','auprès','auquel','auquelles','auquels','auraient','aurais','aurait','aurez',
43
45
  'auriez','aurions','aurons','auront','aussi','aussitôt','autre','autres','aux',
44
46
  'avaient','avais','avait','avant','avec','avez','aviez','avoir','avons','ayant',
@@ -1,3 +1,3 @@
1
1
  module RubyTfIdf
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-tf-idf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: