frekwenza 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe93d2259013d43219aac982560c795d7e698a03
4
- data.tar.gz: 833373b3981ad562cb81ea3372d6f447c0a9d2bc
3
+ metadata.gz: 2e95e6c0045abe2259aa61401e009473fcfe39a0
4
+ data.tar.gz: d040b9ecbc8e842338839a576187a1384c060ef1
5
5
  SHA512:
6
- metadata.gz: aaef936370715683e87b369fc7f578cc9160be2a694fdcf7cd61b2d8a0f6eed1232adc4b933de5d1c7d68c91674ac792b8a80fde8c45941500041d971e0f969e
7
- data.tar.gz: e29aa5916853ac534800acd05e64261155b682f420b20e483e64ddc45bc4e6372571adaa19a8850fd768def67746bf4860f5fca44e038c75f62076ebe23c1c61
6
+ metadata.gz: 66a7349fa34dd1815397fbbe949fb81d927a88068d1b1e0557ddca728ba65ecc9660d494e4711decace68c827a47af4f728aaffb21dc51ed4234fd1a6e009f82
7
+ data.tar.gz: 4eb6cb0199039c171863d481f6148bb9cb8b60af39208ceb14a9176c0dc2e8b12997514f4f7a43213cdafa1541f0ac3720f81768cab22fff043a99e1916ba185
data/README.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  This gem is based on [Mathieu Ripert](https://github.com/mathieuripert)'s work, [ruby-tf-idf](https://github.com/mathieuripert/ruby-tf-idf). Some changes are made in the gem, aside from the project structure.
4
4
 
5
+ ## Installation
6
+
7
+ ```
8
+ $ gem install frekwenza
9
+ ```
10
+
5
11
  ## Usage
6
12
 
7
13
  Mostly similar with ruby-tf-idf, here's an example right from ruby-tf-idf's readme only with the "RubyTfIdf" part replaced with "Frekwenza".
@@ -2,9 +2,13 @@ module Frekwenza
2
2
  class StopWords
3
3
  attr_reader :stop_words
4
4
 
5
- def initialize(file)
6
- string = read(file)
7
- build_list(string)
5
+ def initialize(sw)
6
+ if sw.kind_of?(String)
7
+ string = read(sw)
8
+ build_list(string)
9
+ else
10
+ @stop_words = sw
11
+ end
8
12
  end
9
13
 
10
14
  private
@@ -2,14 +2,14 @@ module Frekwenza
2
2
  class TfIdf
3
3
  attr_reader :tf, :idf, :tf_idf
4
4
 
5
- def initialize(docs, limit, stop_words_file=nil)
5
+ def initialize(docs, limit, stop_words=nil)
6
6
  @docs = split_docs(docs)
7
7
  @tf = []
8
8
  @idf = {}
9
9
  @tf_idf = []
10
10
  @docs_size = @docs.size
11
11
  calculate_tf_and_idf
12
- calculate_tf_idf(limit, stop_words_file)
12
+ calculate_tf_idf(limit, stop_words)
13
13
  end
14
14
 
15
15
  private
@@ -38,14 +38,14 @@ module Frekwenza
38
38
  end
39
39
  end
40
40
 
41
- def calculate_tf_idf(limit, stop_words_file)
41
+ def calculate_tf_idf(limit, stop_words)
42
42
  @tf.each do |tf_freq|
43
43
  tfidf = Hash.new(0)
44
44
  tf_freq.each do |k, v|
45
45
  tfidf[k] = @idf[k] * v
46
46
  end
47
- if stop_words_file
48
- sw = StopWords.new(stop_words_file)
47
+ if stop_words
48
+ sw = StopWords.new(stop_words)
49
49
  tfidf.reject!{|k| sw.stop_words.include?(k)}
50
50
  end
51
51
  tfidf = Hash[tfidf.sort_by{|k, v| -v}[0..limit-1]]
@@ -1,3 +1,3 @@
1
1
  module Frekwenza
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: frekwenza
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mathieu Ripert
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-07-25 00:00:00.000000000 Z
12
+ date: 2015-08-18 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: 'Term Frequency - Inverse Document Frequency '
15
15
  email: