frekwenza 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe93d2259013d43219aac982560c795d7e698a03
4
- data.tar.gz: 833373b3981ad562cb81ea3372d6f447c0a9d2bc
3
+ metadata.gz: 2e95e6c0045abe2259aa61401e009473fcfe39a0
4
+ data.tar.gz: d040b9ecbc8e842338839a576187a1384c060ef1
5
5
  SHA512:
6
- metadata.gz: aaef936370715683e87b369fc7f578cc9160be2a694fdcf7cd61b2d8a0f6eed1232adc4b933de5d1c7d68c91674ac792b8a80fde8c45941500041d971e0f969e
7
- data.tar.gz: e29aa5916853ac534800acd05e64261155b682f420b20e483e64ddc45bc4e6372571adaa19a8850fd768def67746bf4860f5fca44e038c75f62076ebe23c1c61
6
+ metadata.gz: 66a7349fa34dd1815397fbbe949fb81d927a88068d1b1e0557ddca728ba65ecc9660d494e4711decace68c827a47af4f728aaffb21dc51ed4234fd1a6e009f82
7
+ data.tar.gz: 4eb6cb0199039c171863d481f6148bb9cb8b60af39208ceb14a9176c0dc2e8b12997514f4f7a43213cdafa1541f0ac3720f81768cab22fff043a99e1916ba185
data/README.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  This gem is based on [Mathieu Ripert](https://github.com/mathieuripert)'s work, [ruby-tf-idf](https://github.com/mathieuripert/ruby-tf-idf). Some changes are made in the gem, aside from the project structure.
4
4
 
5
+ ## Installation
6
+
7
+ ```
8
+ $ gem install frekwenza
9
+ ```
10
+
5
11
  ## Usage
6
12
 
7
13
  Mostly similar with ruby-tf-idf, here's an example right from ruby-tf-idf's readme only with the "RubyTfIdf" part replaced with "Frekwenza".
@@ -2,9 +2,13 @@ module Frekwenza
2
2
  class StopWords
3
3
  attr_reader :stop_words
4
4
 
5
- def initialize(file)
6
- string = read(file)
7
- build_list(string)
5
+ def initialize(sw)
6
+ if sw.kind_of?(String)
7
+ string = read(sw)
8
+ build_list(string)
9
+ else
10
+ @stop_words = sw
11
+ end
8
12
  end
9
13
 
10
14
  private
@@ -2,14 +2,14 @@ module Frekwenza
2
2
  class TfIdf
3
3
  attr_reader :tf, :idf, :tf_idf
4
4
 
5
- def initialize(docs, limit, stop_words_file=nil)
5
+ def initialize(docs, limit, stop_words=nil)
6
6
  @docs = split_docs(docs)
7
7
  @tf = []
8
8
  @idf = {}
9
9
  @tf_idf = []
10
10
  @docs_size = @docs.size
11
11
  calculate_tf_and_idf
12
- calculate_tf_idf(limit, stop_words_file)
12
+ calculate_tf_idf(limit, stop_words)
13
13
  end
14
14
 
15
15
  private
@@ -38,14 +38,14 @@ module Frekwenza
38
38
  end
39
39
  end
40
40
 
41
- def calculate_tf_idf(limit, stop_words_file)
41
+ def calculate_tf_idf(limit, stop_words)
42
42
  @tf.each do |tf_freq|
43
43
  tfidf = Hash.new(0)
44
44
  tf_freq.each do |k, v|
45
45
  tfidf[k] = @idf[k] * v
46
46
  end
47
- if stop_words_file
48
- sw = StopWords.new(stop_words_file)
47
+ if stop_words
48
+ sw = StopWords.new(stop_words)
49
49
  tfidf.reject!{|k| sw.stop_words.include?(k)}
50
50
  end
51
51
  tfidf = Hash[tfidf.sort_by{|k, v| -v}[0..limit-1]]
@@ -1,3 +1,3 @@
1
1
  module Frekwenza
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: frekwenza
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mathieu Ripert
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-07-25 00:00:00.000000000 Z
12
+ date: 2015-08-18 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: 'Term Frequency - Inverse Document Frequency '
15
15
  email: