frekwenza 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/frekwenza/stop_words.rb +7 -3
- data/lib/frekwenza/tf_idf.rb +5 -5
- data/lib/frekwenza/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e95e6c0045abe2259aa61401e009473fcfe39a0
|
4
|
+
data.tar.gz: d040b9ecbc8e842338839a576187a1384c060ef1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66a7349fa34dd1815397fbbe949fb81d927a88068d1b1e0557ddca728ba65ecc9660d494e4711decace68c827a47af4f728aaffb21dc51ed4234fd1a6e009f82
|
7
|
+
data.tar.gz: 4eb6cb0199039c171863d481f6148bb9cb8b60af39208ceb14a9176c0dc2e8b12997514f4f7a43213cdafa1541f0ac3720f81768cab22fff043a99e1916ba185
|
data/README.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
This gem is based on [Mathieu Ripert](https://github.com/mathieuripert)'s work, [ruby-tf-idf](https://github.com/mathieuripert/ruby-tf-idf). Some changes are made in the gem, aside from the project structure.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```
|
8
|
+
$ gem install frekwenza
|
9
|
+
```
|
10
|
+
|
5
11
|
## Usage
|
6
12
|
|
7
13
|
Mostly similar with ruby-tf-idf, here's an example right from ruby-tf-idf's readme only with the "RubyTfIdf" part replaced with "Frekwenza".
|
data/lib/frekwenza/stop_words.rb
CHANGED
@@ -2,9 +2,13 @@ module Frekwenza
|
|
2
2
|
class StopWords
|
3
3
|
attr_reader :stop_words
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
|
7
|
-
|
5
|
+
def initialize(sw)
|
6
|
+
if sw.kind_of?(String)
|
7
|
+
string = read(sw)
|
8
|
+
build_list(string)
|
9
|
+
else
|
10
|
+
@stop_words = sw
|
11
|
+
end
|
8
12
|
end
|
9
13
|
|
10
14
|
private
|
data/lib/frekwenza/tf_idf.rb
CHANGED
@@ -2,14 +2,14 @@ module Frekwenza
|
|
2
2
|
class TfIdf
|
3
3
|
attr_reader :tf, :idf, :tf_idf
|
4
4
|
|
5
|
-
def initialize(docs, limit,
|
5
|
+
def initialize(docs, limit, stop_words=nil)
|
6
6
|
@docs = split_docs(docs)
|
7
7
|
@tf = []
|
8
8
|
@idf = {}
|
9
9
|
@tf_idf = []
|
10
10
|
@docs_size = @docs.size
|
11
11
|
calculate_tf_and_idf
|
12
|
-
calculate_tf_idf(limit,
|
12
|
+
calculate_tf_idf(limit, stop_words)
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
@@ -38,14 +38,14 @@ module Frekwenza
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def calculate_tf_idf(limit,
|
41
|
+
def calculate_tf_idf(limit, stop_words)
|
42
42
|
@tf.each do |tf_freq|
|
43
43
|
tfidf = Hash.new(0)
|
44
44
|
tf_freq.each do |k, v|
|
45
45
|
tfidf[k] = @idf[k] * v
|
46
46
|
end
|
47
|
-
if
|
48
|
-
sw = StopWords.new(
|
47
|
+
if stop_words
|
48
|
+
sw = StopWords.new(stop_words)
|
49
49
|
tfidf.reject!{|k| sw.stop_words.include?(k)}
|
50
50
|
end
|
51
51
|
tfidf = Hash[tfidf.sort_by{|k, v| -v}[0..limit-1]]
|
data/lib/frekwenza/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: frekwenza
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mathieu Ripert
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-08-18 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: 'Term Frequency - Inverse Document Frequency '
|
15
15
|
email:
|