r_nlp 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/r_nlp/idf.rb +27 -0
- data/lib/r_nlp/tf.rb +5 -4
- data/lib/r_nlp/version.rb +1 -1
- data/lib/r_nlp.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d61657fc30af65e20df603b8ab79b6a85671d02
|
4
|
+
data.tar.gz: 97517fb59114c31571d9af656428c483583b113c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7eca6f24c111164ca86573a1edc0ff34863e93f6c321b5139fa649408a70c926389ad817ed2a957fab5735297165cc17f0f8a5b142ef363199dd840330e4d7b1
|
7
|
+
data.tar.gz: 8d0a61c57d946406ee892fac514a1ccfaf74a0a73edc8196a21747fd28c674a495d0c4e2fdd8135c707ea161aea813ae8734756f2d698d6a60c30080c237a52b
|
data/lib/r_nlp/idf.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module RNlp
|
4
|
+
class Idf
|
5
|
+
# compatible with ja or en
|
6
|
+
attr_reader :lang
|
7
|
+
def initialize(lang)
|
8
|
+
@lang = lang
|
9
|
+
unless lang == 'ja' || lang == 'en'
|
10
|
+
puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'"
|
11
|
+
exit
|
12
|
+
end
|
13
|
+
end
|
14
|
+
# documents should be array of string
|
15
|
+
def calc_idf(word, documents)
|
16
|
+
@word = word
|
17
|
+
@documents = documents
|
18
|
+
n = @documents.size
|
19
|
+
df = 0.0
|
20
|
+
@documents.each do |document|
|
21
|
+
df += 1 if document =~ /#{@word}/
|
22
|
+
end
|
23
|
+
idf = Math.log2(n/df) + 1
|
24
|
+
return idf
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/r_nlp/tf.rb
CHANGED
@@ -7,6 +7,10 @@ module RNlp
|
|
7
7
|
attr_reader :lang
|
8
8
|
def initialize(lang)
|
9
9
|
@lang = lang
|
10
|
+
unless lang == 'ja' || lang == 'en'
|
11
|
+
puts "lang #{@lang} is not compatible."
|
12
|
+
exit
|
13
|
+
end
|
10
14
|
end
|
11
15
|
def count(text)
|
12
16
|
tf = Hash.new
|
@@ -23,7 +27,7 @@ module RNlp
|
|
23
27
|
end
|
24
28
|
end
|
25
29
|
elsif @lang == 'en'
|
26
|
-
text.split("
|
30
|
+
text.split("\n").each do |line|
|
27
31
|
line.split(" ").each do |word|
|
28
32
|
if tf[word] == nil
|
29
33
|
tf[word] = 1
|
@@ -32,9 +36,6 @@ module RNlp
|
|
32
36
|
end
|
33
37
|
end
|
34
38
|
end
|
35
|
-
else
|
36
|
-
puts "lang #{@lang} is not compatible."
|
37
|
-
exit
|
38
39
|
end
|
39
40
|
return tf
|
40
41
|
end
|
data/lib/r_nlp/version.rb
CHANGED
data/lib/r_nlp.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: r_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- himkt
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- bin/console
|
71
71
|
- bin/setup
|
72
72
|
- lib/r_nlp.rb
|
73
|
+
- lib/r_nlp/idf.rb
|
73
74
|
- lib/r_nlp/tf.rb
|
74
75
|
- lib/r_nlp/version.rb
|
75
76
|
- r_nlp.gemspec
|