r_nlp 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10e2b17b4d37d3b4ee59f74a7bead3795d8e0093
4
- data.tar.gz: 0b696821a20ef70111c46f3322aacdc3573b1759
3
+ metadata.gz: 3d61657fc30af65e20df603b8ab79b6a85671d02
4
+ data.tar.gz: 97517fb59114c31571d9af656428c483583b113c
5
5
  SHA512:
6
- metadata.gz: 38812887a46335e36b718b3f494eab97861c3100db68edb14f8b53d3df34d305b449fdadd9fff75668e3415a7c4e0a53ff0f8b76f57c57d25d9cbb853db2b9b2
7
- data.tar.gz: bf4db7dd32390a4f5760282106913b50172c2c10fc0d7657e02bca58fc922b39ce9fcf5940368680bd8d61c33bb88a503e5f77c631b56fea1143f39845e6560c
6
+ metadata.gz: 7eca6f24c111164ca86573a1edc0ff34863e93f6c321b5139fa649408a70c926389ad817ed2a957fab5735297165cc17f0f8a5b142ef363199dd840330e4d7b1
7
+ data.tar.gz: 8d0a61c57d946406ee892fac514a1ccfaf74a0a73edc8196a21747fd28c674a495d0c4e2fdd8135c707ea161aea813ae8734756f2d698d6a60c30080c237a52b
data/lib/r_nlp/idf.rb ADDED
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module RNlp
4
+ class Idf
5
+ # compatible with ja or en
6
+ attr_reader :lang
7
+ def initialize(lang)
8
+ @lang = lang
9
+ unless lang == 'ja' || lang == 'en'
10
+ puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'"
11
+ exit
12
+ end
13
+ end
14
+ # documents should be array of string
15
+ def calc_idf(word, documents)
16
+ @word = word
17
+ @documents = documents
18
+ n = @documents.size
19
+ df = 0.0
20
+ @documents.each do |document|
21
+ df += 1 if document =~ /#{@word}/
22
+ end
23
+ idf = Math.log2(n/df) + 1
24
+ return idf
25
+ end
26
+ end
27
+ end
data/lib/r_nlp/tf.rb CHANGED
@@ -7,6 +7,10 @@ module RNlp
7
7
  attr_reader :lang
8
8
  def initialize(lang)
9
9
  @lang = lang
10
+ unless lang == 'ja' || lang == 'en'
11
+ puts "lang #{@lang} is not compatible."
12
+ exit
13
+ end
10
14
  end
11
15
  def count(text)
12
16
  tf = Hash.new
@@ -23,7 +27,7 @@ module RNlp
23
27
  end
24
28
  end
25
29
  elsif @lang == 'en'
26
- text.split(" ").each do |line|
30
+ text.split("\n").each do |line|
27
31
  line.split(" ").each do |word|
28
32
  if tf[word] == nil
29
33
  tf[word] = 1
@@ -32,9 +36,6 @@ module RNlp
32
36
  end
33
37
  end
34
38
  end
35
- else
36
- puts "lang #{@lang} is not compatible."
37
- exit
38
39
  end
39
40
  return tf
40
41
  end
data/lib/r_nlp/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RNlp
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
data/lib/r_nlp.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "r_nlp/version"
2
2
  require "r_nlp/tf"
3
+ require 'r_nlp/idf'
3
4
 
4
5
  module RNlp
5
6
  # Your code goes here...
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: r_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - himkt
@@ -70,6 +70,7 @@ files:
70
70
  - bin/console
71
71
  - bin/setup
72
72
  - lib/r_nlp.rb
73
+ - lib/r_nlp/idf.rb
73
74
  - lib/r_nlp/tf.rb
74
75
  - lib/r_nlp/version.rb
75
76
  - r_nlp.gemspec