r_nlp 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10e2b17b4d37d3b4ee59f74a7bead3795d8e0093
4
- data.tar.gz: 0b696821a20ef70111c46f3322aacdc3573b1759
3
+ metadata.gz: 3d61657fc30af65e20df603b8ab79b6a85671d02
4
+ data.tar.gz: 97517fb59114c31571d9af656428c483583b113c
5
5
  SHA512:
6
- metadata.gz: 38812887a46335e36b718b3f494eab97861c3100db68edb14f8b53d3df34d305b449fdadd9fff75668e3415a7c4e0a53ff0f8b76f57c57d25d9cbb853db2b9b2
7
- data.tar.gz: bf4db7dd32390a4f5760282106913b50172c2c10fc0d7657e02bca58fc922b39ce9fcf5940368680bd8d61c33bb88a503e5f77c631b56fea1143f39845e6560c
6
+ metadata.gz: 7eca6f24c111164ca86573a1edc0ff34863e93f6c321b5139fa649408a70c926389ad817ed2a957fab5735297165cc17f0f8a5b142ef363199dd840330e4d7b1
7
+ data.tar.gz: 8d0a61c57d946406ee892fac514a1ccfaf74a0a73edc8196a21747fd28c674a495d0c4e2fdd8135c707ea161aea813ae8734756f2d698d6a60c30080c237a52b
data/lib/r_nlp/idf.rb ADDED
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module RNlp
4
+ class Idf
5
+ # compatible with ja or en
6
+ attr_reader :lang
7
+ def initialize(lang)
8
+ @lang = lang
9
+ unless lang == 'ja' || lang == 'en'
10
+ puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'"
11
+ exit
12
+ end
13
+ end
14
+ # documents should be array of string
15
+ def calc_idf(word, documents)
16
+ @word = word
17
+ @documents = documents
18
+ n = @documents.size
19
+ df = 0.0
20
+ @documents.each do |document|
21
+ df += 1 if document =~ /#{@word}/
22
+ end
23
+ idf = Math.log2(n/df) + 1
24
+ return idf
25
+ end
26
+ end
27
+ end
data/lib/r_nlp/tf.rb CHANGED
@@ -7,6 +7,10 @@ module RNlp
7
7
  attr_reader :lang
8
8
  def initialize(lang)
9
9
  @lang = lang
10
+ unless lang == 'ja' || lang == 'en'
11
+ puts "lang #{@lang} is not compatible."
12
+ exit
13
+ end
10
14
  end
11
15
  def count(text)
12
16
  tf = Hash.new
@@ -23,7 +27,7 @@ module RNlp
23
27
  end
24
28
  end
25
29
  elsif @lang == 'en'
26
- text.split(" ").each do |line|
30
+ text.split("\n").each do |line|
27
31
  line.split(" ").each do |word|
28
32
  if tf[word] == nil
29
33
  tf[word] = 1
@@ -32,9 +36,6 @@ module RNlp
32
36
  end
33
37
  end
34
38
  end
35
- else
36
- puts "lang #{@lang} is not compatible."
37
- exit
38
39
  end
39
40
  return tf
40
41
  end
data/lib/r_nlp/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RNlp
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
data/lib/r_nlp.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "r_nlp/version"
2
2
  require "r_nlp/tf"
3
+ require 'r_nlp/idf'
3
4
 
4
5
  module RNlp
5
6
  # Your code goes here...
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: r_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - himkt
@@ -70,6 +70,7 @@ files:
70
70
  - bin/console
71
71
  - bin/setup
72
72
  - lib/r_nlp.rb
73
+ - lib/r_nlp/idf.rb
73
74
  - lib/r_nlp/tf.rb
74
75
  - lib/r_nlp/version.rb
75
76
  - r_nlp.gemspec