wcc-text-analysis 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53076bbd0f7310bd02cab7548cd71edb8417ad7bd320c4828c0f80dc72cdc0ad
4
- data.tar.gz: e161b846a746e14d0912898d8028f5c954f2be6eeda844a16e202744962bfeb1
3
+ metadata.gz: 66a18a3f32298d0124f6e045f4e83db791fd2d67502c327be659fed2ab82ffad
4
+ data.tar.gz: fee8ce474ebaa139fc0ea4fb8fa706cdb5646fe2b9b3eda768ed8ef23994d137
5
5
  SHA512:
6
- metadata.gz: 1f07d19805a3723020d8d978c53c5c921958122d76f51a09e39b7501dca28df2239b49eacef6f2be154eacffd9bcc588ffdd7fd2c72dbd16b328e1305f556b4f
7
- data.tar.gz: f685000411857bcb532f3e0bf46acfa1201ceb4bc53e470d3e011554489e676437e3048dcf798245f943bf471ec3d23b9dd057de544ba1728232194f3a5be24d
6
+ metadata.gz: e69ce56fe1ffec9d057e9694ff0b45cda625e894b42abca76c94c8b14992c94dbdcca4045be5452c50f496877cd2842ee9489027fde95a55d7a418057f488e63
7
+ data.tar.gz: 88730022fd77784a9ae04c34ff7ddc32dff8c75414065ed53e078e1471a2af980f495b5587c62753252d156ad1da12cb5f1569d4e073e798300673083ef781a2
data/.gitignore CHANGED
@@ -1 +1,2 @@
1
- Gemfile.lock
1
+ Gemfile.lock
2
+ pkg/*
@@ -8,11 +8,22 @@ module WCC
8
8
 
9
9
  attr_reader :normalized, :stripped
10
10
 
11
- def self.extract_terms(file)
12
- File.read("db/#{file}.txt").split("\n")
11
+ def self.extract_terms(db_file)
12
+ File.read(
13
+ File.join(File.dirname(__FILE__), '../../db', db_file),
14
+ ).split("\n")
13
15
  end
14
16
 
15
- def initialize(string, stop_words: STOPWORDS)
17
+ # Stopwords from http://www.ranks.nl/stopwords
18
+ def self.default_stopwords
19
+ @default_stopwords ||= extract_terms('stop_words.txt')
20
+ end
21
+
22
+ def self.default_exclusions
23
+ @default_exclusions ||= extract_terms('transcript_exclusions.txt')
24
+ end
25
+
26
+ def initialize(string, stop_words: self.class.default_stopwords)
16
27
  @original = string
17
28
  @stop_words = stop_words
18
29
  end
@@ -61,11 +72,7 @@ module WCC
61
72
  end
62
73
 
63
74
  def remove_ignored_tokens(string)
64
- string - (@stop_words + EXCLUSIONS)
75
+ string - (@stop_words + self.class.default_exclusions)
65
76
  end
66
-
67
- # Stopwords from http://www.ranks.nl/stopwords
68
- STOPWORDS = extract_terms("stop_words").freeze
69
- EXCLUSIONS = extract_terms("transcript_exclusions").freeze
70
77
  end
71
78
  end
@@ -1,5 +1,5 @@
1
1
  module WCC
2
2
  class TextAnalysis
3
- VERSION = '0.0.1'.freeze
3
+ VERSION = '0.0.2'.freeze
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wcc-text-analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Watermark Dev