wcc-text-analysis 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53076bbd0f7310bd02cab7548cd71edb8417ad7bd320c4828c0f80dc72cdc0ad
4
- data.tar.gz: e161b846a746e14d0912898d8028f5c954f2be6eeda844a16e202744962bfeb1
3
+ metadata.gz: 66a18a3f32298d0124f6e045f4e83db791fd2d67502c327be659fed2ab82ffad
4
+ data.tar.gz: fee8ce474ebaa139fc0ea4fb8fa706cdb5646fe2b9b3eda768ed8ef23994d137
5
5
  SHA512:
6
- metadata.gz: 1f07d19805a3723020d8d978c53c5c921958122d76f51a09e39b7501dca28df2239b49eacef6f2be154eacffd9bcc588ffdd7fd2c72dbd16b328e1305f556b4f
7
- data.tar.gz: f685000411857bcb532f3e0bf46acfa1201ceb4bc53e470d3e011554489e676437e3048dcf798245f943bf471ec3d23b9dd057de544ba1728232194f3a5be24d
6
+ metadata.gz: e69ce56fe1ffec9d057e9694ff0b45cda625e894b42abca76c94c8b14992c94dbdcca4045be5452c50f496877cd2842ee9489027fde95a55d7a418057f488e63
7
+ data.tar.gz: 88730022fd77784a9ae04c34ff7ddc32dff8c75414065ed53e078e1471a2af980f495b5587c62753252d156ad1da12cb5f1569d4e073e798300673083ef781a2
data/.gitignore CHANGED
@@ -1 +1,2 @@
1
- Gemfile.lock
1
+ Gemfile.lock
2
+ pkg/*
@@ -8,11 +8,22 @@ module WCC
8
8
 
9
9
  attr_reader :normalized, :stripped
10
10
 
11
- def self.extract_terms(file)
12
- File.read("db/#{file}.txt").split("\n")
11
+ def self.extract_terms(db_file)
12
+ File.read(
13
+ File.join(File.dirname(__FILE__), '../../db', db_file),
14
+ ).split("\n")
13
15
  end
14
16
 
15
- def initialize(string, stop_words: STOPWORDS)
17
+ # Stopwords from http://www.ranks.nl/stopwords
18
+ def self.default_stopwords
19
+ @default_stopwords ||= extract_terms('stop_words.txt')
20
+ end
21
+
22
+ def self.default_exclusions
23
+ @default_exclusions ||= extract_terms('transcript_exclusions.txt')
24
+ end
25
+
26
+ def initialize(string, stop_words: self.class.default_stopwords)
16
27
  @original = string
17
28
  @stop_words = stop_words
18
29
  end
@@ -61,11 +72,7 @@ module WCC
61
72
  end
62
73
 
63
74
  def remove_ignored_tokens(string)
64
- string - (@stop_words + EXCLUSIONS)
75
+ string - (@stop_words + self.class.default_exclusions)
65
76
  end
66
-
67
- # Stopwords from http://www.ranks.nl/stopwords
68
- STOPWORDS = extract_terms("stop_words").freeze
69
- EXCLUSIONS = extract_terms("transcript_exclusions").freeze
70
77
  end
71
78
  end
@@ -1,5 +1,5 @@
1
1
  module WCC
2
2
  class TextAnalysis
3
- VERSION = '0.0.1'.freeze
3
+ VERSION = '0.0.2'.freeze
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wcc-text-analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Watermark Dev