clarifier 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +1 -1
- data/lib/clarifier.rb +1 -0
- data/lib/clarifier/n_grams.rb +15 -0
- data/lib/clarifier/version.rb +1 -1
- data/test/n_grams_test.rb +23 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58565ee983a2150f10fc30a134866966f258035f
|
4
|
+
data.tar.gz: e64ca90da702b11ae24be30968b036b550be4d3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51cf55d7abfaf2610c86a9d73f1f3a305770e7bf28aa6d6235836dd5789c36b91e534d9fddcb5506dc1b4b45b80765724a6b751d05ba836243edf2dd7dc58b81
|
7
|
+
data.tar.gz: 1cbd8554f5703f4bc593a718f0e691cff000282a5652c518e7ceb07a491a740a6cb20ddec493fa61a63cff8f69247837625499192f9e10ad1ef81fd4ef89ce4d
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/clarifier.rb
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Clarifier
|
2
|
+
class NGrams
|
3
|
+
|
4
|
+
def n_grams(input, size = 2)
|
5
|
+
words = input.split(/\W+/)
|
6
|
+
ngrams = []
|
7
|
+
words.each_with_index do |word, i|
|
8
|
+
upper_limit = i + size - 1
|
9
|
+
ngrams << words[i..upper_limit].join(' ') unless upper_limit >= words.length
|
10
|
+
end
|
11
|
+
ngrams
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
data/lib/clarifier/version.rb
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
require_relative 'test_docs'
|
3
|
+
|
4
|
+
module Clarifier
|
5
|
+
|
6
|
+
class ClarifierNGramsTest < Minitest::Test
|
7
|
+
|
8
|
+
def test_creates_2_word_n_grams_by_default
|
9
|
+
ng = Clarifier::NGrams.new
|
10
|
+
input = 'The quick brown fox jumped over the lazy dog.'
|
11
|
+
expected = ['The quick', 'quick brown', 'brown fox', 'fox jumped', 'jumped over', 'over the', 'the lazy', 'lazy dog']
|
12
|
+
assert_equal expected, ng.n_grams(input)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_creates_3_word_n_grams
|
16
|
+
ng = Clarifier::NGrams.new
|
17
|
+
input = 'The quick brown fox jumped over the lazy dog.'
|
18
|
+
expected = ['The quick brown', 'quick brown fox', 'brown fox jumped', 'fox jumped over', 'jumped over the', 'over the lazy', 'the lazy dog']
|
19
|
+
assert_equal expected, ng.n_grams(input, 3)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: clarifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -82,10 +82,12 @@ files:
|
|
82
82
|
- Rakefile
|
83
83
|
- clarifier.gemspec
|
84
84
|
- lib/clarifier.rb
|
85
|
+
- lib/clarifier/n_grams.rb
|
85
86
|
- lib/clarifier/stop_words.rb
|
86
87
|
- lib/clarifier/version.rb
|
87
88
|
- lib/clarifier/word_lists/en_gb_basic.rb
|
88
89
|
- lib/clarifier/word_lists/en_gb_mysql.rb
|
90
|
+
- test/n_grams_test.rb
|
89
91
|
- test/stop_words_test.rb
|
90
92
|
- test/test_docs.rb
|
91
93
|
- test/test_helper.rb
|
@@ -114,6 +116,7 @@ signing_key:
|
|
114
116
|
specification_version: 4
|
115
117
|
summary: Clarifier is a stopwords library for removing common words from text
|
116
118
|
test_files:
|
119
|
+
- test/n_grams_test.rb
|
117
120
|
- test/stop_words_test.rb
|
118
121
|
- test/test_docs.rb
|
119
122
|
- test/test_helper.rb
|