clarifier 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +1 -1
- data/lib/clarifier.rb +1 -0
- data/lib/clarifier/n_grams.rb +15 -0
- data/lib/clarifier/version.rb +1 -1
- data/test/n_grams_test.rb +23 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58565ee983a2150f10fc30a134866966f258035f
|
4
|
+
data.tar.gz: e64ca90da702b11ae24be30968b036b550be4d3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51cf55d7abfaf2610c86a9d73f1f3a305770e7bf28aa6d6235836dd5789c36b91e534d9fddcb5506dc1b4b45b80765724a6b751d05ba836243edf2dd7dc58b81
|
7
|
+
data.tar.gz: 1cbd8554f5703f4bc593a718f0e691cff000282a5652c518e7ceb07a491a740a6cb20ddec493fa61a63cff8f69247837625499192f9e10ad1ef81fd4ef89ce4d
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/clarifier.rb
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Clarifier
|
2
|
+
class NGrams
|
3
|
+
|
4
|
+
def n_grams(input, size = 2)
|
5
|
+
words = input.split(/\W+/)
|
6
|
+
ngrams = []
|
7
|
+
words.each_with_index do |word, i|
|
8
|
+
upper_limit = i + size - 1
|
9
|
+
ngrams << words[i..upper_limit].join(' ') unless upper_limit >= words.length
|
10
|
+
end
|
11
|
+
ngrams
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
data/lib/clarifier/version.rb
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
require_relative 'test_docs'
|
3
|
+
|
4
|
+
module Clarifier
|
5
|
+
|
6
|
+
class ClarifierNGramsTest < Minitest::Test
|
7
|
+
|
8
|
+
def test_creates_2_word_n_grams_by_default
|
9
|
+
ng = Clarifier::NGrams.new
|
10
|
+
input = 'The quick brown fox jumped over the lazy dog.'
|
11
|
+
expected = ['The quick', 'quick brown', 'brown fox', 'fox jumped', 'jumped over', 'over the', 'the lazy', 'lazy dog']
|
12
|
+
assert_equal expected, ng.n_grams(input)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_creates_3_word_n_grams
|
16
|
+
ng = Clarifier::NGrams.new
|
17
|
+
input = 'The quick brown fox jumped over the lazy dog.'
|
18
|
+
expected = ['The quick brown', 'quick brown fox', 'brown fox jumped', 'fox jumped over', 'jumped over the', 'over the lazy', 'the lazy dog']
|
19
|
+
assert_equal expected, ng.n_grams(input, 3)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: clarifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -82,10 +82,12 @@ files:
|
|
82
82
|
- Rakefile
|
83
83
|
- clarifier.gemspec
|
84
84
|
- lib/clarifier.rb
|
85
|
+
- lib/clarifier/n_grams.rb
|
85
86
|
- lib/clarifier/stop_words.rb
|
86
87
|
- lib/clarifier/version.rb
|
87
88
|
- lib/clarifier/word_lists/en_gb_basic.rb
|
88
89
|
- lib/clarifier/word_lists/en_gb_mysql.rb
|
90
|
+
- test/n_grams_test.rb
|
89
91
|
- test/stop_words_test.rb
|
90
92
|
- test/test_docs.rb
|
91
93
|
- test/test_helper.rb
|
@@ -114,6 +116,7 @@ signing_key:
|
|
114
116
|
specification_version: 4
|
115
117
|
summary: Clarifier is a stopwords library for removing common words from text
|
116
118
|
test_files:
|
119
|
+
- test/n_grams_test.rb
|
117
120
|
- test/stop_words_test.rb
|
118
121
|
- test/test_docs.rb
|
119
122
|
- test/test_helper.rb
|