clarifier 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d53807d854a8ce086196afc575ba901abc29bb7e
4
- data.tar.gz: ab905fce3c0436754135fbf76de9f495a62f8ffa
3
+ metadata.gz: 58565ee983a2150f10fc30a134866966f258035f
4
+ data.tar.gz: e64ca90da702b11ae24be30968b036b550be4d3a
5
5
  SHA512:
6
- metadata.gz: b9c761e333acd0cfcf8ecbda6c66448303d98aef4e45937a23f7260e7a538ca808b1b23927d058f37309754ccdbd332c977e7301fa6b79af283d58e68c2098d9
7
- data.tar.gz: fb0362001038e405b7f37e4ebd14c2bbe97fa43349a53b54acbadc53e28086b5736a2a4da791749bbd58e2a3e72ccb6e92b4f01dfbcfa92d14451dc614764ee5
6
+ metadata.gz: 51cf55d7abfaf2610c86a9d73f1f3a305770e7bf28aa6d6235836dd5789c36b91e534d9fddcb5506dc1b4b45b80765724a6b751d05ba836243edf2dd7dc58b81
7
+ data.tar.gz: 1cbd8554f5703f4bc593a718f0e691cff000282a5652c518e7ceb07a491a740a6cb20ddec493fa61a63cff8f69247837625499192f9e10ad1ef81fd4ef89ce4d
@@ -1,2 +1,5 @@
1
+ # 0.0.2 / 2014-03-12
2
+ * [FEATURE] Added n-gram generator
3
+
1
4
  # 0.0.1 / 2014-03-04
2
5
  * [FEATURE] Initial Release
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clarifier (0.0.1)
4
+ clarifier (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -1,5 +1,6 @@
1
1
  require 'clarifier/version'
2
2
  require 'clarifier/stop_words'
3
+ require 'clarifier/n_grams'
3
4
 
4
5
  require 'clarifier/word_lists/en_gb_basic'
5
6
  require 'clarifier/word_lists/en_gb_mysql'
@@ -0,0 +1,15 @@
1
+ module Clarifier
2
+ class NGrams
3
+
4
+ def n_grams(input, size = 2)
5
+ words = input.split(/\W+/)
6
+ ngrams = []
7
+ words.each_with_index do |word, i|
8
+ upper_limit = i + size - 1
9
+ ngrams << words[i..upper_limit].join(' ') unless upper_limit >= words.length
10
+ end
11
+ ngrams
12
+ end
13
+
14
+ end
15
+ end
@@ -1,3 +1,3 @@
1
1
  module Clarifier
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,23 @@
1
+ require_relative 'test_helper'
2
+ require_relative 'test_docs'
3
+
4
+ module Clarifier
5
+
6
+ class ClarifierNGramsTest < Minitest::Test
7
+
8
+ def test_creates_2_word_n_grams_by_default
9
+ ng = Clarifier::NGrams.new
10
+ input = 'The quick brown fox jumped over the lazy dog.'
11
+ expected = ['The quick', 'quick brown', 'brown fox', 'fox jumped', 'jumped over', 'over the', 'the lazy', 'lazy dog']
12
+ assert_equal expected, ng.n_grams(input)
13
+ end
14
+
15
+ def test_creates_3_word_n_grams
16
+ ng = Clarifier::NGrams.new
17
+ input = 'The quick brown fox jumped over the lazy dog.'
18
+ expected = ['The quick brown', 'quick brown fox', 'brown fox jumped', 'fox jumped over', 'jumped over the', 'over the lazy', 'the lazy dog']
19
+ assert_equal expected, ng.n_grams(input, 3)
20
+ end
21
+
22
+ end
23
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clarifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-04 00:00:00.000000000 Z
11
+ date: 2014-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -82,10 +82,12 @@ files:
82
82
  - Rakefile
83
83
  - clarifier.gemspec
84
84
  - lib/clarifier.rb
85
+ - lib/clarifier/n_grams.rb
85
86
  - lib/clarifier/stop_words.rb
86
87
  - lib/clarifier/version.rb
87
88
  - lib/clarifier/word_lists/en_gb_basic.rb
88
89
  - lib/clarifier/word_lists/en_gb_mysql.rb
90
+ - test/n_grams_test.rb
89
91
  - test/stop_words_test.rb
90
92
  - test/test_docs.rb
91
93
  - test/test_helper.rb
@@ -114,6 +116,7 @@ signing_key:
114
116
  specification_version: 4
115
117
  summary: Clarifier is a stopwords library for removing common words from text
116
118
  test_files:
119
+ - test/n_grams_test.rb
117
120
  - test/stop_words_test.rb
118
121
  - test/test_docs.rb
119
122
  - test/test_helper.rb