clarifier 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d53807d854a8ce086196afc575ba901abc29bb7e
4
- data.tar.gz: ab905fce3c0436754135fbf76de9f495a62f8ffa
3
+ metadata.gz: 58565ee983a2150f10fc30a134866966f258035f
4
+ data.tar.gz: e64ca90da702b11ae24be30968b036b550be4d3a
5
5
  SHA512:
6
- metadata.gz: b9c761e333acd0cfcf8ecbda6c66448303d98aef4e45937a23f7260e7a538ca808b1b23927d058f37309754ccdbd332c977e7301fa6b79af283d58e68c2098d9
7
- data.tar.gz: fb0362001038e405b7f37e4ebd14c2bbe97fa43349a53b54acbadc53e28086b5736a2a4da791749bbd58e2a3e72ccb6e92b4f01dfbcfa92d14451dc614764ee5
6
+ metadata.gz: 51cf55d7abfaf2610c86a9d73f1f3a305770e7bf28aa6d6235836dd5789c36b91e534d9fddcb5506dc1b4b45b80765724a6b751d05ba836243edf2dd7dc58b81
7
+ data.tar.gz: 1cbd8554f5703f4bc593a718f0e691cff000282a5652c518e7ceb07a491a740a6cb20ddec493fa61a63cff8f69247837625499192f9e10ad1ef81fd4ef89ce4d
@@ -1,2 +1,5 @@
1
+ # 0.0.2 / 2014-03-12
2
+ * [FEATURE] Added n-gram generator
3
+
1
4
  # 0.0.1 / 2014-03-04
2
5
  * [FEATURE] Initial Release
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clarifier (0.0.1)
4
+ clarifier (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -1,5 +1,6 @@
1
1
  require 'clarifier/version'
2
2
  require 'clarifier/stop_words'
3
+ require 'clarifier/n_grams'
3
4
 
4
5
  require 'clarifier/word_lists/en_gb_basic'
5
6
  require 'clarifier/word_lists/en_gb_mysql'
@@ -0,0 +1,15 @@
1
+ module Clarifier
2
+ class NGrams
3
+
4
+ def n_grams(input, size = 2)
5
+ words = input.split(/\W+/)
6
+ ngrams = []
7
+ words.each_with_index do |word, i|
8
+ upper_limit = i + size - 1
9
+ ngrams << words[i..upper_limit].join(' ') unless upper_limit >= words.length
10
+ end
11
+ ngrams
12
+ end
13
+
14
+ end
15
+ end
@@ -1,3 +1,3 @@
1
1
  module Clarifier
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,23 @@
1
+ require_relative 'test_helper'
2
+ require_relative 'test_docs'
3
+
4
+ module Clarifier
5
+
6
+ class ClarifierNGramsTest < Minitest::Test
7
+
8
+ def test_creates_2_word_n_grams_by_default
9
+ ng = Clarifier::NGrams.new
10
+ input = 'The quick brown fox jumped over the lazy dog.'
11
+ expected = ['The quick', 'quick brown', 'brown fox', 'fox jumped', 'jumped over', 'over the', 'the lazy', 'lazy dog']
12
+ assert_equal expected, ng.n_grams(input)
13
+ end
14
+
15
+ def test_creates_3_word_n_grams
16
+ ng = Clarifier::NGrams.new
17
+ input = 'The quick brown fox jumped over the lazy dog.'
18
+ expected = ['The quick brown', 'quick brown fox', 'brown fox jumped', 'fox jumped over', 'jumped over the', 'over the lazy', 'the lazy dog']
19
+ assert_equal expected, ng.n_grams(input, 3)
20
+ end
21
+
22
+ end
23
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clarifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-04 00:00:00.000000000 Z
11
+ date: 2014-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -82,10 +82,12 @@ files:
82
82
  - Rakefile
83
83
  - clarifier.gemspec
84
84
  - lib/clarifier.rb
85
+ - lib/clarifier/n_grams.rb
85
86
  - lib/clarifier/stop_words.rb
86
87
  - lib/clarifier/version.rb
87
88
  - lib/clarifier/word_lists/en_gb_basic.rb
88
89
  - lib/clarifier/word_lists/en_gb_mysql.rb
90
+ - test/n_grams_test.rb
89
91
  - test/stop_words_test.rb
90
92
  - test/test_docs.rb
91
93
  - test/test_helper.rb
@@ -114,6 +116,7 @@ signing_key:
114
116
  specification_version: 4
115
117
  summary: Clarifier is a stopwords library for removing common words from text
116
118
  test_files:
119
+ - test/n_grams_test.rb
117
120
  - test/stop_words_test.rb
118
121
  - test/test_docs.rb
119
122
  - test/test_helper.rb