RubyGems - ngrams_parser - Versions diffs - 0.0.3 → 0.0.4 - Mend

ngrams_parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.travis.yml +0 -1
data/CHANGELOG.md +4 -0
data/README.md +1 -1
data/lib/ngrams_parser/ngram.rb +3 -1
data/lib/ngrams_parser/ngrams.rb +2 -1
data/lib/ngrams_parser/string.rb +3 -1
data/lib/ngrams_parser/version.rb +2 -1
data/lib/ngrams_parser.rb +4 -7
data/spec/ngrams_parser/ngram_spec.rb +10 -10
data/spec/ngrams_parser/ngrams_spec.rb +70 -67
data/spec/ngrams_parser/string_spec.rb +10 -9
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
-  data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
+  metadata.gz: 6c3cdef6702ffbc5a36f42d454a3bb491abf72f0
+  data.tar.gz: 69e88fdec44a26144c90f668be1c80ed97c804e3
 SHA512:
-  metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
-  data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
+  metadata.gz: a1c60d03f3802948359c7e3993a315e62b481a15179b7ae2f9c5c0bc44066f1bf3919d59f8b15a7d35288414e074448ca0def7815fc58f034091f95f495d2f83
+  data.tar.gz: 5c936796efb35bddadfe3d34381c5cba344ddff8c2d86fe169a95bed70ed6086c785a1aee19509ba9f6204084e6c079ce4d1d1d4a475d131d9985391b9f9073e

data/.travis.yml CHANGED Viewed

@@ -2,4 +2,3 @@ language: ruby
 rvm:
   - 1.9.3
   - 2.0.0
-  - jruby-head

data/CHANGELOG.md CHANGED Viewed

@@ -9,3 +9,7 @@
 ## v0.0.3
 * ngrams without digits
+## v0.0.4
+* code clean up

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser)
+# NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser) [![Stories in Ready](https://badge.waffle.io/fractalsoft/ngrams_parser.png)](http://waffle.io/fractalsoft/ngrams_parser)
 [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
 N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.

data/lib/ngrams_parser/ngram.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 # coding: utf-8
+# Parse word to ngrams
 module NgramsParser
   # Split word into ngrams
   #
@@ -7,7 +9,7 @@ module NgramsParser
   def self.ngram(word, size)
     array = []
     word.split('').each_index do |index|
-      text = word[index..index+size-1]
+      text = word[index..index + size - 1]
       array << text.ljust(size, ' ')
     end
     array

data/lib/ngrams_parser/ngrams.rb CHANGED Viewed

@@ -1,10 +1,11 @@
 # coding: utf-8
 require 'lexical_units'
+# Parse given *text* into ngrams
 module NgramsParser
   def self.ngrams(text, size)
     array = []
-    LexicalUnits::words_without_digits(text).each do |word|
+    LexicalUnits.words_without_digits(text).each do |word|
       array << ngram(word, size)
     end
     array.flatten

data/lib/ngrams_parser/string.rb CHANGED Viewed

@@ -1,8 +1,10 @@
 # coding: utf-8
 module NgramsParser
+  # Use ngrams inside String class
   module String
     def ngrams(size)
-      NgramsParser::ngrams(self, size)
+      NgramsParser.ngrams(self, size)
     end
   end
 end

data/lib/ngrams_parser/version.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+# Gem version
 module NgramsParser
-  VERSION = "0.0.3"
+  VERSION = '0.0.4'
 end

data/lib/ngrams_parser.rb CHANGED Viewed

@@ -1,7 +1,4 @@
-require "ngrams_parser/ngram"
-require "ngrams_parser/ngrams"
-require "ngrams_parser/string"
-require "ngrams_parser/version"
-module NgramsParser
-end
+require 'ngrams_parser/ngram'
+require 'ngrams_parser/ngrams'
+require 'ngrams_parser/string'
+require 'ngrams_parser/version'

data/spec/ngrams_parser/ngram_spec.rb CHANGED Viewed

@@ -2,29 +2,29 @@
 require 'spec_helper'
 describe NgramsParser do
-  context ".ngram" do
-    let(:klass) { NgramsParser }
+  context '.ngram' do
     [
       {
         text: "Will's",
-        digrams: ["Wi", "il", "ll", "l'", "'s", "s "],
-        trigrams: ["Wil", "ill", "ll'", "l's", "'s ", "s  "],
+        digrams: ['Wi', 'il', 'll', "l'", "'s", 's '],
+        trigrams: ['Wil', 'ill', "ll'", "l's", "'s ", 's  '],
       },
       {
-        text: "činčilový",
-        digrams: ["či", "in", "nč", "či", "il", "lo", "ov", "vý", "ý "],
-        trigrams: ["čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý ", "ý  "]
+        text: 'činčilový',
+        digrams: ['či', 'in', 'nč', 'či', 'il', 'lo', 'ov', 'vý', 'ý '],
+        trigrams: [
+          'čin', 'inč', 'nči', 'čil', 'ilo', 'lov', 'ový', 'vý ', 'ý  '
+        ]
       }
     ].each do |hash|
       text, bigrams, trigrams = hash.values
       it "split word '#{text}' into bigrams: #{bigrams}" do
-        klass::ngram(text, 2).should eq(bigrams)
+        subject.ngram(text, 2).should eq(bigrams)
       end
       it "split word '#{text}' into trigrams: #{trigrams}" do
-        klass::ngram(text, 3).should eq(trigrams)
+        subject.ngram(text, 3).should eq(trigrams)
       end
     end
   end

data/spec/ngrams_parser/ngrams_spec.rb CHANGED Viewed

@@ -2,121 +2,124 @@
 require 'spec_helper'
 describe NgramsParser do
-  context ".ngrams" do
+  context '.ngrams' do
     let(:klass) { NgramsParser }
     [
       {
         text: "Will will Will will Will's will to Will?",
         bigrams: [
-          ["Wi", "il", "ll", "l ", "wi", "il", "ll", "l "],
-          ["Wi", "il", "ll", "l ", "wi", "il", "ll", "l "],
-          ["Wi", "il", "ll", "l'", "'s", "s ", "wi", "il", "ll", "l "],
-          ["to", "o "],
-          ["Wi", "il", "ll", "l "]
+          ['Wi', 'il', 'll', 'l ', 'wi', 'il', 'll', 'l '],
+          ['Wi', 'il', 'll', 'l ', 'wi', 'il', 'll', 'l '],
+          ['Wi', 'il', 'll', "l'", "'s", 's ', 'wi', 'il', 'll', 'l '],
+          ['to', 'o '],
+          ['Wi', 'il', 'll', 'l ']
         ].flatten,
         trigrams: [
-          ["Wil", "ill", "ll ","l  "],
-          ["wil", "ill", "ll ", "l  "],
-          ["Wil", "ill", "ll ","l  "],
-          ["wil", "ill", "ll ", "l  "],
-          ["Wil", "ill", "ll'","l's", "'s ", "s  "],
-          ["wil", "ill", "ll ", "l  "],
-          ["to ", "o  ", "Wil", "ill", "ll ", "l  "]
+          ['Wil', 'ill', 'll ', 'l  '],
+          ['wil', 'ill', 'll ', 'l  '],
+          ['Wil', 'ill', 'll ', 'l  '],
+          ['wil', 'ill', 'll ', 'l  '],
+          ['Wil', 'ill', "ll'", "l's", "'s ", 's  '],
+          ['wil', 'ill', 'll ', 'l  '],
+          ['to ', 'o  ', 'Wil', 'ill', 'll ', 'l  ']
         ].flatten
       },
       {
-        text: "Acht alte Ameisen aßen am Abend Ananas.",
+        text: 'Acht alte Ameisen aßen am Abend Ananas.',
         bigrams: [
-          ["Ac", "ch", "ht", "t "],
-          ["al", "lt", "te", "e "],
-          ["Am", "me", "ei", "is", "se", "en", "n "],
-          ["aß", "ße", "en", "n "],
-          ["am", "m "],
-          ["Ab", "be", "en", "nd", "d "],
-          ["An", "na", "an", "na", "as", "s "]
+          ['Ac', 'ch', 'ht', 't '],
+          ['al', 'lt', 'te', 'e '],
+          ['Am', 'me', 'ei', 'is', 'se', 'en', 'n '],
+          ['aß', 'ße', 'en', 'n '],
+          ['am', 'm '],
+          ['Ab', 'be', 'en', 'nd', 'd '],
+          ['An', 'na', 'an', 'na', 'as', 's ']
         ].flatten,
         trigrams: [
-          ["Ach", "cht", "ht ", "t  "],
-          ["alt", "lte", "te ", "e  "],
-          ["Ame", "mei", "eis", "ise", "sen", "en ", "n  "],
-          ["aße", "ßen", "en ", "n  "],
-          ["am ", "m  "],
-          ["Abe", "ben", "end", "nd ", "d  "],
-          ["Ana", "nan", "ana", "nas", "as ", "s  "]
+          ['Ach', 'cht', 'ht ', 't  '],
+          ['alt', 'lte', 'te ', 'e  '],
+          ['Ame', 'mei', 'eis', 'ise', 'sen', 'en ', 'n  '],
+          ['aße', 'ßen', 'en ', 'n  '],
+          ['am ', 'm  '],
+          ['Abe', 'ben', 'end', 'nd ', 'd  '],
+          ['Ana', 'nan', 'ana', 'nas', 'as ', 's  ']
         ].flatten
       },
       {
-        text: "Ödögidöggi",
-        bigrams: ["Öd", "dö", "ög", "gi", "id", "dö", "ög", "gg", "gi", "i "],
+        text: 'Ödögidöggi',
+        bigrams: ['Öd', 'dö', 'ög', 'gi', 'id', 'dö', 'ög', 'gg', 'gi', 'i '],
         trigrams: [
-          ["Ödö", "dög", "ögi", "gid", "idö", "dög", "ögg", "ggi", "gi ", "i  "]
+          [
+            'Ödö', 'dög', 'ögi', 'gid', 'idö',
+            'dög', 'ögg', 'ggi', 'gi ', 'i  '
+          ]
         ].flatten
       },
       {
-        text: "Ćma ćmę ćmi.",
+        text: 'Ćma ćmę ćmi.',
         bigrams: [
-          ["Ćm", "ma", "a ", "ćm", "mę", "ę ", "ćm", "mi", "i "]
+          ['Ćm', 'ma', 'a ', 'ćm', 'mę', 'ę ', 'ćm', 'mi', 'i ']
         ].flatten,
         trigrams: [
-          ["Ćma", "ma ", "a  ", "ćmę", "mę ", "ę  ", "ćmi", "mi ", "i  "]
+          ['Ćma', 'ma ', 'a  ', 'ćmę', 'mę ', 'ę  ', 'ćmi', 'mi ', 'i  ']
         ].flatten
       },
       {
-        text: "Łzy złej zołzy",
+        text: 'Łzy złej zołzy',
         bigrams: [
-          ["Łz", "zy", "y ", "zł", "łe", "ej", "j "],
-          ["zo", "oł", "łz", "zy", "y "]
+          ['Łz', 'zy', 'y ', 'zł', 'łe', 'ej', 'j '],
+          ['zo', 'oł', 'łz', 'zy', 'y ']
         ].flatten,
         trigrams: [
-          ["Łzy", "zy ", "y  ", "złe", "łej", "ej ", "j  "],
-          ["zoł", "ołz", "łzy", "zy ", "y  "]
+          ['Łzy', 'zy ', 'y  ', 'złe', 'łej', 'ej ', 'j  '],
+          ['zoł', 'ołz', 'łzy', 'zy ', 'y  ']
         ].flatten
       },
       {
-        text: "Żubr żuł żuchwą żurawinę.",
+        text: 'Żubr żuł żuchwą żurawinę.',
         bigrams: [
-          ["Żu", "ub", "br", "r ", "żu", "uł", "ł "],
-          ["żu", "uc", "ch", "hw", "wą", "ą "],
-          ["żu", "ur", "ra", "aw", "wi", "in", "nę", "ę "]
+          ['Żu', 'ub', 'br', 'r ', 'żu', 'uł', 'ł '],
+          ['żu', 'uc', 'ch', 'hw', 'wą', 'ą '],
+          ['żu', 'ur', 'ra', 'aw', 'wi', 'in', 'nę', 'ę ']
         ].flatten,
         trigrams: [
-          ["Żub", "ubr", "br ", "r  ", "żuł", "uł ", "ł  "],
-          ["żuc", "uch", "chw", "hwą", "wą ", "ą  "],
-          ["żur", "ura", "raw", "awi", "win", "inę", "nę ", "ę  "]
+          ['Żub', 'ubr', 'br ', 'r  ', 'żuł', 'uł ', 'ł  '],
+          ['żuc', 'uch', 'chw', 'hwą', 'wą ', 'ą  '],
+          ['żur', 'ura', 'raw', 'awi', 'win', 'inę', 'nę ', 'ę  ']
         ].flatten
       },
       {
-        text: "Čistý s Čistou čistili činčilový čepec.",
+        text: 'Čistý s Čistou čistili činčilový čepec.',
         bigrams: [
-          ["Či", "is", "st", "tý", "ý ", "s ", "Či", "is", "st", "to", "ou"],
-          ["u ", "či", "is", "st", "ti", "il", "li", "i ", "či", "in", "nč"],
-          ["či", "il", "lo", "ov", "vý", "ý ", "če", "ep", "pe", "ec", "c "]
+          ['Či', 'is', 'st', 'tý', 'ý ', 's ', 'Či', 'is', 'st', 'to', 'ou'],
+          ['u ', 'či', 'is', 'st', 'ti', 'il', 'li', 'i ', 'či', 'in', 'nč'],
+          ['či', 'il', 'lo', 'ov', 'vý', 'ý ', 'če', 'ep', 'pe', 'ec', 'c ']
         ].flatten,
         trigrams: [
-          ["Čis", "ist", "stý", "tý ", "ý  ", "s  ", "Čis", "ist", "sto"],
-          ["tou", "ou ", "u  ", "čis", "ist", "sti", "til", "ili", "li "],
-          ["i  ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý "],
-          ["ý  ", "čep", "epe", "pec", "ec ", "c  "]
+          ['Čis', 'ist', 'stý', 'tý ', 'ý  ', 's  ', 'Čis', 'ist', 'sto'],
+          ['tou', 'ou ', 'u  ', 'čis', 'ist', 'sti', 'til', 'ili', 'li '],
+          ['i  ', 'čin', 'inč', 'nči', 'čil', 'ilo', 'lov', 'ový', 'vý '],
+          ['ý  ', 'čep', 'epe', 'pec', 'ec ', 'c  ']
         ].flatten
       },
       {
-        text: "99 bottles of beer on the wall,",
+        text: '99 bottles of beer on the wall,',
         bigrams: [
-          ["bo", "ot", "tt", "tl", "le", "es", "s "],
-          ["of", "f "],
-          ["be", "ee", "er", "r "],
-          ["on", "n "],
-          ["th", "he", "e "],
-          ["wa", "al", "ll", "l "]
+          ['bo', 'ot', 'tt', 'tl', 'le', 'es', 's '],
+          ['of', 'f '],
+          ['be', 'ee', 'er', 'r '],
+          ['on', 'n '],
+          ['th', 'he', 'e '],
+          ['wa', 'al', 'll', 'l ']
         ].flatten,
         trigrams: [
-          ["bot", "ott", "ttl", "tle", "les", "es ", "s  "],
-          ["of ", "f  "],
-          ["bee", "eer", "er ", "r  "],
-          ["on ", "n  "],
-          ["the", "he ", "e  "],
-          ["wal", "all", "ll ", "l  "]
+          ['bot', 'ott', 'ttl', 'tle', 'les', 'es ', 's  '],
+          ['of ', 'f  '],
+          ['bee', 'eer', 'er ', 'r  '],
+          ['on ', 'n  '],
+          ['the', 'he ', 'e  '],
+          ['wal', 'all', 'll ', 'l  ']
         ].flatten
       }
     ].each do |hash|

data/spec/ngrams_parser/string_spec.rb CHANGED Viewed

@@ -2,24 +2,25 @@
 require 'spec_helper'
 describe NgramsParser::String do
+  # Testing class
   class String
     include NgramsParser::String
   end
-  context "#ngrams" do
-    it "splits String into ngrams" do
-      string = "Lorem ipsum"
+  context '#ngrams' do
+    it 'splits String into ngrams' do
+      string = 'Lorem ipsum'
       bigrams = [
-        "Lo", "or", "re", "em", "m ",
-        "ip", "ps", "su", "um", "m "
+        'Lo', 'or', 're', 'em', 'm ',
+        'ip', 'ps', 'su', 'um', 'm '
       ]
       trigrams = [
-        "Lor", "ore", "rem", "em ", "m  ",
-        "ips", "psu", "sum", "um ", "m  "
+        'Lor', 'ore', 'rem', 'em ', 'm  ',
+        'ips', 'psu', 'sum', 'um ', 'm  '
       ]
       quadgrams = [
-        "Lore", "orem", "rem ", "em  ", "m   ",
-        "ipsu", "psum", "sum ", "um  ", "m   "
+        'Lore', 'orem', 'rem ', 'em  ', 'm   ',
+        'ipsu', 'psum', 'sum ', 'um  ', 'm   '
       ]
       string.ngrams(2).should eq(bigrams)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ngrams_parser
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Aleksander Malaszkiewicz
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-16 00:00:00.000000000 Z
+date: 2013-08-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler