ngrams_parser 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
4
- data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
3
+ metadata.gz: 6c3cdef6702ffbc5a36f42d454a3bb491abf72f0
4
+ data.tar.gz: 69e88fdec44a26144c90f668be1c80ed97c804e3
5
5
  SHA512:
6
- metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
7
- data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
6
+ metadata.gz: a1c60d03f3802948359c7e3993a315e62b481a15179b7ae2f9c5c0bc44066f1bf3919d59f8b15a7d35288414e074448ca0def7815fc58f034091f95f495d2f83
7
+ data.tar.gz: 5c936796efb35bddadfe3d34381c5cba344ddff8c2d86fe169a95bed70ed6086c785a1aee19509ba9f6204084e6c079ce4d1d1d4a475d131d9985391b9f9073e
data/.travis.yml CHANGED
@@ -2,4 +2,3 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - 2.0.0
5
- - jruby-head
data/CHANGELOG.md CHANGED
@@ -9,3 +9,7 @@
9
9
  ## v0.0.3
10
10
 
11
11
  * ngrams without digits
12
+
13
+ ## v0.0.4
14
+
15
+ * code clean up
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser)
1
+ # NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser) [![Stories in Ready](https://badge.waffle.io/fractalsoft/ngrams_parser.png)](http://waffle.io/fractalsoft/ngrams_parser)
2
2
  [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
3
3
 
4
4
  N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+
3
+ # Parse word to ngrams
2
4
  module NgramsParser
3
5
  # Split word into ngrams
4
6
  #
@@ -7,7 +9,7 @@ module NgramsParser
7
9
  def self.ngram(word, size)
8
10
  array = []
9
11
  word.split('').each_index do |index|
10
- text = word[index..index+size-1]
12
+ text = word[index..index + size - 1]
11
13
  array << text.ljust(size, ' ')
12
14
  end
13
15
  array
@@ -1,10 +1,11 @@
1
1
  # coding: utf-8
2
2
  require 'lexical_units'
3
3
 
4
+ # Parse given *text* into ngrams
4
5
  module NgramsParser
5
6
  def self.ngrams(text, size)
6
7
  array = []
7
- LexicalUnits::words_without_digits(text).each do |word|
8
+ LexicalUnits.words_without_digits(text).each do |word|
8
9
  array << ngram(word, size)
9
10
  end
10
11
  array.flatten
@@ -1,8 +1,10 @@
1
1
  # coding: utf-8
2
+
2
3
  module NgramsParser
4
+ # Use ngrams inside String class
3
5
  module String
4
6
  def ngrams(size)
5
- NgramsParser::ngrams(self, size)
7
+ NgramsParser.ngrams(self, size)
6
8
  end
7
9
  end
8
10
  end
@@ -1,3 +1,4 @@
1
+ # Gem version
1
2
  module NgramsParser
2
- VERSION = "0.0.3"
3
+ VERSION = '0.0.4'
3
4
  end
data/lib/ngrams_parser.rb CHANGED
@@ -1,7 +1,4 @@
1
- require "ngrams_parser/ngram"
2
- require "ngrams_parser/ngrams"
3
- require "ngrams_parser/string"
4
- require "ngrams_parser/version"
5
-
6
- module NgramsParser
7
- end
1
+ require 'ngrams_parser/ngram'
2
+ require 'ngrams_parser/ngrams'
3
+ require 'ngrams_parser/string'
4
+ require 'ngrams_parser/version'
@@ -2,29 +2,29 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe NgramsParser do
5
- context ".ngram" do
6
- let(:klass) { NgramsParser }
7
-
5
+ context '.ngram' do
8
6
  [
9
7
  {
10
8
  text: "Will's",
11
- digrams: ["Wi", "il", "ll", "l'", "'s", "s "],
12
- trigrams: ["Wil", "ill", "ll'", "l's", "'s ", "s "],
9
+ digrams: ['Wi', 'il', 'll', "l'", "'s", 's '],
10
+ trigrams: ['Wil', 'ill', "ll'", "l's", "'s ", 's '],
13
11
  },
14
12
  {
15
- text: "činčilový",
16
- digrams: ["či", "in", "", "či", "il", "lo", "ov", "", "ý "],
17
- trigrams: ["čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý ", "ý "]
13
+ text: 'činčilový',
14
+ digrams: ['či', 'in', '', 'či', 'il', 'lo', 'ov', '', 'ý '],
15
+ trigrams: [
16
+ 'čin', 'inč', 'nči', 'čil', 'ilo', 'lov', 'ový', 'vý ', 'ý '
17
+ ]
18
18
  }
19
19
  ].each do |hash|
20
20
  text, bigrams, trigrams = hash.values
21
21
 
22
22
  it "split word '#{text}' into bigrams: #{bigrams}" do
23
- klass::ngram(text, 2).should eq(bigrams)
23
+ subject.ngram(text, 2).should eq(bigrams)
24
24
  end
25
25
 
26
26
  it "split word '#{text}' into trigrams: #{trigrams}" do
27
- klass::ngram(text, 3).should eq(trigrams)
27
+ subject.ngram(text, 3).should eq(trigrams)
28
28
  end
29
29
  end
30
30
  end
@@ -2,121 +2,124 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe NgramsParser do
5
- context ".ngrams" do
5
+ context '.ngrams' do
6
6
  let(:klass) { NgramsParser }
7
7
 
8
8
  [
9
9
  {
10
10
  text: "Will will Will will Will's will to Will?",
11
11
  bigrams: [
12
- ["Wi", "il", "ll", "l ", "wi", "il", "ll", "l "],
13
- ["Wi", "il", "ll", "l ", "wi", "il", "ll", "l "],
14
- ["Wi", "il", "ll", "l'", "'s", "s ", "wi", "il", "ll", "l "],
15
- ["to", "o "],
16
- ["Wi", "il", "ll", "l "]
12
+ ['Wi', 'il', 'll', 'l ', 'wi', 'il', 'll', 'l '],
13
+ ['Wi', 'il', 'll', 'l ', 'wi', 'il', 'll', 'l '],
14
+ ['Wi', 'il', 'll', "l'", "'s", 's ', 'wi', 'il', 'll', 'l '],
15
+ ['to', 'o '],
16
+ ['Wi', 'il', 'll', 'l ']
17
17
  ].flatten,
18
18
  trigrams: [
19
- ["Wil", "ill", "ll ","l "],
20
- ["wil", "ill", "ll ", "l "],
21
- ["Wil", "ill", "ll ","l "],
22
- ["wil", "ill", "ll ", "l "],
23
- ["Wil", "ill", "ll'","l's", "'s ", "s "],
24
- ["wil", "ill", "ll ", "l "],
25
- ["to ", "o ", "Wil", "ill", "ll ", "l "]
19
+ ['Wil', 'ill', 'll ', 'l '],
20
+ ['wil', 'ill', 'll ', 'l '],
21
+ ['Wil', 'ill', 'll ', 'l '],
22
+ ['wil', 'ill', 'll ', 'l '],
23
+ ['Wil', 'ill', "ll'", "l's", "'s ", 's '],
24
+ ['wil', 'ill', 'll ', 'l '],
25
+ ['to ', 'o ', 'Wil', 'ill', 'll ', 'l ']
26
26
  ].flatten
27
27
  },
28
28
  {
29
- text: "Acht alte Ameisen aßen am Abend Ananas.",
29
+ text: 'Acht alte Ameisen aßen am Abend Ananas.',
30
30
  bigrams: [
31
- ["Ac", "ch", "ht", "t "],
32
- ["al", "lt", "te", "e "],
33
- ["Am", "me", "ei", "is", "se", "en", "n "],
34
- ["", "ße", "en", "n "],
35
- ["am", "m "],
36
- ["Ab", "be", "en", "nd", "d "],
37
- ["An", "na", "an", "na", "as", "s "]
31
+ ['Ac', 'ch', 'ht', 't '],
32
+ ['al', 'lt', 'te', 'e '],
33
+ ['Am', 'me', 'ei', 'is', 'se', 'en', 'n '],
34
+ ['', 'ße', 'en', 'n '],
35
+ ['am', 'm '],
36
+ ['Ab', 'be', 'en', 'nd', 'd '],
37
+ ['An', 'na', 'an', 'na', 'as', 's ']
38
38
  ].flatten,
39
39
  trigrams: [
40
- ["Ach", "cht", "ht ", "t "],
41
- ["alt", "lte", "te ", "e "],
42
- ["Ame", "mei", "eis", "ise", "sen", "en ", "n "],
43
- ["aße", "ßen", "en ", "n "],
44
- ["am ", "m "],
45
- ["Abe", "ben", "end", "nd ", "d "],
46
- ["Ana", "nan", "ana", "nas", "as ", "s "]
40
+ ['Ach', 'cht', 'ht ', 't '],
41
+ ['alt', 'lte', 'te ', 'e '],
42
+ ['Ame', 'mei', 'eis', 'ise', 'sen', 'en ', 'n '],
43
+ ['aße', 'ßen', 'en ', 'n '],
44
+ ['am ', 'm '],
45
+ ['Abe', 'ben', 'end', 'nd ', 'd '],
46
+ ['Ana', 'nan', 'ana', 'nas', 'as ', 's ']
47
47
  ].flatten
48
48
  },
49
49
  {
50
- text: "Ödögidöggi",
51
- bigrams: ["Öd", "", "ög", "gi", "id", "", "ög", "gg", "gi", "i "],
50
+ text: 'Ödögidöggi',
51
+ bigrams: ['Öd', '', 'ög', 'gi', 'id', '', 'ög', 'gg', 'gi', 'i '],
52
52
  trigrams: [
53
- ["Ödö", "dög", "ögi", "gid", "idö", "dög", "ögg", "ggi", "gi ", "i "]
53
+ [
54
+ 'Ödö', 'dög', 'ögi', 'gid', 'idö',
55
+ 'dög', 'ögg', 'ggi', 'gi ', 'i '
56
+ ]
54
57
  ].flatten
55
58
  },
56
59
  {
57
- text: "Ćma ćmę ćmi.",
60
+ text: 'Ćma ćmę ćmi.',
58
61
  bigrams: [
59
- ["Ćm", "ma", "a ", "ćm", "", "ę ", "ćm", "mi", "i "]
62
+ ['Ćm', 'ma', 'a ', 'ćm', '', 'ę ', 'ćm', 'mi', 'i ']
60
63
  ].flatten,
61
64
  trigrams: [
62
- ["Ćma", "ma ", "a ", "ćmę", "", "ę ", "ćmi", "mi ", "i "]
65
+ ['Ćma', 'ma ', 'a ', 'ćmę', '', 'ę ', 'ćmi', 'mi ', 'i ']
63
66
  ].flatten
64
67
  },
65
68
  {
66
- text: "Łzy złej zołzy",
69
+ text: 'Łzy złej zołzy',
67
70
  bigrams: [
68
- ["Łz", "zy", "y ", "", "łe", "ej", "j "],
69
- ["zo", "", "łz", "zy", "y "]
71
+ ['Łz', 'zy', 'y ', '', 'łe', 'ej', 'j '],
72
+ ['zo', '', 'łz', 'zy', 'y ']
70
73
  ].flatten,
71
74
  trigrams: [
72
- ["Łzy", "zy ", "y ", "złe", "łej", "ej ", "j "],
73
- ["zoł", "ołz", "łzy", "zy ", "y "]
75
+ ['Łzy', 'zy ', 'y ', 'złe', 'łej', 'ej ', 'j '],
76
+ ['zoł', 'ołz', 'łzy', 'zy ', 'y ']
74
77
  ].flatten
75
78
  },
76
79
  {
77
- text: "Żubr żuł żuchwą żurawinę.",
80
+ text: 'Żubr żuł żuchwą żurawinę.',
78
81
  bigrams: [
79
- ["Żu", "ub", "br", "r ", "żu", "", "ł "],
80
- ["żu", "uc", "ch", "hw", "", "ą "],
81
- ["żu", "ur", "ra", "aw", "wi", "in", "", "ę "]
82
+ ['Żu', 'ub', 'br', 'r ', 'żu', '', 'ł '],
83
+ ['żu', 'uc', 'ch', 'hw', '', 'ą '],
84
+ ['żu', 'ur', 'ra', 'aw', 'wi', 'in', '', 'ę ']
82
85
  ].flatten,
83
86
  trigrams: [
84
- ["Żub", "ubr", "br ", "r ", "żuł", "", "ł "],
85
- ["żuc", "uch", "chw", "hwą", "", "ą "],
86
- ["żur", "ura", "raw", "awi", "win", "inę", "", "ę "]
87
+ ['Żub', 'ubr', 'br ', 'r ', 'żuł', '', 'ł '],
88
+ ['żuc', 'uch', 'chw', 'hwą', '', 'ą '],
89
+ ['żur', 'ura', 'raw', 'awi', 'win', 'inę', '', 'ę ']
87
90
  ].flatten
88
91
  },
89
92
  {
90
- text: "Čistý s Čistou čistili činčilový čepec.",
93
+ text: 'Čistý s Čistou čistili činčilový čepec.',
91
94
  bigrams: [
92
- ["Či", "is", "st", "", "ý ", "s ", "Či", "is", "st", "to", "ou"],
93
- ["u ", "či", "is", "st", "ti", "il", "li", "i ", "či", "in", ""],
94
- ["či", "il", "lo", "ov", "", "ý ", "če", "ep", "pe", "ec", "c "]
95
+ ['Či', 'is', 'st', '', 'ý ', 's ', 'Či', 'is', 'st', 'to', 'ou'],
96
+ ['u ', 'či', 'is', 'st', 'ti', 'il', 'li', 'i ', 'či', 'in', ''],
97
+ ['či', 'il', 'lo', 'ov', '', 'ý ', 'če', 'ep', 'pe', 'ec', 'c ']
95
98
  ].flatten,
96
99
  trigrams: [
97
- ["Čis", "ist", "stý", "", "ý ", "s ", "Čis", "ist", "sto"],
98
- ["tou", "ou ", "u ", "čis", "ist", "sti", "til", "ili", "li "],
99
- ["i ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", ""],
100
- ["ý ", "čep", "epe", "pec", "ec ", "c "]
100
+ ['Čis', 'ist', 'stý', '', 'ý ', 's ', 'Čis', 'ist', 'sto'],
101
+ ['tou', 'ou ', 'u ', 'čis', 'ist', 'sti', 'til', 'ili', 'li '],
102
+ ['i ', 'čin', 'inč', 'nči', 'čil', 'ilo', 'lov', 'ový', ''],
103
+ ['ý ', 'čep', 'epe', 'pec', 'ec ', 'c ']
101
104
  ].flatten
102
105
  },
103
106
  {
104
- text: "99 bottles of beer on the wall,",
107
+ text: '99 bottles of beer on the wall,',
105
108
  bigrams: [
106
- ["bo", "ot", "tt", "tl", "le", "es", "s "],
107
- ["of", "f "],
108
- ["be", "ee", "er", "r "],
109
- ["on", "n "],
110
- ["th", "he", "e "],
111
- ["wa", "al", "ll", "l "]
109
+ ['bo', 'ot', 'tt', 'tl', 'le', 'es', 's '],
110
+ ['of', 'f '],
111
+ ['be', 'ee', 'er', 'r '],
112
+ ['on', 'n '],
113
+ ['th', 'he', 'e '],
114
+ ['wa', 'al', 'll', 'l ']
112
115
  ].flatten,
113
116
  trigrams: [
114
- ["bot", "ott", "ttl", "tle", "les", "es ", "s "],
115
- ["of ", "f "],
116
- ["bee", "eer", "er ", "r "],
117
- ["on ", "n "],
118
- ["the", "he ", "e "],
119
- ["wal", "all", "ll ", "l "]
117
+ ['bot', 'ott', 'ttl', 'tle', 'les', 'es ', 's '],
118
+ ['of ', 'f '],
119
+ ['bee', 'eer', 'er ', 'r '],
120
+ ['on ', 'n '],
121
+ ['the', 'he ', 'e '],
122
+ ['wal', 'all', 'll ', 'l ']
120
123
  ].flatten
121
124
  }
122
125
  ].each do |hash|
@@ -2,24 +2,25 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe NgramsParser::String do
5
+ # Testing class
5
6
  class String
6
7
  include NgramsParser::String
7
8
  end
8
9
 
9
- context "#ngrams" do
10
- it "splits String into ngrams" do
11
- string = "Lorem ipsum"
10
+ context '#ngrams' do
11
+ it 'splits String into ngrams' do
12
+ string = 'Lorem ipsum'
12
13
  bigrams = [
13
- "Lo", "or", "re", "em", "m ",
14
- "ip", "ps", "su", "um", "m "
14
+ 'Lo', 'or', 're', 'em', 'm ',
15
+ 'ip', 'ps', 'su', 'um', 'm '
15
16
  ]
16
17
  trigrams = [
17
- "Lor", "ore", "rem", "em ", "m ",
18
- "ips", "psu", "sum", "um ", "m "
18
+ 'Lor', 'ore', 'rem', 'em ', 'm ',
19
+ 'ips', 'psu', 'sum', 'um ', 'm '
19
20
  ]
20
21
  quadgrams = [
21
- "Lore", "orem", "rem ", "em ", "m ",
22
- "ipsu", "psum", "sum ", "um ", "m "
22
+ 'Lore', 'orem', 'rem ', 'em ', 'm ',
23
+ 'ipsu', 'psum', 'sum ', 'um ', 'm '
23
24
  ]
24
25
 
25
26
  string.ngrams(2).should eq(bigrams)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ngrams_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-16 00:00:00.000000000 Z
11
+ date: 2013-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler