ngrams_parser 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cc5ee57d99795a26f1f768a6601867e6c0ccf66
4
- data.tar.gz: 339eaa496a92646798bbd2d98fa921ecfb663e30
3
+ metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
4
+ data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
5
5
  SHA512:
6
- metadata.gz: 0220b07f281d806b811a5fc65961375ff68f177da9a84881b3b62a43db6113033fb5070a895f9400ce29fa1e7b69b6c78620e2ed6bdd8b3dcaa7cd60ee62ff2c
7
- data.tar.gz: 894d51dc9b5bf36d9d38ceffb60f1248227bf8024ace8b808b3dfe86c3e9a840997835d00d37af3e307f48041bb0e7424d008fab855b649ea7469b2b5d123b1d
6
+ metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
7
+ data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
data/CHANGELOG.md CHANGED
@@ -5,3 +5,7 @@
5
5
  ## v0.0.2
6
6
 
7
7
  * fixed coding problem
8
+
9
+ ## v0.0.3
10
+
11
+ * ngrams without digits
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser)
2
+ [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
2
3
 
3
4
  N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.
4
5
 
@@ -4,7 +4,7 @@ require 'lexical_units'
4
4
  module NgramsParser
5
5
  def self.ngrams(text, size)
6
6
  array = []
7
- LexicalUnits::words(text).each do |word|
7
+ LexicalUnits::words_without_digits(text).each do |word|
8
8
  array << ngram(word, size)
9
9
  end
10
10
  array.flatten
@@ -1,3 +1,3 @@
1
1
  module NgramsParser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Aleksander Malaszkiewicz"]
10
10
  spec.email = ["info@fractalsoft.org"]
11
11
  spec.summary = %q{Split text into ngrams}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/fractalsoft/ngrams_parser"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files`.split($/)
@@ -99,16 +99,35 @@ describe NgramsParser do
99
99
  ["i ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý "],
100
100
  ["ý ", "čep", "epe", "pec", "ec ", "c "]
101
101
  ].flatten
102
+ },
103
+ {
104
+ text: "99 bottles of beer on the wall,",
105
+ bigrams: [
106
+ ["bo", "ot", "tt", "tl", "le", "es", "s "],
107
+ ["of", "f "],
108
+ ["be", "ee", "er", "r "],
109
+ ["on", "n "],
110
+ ["th", "he", "e "],
111
+ ["wa", "al", "ll", "l "]
112
+ ].flatten,
113
+ trigrams: [
114
+ ["bot", "ott", "ttl", "tle", "les", "es ", "s "],
115
+ ["of ", "f "],
116
+ ["bee", "eer", "er ", "r "],
117
+ ["on ", "n "],
118
+ ["the", "he ", "e "],
119
+ ["wal", "all", "ll ", "l "]
120
+ ].flatten
102
121
  }
103
122
  ].each do |hash|
104
123
  text, bigrams, trigrams = hash.values
105
124
 
106
125
  it "split text '#{text}' into bigrams" do
107
- klass::ngrams(text, 2).should eq(bigrams)
126
+ subject.ngrams(text, 2).should eq(bigrams)
108
127
  end
109
128
 
110
129
  it "split text '#{text}' into trigrams" do
111
- klass::ngrams(text, 3).should eq(trigrams)
130
+ subject.ngrams(text, 3).should eq(trigrams)
112
131
  end
113
132
  end
114
133
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ngrams_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-07 00:00:00.000000000 Z
11
+ date: 2013-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,7 +80,7 @@ files:
80
80
  - spec/ngrams_parser/ngrams_spec.rb
81
81
  - spec/ngrams_parser/string_spec.rb
82
82
  - spec/spec_helper.rb
83
- homepage: ''
83
+ homepage: https://github.com/fractalsoft/ngrams_parser
84
84
  licenses:
85
85
  - MIT
86
86
  metadata: {}