ngrams_parser 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cc5ee57d99795a26f1f768a6601867e6c0ccf66
4
- data.tar.gz: 339eaa496a92646798bbd2d98fa921ecfb663e30
3
+ metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
4
+ data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
5
5
  SHA512:
6
- metadata.gz: 0220b07f281d806b811a5fc65961375ff68f177da9a84881b3b62a43db6113033fb5070a895f9400ce29fa1e7b69b6c78620e2ed6bdd8b3dcaa7cd60ee62ff2c
7
- data.tar.gz: 894d51dc9b5bf36d9d38ceffb60f1248227bf8024ace8b808b3dfe86c3e9a840997835d00d37af3e307f48041bb0e7424d008fab855b649ea7469b2b5d123b1d
6
+ metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
7
+ data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
data/CHANGELOG.md CHANGED
@@ -5,3 +5,7 @@
5
5
  ## v0.0.2
6
6
 
7
7
  * fixed coding problem
8
+
9
+ ## v0.0.3
10
+
11
+ * ngrams without digits
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser)
2
+ [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
2
3
 
3
4
  N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.
4
5
 
@@ -4,7 +4,7 @@ require 'lexical_units'
4
4
  module NgramsParser
5
5
  def self.ngrams(text, size)
6
6
  array = []
7
- LexicalUnits::words(text).each do |word|
7
+ LexicalUnits::words_without_digits(text).each do |word|
8
8
  array << ngram(word, size)
9
9
  end
10
10
  array.flatten
@@ -1,3 +1,3 @@
1
1
  module NgramsParser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Aleksander Malaszkiewicz"]
10
10
  spec.email = ["info@fractalsoft.org"]
11
11
  spec.summary = %q{Split text into ngrams}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/fractalsoft/ngrams_parser"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files`.split($/)
@@ -99,16 +99,35 @@ describe NgramsParser do
99
99
  ["i ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý "],
100
100
  ["ý ", "čep", "epe", "pec", "ec ", "c "]
101
101
  ].flatten
102
+ },
103
+ {
104
+ text: "99 bottles of beer on the wall,",
105
+ bigrams: [
106
+ ["bo", "ot", "tt", "tl", "le", "es", "s "],
107
+ ["of", "f "],
108
+ ["be", "ee", "er", "r "],
109
+ ["on", "n "],
110
+ ["th", "he", "e "],
111
+ ["wa", "al", "ll", "l "]
112
+ ].flatten,
113
+ trigrams: [
114
+ ["bot", "ott", "ttl", "tle", "les", "es ", "s "],
115
+ ["of ", "f "],
116
+ ["bee", "eer", "er ", "r "],
117
+ ["on ", "n "],
118
+ ["the", "he ", "e "],
119
+ ["wal", "all", "ll ", "l "]
120
+ ].flatten
102
121
  }
103
122
  ].each do |hash|
104
123
  text, bigrams, trigrams = hash.values
105
124
 
106
125
  it "split text '#{text}' into bigrams" do
107
- klass::ngrams(text, 2).should eq(bigrams)
126
+ subject.ngrams(text, 2).should eq(bigrams)
108
127
  end
109
128
 
110
129
  it "split text '#{text}' into trigrams" do
111
- klass::ngrams(text, 3).should eq(trigrams)
130
+ subject.ngrams(text, 3).should eq(trigrams)
112
131
  end
113
132
  end
114
133
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ngrams_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-07 00:00:00.000000000 Z
11
+ date: 2013-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,7 +80,7 @@ files:
80
80
  - spec/ngrams_parser/ngrams_spec.rb
81
81
  - spec/ngrams_parser/string_spec.rb
82
82
  - spec/spec_helper.rb
83
- homepage: ''
83
+ homepage: https://github.com/fractalsoft/ngrams_parser
84
84
  licenses:
85
85
  - MIT
86
86
  metadata: {}