ngrams_parser 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -0
- data/lib/ngrams_parser/ngrams.rb +1 -1
- data/lib/ngrams_parser/version.rb +1 -1
- data/ngrams_parser.gemspec +1 -1
- data/spec/ngrams_parser/ngrams_spec.rb +21 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
|
4
|
+
data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
|
7
|
+
data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# NgramsParser [![Gem Version](https://badge.fury.io/rb/ngrams_parser.png)](http://badge.fury.io/rb/ngrams_parser) [![Build Status](https://travis-ci.org/fractalsoft/ngrams_parser.png)](https://travis-ci.org/fractalsoft/ngrams_parser) [![Dependency Status](https://gemnasium.com/fractalsoft/ngrams_parser.png)](https://gemnasium.com/fractalsoft/ngrams_parser) [![Coverage Status](https://coveralls.io/repos/fractalsoft/ngrams_parser/badge.png)](https://coveralls.io/r/fractalsoft/ngrams_parser)
|
2
|
+
[![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
|
2
3
|
|
3
4
|
N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.
|
4
5
|
|
data/lib/ngrams_parser/ngrams.rb
CHANGED
data/ngrams_parser.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Aleksander Malaszkiewicz"]
|
10
10
|
spec.email = ["info@fractalsoft.org"]
|
11
11
|
spec.summary = %q{Split text into ngrams}
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/fractalsoft/ngrams_parser"
|
13
13
|
spec.license = "MIT"
|
14
14
|
|
15
15
|
spec.files = `git ls-files`.split($/)
|
@@ -99,16 +99,35 @@ describe NgramsParser do
|
|
99
99
|
["i ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý "],
|
100
100
|
["ý ", "čep", "epe", "pec", "ec ", "c "]
|
101
101
|
].flatten
|
102
|
+
},
|
103
|
+
{
|
104
|
+
text: "99 bottles of beer on the wall,",
|
105
|
+
bigrams: [
|
106
|
+
["bo", "ot", "tt", "tl", "le", "es", "s "],
|
107
|
+
["of", "f "],
|
108
|
+
["be", "ee", "er", "r "],
|
109
|
+
["on", "n "],
|
110
|
+
["th", "he", "e "],
|
111
|
+
["wa", "al", "ll", "l "]
|
112
|
+
].flatten,
|
113
|
+
trigrams: [
|
114
|
+
["bot", "ott", "ttl", "tle", "les", "es ", "s "],
|
115
|
+
["of ", "f "],
|
116
|
+
["bee", "eer", "er ", "r "],
|
117
|
+
["on ", "n "],
|
118
|
+
["the", "he ", "e "],
|
119
|
+
["wal", "all", "ll ", "l "]
|
120
|
+
].flatten
|
102
121
|
}
|
103
122
|
].each do |hash|
|
104
123
|
text, bigrams, trigrams = hash.values
|
105
124
|
|
106
125
|
it "split text '#{text}' into bigrams" do
|
107
|
-
|
126
|
+
subject.ngrams(text, 2).should eq(bigrams)
|
108
127
|
end
|
109
128
|
|
110
129
|
it "split text '#{text}' into trigrams" do
|
111
|
-
|
130
|
+
subject.ngrams(text, 3).should eq(trigrams)
|
112
131
|
end
|
113
132
|
end
|
114
133
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ngrams_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,7 +80,7 @@ files:
|
|
80
80
|
- spec/ngrams_parser/ngrams_spec.rb
|
81
81
|
- spec/ngrams_parser/string_spec.rb
|
82
82
|
- spec/spec_helper.rb
|
83
|
-
homepage:
|
83
|
+
homepage: https://github.com/fractalsoft/ngrams_parser
|
84
84
|
licenses:
|
85
85
|
- MIT
|
86
86
|
metadata: {}
|