ngrams_parser 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -0
- data/lib/ngrams_parser/ngrams.rb +1 -1
- data/lib/ngrams_parser/version.rb +1 -1
- data/ngrams_parser.gemspec +1 -1
- data/spec/ngrams_parser/ngrams_spec.rb +21 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4050a66b4d50b418ea1c4deffbb033a4b31f84ed
|
4
|
+
data.tar.gz: 54acdd2b29a1a17efc1e933fc3e938355629ae62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb385713b8d33dc66af608534b74eebb9c60d73f6813b98d6f6d83e3fc1401e88cd530dd5d72e7d0e96745f1036b64df1be0ca21593bdd945a310d0019874564
|
7
|
+
data.tar.gz: d3af55e33005e70ce2efc638262789e0ba3e772107fb60203ea697ea936b8f84f038708596d4aa5a2e8c1728cf009b0cde645b213840ad41c2aba4629c1dd14f
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# NgramsParser [](http://badge.fury.io/rb/ngrams_parser) [](https://travis-ci.org/fractalsoft/ngrams_parser) [](https://gemnasium.com/fractalsoft/ngrams_parser) [](https://coveralls.io/r/fractalsoft/ngrams_parser)
|
2
|
+
[](https://coderwall.com/torrocus)
|
2
3
|
|
3
4
|
N-gram is a contiguous sequence of n items from a given sequence of text or speech. The items are letters, but can be phonemes, syllables, words or base pairs according to the application. The n-grams typically are collected from a text or speech corpus.
|
4
5
|
|
data/lib/ngrams_parser/ngrams.rb
CHANGED
data/ngrams_parser.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Aleksander Malaszkiewicz"]
|
10
10
|
spec.email = ["info@fractalsoft.org"]
|
11
11
|
spec.summary = %q{Split text into ngrams}
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/fractalsoft/ngrams_parser"
|
13
13
|
spec.license = "MIT"
|
14
14
|
|
15
15
|
spec.files = `git ls-files`.split($/)
|
@@ -99,16 +99,35 @@ describe NgramsParser do
|
|
99
99
|
["i ", "čin", "inč", "nči", "čil", "ilo", "lov", "ový", "vý "],
|
100
100
|
["ý ", "čep", "epe", "pec", "ec ", "c "]
|
101
101
|
].flatten
|
102
|
+
},
|
103
|
+
{
|
104
|
+
text: "99 bottles of beer on the wall,",
|
105
|
+
bigrams: [
|
106
|
+
["bo", "ot", "tt", "tl", "le", "es", "s "],
|
107
|
+
["of", "f "],
|
108
|
+
["be", "ee", "er", "r "],
|
109
|
+
["on", "n "],
|
110
|
+
["th", "he", "e "],
|
111
|
+
["wa", "al", "ll", "l "]
|
112
|
+
].flatten,
|
113
|
+
trigrams: [
|
114
|
+
["bot", "ott", "ttl", "tle", "les", "es ", "s "],
|
115
|
+
["of ", "f "],
|
116
|
+
["bee", "eer", "er ", "r "],
|
117
|
+
["on ", "n "],
|
118
|
+
["the", "he ", "e "],
|
119
|
+
["wal", "all", "ll ", "l "]
|
120
|
+
].flatten
|
102
121
|
}
|
103
122
|
].each do |hash|
|
104
123
|
text, bigrams, trigrams = hash.values
|
105
124
|
|
106
125
|
it "split text '#{text}' into bigrams" do
|
107
|
-
|
126
|
+
subject.ngrams(text, 2).should eq(bigrams)
|
108
127
|
end
|
109
128
|
|
110
129
|
it "split text '#{text}' into trigrams" do
|
111
|
-
|
130
|
+
subject.ngrams(text, 3).should eq(trigrams)
|
112
131
|
end
|
113
132
|
end
|
114
133
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ngrams_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,7 +80,7 @@ files:
|
|
80
80
|
- spec/ngrams_parser/ngrams_spec.rb
|
81
81
|
- spec/ngrams_parser/string_spec.rb
|
82
82
|
- spec/spec_helper.rb
|
83
|
-
homepage:
|
83
|
+
homepage: https://github.com/fractalsoft/ngrams_parser
|
84
84
|
licenses:
|
85
85
|
- MIT
|
86
86
|
metadata: {}
|