ngram 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/ngram.rb +29 -0
- metadata +59 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
data.tar.gz: 24d70550296074ff0c420c90f3c6dea8bf9c97d1
|
4
|
+
metadata.gz: 5a098b7eba2e5a37f01e09d52e5fda018bb630ba
|
5
|
+
SHA512:
|
6
|
+
data.tar.gz: 235df4e836d8f556c9835c825b263c11863e4a7d339db14f07620099cd47c551c4f149044cba43a7c4ffc32729d75690a969d9ff783735df7869e3e9df820e74
|
7
|
+
metadata.gz: 6bbb302861599e7f8d1ec2bd5ea70783aaa46286cc249bd1c759d725c8849caf89a736e1510cbf2a78a0e3c86f684bf5a237d3273657dbe3d7c47cf2a5294180
|
data/lib/ngram.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
class NGram
|
2
|
+
VERSION = "1.0.0"
|
3
|
+
|
4
|
+
attr_accessor :size, :word_separator, :padchar
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
@size = opts[:size]||2
|
8
|
+
@word_separator = opts[:word_separator]||" "
|
9
|
+
@padchar = opts[:padchar]||"_"
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(phrase)
|
13
|
+
words = phrase.split(@separator)
|
14
|
+
if words.length == 1
|
15
|
+
process(phrase)
|
16
|
+
else
|
17
|
+
words.map { |w| process(w) }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(word)
|
22
|
+
pad = @padchar*(@size-1)
|
23
|
+
word = "#{pad}#{word}#{pad}"
|
24
|
+
(0..word.length - @size).map do |idx|
|
25
|
+
"#{word[idx, @size]}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
private :process
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ngram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tyler Kellen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2013-12-12 00:00:00 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
prerelease: false
|
17
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
18
|
+
requirements:
|
19
|
+
- &id002
|
20
|
+
- ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "0"
|
23
|
+
type: :development
|
24
|
+
version_requirements: *id001
|
25
|
+
description: Break words and phrases into ngrams.
|
26
|
+
email: tyler@sleekcode.net
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- lib/ngram.rb
|
35
|
+
homepage: https://github.com/tkellen/ruby-ngram
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
metadata: {}
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- *id002
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- *id002
|
51
|
+
requirements: []
|
52
|
+
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 2.1.11
|
55
|
+
signing_key:
|
56
|
+
specification_version: 4
|
57
|
+
summary: Break words and phrases into ngrams.
|
58
|
+
test_files: []
|
59
|
+
|