andrey 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +38 -0
- data/Rakefile +13 -0
- data/andrey.gemspec +19 -0
- data/bin/andrey +10 -0
- data/lib/andrey/analyzer.rb +24 -0
- data/lib/andrey/command.rb +34 -0
- data/lib/andrey/language/english.rb +40 -0
- data/lib/andrey/version.rb +3 -0
- data/lib/andrey/word.rb +57 -0
- data/lib/andrey.rb +5 -0
- data/spec/lib/andrey/analyzer_spec.rb +26 -0
- data/spec/lib/andrey/command_spec.rb +12 -0
- data/spec/lib/andrey/language/english_spec.rb +16 -0
- data/spec/lib/andrey/word_spec.rb +28 -0
- data/spec/lib/andrey_spec.rb +8 -0
- data/spec/spec_helper.rb +3 -0
- metadata +82 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Solomon White
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# Andrey
|
2
|
+
|
3
|
+
Generate pseudopronounceable random words based on bigram frequency
|
4
|
+
distributions
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
It's a gem, sooo...
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
From the command line:
|
13
|
+
|
14
|
+
andrey generate
|
15
|
+
# => genespha
|
16
|
+
|
17
|
+
From your code:
|
18
|
+
|
19
|
+
Andrey::Word.generate(5)
|
20
|
+
# => 'gatiz'
|
21
|
+
|
22
|
+
Want to customize the Markov model to make the words sound different? (Y U NO
|
23
|
+
LIKE ENGLISH?) Make a new language file, and generate the probability map via:
|
24
|
+
|
25
|
+
andrey analyze /path/to/klingon-opera.txt
|
26
|
+
|
27
|
+
## Why 'Andrey'
|
28
|
+
|
29
|
+
For [Andrey Markov](http://en.wikipedia.org/wiki/Andrey_Markov). And also
|
30
|
+
because Andrey is "rand()" in pig-latin.
|
31
|
+
|
32
|
+
## Contributing
|
33
|
+
|
34
|
+
1. Fork it
|
35
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
36
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
37
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
38
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs.push "lib"
|
7
|
+
t.libs.push "spec"
|
8
|
+
t.test_files = FileList['spec/**/*_spec.rb']
|
9
|
+
t.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "Default: run specs."
|
13
|
+
task :default => :test
|
data/andrey.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/andrey/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Solomon White"]
|
6
|
+
gem.email = ["rubysolo@gmail.com"]
|
7
|
+
gem.description = %q{andrey}
|
8
|
+
gem.summary = %q{generate pseudopronounceable random words based on bigram freqency distribution}
|
9
|
+
gem.homepage = "https://github.com/rubysolo/andrey"
|
10
|
+
|
11
|
+
gem.add_development_dependency 'mocha'
|
12
|
+
|
13
|
+
gem.files = `git ls-files`.split($\)
|
14
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
15
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
|
+
gem.name = "andrey"
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
gem.version = Andrey::VERSION
|
19
|
+
end
|
data/bin/andrey
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Andrey
|
2
|
+
class Analyzer
|
3
|
+
def read(filename)
|
4
|
+
analyze(IO.read(filename))
|
5
|
+
end
|
6
|
+
|
7
|
+
def analyze(text)
|
8
|
+
symbols = ('a'..'z').to_a
|
9
|
+
zeros = [].fill(0,0,26)
|
10
|
+
map = (0..25).to_a.map { |x| zeros.dup }
|
11
|
+
|
12
|
+
text.downcase.split(/[^a-z]+/).each do |word|
|
13
|
+
word.split(//).each_cons(2) do |from, to|
|
14
|
+
col = symbols.index(from)
|
15
|
+
row = symbols.index(to)
|
16
|
+
|
17
|
+
map[col][row] += 1
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
map
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'andrey/analyzer'
|
2
|
+
require 'andrey/word'
|
3
|
+
|
4
|
+
module Andrey
|
5
|
+
class Command
|
6
|
+
class Generate
|
7
|
+
def run(*args)
|
8
|
+
puts Andrey::Word.generate
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Analyze
|
13
|
+
def run(*args)
|
14
|
+
probability_map = Andrey::Analyzer.new.read(args.first)
|
15
|
+
puts "["
|
16
|
+
probability_map.each do |row|
|
17
|
+
puts "[#{ row.join(',') }]"
|
18
|
+
end
|
19
|
+
puts "]"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.[](command)
|
24
|
+
case command
|
25
|
+
when /^gen/i
|
26
|
+
Generate.new
|
27
|
+
when /^ana/i
|
28
|
+
Analyze.new
|
29
|
+
else
|
30
|
+
raise "unknown command #{ command }"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Andrey
|
2
|
+
module Language
|
3
|
+
class English
|
4
|
+
def self.symbols
|
5
|
+
('a'..'z').to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.probability_map
|
9
|
+
[
|
10
|
+
[ 131,9076,12589, 5968, 4774,1359, 5536, 856, 3928,189,2029,28178, 7536,28275, 271, 7214,199,21440,10038,27276, 3550,2089,1131, 905, 1523, 899],
|
11
|
+
[ 5297, 927, 211, 316, 5491, 73, 43, 115, 5586,164, 14, 8765, 147, 78, 4110, 118, 6, 4028, 801, 295, 2823, 82, 48, 0, 414, 4],
|
12
|
+
[15207, 9, 1525, 24,10495, 4, 4,13308, 7290, 0,3694, 3272, 35, 164,16088, 13, 96, 5504, 475, 6945, 5110, 0, 10, 0, 2814, 30],
|
13
|
+
[ 6287, 183, 100, 1034,13714, 218, 582, 299,12286,146, 25, 2113, 389, 1057, 5596, 91, 6, 3433, 577, 78, 2101, 232, 299, 0, 1337, 19],
|
14
|
+
[10087,1945, 8383,14736, 3994,2285, 2575, 940, 2678,226, 392,12811, 8075,25070, 4196, 5890,588,42507,20704,12057, 3103,1995,1461,3280, 905, 253],
|
15
|
+
[ 2139, 23, 10, 17, 3216,1744, 11, 27, 4052, 4, 8, 2471, 22, 24, 3704, 11, 0, 1677, 46, 807, 2627, 1, 30, 0, 572, 3],
|
16
|
+
[ 5000, 116, 18, 81, 7761, 66, 1139, 1704, 5447, 3, 18, 4089, 580, 1878, 3263, 40, 0, 4769, 289, 118, 2452, 2, 132, 0, 1689, 8],
|
17
|
+
[ 9380, 180, 69, 84,12374, 198, 43, 74,10434, 4, 31, 1332, 586, 765,10394, 116, 5, 3104, 170, 1646, 1830, 18, 297, 0, 6462, 6],
|
18
|
+
[15276,3113,26517,10728, 4020,4328, 4475, 276, 420, 77,1915,10331, 5176,33718,15378, 5100,287, 4552,24204,16952, 1774,5208, 78, 494, 55,4596],
|
19
|
+
[ 795, 1, 0, 4, 612, 0, 0, 6, 200, 2, 1, 2, 2, 4, 595, 2, 0, 8, 0, 1, 905, 0, 1, 0, 8, 0],
|
20
|
+
[ 1431, 148, 37, 38, 4979, 124, 21, 295, 2544, 18, 72, 773, 160, 463, 597, 66, 0, 223, 410, 139, 317, 8, 202, 0, 445, 2],
|
21
|
+
[16932, 484, 838, 1393,23347, 538, 577, 165,20883, 6, 506,11418, 983, 741,13004, 1082, 7, 112, 694, 2330, 4434, 760, 215, 3,14059, 25],
|
22
|
+
[13712,2638, 57, 47,12258, 142, 21, 50,11357, 6, 18, 253, 2020, 763, 8702, 4382, 2, 65, 277, 43, 2536, 44, 66, 0, 2355, 4],
|
23
|
+
[12665, 956, 8690, 9633,21743,2137,13071, 915,15240,340,1344, 1229, 985, 2422,12183, 1481,331, 1306, 6455,19981, 2159,1031, 692, 74, 1331, 385],
|
24
|
+
[ 2423,3224, 7960, 5588, 1761,1484, 7623, 764, 4109, 97,1067,12463,12365,29811, 4684,11281,218,19802,10890, 8961,12999,3748,2600,1373, 603, 494],
|
25
|
+
[ 8572, 128, 78, 32,11227, 99, 39,13194, 6864, 12, 42, 4977, 137, 389, 8768, 2022, 2, 9756, 2246, 3256, 2507, 3, 134, 0, 1480, 0],
|
26
|
+
[ 2, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3715, 0, 0, 0, 0, 0],
|
27
|
+
[23804,1924, 3855, 3672,23956, 956, 2009, 1602,23280,109,1188, 1865, 4781, 2802,22402, 2423, 82, 3649, 3668, 5672, 3942,1099, 653, 7, 6055, 67],
|
28
|
+
[ 5952, 264, 6509, 143,11489, 276, 153, 7365,12066, 43,1027, 2264, 5604, 1568, 6008, 5761,705, 164,14428,22087, 6558, 77, 830, 0, 2130, 11],
|
29
|
+
[14456, 280, 1062, 89,29443, 467, 124,12101,31684, 32, 40, 2236, 581, 475,15285, 222, 9,14163, 776, 3232, 4766, 47, 729, 1, 5564, 144],
|
30
|
+
[ 2731,3282, 2899, 2362, 1999, 553, 1402, 61, 2613, 43, 294, 9782, 5983,19217, 671, 3098, 21, 8990,14587, 5472, 27, 259, 17, 244, 82, 140],
|
31
|
+
[ 3212, 0, 3, 1,10716, 0, 1, 0, 4224, 0, 2, 7, 0, 5, 1545, 0, 0, 33, 8, 0, 286, 12, 0, 0, 81, 3],
|
32
|
+
[ 3080, 132, 32, 143, 2121, 80, 28, 930, 2296, 1, 97, 370, 89, 562, 2172, 49, 2, 426, 216, 87, 68, 1, 57, 0, 111, 11],
|
33
|
+
[ 764, 29, 391, 10, 537, 30, 7, 116, 1419, 0, 2, 41, 27, 9, 570, 550, 10, 7, 56, 778, 161, 2, 24, 0, 597, 1],
|
34
|
+
[ 1446, 289, 1696, 1137, 953, 142, 742, 159, 621, 3, 47, 3010, 1796, 1699, 1255, 2686, 5, 1587, 2358, 1641, 136, 17, 204, 166, 4, 135],
|
35
|
+
[ 1677, 12, 9, 15, 3613, 2, 7, 7, 708, 0, 4, 127, 5, 4, 1400, 3, 0, 8, 7, 11, 80, 2, 10, 0, 334, 271]
|
36
|
+
]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/andrey/word.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'andrey/language/english'
|
2
|
+
|
3
|
+
module Andrey
|
4
|
+
class Word
|
5
|
+
def initialize(language=Language::English)
|
6
|
+
@language = language
|
7
|
+
@word = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
@word.join
|
12
|
+
end
|
13
|
+
|
14
|
+
def length
|
15
|
+
@word.length
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_letter
|
19
|
+
@word << next_letter(@word.last || symbols.sample)
|
20
|
+
end
|
21
|
+
|
22
|
+
def next_letter(letter)
|
23
|
+
index = symbols.index(letter)
|
24
|
+
|
25
|
+
probabilities = probability_map[index]
|
26
|
+
pointer = rand(probabilities.inject(:+))
|
27
|
+
sum = 0
|
28
|
+
|
29
|
+
probabilities.each_with_index do |p, index|
|
30
|
+
if p > 0
|
31
|
+
sum += p
|
32
|
+
return symbols[index] if sum > pointer
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
symbols.sample
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.generate(length=8, language=Language::English)
|
40
|
+
new(language).tap do |word|
|
41
|
+
while word.length < length
|
42
|
+
word.add_letter
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def symbols
|
50
|
+
@language.symbols
|
51
|
+
end
|
52
|
+
|
53
|
+
def probability_map
|
54
|
+
@language.probability_map
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/andrey.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'andrey/analyzer'
|
3
|
+
|
4
|
+
describe Andrey::Analyzer do
|
5
|
+
let(:subject) { Andrey::Analyzer.new }
|
6
|
+
|
7
|
+
it 'counts occurrences of bigrams in source text' do
|
8
|
+
zeros = (0..25).map { |x| 0 }
|
9
|
+
result = subject.analyze("abc")
|
10
|
+
|
11
|
+
freq_a = result[0]
|
12
|
+
freq_a.must_equal zeros.dup.fill(1,1,1)
|
13
|
+
|
14
|
+
freq_b = result[1]
|
15
|
+
freq_b.must_equal zeros.dup.fill(1,2,1)
|
16
|
+
|
17
|
+
freq_c = result[2]
|
18
|
+
freq_c.must_equal zeros
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'reads corpus text from a file' do
|
22
|
+
IO.stubs(:read).returns("abc")
|
23
|
+
subject.expects(:analyze).with("abc")
|
24
|
+
subject.read("filename.txt")
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'andrey/command'
|
3
|
+
|
4
|
+
describe Andrey::Command do
|
5
|
+
let(:described_class) { Andrey::Command }
|
6
|
+
|
7
|
+
it 'generates a random word' do
|
8
|
+
Andrey::Command::Generate.any_instance.expects(:puts).returns(nil)
|
9
|
+
command = described_class['generate']
|
10
|
+
command.run
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'andrey/language/english'
|
3
|
+
|
4
|
+
describe Andrey::Language::English do
|
5
|
+
let(:described_class) { Andrey::Language::English }
|
6
|
+
|
7
|
+
it 'knows the english alphabet' do
|
8
|
+
described_class.symbols.must_equal ('a'..'z').to_a
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'knows the trained bigram freqency' do
|
12
|
+
map = described_class.probability_map
|
13
|
+
map.length.must_equal 26
|
14
|
+
map.each { |row| row.length.must_equal 26 }
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'andrey/word'
|
3
|
+
|
4
|
+
class TestLanguage
|
5
|
+
def self.symbols
|
6
|
+
('a'..'z').to_a
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.probability_map
|
10
|
+
@map ||= begin
|
11
|
+
inner = (0..25).map { |index| 0 }
|
12
|
+
outer = (0..25).map { |index| inner }
|
13
|
+
outer[0][1] = 1
|
14
|
+
outer
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe Andrey::Word do
|
20
|
+
it 'generates a word based on letter frequencies' do
|
21
|
+
Andrey::Word.generate().length.must_equal 8
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'picks a next letter, based on probability' do
|
25
|
+
word = Andrey::Word.new(TestLanguage)
|
26
|
+
word.next_letter('a').must_equal 'b'
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: andrey
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Solomon White
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-02 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mocha
|
16
|
+
requirement: &70199999986280 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70199999986280
|
25
|
+
description: andrey
|
26
|
+
email:
|
27
|
+
- rubysolo@gmail.com
|
28
|
+
executables:
|
29
|
+
- andrey
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- .gitignore
|
34
|
+
- Gemfile
|
35
|
+
- LICENSE
|
36
|
+
- README.md
|
37
|
+
- Rakefile
|
38
|
+
- andrey.gemspec
|
39
|
+
- bin/andrey
|
40
|
+
- lib/andrey.rb
|
41
|
+
- lib/andrey/analyzer.rb
|
42
|
+
- lib/andrey/command.rb
|
43
|
+
- lib/andrey/language/english.rb
|
44
|
+
- lib/andrey/version.rb
|
45
|
+
- lib/andrey/word.rb
|
46
|
+
- spec/lib/andrey/analyzer_spec.rb
|
47
|
+
- spec/lib/andrey/command_spec.rb
|
48
|
+
- spec/lib/andrey/language/english_spec.rb
|
49
|
+
- spec/lib/andrey/word_spec.rb
|
50
|
+
- spec/lib/andrey_spec.rb
|
51
|
+
- spec/spec_helper.rb
|
52
|
+
homepage: https://github.com/rubysolo/andrey
|
53
|
+
licenses: []
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.11
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: generate pseudopronounceable random words based on bigram freqency distribution
|
76
|
+
test_files:
|
77
|
+
- spec/lib/andrey/analyzer_spec.rb
|
78
|
+
- spec/lib/andrey/command_spec.rb
|
79
|
+
- spec/lib/andrey/language/english_spec.rb
|
80
|
+
- spec/lib/andrey/word_spec.rb
|
81
|
+
- spec/lib/andrey_spec.rb
|
82
|
+
- spec/spec_helper.rb
|