andrey 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in andrey.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Solomon White
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # Andrey
2
+
3
+ Generate pseudopronounceable random words based on bigram frequency
4
+ distributions
5
+
6
+ ## Installation
7
+
8
+ It's a gem, sooo...
9
+
10
+ ## Usage
11
+
12
+ From the command line:
13
+
14
+ andrey generate
15
+ # => genespha
16
+
17
+ From your code:
18
+
19
+ Andrey::Word.generate(5)
20
+ # => 'gatiz'
21
+
22
+ Want to customize the Markov model to make the words sound different? (Y U NO
23
+ LIKE ENGLISH?) Make a new language file, and generate the probability map via:
24
+
25
+ andrey analyze /path/to/klingon-opera.txt
26
+
27
+ ## Why 'Andrey'
28
+
29
+ For [Andrey Markov](http://en.wikipedia.org/wiki/Andrey_Markov). And also
30
+ because Andrey is "rand()" in pig-latin.
31
+
32
+ ## Contributing
33
+
34
+ 1. Fork it
35
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
36
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
37
+ 4. Push to the branch (`git push origin my-new-feature`)
38
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs.push "lib"
7
+ t.libs.push "spec"
8
+ t.test_files = FileList['spec/**/*_spec.rb']
9
+ t.verbose = true
10
+ end
11
+
12
+ desc "Default: run specs."
13
+ task :default => :test
data/andrey.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/andrey/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Solomon White"]
6
+ gem.email = ["rubysolo@gmail.com"]
7
+ gem.description = %q{andrey}
8
+ gem.summary = %q{generate pseudopronounceable random words based on bigram freqency distribution}
9
+ gem.homepage = "https://github.com/rubysolo/andrey"
10
+
11
+ gem.add_development_dependency 'mocha'
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.name = "andrey"
17
+ gem.require_paths = ["lib"]
18
+ gem.version = Andrey::VERSION
19
+ end
data/bin/andrey ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ library_path = File.dirname(__FILE__) + '/../lib'
4
+ $LOAD_PATH.unshift(library_path) unless $LOAD_PATH.include?(library_path)
5
+
6
+ require 'andrey/command'
7
+
8
+ command = ARGV.shift
9
+ Andrey::Command[command].run(*ARGV)
10
+
@@ -0,0 +1,24 @@
1
+ module Andrey
2
+ class Analyzer
3
+ def read(filename)
4
+ analyze(IO.read(filename))
5
+ end
6
+
7
+ def analyze(text)
8
+ symbols = ('a'..'z').to_a
9
+ zeros = [].fill(0,0,26)
10
+ map = (0..25).to_a.map { |x| zeros.dup }
11
+
12
+ text.downcase.split(/[^a-z]+/).each do |word|
13
+ word.split(//).each_cons(2) do |from, to|
14
+ col = symbols.index(from)
15
+ row = symbols.index(to)
16
+
17
+ map[col][row] += 1
18
+ end
19
+ end
20
+
21
+ map
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ require 'andrey/analyzer'
2
+ require 'andrey/word'
3
+
4
+ module Andrey
5
+ class Command
6
+ class Generate
7
+ def run(*args)
8
+ puts Andrey::Word.generate
9
+ end
10
+ end
11
+
12
+ class Analyze
13
+ def run(*args)
14
+ probability_map = Andrey::Analyzer.new.read(args.first)
15
+ puts "["
16
+ probability_map.each do |row|
17
+ puts "[#{ row.join(',') }]"
18
+ end
19
+ puts "]"
20
+ end
21
+ end
22
+
23
+ def self.[](command)
24
+ case command
25
+ when /^gen/i
26
+ Generate.new
27
+ when /^ana/i
28
+ Analyze.new
29
+ else
30
+ raise "unknown command #{ command }"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ module Andrey
2
+ module Language
3
+ class English
4
+ def self.symbols
5
+ ('a'..'z').to_a
6
+ end
7
+
8
+ def self.probability_map
9
+ [
10
+ [ 131,9076,12589, 5968, 4774,1359, 5536, 856, 3928,189,2029,28178, 7536,28275, 271, 7214,199,21440,10038,27276, 3550,2089,1131, 905, 1523, 899],
11
+ [ 5297, 927, 211, 316, 5491, 73, 43, 115, 5586,164, 14, 8765, 147, 78, 4110, 118, 6, 4028, 801, 295, 2823, 82, 48, 0, 414, 4],
12
+ [15207, 9, 1525, 24,10495, 4, 4,13308, 7290, 0,3694, 3272, 35, 164,16088, 13, 96, 5504, 475, 6945, 5110, 0, 10, 0, 2814, 30],
13
+ [ 6287, 183, 100, 1034,13714, 218, 582, 299,12286,146, 25, 2113, 389, 1057, 5596, 91, 6, 3433, 577, 78, 2101, 232, 299, 0, 1337, 19],
14
+ [10087,1945, 8383,14736, 3994,2285, 2575, 940, 2678,226, 392,12811, 8075,25070, 4196, 5890,588,42507,20704,12057, 3103,1995,1461,3280, 905, 253],
15
+ [ 2139, 23, 10, 17, 3216,1744, 11, 27, 4052, 4, 8, 2471, 22, 24, 3704, 11, 0, 1677, 46, 807, 2627, 1, 30, 0, 572, 3],
16
+ [ 5000, 116, 18, 81, 7761, 66, 1139, 1704, 5447, 3, 18, 4089, 580, 1878, 3263, 40, 0, 4769, 289, 118, 2452, 2, 132, 0, 1689, 8],
17
+ [ 9380, 180, 69, 84,12374, 198, 43, 74,10434, 4, 31, 1332, 586, 765,10394, 116, 5, 3104, 170, 1646, 1830, 18, 297, 0, 6462, 6],
18
+ [15276,3113,26517,10728, 4020,4328, 4475, 276, 420, 77,1915,10331, 5176,33718,15378, 5100,287, 4552,24204,16952, 1774,5208, 78, 494, 55,4596],
19
+ [ 795, 1, 0, 4, 612, 0, 0, 6, 200, 2, 1, 2, 2, 4, 595, 2, 0, 8, 0, 1, 905, 0, 1, 0, 8, 0],
20
+ [ 1431, 148, 37, 38, 4979, 124, 21, 295, 2544, 18, 72, 773, 160, 463, 597, 66, 0, 223, 410, 139, 317, 8, 202, 0, 445, 2],
21
+ [16932, 484, 838, 1393,23347, 538, 577, 165,20883, 6, 506,11418, 983, 741,13004, 1082, 7, 112, 694, 2330, 4434, 760, 215, 3,14059, 25],
22
+ [13712,2638, 57, 47,12258, 142, 21, 50,11357, 6, 18, 253, 2020, 763, 8702, 4382, 2, 65, 277, 43, 2536, 44, 66, 0, 2355, 4],
23
+ [12665, 956, 8690, 9633,21743,2137,13071, 915,15240,340,1344, 1229, 985, 2422,12183, 1481,331, 1306, 6455,19981, 2159,1031, 692, 74, 1331, 385],
24
+ [ 2423,3224, 7960, 5588, 1761,1484, 7623, 764, 4109, 97,1067,12463,12365,29811, 4684,11281,218,19802,10890, 8961,12999,3748,2600,1373, 603, 494],
25
+ [ 8572, 128, 78, 32,11227, 99, 39,13194, 6864, 12, 42, 4977, 137, 389, 8768, 2022, 2, 9756, 2246, 3256, 2507, 3, 134, 0, 1480, 0],
26
+ [ 2, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3715, 0, 0, 0, 0, 0],
27
+ [23804,1924, 3855, 3672,23956, 956, 2009, 1602,23280,109,1188, 1865, 4781, 2802,22402, 2423, 82, 3649, 3668, 5672, 3942,1099, 653, 7, 6055, 67],
28
+ [ 5952, 264, 6509, 143,11489, 276, 153, 7365,12066, 43,1027, 2264, 5604, 1568, 6008, 5761,705, 164,14428,22087, 6558, 77, 830, 0, 2130, 11],
29
+ [14456, 280, 1062, 89,29443, 467, 124,12101,31684, 32, 40, 2236, 581, 475,15285, 222, 9,14163, 776, 3232, 4766, 47, 729, 1, 5564, 144],
30
+ [ 2731,3282, 2899, 2362, 1999, 553, 1402, 61, 2613, 43, 294, 9782, 5983,19217, 671, 3098, 21, 8990,14587, 5472, 27, 259, 17, 244, 82, 140],
31
+ [ 3212, 0, 3, 1,10716, 0, 1, 0, 4224, 0, 2, 7, 0, 5, 1545, 0, 0, 33, 8, 0, 286, 12, 0, 0, 81, 3],
32
+ [ 3080, 132, 32, 143, 2121, 80, 28, 930, 2296, 1, 97, 370, 89, 562, 2172, 49, 2, 426, 216, 87, 68, 1, 57, 0, 111, 11],
33
+ [ 764, 29, 391, 10, 537, 30, 7, 116, 1419, 0, 2, 41, 27, 9, 570, 550, 10, 7, 56, 778, 161, 2, 24, 0, 597, 1],
34
+ [ 1446, 289, 1696, 1137, 953, 142, 742, 159, 621, 3, 47, 3010, 1796, 1699, 1255, 2686, 5, 1587, 2358, 1641, 136, 17, 204, 166, 4, 135],
35
+ [ 1677, 12, 9, 15, 3613, 2, 7, 7, 708, 0, 4, 127, 5, 4, 1400, 3, 0, 8, 7, 11, 80, 2, 10, 0, 334, 271]
36
+ ]
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,3 @@
1
+ module Andrey
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'andrey/language/english'
2
+
3
+ module Andrey
4
+ class Word
5
+ def initialize(language=Language::English)
6
+ @language = language
7
+ @word = []
8
+ end
9
+
10
+ def to_s
11
+ @word.join
12
+ end
13
+
14
+ def length
15
+ @word.length
16
+ end
17
+
18
+ def add_letter
19
+ @word << next_letter(@word.last || symbols.sample)
20
+ end
21
+
22
+ def next_letter(letter)
23
+ index = symbols.index(letter)
24
+
25
+ probabilities = probability_map[index]
26
+ pointer = rand(probabilities.inject(:+))
27
+ sum = 0
28
+
29
+ probabilities.each_with_index do |p, index|
30
+ if p > 0
31
+ sum += p
32
+ return symbols[index] if sum > pointer
33
+ end
34
+ end
35
+
36
+ symbols.sample
37
+ end
38
+
39
+ def self.generate(length=8, language=Language::English)
40
+ new(language).tap do |word|
41
+ while word.length < length
42
+ word.add_letter
43
+ end
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def symbols
50
+ @language.symbols
51
+ end
52
+
53
+ def probability_map
54
+ @language.probability_map
55
+ end
56
+ end
57
+ end
data/lib/andrey.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "andrey/version"
2
+
3
+ module Andrey
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+ require 'andrey/analyzer'
3
+
4
+ describe Andrey::Analyzer do
5
+ let(:subject) { Andrey::Analyzer.new }
6
+
7
+ it 'counts occurrences of bigrams in source text' do
8
+ zeros = (0..25).map { |x| 0 }
9
+ result = subject.analyze("abc")
10
+
11
+ freq_a = result[0]
12
+ freq_a.must_equal zeros.dup.fill(1,1,1)
13
+
14
+ freq_b = result[1]
15
+ freq_b.must_equal zeros.dup.fill(1,2,1)
16
+
17
+ freq_c = result[2]
18
+ freq_c.must_equal zeros
19
+ end
20
+
21
+ it 'reads corpus text from a file' do
22
+ IO.stubs(:read).returns("abc")
23
+ subject.expects(:analyze).with("abc")
24
+ subject.read("filename.txt")
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+ require 'andrey/command'
3
+
4
+ describe Andrey::Command do
5
+ let(:described_class) { Andrey::Command }
6
+
7
+ it 'generates a random word' do
8
+ Andrey::Command::Generate.any_instance.expects(:puts).returns(nil)
9
+ command = described_class['generate']
10
+ command.run
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+ require 'andrey/language/english'
3
+
4
+ describe Andrey::Language::English do
5
+ let(:described_class) { Andrey::Language::English }
6
+
7
+ it 'knows the english alphabet' do
8
+ described_class.symbols.must_equal ('a'..'z').to_a
9
+ end
10
+
11
+ it 'knows the trained bigram freqency' do
12
+ map = described_class.probability_map
13
+ map.length.must_equal 26
14
+ map.each { |row| row.length.must_equal 26 }
15
+ end
16
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'andrey/word'
3
+
4
+ class TestLanguage
5
+ def self.symbols
6
+ ('a'..'z').to_a
7
+ end
8
+
9
+ def self.probability_map
10
+ @map ||= begin
11
+ inner = (0..25).map { |index| 0 }
12
+ outer = (0..25).map { |index| inner }
13
+ outer[0][1] = 1
14
+ outer
15
+ end
16
+ end
17
+ end
18
+
19
+ describe Andrey::Word do
20
+ it 'generates a word based on letter frequencies' do
21
+ Andrey::Word.generate().length.must_equal 8
22
+ end
23
+
24
+ it 'picks a next letter, based on probability' do
25
+ word = Andrey::Word.new(TestLanguage)
26
+ word.next_letter('a').must_equal 'b'
27
+ end
28
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'andrey'
3
+
4
+ describe Andrey do
5
+ it 'generates a word based on letter frequencies' do
6
+
7
+ end
8
+ end
@@ -0,0 +1,3 @@
1
+ require 'minitest/spec'
2
+ require 'minitest/autorun'
3
+ require 'mocha'
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: andrey
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Solomon White
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mocha
16
+ requirement: &70199999986280 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70199999986280
25
+ description: andrey
26
+ email:
27
+ - rubysolo@gmail.com
28
+ executables:
29
+ - andrey
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - LICENSE
36
+ - README.md
37
+ - Rakefile
38
+ - andrey.gemspec
39
+ - bin/andrey
40
+ - lib/andrey.rb
41
+ - lib/andrey/analyzer.rb
42
+ - lib/andrey/command.rb
43
+ - lib/andrey/language/english.rb
44
+ - lib/andrey/version.rb
45
+ - lib/andrey/word.rb
46
+ - spec/lib/andrey/analyzer_spec.rb
47
+ - spec/lib/andrey/command_spec.rb
48
+ - spec/lib/andrey/language/english_spec.rb
49
+ - spec/lib/andrey/word_spec.rb
50
+ - spec/lib/andrey_spec.rb
51
+ - spec/spec_helper.rb
52
+ homepage: https://github.com/rubysolo/andrey
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.11
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: generate pseudopronounceable random words based on bigram freqency distribution
76
+ test_files:
77
+ - spec/lib/andrey/analyzer_spec.rb
78
+ - spec/lib/andrey/command_spec.rb
79
+ - spec/lib/andrey/language/english_spec.rb
80
+ - spec/lib/andrey/word_spec.rb
81
+ - spec/lib/andrey_spec.rb
82
+ - spec/spec_helper.rb