andrey 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in andrey.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Solomon White
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # Andrey
2
+
3
+ Generate pseudopronounceable random words based on bigram frequency
4
+ distributions
5
+
6
+ ## Installation
7
+
8
+ It's a gem, sooo...
9
+
10
+ ## Usage
11
+
12
+ From the command line:
13
+
14
+ andrey generate
15
+ # => genespha
16
+
17
+ From your code:
18
+
19
+ Andrey::Word.generate(5)
20
+ # => 'gatiz'
21
+
22
+ Want to customize the Markov model to make the words sound different? (Y U NO
23
+ LIKE ENGLISH?) Make a new language file, and generate the probability map via:
24
+
25
+ andrey analyze /path/to/klingon-opera.txt
26
+
27
+ ## Why 'Andrey'
28
+
29
+ For [Andrey Markov](http://en.wikipedia.org/wiki/Andrey_Markov). And also
30
+ because Andrey is "rand()" in pig-latin.
31
+
32
+ ## Contributing
33
+
34
+ 1. Fork it
35
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
36
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
37
+ 4. Push to the branch (`git push origin my-new-feature`)
38
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs.push "lib"
7
+ t.libs.push "spec"
8
+ t.test_files = FileList['spec/**/*_spec.rb']
9
+ t.verbose = true
10
+ end
11
+
12
+ desc "Default: run specs."
13
+ task :default => :test
data/andrey.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/andrey/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Solomon White"]
6
+ gem.email = ["rubysolo@gmail.com"]
7
+ gem.description = %q{andrey}
8
+ gem.summary = %q{generate pseudopronounceable random words based on bigram freqency distribution}
9
+ gem.homepage = "https://github.com/rubysolo/andrey"
10
+
11
+ gem.add_development_dependency 'mocha'
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.name = "andrey"
17
+ gem.require_paths = ["lib"]
18
+ gem.version = Andrey::VERSION
19
+ end
data/bin/andrey ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ library_path = File.dirname(__FILE__) + '/../lib'
4
+ $LOAD_PATH.unshift(library_path) unless $LOAD_PATH.include?(library_path)
5
+
6
+ require 'andrey/command'
7
+
8
+ command = ARGV.shift
9
+ Andrey::Command[command].run(*ARGV)
10
+
@@ -0,0 +1,24 @@
1
+ module Andrey
2
+ class Analyzer
3
+ def read(filename)
4
+ analyze(IO.read(filename))
5
+ end
6
+
7
+ def analyze(text)
8
+ symbols = ('a'..'z').to_a
9
+ zeros = [].fill(0,0,26)
10
+ map = (0..25).to_a.map { |x| zeros.dup }
11
+
12
+ text.downcase.split(/[^a-z]+/).each do |word|
13
+ word.split(//).each_cons(2) do |from, to|
14
+ col = symbols.index(from)
15
+ row = symbols.index(to)
16
+
17
+ map[col][row] += 1
18
+ end
19
+ end
20
+
21
+ map
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ require 'andrey/analyzer'
2
+ require 'andrey/word'
3
+
4
+ module Andrey
5
+ class Command
6
+ class Generate
7
+ def run(*args)
8
+ puts Andrey::Word.generate
9
+ end
10
+ end
11
+
12
+ class Analyze
13
+ def run(*args)
14
+ probability_map = Andrey::Analyzer.new.read(args.first)
15
+ puts "["
16
+ probability_map.each do |row|
17
+ puts "[#{ row.join(',') }]"
18
+ end
19
+ puts "]"
20
+ end
21
+ end
22
+
23
+ def self.[](command)
24
+ case command
25
+ when /^gen/i
26
+ Generate.new
27
+ when /^ana/i
28
+ Analyze.new
29
+ else
30
+ raise "unknown command #{ command }"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ module Andrey
2
+ module Language
3
+ class English
4
+ def self.symbols
5
+ ('a'..'z').to_a
6
+ end
7
+
8
+ def self.probability_map
9
+ [
10
+ [ 131,9076,12589, 5968, 4774,1359, 5536, 856, 3928,189,2029,28178, 7536,28275, 271, 7214,199,21440,10038,27276, 3550,2089,1131, 905, 1523, 899],
11
+ [ 5297, 927, 211, 316, 5491, 73, 43, 115, 5586,164, 14, 8765, 147, 78, 4110, 118, 6, 4028, 801, 295, 2823, 82, 48, 0, 414, 4],
12
+ [15207, 9, 1525, 24,10495, 4, 4,13308, 7290, 0,3694, 3272, 35, 164,16088, 13, 96, 5504, 475, 6945, 5110, 0, 10, 0, 2814, 30],
13
+ [ 6287, 183, 100, 1034,13714, 218, 582, 299,12286,146, 25, 2113, 389, 1057, 5596, 91, 6, 3433, 577, 78, 2101, 232, 299, 0, 1337, 19],
14
+ [10087,1945, 8383,14736, 3994,2285, 2575, 940, 2678,226, 392,12811, 8075,25070, 4196, 5890,588,42507,20704,12057, 3103,1995,1461,3280, 905, 253],
15
+ [ 2139, 23, 10, 17, 3216,1744, 11, 27, 4052, 4, 8, 2471, 22, 24, 3704, 11, 0, 1677, 46, 807, 2627, 1, 30, 0, 572, 3],
16
+ [ 5000, 116, 18, 81, 7761, 66, 1139, 1704, 5447, 3, 18, 4089, 580, 1878, 3263, 40, 0, 4769, 289, 118, 2452, 2, 132, 0, 1689, 8],
17
+ [ 9380, 180, 69, 84,12374, 198, 43, 74,10434, 4, 31, 1332, 586, 765,10394, 116, 5, 3104, 170, 1646, 1830, 18, 297, 0, 6462, 6],
18
+ [15276,3113,26517,10728, 4020,4328, 4475, 276, 420, 77,1915,10331, 5176,33718,15378, 5100,287, 4552,24204,16952, 1774,5208, 78, 494, 55,4596],
19
+ [ 795, 1, 0, 4, 612, 0, 0, 6, 200, 2, 1, 2, 2, 4, 595, 2, 0, 8, 0, 1, 905, 0, 1, 0, 8, 0],
20
+ [ 1431, 148, 37, 38, 4979, 124, 21, 295, 2544, 18, 72, 773, 160, 463, 597, 66, 0, 223, 410, 139, 317, 8, 202, 0, 445, 2],
21
+ [16932, 484, 838, 1393,23347, 538, 577, 165,20883, 6, 506,11418, 983, 741,13004, 1082, 7, 112, 694, 2330, 4434, 760, 215, 3,14059, 25],
22
+ [13712,2638, 57, 47,12258, 142, 21, 50,11357, 6, 18, 253, 2020, 763, 8702, 4382, 2, 65, 277, 43, 2536, 44, 66, 0, 2355, 4],
23
+ [12665, 956, 8690, 9633,21743,2137,13071, 915,15240,340,1344, 1229, 985, 2422,12183, 1481,331, 1306, 6455,19981, 2159,1031, 692, 74, 1331, 385],
24
+ [ 2423,3224, 7960, 5588, 1761,1484, 7623, 764, 4109, 97,1067,12463,12365,29811, 4684,11281,218,19802,10890, 8961,12999,3748,2600,1373, 603, 494],
25
+ [ 8572, 128, 78, 32,11227, 99, 39,13194, 6864, 12, 42, 4977, 137, 389, 8768, 2022, 2, 9756, 2246, 3256, 2507, 3, 134, 0, 1480, 0],
26
+ [ 2, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3715, 0, 0, 0, 0, 0],
27
+ [23804,1924, 3855, 3672,23956, 956, 2009, 1602,23280,109,1188, 1865, 4781, 2802,22402, 2423, 82, 3649, 3668, 5672, 3942,1099, 653, 7, 6055, 67],
28
+ [ 5952, 264, 6509, 143,11489, 276, 153, 7365,12066, 43,1027, 2264, 5604, 1568, 6008, 5761,705, 164,14428,22087, 6558, 77, 830, 0, 2130, 11],
29
+ [14456, 280, 1062, 89,29443, 467, 124,12101,31684, 32, 40, 2236, 581, 475,15285, 222, 9,14163, 776, 3232, 4766, 47, 729, 1, 5564, 144],
30
+ [ 2731,3282, 2899, 2362, 1999, 553, 1402, 61, 2613, 43, 294, 9782, 5983,19217, 671, 3098, 21, 8990,14587, 5472, 27, 259, 17, 244, 82, 140],
31
+ [ 3212, 0, 3, 1,10716, 0, 1, 0, 4224, 0, 2, 7, 0, 5, 1545, 0, 0, 33, 8, 0, 286, 12, 0, 0, 81, 3],
32
+ [ 3080, 132, 32, 143, 2121, 80, 28, 930, 2296, 1, 97, 370, 89, 562, 2172, 49, 2, 426, 216, 87, 68, 1, 57, 0, 111, 11],
33
+ [ 764, 29, 391, 10, 537, 30, 7, 116, 1419, 0, 2, 41, 27, 9, 570, 550, 10, 7, 56, 778, 161, 2, 24, 0, 597, 1],
34
+ [ 1446, 289, 1696, 1137, 953, 142, 742, 159, 621, 3, 47, 3010, 1796, 1699, 1255, 2686, 5, 1587, 2358, 1641, 136, 17, 204, 166, 4, 135],
35
+ [ 1677, 12, 9, 15, 3613, 2, 7, 7, 708, 0, 4, 127, 5, 4, 1400, 3, 0, 8, 7, 11, 80, 2, 10, 0, 334, 271]
36
+ ]
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,3 @@
1
+ module Andrey
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'andrey/language/english'
2
+
3
+ module Andrey
4
+ class Word
5
+ def initialize(language=Language::English)
6
+ @language = language
7
+ @word = []
8
+ end
9
+
10
+ def to_s
11
+ @word.join
12
+ end
13
+
14
+ def length
15
+ @word.length
16
+ end
17
+
18
+ def add_letter
19
+ @word << next_letter(@word.last || symbols.sample)
20
+ end
21
+
22
+ def next_letter(letter)
23
+ index = symbols.index(letter)
24
+
25
+ probabilities = probability_map[index]
26
+ pointer = rand(probabilities.inject(:+))
27
+ sum = 0
28
+
29
+ probabilities.each_with_index do |p, index|
30
+ if p > 0
31
+ sum += p
32
+ return symbols[index] if sum > pointer
33
+ end
34
+ end
35
+
36
+ symbols.sample
37
+ end
38
+
39
+ def self.generate(length=8, language=Language::English)
40
+ new(language).tap do |word|
41
+ while word.length < length
42
+ word.add_letter
43
+ end
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def symbols
50
+ @language.symbols
51
+ end
52
+
53
+ def probability_map
54
+ @language.probability_map
55
+ end
56
+ end
57
+ end
data/lib/andrey.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "andrey/version"
2
+
3
+ module Andrey
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+ require 'andrey/analyzer'
3
+
4
+ describe Andrey::Analyzer do
5
+ let(:subject) { Andrey::Analyzer.new }
6
+
7
+ it 'counts occurrences of bigrams in source text' do
8
+ zeros = (0..25).map { |x| 0 }
9
+ result = subject.analyze("abc")
10
+
11
+ freq_a = result[0]
12
+ freq_a.must_equal zeros.dup.fill(1,1,1)
13
+
14
+ freq_b = result[1]
15
+ freq_b.must_equal zeros.dup.fill(1,2,1)
16
+
17
+ freq_c = result[2]
18
+ freq_c.must_equal zeros
19
+ end
20
+
21
+ it 'reads corpus text from a file' do
22
+ IO.stubs(:read).returns("abc")
23
+ subject.expects(:analyze).with("abc")
24
+ subject.read("filename.txt")
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+ require 'andrey/command'
3
+
4
+ describe Andrey::Command do
5
+ let(:described_class) { Andrey::Command }
6
+
7
+ it 'generates a random word' do
8
+ Andrey::Command::Generate.any_instance.expects(:puts).returns(nil)
9
+ command = described_class['generate']
10
+ command.run
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+ require 'andrey/language/english'
3
+
4
+ describe Andrey::Language::English do
5
+ let(:described_class) { Andrey::Language::English }
6
+
7
+ it 'knows the english alphabet' do
8
+ described_class.symbols.must_equal ('a'..'z').to_a
9
+ end
10
+
11
+ it 'knows the trained bigram freqency' do
12
+ map = described_class.probability_map
13
+ map.length.must_equal 26
14
+ map.each { |row| row.length.must_equal 26 }
15
+ end
16
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'andrey/word'
3
+
4
+ class TestLanguage
5
+ def self.symbols
6
+ ('a'..'z').to_a
7
+ end
8
+
9
+ def self.probability_map
10
+ @map ||= begin
11
+ inner = (0..25).map { |index| 0 }
12
+ outer = (0..25).map { |index| inner }
13
+ outer[0][1] = 1
14
+ outer
15
+ end
16
+ end
17
+ end
18
+
19
+ describe Andrey::Word do
20
+ it 'generates a word based on letter frequencies' do
21
+ Andrey::Word.generate().length.must_equal 8
22
+ end
23
+
24
+ it 'picks a next letter, based on probability' do
25
+ word = Andrey::Word.new(TestLanguage)
26
+ word.next_letter('a').must_equal 'b'
27
+ end
28
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'andrey'
3
+
4
+ describe Andrey do
5
+ it 'generates a word based on letter frequencies' do
6
+
7
+ end
8
+ end
@@ -0,0 +1,3 @@
1
+ require 'minitest/spec'
2
+ require 'minitest/autorun'
3
+ require 'mocha'
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: andrey
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Solomon White
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mocha
16
+ requirement: &70199999986280 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70199999986280
25
+ description: andrey
26
+ email:
27
+ - rubysolo@gmail.com
28
+ executables:
29
+ - andrey
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - LICENSE
36
+ - README.md
37
+ - Rakefile
38
+ - andrey.gemspec
39
+ - bin/andrey
40
+ - lib/andrey.rb
41
+ - lib/andrey/analyzer.rb
42
+ - lib/andrey/command.rb
43
+ - lib/andrey/language/english.rb
44
+ - lib/andrey/version.rb
45
+ - lib/andrey/word.rb
46
+ - spec/lib/andrey/analyzer_spec.rb
47
+ - spec/lib/andrey/command_spec.rb
48
+ - spec/lib/andrey/language/english_spec.rb
49
+ - spec/lib/andrey/word_spec.rb
50
+ - spec/lib/andrey_spec.rb
51
+ - spec/spec_helper.rb
52
+ homepage: https://github.com/rubysolo/andrey
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.11
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: generate pseudopronounceable random words based on bigram freqency distribution
76
+ test_files:
77
+ - spec/lib/andrey/analyzer_spec.rb
78
+ - spec/lib/andrey/command_spec.rb
79
+ - spec/lib/andrey/language/english_spec.rb
80
+ - spec/lib/andrey/word_spec.rb
81
+ - spec/lib/andrey_spec.rb
82
+ - spec/spec_helper.rb