andrey 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ *[swp|swo]
data/README.md CHANGED
@@ -24,10 +24,10 @@ LIKE ENGLISH?) Make a new language file, and generate the probability map via:
24
24
 
25
25
  andrey analyze /path/to/klingon-opera.txt
26
26
 
27
- ## Why 'Andrey'
27
+ ## Why 'Andrey'?
28
28
 
29
29
  For [Andrey Markov](http://en.wikipedia.org/wiki/Andrey_Markov). And also
30
- because Andrey is "rand()" in pig-latin.
30
+ because And-rey is "rand()" in pig-latin.
31
31
 
32
32
  ## Contributing
33
33
 
data/bin/andrey CHANGED
@@ -6,5 +6,14 @@ $LOAD_PATH.unshift(library_path) unless $LOAD_PATH.include?(library_path)
6
6
  require 'andrey/command'
7
7
 
8
8
  command = ARGV.shift
9
- Andrey::Command[command].run(*ARGV)
10
9
 
10
+ unless ["analyze", "generate"].include?(command)
11
+ puts <<-eof
12
+ usage:
13
+ andrey generate #=> Generate a new word based on current bigram frequency
14
+ andrey analyze filename #=> Read in the specified text file and calculate bigram frequency
15
+ eof
16
+ exit 1
17
+ end
18
+
19
+ Andrey::Command[command].run(*ARGV)
@@ -1,5 +1,17 @@
1
+ require "andrey/analyzer"
1
2
  require "andrey/version"
3
+ require "andrey/word"
2
4
 
3
5
  module Andrey
4
- # Your code goes here...
6
+ def self.analyze_file(*args)
7
+ Analyzer.analyze_file(*args)
8
+ end
9
+
10
+ def self.analyze_text(*args)
11
+ Analyzer.analyze_text(*args)
12
+ end
13
+
14
+ def self.generate(*args)
15
+ Word.generate(*args)
16
+ end
5
17
  end
@@ -1,24 +1,32 @@
1
1
  module Andrey
2
2
  class Analyzer
3
- def read(filename)
4
- analyze(IO.read(filename))
3
+ def self.analyze_file(filename, include_spaces=false)
4
+ analyze_text(IO.read(filename), include_spaces)
5
5
  end
6
6
 
7
- def analyze(text)
8
- symbols = ('a'..'z').to_a
9
- zeros = [].fill(0,0,26)
10
- map = (0..25).to_a.map { |x| zeros.dup }
7
+ def self.analyze_text(text, include_spaces=false)
8
+ pmap = {}
11
9
 
12
- text.downcase.split(/[^a-z]+/).each do |word|
13
- word.split(//).each_cons(2) do |from, to|
14
- col = symbols.index(from)
15
- row = symbols.index(to)
10
+ text.downcase.chars.each_cons(2) do |from, to|
11
+ pmap[from] ||= Hash.new(0)
12
+ pmap[to] ||= Hash.new(0)
13
+ pmap[from][to] += 1
14
+ end
15
+
16
+ regex = symbols_regex(include_spaces)
17
+ symbols = pmap.keys.sort.select {|s| s.match(regex) }
16
18
 
17
- map[col][row] += 1
18
- end
19
+ map = symbols.inject([]) do |m, symbol|
20
+ occurrences = pmap[symbol]
21
+ sum = occurrences.values.inject(:+) || 0
22
+ m << symbols.map { |s| sum == 0 ? 0 : (occurrences[s] / sum.to_f) }
19
23
  end
20
24
 
21
- map
25
+ [symbols, map]
26
+ end
27
+
28
+ def self.symbols_regex(include_spaces)
29
+ include_spaces ? Regexp.new(/[a-z ]/) : Regexp.new(/[a-z]/)
22
30
  end
23
31
  end
24
32
  end
@@ -5,16 +5,40 @@ module Andrey
5
5
  class Command
6
6
  class Generate
7
7
  def run(*args)
8
- puts Andrey::Word.generate
8
+ # turn cli stuffs into ruby stuffs
9
+ options = {}
10
+ args.each_cons(2) do |(flag, value)|
11
+ options[:length] = value.to_i if flag == '-l'
12
+ options[:language] = language_class(value) if flag == '-m'
13
+ options[:corpus] = value if flag == '-c'
14
+ end
15
+
16
+ puts Andrey::Word.generate(options)
17
+ end
18
+
19
+ def language_class(filename)
20
+ absolute = if filename.match(%r{^/})
21
+ filename
22
+ else
23
+ File.expand_path(filename, Dir.pwd)
24
+ end
25
+
26
+ require absolute
27
+ classname = File.basename(filename, '.rb').split('_').map(&:capitalize).join
28
+ Andrey::Language.const_get(classname)
9
29
  end
10
30
  end
11
31
 
12
32
  class Analyze
13
33
  def run(*args)
14
- probability_map = Andrey::Analyzer.new.read(args.first)
34
+ symbols, probability_map = Andrey::Analyzer.analyze_file(args.first)
35
+ puts "[#{ symbols.map{|s| "'#{ s }'" }.join(',') }]"
36
+
15
37
  puts "["
16
- probability_map.each do |row|
17
- puts "[#{ row.join(',') }]"
38
+ probability_map.each_with_index do |row, index|
39
+ print "[#{ row.map { |v| sprintf('%.04f',v) }.join(',') }]"
40
+ print "," if index < 25
41
+ puts
18
42
  end
19
43
  puts "]"
20
44
  end
@@ -0,0 +1,40 @@
1
+ module Andrey
2
+ module Language
3
+ class AmericanNames
4
+ def self.symbols
5
+ ('a'..'z').to_a
6
+ end
7
+
8
+ def self.probability_map
9
+ [
10
+ [125,740,1698,1357,901,163,596,623,910,39,405,4917,4716,12334,40,306,32,13076,2427,3008,1576,1507,612,109,1479,306],
11
+ [1865,442,9,7,4329,0,0,3,732,3,0,510,0,8,1108,0,0,2391,231,8,711,2,0,0,389,0],
12
+ [3012,23,501,119,2046,36,125,5389,1170,2,2232,914,59,73,2585,20,105,651,9,169,383,4,9,0,532,10],
13
+ [3811,29,13,389,3182,51,247,22,1243,1,21,285,112,199,2410,0,4,1289,607,89,426,4,512,0,721,3],
14
+ [1910,719,514,1653,1538,423,665,120,774,79,118,7730,790,6987,688,889,1,13967,5292,2829,195,1243,1028,202,2861,1542],
15
+ [427,0,2,0,818,848,4,0,546,0,3,342,67,8,689,0,0,1257,5,80,152,0,0,0,7,0],
16
+ [1828,10,0,25,2305,14,379,696,798,0,0,671,22,145,1005,2,1,1183,227,160,728,0,47,0,76,0],
17
+ [6231,20,9,2,4705,22,0,1,1797,1,0,355,113,1672,2528,0,0,917,32,311,913,0,39,0,717,0],
18
+ [4029,182,4463,1254,3691,596,892,8,1,14,184,4863,1020,7160,492,376,73,995,4394,2321,41,423,4,118,0,475],
19
+ [2538,0,3,0,1562,0,0,0,200,0,0,0,0,0,3731,0,0,0,0,1,725,0,0,0,0,0],
20
+ [1046,10,0,0,2629,9,0,45,1260,0,19,275,76,146,164,15,1,295,565,9,90,0,42,0,165,0],
21
+ [4520,384,129,1746,7776,199,96,35,5707,0,258,7342,463,24,2509,154,3,24,941,654,562,464,35,0,1553,22],
22
+ [7138,469,1120,0,2833,7,12,2,3533,0,5,39,551,17,2071,533,2,9,755,1,468,0,11,0,482,1],
23
+ [3562,102,1440,3832,6085,55,1871,88,2943,108,573,186,29,3356,1233,17,5,242,2502,1777,182,8,60,0,719,291],
24
+ [376,1773,445,1136,356,151,355,1678,191,37,320,2562,1480,10336,1258,804,8,5056,2175,1164,1127,300,1412,155,751,122],
25
+ [1867,97,6,1,1670,14,0,1742,372,0,35,170,33,1,484,202,0,391,410,67,107,0,0,0,11,0],
26
+ [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,482,0,0,0,0,0],
27
+ [5553,548,540,2522,6057,53,1248,171,7387,51,932,1863,721,1360,5955,223,46,2801,1663,3551,1109,222,86,1,3124,55],
28
+ [2631,103,876,21,2493,20,13,2220,857,2,189,359,802,131,3983,336,60,14,1302,4219,630,3,185,0,191,8],
29
+ [1873,9,332,0,3955,31,36,5553,2221,7,73,346,100,135,2917,8,0,1454,442,2708,324,0,67,0,557,276],
30
+ [423,270,532,537,1039,136,666,42,536,25,43,1367,351,972,19,107,3,1668,1218,654,3,13,1,48,82,181],
31
+ [1083,0,0,0,1731,0,0,0,2511,0,1,8,0,1,178,0,0,9,1,0,6,0,0,0,23,0],
32
+ [1978,13,6,38,1331,48,2,391,2532,0,51,131,67,578,467,2,1,290,248,36,10,0,0,0,77,1],
33
+ [135,0,0,0,6,3,0,0,51,0,0,10,3,2,62,0,0,0,7,60,0,0,20,0,0,0],
34
+ [575,29,156,299,471,7,4,10,10,8,43,794,172,1107,328,8,1,200,214,81,14,56,17,0,0,1],
35
+ [785,5,9,2,230,2,11,4,148,0,1,14,39,3,93,8,30,2,3,0,29,0,4,0,11,47]
36
+ ]
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,12 @@
1
+ module Andrey
2
+ module Language
3
+ class Dynamic
4
+ attr_reader :symbols, :probability_map
5
+
6
+ def initialize(symbols, map)
7
+ @symbols = symbols
8
+ @probability_map = map
9
+ end
10
+ end
11
+ end
12
+ end
@@ -7,32 +7,32 @@ module Andrey
7
7
 
8
8
  def self.probability_map
9
9
  [
10
- [ 131,9076,12589, 5968, 4774,1359, 5536, 856, 3928,189,2029,28178, 7536,28275, 271, 7214,199,21440,10038,27276, 3550,2089,1131, 905, 1523, 899],
11
- [ 5297, 927, 211, 316, 5491, 73, 43, 115, 5586,164, 14, 8765, 147, 78, 4110, 118, 6, 4028, 801, 295, 2823, 82, 48, 0, 414, 4],
12
- [15207, 9, 1525, 24,10495, 4, 4,13308, 7290, 0,3694, 3272, 35, 164,16088, 13, 96, 5504, 475, 6945, 5110, 0, 10, 0, 2814, 30],
13
- [ 6287, 183, 100, 1034,13714, 218, 582, 299,12286,146, 25, 2113, 389, 1057, 5596, 91, 6, 3433, 577, 78, 2101, 232, 299, 0, 1337, 19],
14
- [10087,1945, 8383,14736, 3994,2285, 2575, 940, 2678,226, 392,12811, 8075,25070, 4196, 5890,588,42507,20704,12057, 3103,1995,1461,3280, 905, 253],
15
- [ 2139, 23, 10, 17, 3216,1744, 11, 27, 4052, 4, 8, 2471, 22, 24, 3704, 11, 0, 1677, 46, 807, 2627, 1, 30, 0, 572, 3],
16
- [ 5000, 116, 18, 81, 7761, 66, 1139, 1704, 5447, 3, 18, 4089, 580, 1878, 3263, 40, 0, 4769, 289, 118, 2452, 2, 132, 0, 1689, 8],
17
- [ 9380, 180, 69, 84,12374, 198, 43, 74,10434, 4, 31, 1332, 586, 765,10394, 116, 5, 3104, 170, 1646, 1830, 18, 297, 0, 6462, 6],
18
- [15276,3113,26517,10728, 4020,4328, 4475, 276, 420, 77,1915,10331, 5176,33718,15378, 5100,287, 4552,24204,16952, 1774,5208, 78, 494, 55,4596],
19
- [ 795, 1, 0, 4, 612, 0, 0, 6, 200, 2, 1, 2, 2, 4, 595, 2, 0, 8, 0, 1, 905, 0, 1, 0, 8, 0],
20
- [ 1431, 148, 37, 38, 4979, 124, 21, 295, 2544, 18, 72, 773, 160, 463, 597, 66, 0, 223, 410, 139, 317, 8, 202, 0, 445, 2],
21
- [16932, 484, 838, 1393,23347, 538, 577, 165,20883, 6, 506,11418, 983, 741,13004, 1082, 7, 112, 694, 2330, 4434, 760, 215, 3,14059, 25],
22
- [13712,2638, 57, 47,12258, 142, 21, 50,11357, 6, 18, 253, 2020, 763, 8702, 4382, 2, 65, 277, 43, 2536, 44, 66, 0, 2355, 4],
23
- [12665, 956, 8690, 9633,21743,2137,13071, 915,15240,340,1344, 1229, 985, 2422,12183, 1481,331, 1306, 6455,19981, 2159,1031, 692, 74, 1331, 385],
24
- [ 2423,3224, 7960, 5588, 1761,1484, 7623, 764, 4109, 97,1067,12463,12365,29811, 4684,11281,218,19802,10890, 8961,12999,3748,2600,1373, 603, 494],
25
- [ 8572, 128, 78, 32,11227, 99, 39,13194, 6864, 12, 42, 4977, 137, 389, 8768, 2022, 2, 9756, 2246, 3256, 2507, 3, 134, 0, 1480, 0],
26
- [ 2, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3715, 0, 0, 0, 0, 0],
27
- [23804,1924, 3855, 3672,23956, 956, 2009, 1602,23280,109,1188, 1865, 4781, 2802,22402, 2423, 82, 3649, 3668, 5672, 3942,1099, 653, 7, 6055, 67],
28
- [ 5952, 264, 6509, 143,11489, 276, 153, 7365,12066, 43,1027, 2264, 5604, 1568, 6008, 5761,705, 164,14428,22087, 6558, 77, 830, 0, 2130, 11],
29
- [14456, 280, 1062, 89,29443, 467, 124,12101,31684, 32, 40, 2236, 581, 475,15285, 222, 9,14163, 776, 3232, 4766, 47, 729, 1, 5564, 144],
30
- [ 2731,3282, 2899, 2362, 1999, 553, 1402, 61, 2613, 43, 294, 9782, 5983,19217, 671, 3098, 21, 8990,14587, 5472, 27, 259, 17, 244, 82, 140],
31
- [ 3212, 0, 3, 1,10716, 0, 1, 0, 4224, 0, 2, 7, 0, 5, 1545, 0, 0, 33, 8, 0, 286, 12, 0, 0, 81, 3],
32
- [ 3080, 132, 32, 143, 2121, 80, 28, 930, 2296, 1, 97, 370, 89, 562, 2172, 49, 2, 426, 216, 87, 68, 1, 57, 0, 111, 11],
33
- [ 764, 29, 391, 10, 537, 30, 7, 116, 1419, 0, 2, 41, 27, 9, 570, 550, 10, 7, 56, 778, 161, 2, 24, 0, 597, 1],
34
- [ 1446, 289, 1696, 1137, 953, 142, 742, 159, 621, 3, 47, 3010, 1796, 1699, 1255, 2686, 5, 1587, 2358, 1641, 136, 17, 204, 166, 4, 135],
35
- [ 1677, 12, 9, 15, 3613, 2, 7, 7, 708, 0, 4, 127, 5, 4, 1400, 3, 0, 8, 7, 11, 80, 2, 10, 0, 334, 271]
10
+ [0.0007,0.0455,0.0631,0.0299,0.0239,0.0068,0.0277,0.0043,0.0197,0.0009,0.0102,0.1412,0.0378,0.1417,0.0014,0.0362,0.0010,0.1074,0.0503,0.1367,0.0178,0.0105,0.0057,0.0045,0.0076,0.0045],
11
+ [0.1310,0.0229,0.0052,0.0078,0.1358,0.0018,0.0011,0.0028,0.1382,0.0041,0.0003,0.2168,0.0036,0.0019,0.1016,0.0029,0.0001,0.0996,0.0198,0.0073,0.0698,0.0020,0.0012,0.0000,0.0102,0.0001],
12
+ [0.1470,0.0001,0.0147,0.0002,0.1015,0.0000,0.0000,0.1287,0.0705,0.0000,0.0357,0.0316,0.0003,0.0016,0.1555,0.0001,0.0009,0.0532,0.0046,0.0671,0.0494,0.0000,0.0001,0.0000,0.0272,0.0003],
13
+ [0.0922,0.0027,0.0015,0.0152,0.2011,0.0032,0.0085,0.0044,0.1802,0.0021,0.0004,0.0310,0.0057,0.0155,0.0821,0.0013,0.0001,0.0503,0.0085,0.0011,0.0308,0.0034,0.0044,0.0000,0.0196,0.0003],
14
+ [0.0429,0.0083,0.0356,0.0626,0.0170,0.0097,0.0109,0.0040,0.0114,0.0010,0.0017,0.0544,0.0343,0.1065,0.0178,0.0250,0.0025,0.1806,0.0880,0.0512,0.0132,0.0085,0.0062,0.0139,0.0038,0.0011],
15
+ [0.0885,0.0010,0.0004,0.0007,0.1331,0.0722,0.0005,0.0011,0.1677,0.0002,0.0003,0.1023,0.0009,0.0010,0.1533,0.0005,0.0000,0.0694,0.0019,0.0334,0.1087,0.0000,0.0012,0.0000,0.0237,0.0001],
16
+ [0.1062,0.0025,0.0004,0.0017,0.1648,0.0014,0.0242,0.0362,0.1157,0.0001,0.0004,0.0868,0.0123,0.0399,0.0693,0.0008,0.0000,0.1013,0.0061,0.0025,0.0521,0.0000,0.0028,0.0000,0.0359,0.0002],
17
+ [0.1458,0.0028,0.0011,0.0013,0.1923,0.0031,0.0007,0.0011,0.1621,0.0001,0.0005,0.0207,0.0091,0.0119,0.1615,0.0018,0.0001,0.0482,0.0026,0.0256,0.0284,0.0003,0.0046,0.0000,0.1004,0.0001],
18
+ [0.0760,0.0155,0.1319,0.0534,0.0200,0.0215,0.0223,0.0014,0.0021,0.0004,0.0095,0.0514,0.0257,0.1677,0.0765,0.0254,0.0014,0.0226,0.1204,0.0843,0.0088,0.0259,0.0004,0.0025,0.0003,0.0229],
19
+ [0.2510,0.0003,0.0000,0.0013,0.1932,0.0000,0.0000,0.0019,0.0632,0.0006,0.0003,0.0006,0.0006,0.0013,0.1879,0.0006,0.0000,0.0025,0.0000,0.0003,0.2858,0.0000,0.0003,0.0000,0.0025,0.0000],
20
+ [0.0886,0.0092,0.0023,0.0024,0.3081,0.0077,0.0013,0.0183,0.1574,0.0011,0.0045,0.0478,0.0099,0.0287,0.0369,0.0041,0.0000,0.0138,0.0254,0.0086,0.0196,0.0005,0.0125,0.0000,0.0275,0.0001],
21
+ [0.1298,0.0037,0.0064,0.0107,0.1790,0.0041,0.0044,0.0013,0.1601,0.0000,0.0039,0.0875,0.0075,0.0057,0.0997,0.0083,0.0001,0.0009,0.0053,0.0179,0.0340,0.0058,0.0016,0.0000,0.1078,0.0002],
22
+ [0.1940,0.0373,0.0008,0.0007,0.1734,0.0020,0.0003,0.0007,0.1607,0.0001,0.0003,0.0036,0.0286,0.0108,0.1231,0.0620,0.0000,0.0009,0.0039,0.0006,0.0359,0.0006,0.0009,0.0000,0.0333,0.0001],
23
+ [0.0798,0.0060,0.0547,0.0607,0.1370,0.0135,0.0823,0.0058,0.0960,0.0021,0.0085,0.0077,0.0062,0.0153,0.0767,0.0093,0.0021,0.0082,0.0407,0.1259,0.0136,0.0065,0.0044,0.0005,0.0084,0.0024],
24
+ [0.0142,0.0189,0.0466,0.0327,0.0103,0.0087,0.0447,0.0045,0.0241,0.0006,0.0063,0.0730,0.0724,0.1746,0.0274,0.0661,0.0013,0.1160,0.0638,0.0525,0.0762,0.0220,0.0152,0.0080,0.0035,0.0029],
25
+ [0.1097,0.0016,0.0010,0.0004,0.1436,0.0013,0.0005,0.1688,0.0878,0.0002,0.0005,0.0637,0.0018,0.0050,0.1122,0.0259,0.0000,0.1248,0.0287,0.0417,0.0321,0.0000,0.0017,0.0000,0.0189,0.0000],
26
+ [0.0005,0.0000,0.0000,0.0000,0.0005,0.0000,0.0000,0.0000,0.0011,0.0000,0.0000,0.0000,0.0000,0.0000,0.0005,0.0000,0.0003,0.0003,0.0000,0.0000,0.9949,0.0000,0.0000,0.0000,0.0000,0.0000],
27
+ [0.1479,0.0120,0.0239,0.0228,0.1488,0.0059,0.0125,0.0100,0.1446,0.0007,0.0074,0.0116,0.0297,0.0174,0.1392,0.0151,0.0005,0.0227,0.0228,0.0352,0.0245,0.0068,0.0041,0.0000,0.0376,0.0004],
28
+ [0.0427,0.0019,0.0466,0.0010,0.0823,0.0020,0.0011,0.0528,0.0865,0.0003,0.0074,0.0162,0.0402,0.0112,0.0431,0.0413,0.0051,0.0012,0.1034,0.1583,0.0470,0.0006,0.0059,0.0000,0.0153,0.0001],
29
+ [0.0946,0.0018,0.0069,0.0006,0.1927,0.0031,0.0008,0.0792,0.2073,0.0002,0.0003,0.0146,0.0038,0.0031,0.1000,0.0015,0.0001,0.0927,0.0051,0.0211,0.0312,0.0003,0.0048,0.0000,0.0364,0.0009],
30
+ [0.0313,0.0376,0.0332,0.0270,0.0229,0.0063,0.0160,0.0007,0.0299,0.0005,0.0034,0.1120,0.0685,0.2200,0.0077,0.0355,0.0002,0.1029,0.1670,0.0626,0.0003,0.0030,0.0002,0.0028,0.0009,0.0016],
31
+ [0.1592,0.0000,0.0001,0.0000,0.5311,0.0000,0.0000,0.0000,0.2093,0.0000,0.0001,0.0003,0.0000,0.0002,0.0766,0.0000,0.0000,0.0016,0.0004,0.0000,0.0142,0.0006,0.0000,0.0000,0.0040,0.0001],
32
+ [0.2222,0.0095,0.0023,0.0103,0.1530,0.0058,0.0020,0.0671,0.1656,0.0001,0.0070,0.0267,0.0064,0.0405,0.1567,0.0035,0.0001,0.0307,0.0156,0.0063,0.0049,0.0001,0.0041,0.0000,0.0080,0.0008],
33
+ [0.1102,0.0042,0.0564,0.0014,0.0775,0.0043,0.0010,0.0167,0.2047,0.0000,0.0003,0.0059,0.0039,0.0013,0.0822,0.0793,0.0014,0.0010,0.0081,0.1122,0.0232,0.0003,0.0035,0.0000,0.0861,0.0001],
34
+ [0.0280,0.0056,0.0328,0.0220,0.0184,0.0027,0.0144,0.0031,0.0120,0.0001,0.0009,0.0582,0.0348,0.0329,0.0243,0.0520,0.0001,0.0307,0.0456,0.0318,0.0026,0.0003,0.0039,0.0032,0.0001,0.0026],
35
+ [0.1982,0.0014,0.0011,0.0018,0.4271,0.0002,0.0008,0.0008,0.0837,0.0000,0.0005,0.0150,0.0006,0.0005,0.1655,0.0004,0.0000,0.0009,0.0008,0.0013,0.0095,0.0002,0.0012,0.0000,0.0395,0.0320]
36
36
  ]
37
37
  end
38
38
  end
@@ -1,3 +1,3 @@
1
1
  module Andrey
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -1,3 +1,5 @@
1
+ require 'andrey/language/american_names'
2
+ require 'andrey/language/dynamic'
1
3
  require 'andrey/language/english'
2
4
 
3
5
  module Andrey
@@ -23,7 +25,7 @@ module Andrey
23
25
  index = symbols.index(letter)
24
26
 
25
27
  probabilities = probability_map[index]
26
- pointer = rand(probabilities.inject(:+))
28
+ pointer = rand
27
29
  sum = 0
28
30
 
29
31
  probabilities.each_with_index do |p, index|
@@ -36,9 +38,16 @@ module Andrey
36
38
  symbols.sample
37
39
  end
38
40
 
39
- def self.generate(length=8, language=Language::English)
40
- new(language).tap do |word|
41
- while word.length < length
41
+ def self.generate(options={})
42
+ options[:length] ||= 8
43
+
44
+ if options[:corpus]
45
+ symbols, map = Analyzer.analyze_file(options[:corpus])
46
+ options[:language] = Language::Dynamic.new(symbols, map)
47
+ end
48
+
49
+ new(options[:language] || Language::English).tap do |word|
50
+ while word.length < options[:length]
42
51
  word.add_letter
43
52
  end
44
53
  end
@@ -2,25 +2,23 @@ require 'spec_helper'
2
2
  require 'andrey/analyzer'
3
3
 
4
4
  describe Andrey::Analyzer do
5
- let(:subject) { Andrey::Analyzer.new }
5
+ let(:subject) { Andrey::Analyzer }
6
6
 
7
7
  it 'counts occurrences of bigrams in source text' do
8
- zeros = (0..25).map { |x| 0 }
9
- result = subject.analyze("abc")
8
+ symbols, map = subject.analyze_text("abc")
10
9
 
11
- freq_a = result[0]
12
- freq_a.must_equal zeros.dup.fill(1,1,1)
10
+ freq_a = map[0]
11
+ freq_a.must_equal [0, 1, 0]
13
12
 
14
- freq_b = result[1]
15
- freq_b.must_equal zeros.dup.fill(1,2,1)
13
+ freq_b = map[1]
14
+ freq_b.must_equal [0, 0, 1]
16
15
 
17
- freq_c = result[2]
18
- freq_c.must_equal zeros
16
+ freq_c = map[2]
17
+ freq_c.must_equal [0, 0, 0]
19
18
  end
20
19
 
21
20
  it 'reads corpus text from a file' do
22
21
  IO.stubs(:read).returns("abc")
23
- subject.expects(:analyze).with("abc")
24
- subject.read("filename.txt")
22
+ subject.analyze_file("filename.txt")
25
23
  end
26
24
  end
@@ -9,4 +9,26 @@ describe Andrey::Command do
9
9
  command = described_class['generate']
10
10
  command.run
11
11
  end
12
+
13
+ it 'passes length args to the subcommand' do
14
+ Andrey::Command::Generate.any_instance.stubs(:puts)
15
+ Andrey::Word.expects(:generate).with(length: 10)
16
+ command = described_class['generate']
17
+ command.run('-l', '10')
18
+ end
19
+
20
+ it 'passes probability map args to the subcommand' do
21
+ Andrey::Command::Generate.any_instance.stubs(:puts)
22
+ Andrey::Word.expects(:generate).with(language: Andrey::Language::AmericanNames)
23
+ command = described_class['generate']
24
+ command.run('-m', 'lib/andrey/language/american_names.rb')
25
+ end
26
+
27
+ it 'can generate probability maps on the fly' do
28
+ IO.stubs(:read).returns('a quick brown fox jumped over the lazy dog')
29
+ Andrey::Command::Generate.any_instance.stubs(:puts)
30
+ Andrey::Word.expects(:generate).with(corpus: 'textfile.txt')
31
+ command = described_class['generate']
32
+ command.run('-c', 'textfile.txt')
33
+ end
12
34
  end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'andrey/language/dynamic'
3
+
4
+ describe Andrey::Language::Dynamic do
5
+ let(:described_class) { Andrey::Language::Dynamic }
6
+
7
+ it 'takes symbols array and probability map' do
8
+ sym, map = ['a', 'b', 'c'], [[1, 2, 3]]
9
+ lang = described_class.new(sym, map)
10
+ lang.symbols.must_equal sym
11
+ lang.probability_map.must_equal map
12
+ end
13
+ end
@@ -2,7 +2,8 @@ require 'spec_helper'
2
2
  require 'andrey'
3
3
 
4
4
  describe Andrey do
5
- it 'generates a word based on letter frequencies' do
6
-
5
+ it 'delegates generate to the appropriate command' do
6
+ Andrey::Word.expects(:generate)
7
+ Andrey.generate
7
8
  end
8
9
  end
@@ -1,3 +1,4 @@
1
1
  require 'minitest/spec'
2
2
  require 'minitest/autorun'
3
3
  require 'mocha'
4
+ require 'pry-nav'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: andrey
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-02 00:00:00.000000000 Z
12
+ date: 2012-09-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mocha
16
- requirement: &70199999986280 !ruby/object:Gem::Requirement
16
+ requirement: &70171809710500 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70199999986280
24
+ version_requirements: *70171809710500
25
25
  description: andrey
26
26
  email:
27
27
  - rubysolo@gmail.com
@@ -40,11 +40,14 @@ files:
40
40
  - lib/andrey.rb
41
41
  - lib/andrey/analyzer.rb
42
42
  - lib/andrey/command.rb
43
+ - lib/andrey/language/american_names.rb
44
+ - lib/andrey/language/dynamic.rb
43
45
  - lib/andrey/language/english.rb
44
46
  - lib/andrey/version.rb
45
47
  - lib/andrey/word.rb
46
48
  - spec/lib/andrey/analyzer_spec.rb
47
49
  - spec/lib/andrey/command_spec.rb
50
+ - spec/lib/andrey/language/dynamic_spec.rb
48
51
  - spec/lib/andrey/language/english_spec.rb
49
52
  - spec/lib/andrey/word_spec.rb
50
53
  - spec/lib/andrey_spec.rb
@@ -76,6 +79,7 @@ summary: generate pseudopronounceable random words based on bigram freqency dist
76
79
  test_files:
77
80
  - spec/lib/andrey/analyzer_spec.rb
78
81
  - spec/lib/andrey/command_spec.rb
82
+ - spec/lib/andrey/language/dynamic_spec.rb
79
83
  - spec/lib/andrey/language/english_spec.rb
80
84
  - spec/lib/andrey/word_spec.rb
81
85
  - spec/lib/andrey_spec.rb