andrey 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ *[swp|swo]
data/README.md CHANGED
@@ -24,10 +24,10 @@ LIKE ENGLISH?) Make a new language file, and generate the probability map via:
24
24
 
25
25
  andrey analyze /path/to/klingon-opera.txt
26
26
 
27
- ## Why 'Andrey'
27
+ ## Why 'Andrey'?
28
28
 
29
29
  For [Andrey Markov](http://en.wikipedia.org/wiki/Andrey_Markov). And also
30
- because Andrey is "rand()" in pig-latin.
30
+ because And-rey is "rand()" in pig-latin.
31
31
 
32
32
  ## Contributing
33
33
 
data/bin/andrey CHANGED
@@ -6,5 +6,14 @@ $LOAD_PATH.unshift(library_path) unless $LOAD_PATH.include?(library_path)
6
6
  require 'andrey/command'
7
7
 
8
8
  command = ARGV.shift
9
- Andrey::Command[command].run(*ARGV)
10
9
 
10
+ unless ["analyze", "generate"].include?(command)
11
+ puts <<-eof
12
+ usage:
13
+ andrey generate #=> Generate a new word based on current bigram frequency
14
+ andrey analyze filename #=> Read in the specified text file and calculate bigram frequency
15
+ eof
16
+ exit 1
17
+ end
18
+
19
+ Andrey::Command[command].run(*ARGV)
@@ -1,5 +1,17 @@
1
+ require "andrey/analyzer"
1
2
  require "andrey/version"
3
+ require "andrey/word"
2
4
 
3
5
  module Andrey
4
- # Your code goes here...
6
+ def self.analyze_file(*args)
7
+ Analyzer.analyze_file(*args)
8
+ end
9
+
10
+ def self.analyze_text(*args)
11
+ Analyzer.analyze_text(*args)
12
+ end
13
+
14
+ def self.generate(*args)
15
+ Word.generate(*args)
16
+ end
5
17
  end
@@ -1,24 +1,32 @@
1
1
  module Andrey
2
2
  class Analyzer
3
- def read(filename)
4
- analyze(IO.read(filename))
3
+ def self.analyze_file(filename, include_spaces=false)
4
+ analyze_text(IO.read(filename), include_spaces)
5
5
  end
6
6
 
7
- def analyze(text)
8
- symbols = ('a'..'z').to_a
9
- zeros = [].fill(0,0,26)
10
- map = (0..25).to_a.map { |x| zeros.dup }
7
+ def self.analyze_text(text, include_spaces=false)
8
+ pmap = {}
11
9
 
12
- text.downcase.split(/[^a-z]+/).each do |word|
13
- word.split(//).each_cons(2) do |from, to|
14
- col = symbols.index(from)
15
- row = symbols.index(to)
10
+ text.downcase.chars.each_cons(2) do |from, to|
11
+ pmap[from] ||= Hash.new(0)
12
+ pmap[to] ||= Hash.new(0)
13
+ pmap[from][to] += 1
14
+ end
15
+
16
+ regex = symbols_regex(include_spaces)
17
+ symbols = pmap.keys.sort.select {|s| s.match(regex) }
16
18
 
17
- map[col][row] += 1
18
- end
19
+ map = symbols.inject([]) do |m, symbol|
20
+ occurrences = pmap[symbol]
21
+ sum = occurrences.values.inject(:+) || 0
22
+ m << symbols.map { |s| sum == 0 ? 0 : (occurrences[s] / sum.to_f) }
19
23
  end
20
24
 
21
- map
25
+ [symbols, map]
26
+ end
27
+
28
+ def self.symbols_regex(include_spaces)
29
+ include_spaces ? Regexp.new(/[a-z ]/) : Regexp.new(/[a-z]/)
22
30
  end
23
31
  end
24
32
  end
@@ -5,16 +5,40 @@ module Andrey
5
5
  class Command
6
6
  class Generate
7
7
  def run(*args)
8
- puts Andrey::Word.generate
8
+ # turn cli stuffs into ruby stuffs
9
+ options = {}
10
+ args.each_cons(2) do |(flag, value)|
11
+ options[:length] = value.to_i if flag == '-l'
12
+ options[:language] = language_class(value) if flag == '-m'
13
+ options[:corpus] = value if flag == '-c'
14
+ end
15
+
16
+ puts Andrey::Word.generate(options)
17
+ end
18
+
19
+ def language_class(filename)
20
+ absolute = if filename.match(%r{^/})
21
+ filename
22
+ else
23
+ File.expand_path(filename, Dir.pwd)
24
+ end
25
+
26
+ require absolute
27
+ classname = File.basename(filename, '.rb').split('_').map(&:capitalize).join
28
+ Andrey::Language.const_get(classname)
9
29
  end
10
30
  end
11
31
 
12
32
  class Analyze
13
33
  def run(*args)
14
- probability_map = Andrey::Analyzer.new.read(args.first)
34
+ symbols, probability_map = Andrey::Analyzer.analyze_file(args.first)
35
+ puts "[#{ symbols.map{|s| "'#{ s }'" }.join(',') }]"
36
+
15
37
  puts "["
16
- probability_map.each do |row|
17
- puts "[#{ row.join(',') }]"
38
+ probability_map.each_with_index do |row, index|
39
+ print "[#{ row.map { |v| sprintf('%.04f',v) }.join(',') }]"
40
+ print "," if index < 25
41
+ puts
18
42
  end
19
43
  puts "]"
20
44
  end
@@ -0,0 +1,40 @@
1
+ module Andrey
2
+ module Language
3
+ class AmericanNames
4
+ def self.symbols
5
+ ('a'..'z').to_a
6
+ end
7
+
8
+ def self.probability_map
9
+ [
10
+ [125,740,1698,1357,901,163,596,623,910,39,405,4917,4716,12334,40,306,32,13076,2427,3008,1576,1507,612,109,1479,306],
11
+ [1865,442,9,7,4329,0,0,3,732,3,0,510,0,8,1108,0,0,2391,231,8,711,2,0,0,389,0],
12
+ [3012,23,501,119,2046,36,125,5389,1170,2,2232,914,59,73,2585,20,105,651,9,169,383,4,9,0,532,10],
13
+ [3811,29,13,389,3182,51,247,22,1243,1,21,285,112,199,2410,0,4,1289,607,89,426,4,512,0,721,3],
14
+ [1910,719,514,1653,1538,423,665,120,774,79,118,7730,790,6987,688,889,1,13967,5292,2829,195,1243,1028,202,2861,1542],
15
+ [427,0,2,0,818,848,4,0,546,0,3,342,67,8,689,0,0,1257,5,80,152,0,0,0,7,0],
16
+ [1828,10,0,25,2305,14,379,696,798,0,0,671,22,145,1005,2,1,1183,227,160,728,0,47,0,76,0],
17
+ [6231,20,9,2,4705,22,0,1,1797,1,0,355,113,1672,2528,0,0,917,32,311,913,0,39,0,717,0],
18
+ [4029,182,4463,1254,3691,596,892,8,1,14,184,4863,1020,7160,492,376,73,995,4394,2321,41,423,4,118,0,475],
19
+ [2538,0,3,0,1562,0,0,0,200,0,0,0,0,0,3731,0,0,0,0,1,725,0,0,0,0,0],
20
+ [1046,10,0,0,2629,9,0,45,1260,0,19,275,76,146,164,15,1,295,565,9,90,0,42,0,165,0],
21
+ [4520,384,129,1746,7776,199,96,35,5707,0,258,7342,463,24,2509,154,3,24,941,654,562,464,35,0,1553,22],
22
+ [7138,469,1120,0,2833,7,12,2,3533,0,5,39,551,17,2071,533,2,9,755,1,468,0,11,0,482,1],
23
+ [3562,102,1440,3832,6085,55,1871,88,2943,108,573,186,29,3356,1233,17,5,242,2502,1777,182,8,60,0,719,291],
24
+ [376,1773,445,1136,356,151,355,1678,191,37,320,2562,1480,10336,1258,804,8,5056,2175,1164,1127,300,1412,155,751,122],
25
+ [1867,97,6,1,1670,14,0,1742,372,0,35,170,33,1,484,202,0,391,410,67,107,0,0,0,11,0],
26
+ [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,482,0,0,0,0,0],
27
+ [5553,548,540,2522,6057,53,1248,171,7387,51,932,1863,721,1360,5955,223,46,2801,1663,3551,1109,222,86,1,3124,55],
28
+ [2631,103,876,21,2493,20,13,2220,857,2,189,359,802,131,3983,336,60,14,1302,4219,630,3,185,0,191,8],
29
+ [1873,9,332,0,3955,31,36,5553,2221,7,73,346,100,135,2917,8,0,1454,442,2708,324,0,67,0,557,276],
30
+ [423,270,532,537,1039,136,666,42,536,25,43,1367,351,972,19,107,3,1668,1218,654,3,13,1,48,82,181],
31
+ [1083,0,0,0,1731,0,0,0,2511,0,1,8,0,1,178,0,0,9,1,0,6,0,0,0,23,0],
32
+ [1978,13,6,38,1331,48,2,391,2532,0,51,131,67,578,467,2,1,290,248,36,10,0,0,0,77,1],
33
+ [135,0,0,0,6,3,0,0,51,0,0,10,3,2,62,0,0,0,7,60,0,0,20,0,0,0],
34
+ [575,29,156,299,471,7,4,10,10,8,43,794,172,1107,328,8,1,200,214,81,14,56,17,0,0,1],
35
+ [785,5,9,2,230,2,11,4,148,0,1,14,39,3,93,8,30,2,3,0,29,0,4,0,11,47]
36
+ ]
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,12 @@
1
+ module Andrey
2
+ module Language
3
+ class Dynamic
4
+ attr_reader :symbols, :probability_map
5
+
6
+ def initialize(symbols, map)
7
+ @symbols = symbols
8
+ @probability_map = map
9
+ end
10
+ end
11
+ end
12
+ end
@@ -7,32 +7,32 @@ module Andrey
7
7
 
8
8
  def self.probability_map
9
9
  [
10
- [ 131,9076,12589, 5968, 4774,1359, 5536, 856, 3928,189,2029,28178, 7536,28275, 271, 7214,199,21440,10038,27276, 3550,2089,1131, 905, 1523, 899],
11
- [ 5297, 927, 211, 316, 5491, 73, 43, 115, 5586,164, 14, 8765, 147, 78, 4110, 118, 6, 4028, 801, 295, 2823, 82, 48, 0, 414, 4],
12
- [15207, 9, 1525, 24,10495, 4, 4,13308, 7290, 0,3694, 3272, 35, 164,16088, 13, 96, 5504, 475, 6945, 5110, 0, 10, 0, 2814, 30],
13
- [ 6287, 183, 100, 1034,13714, 218, 582, 299,12286,146, 25, 2113, 389, 1057, 5596, 91, 6, 3433, 577, 78, 2101, 232, 299, 0, 1337, 19],
14
- [10087,1945, 8383,14736, 3994,2285, 2575, 940, 2678,226, 392,12811, 8075,25070, 4196, 5890,588,42507,20704,12057, 3103,1995,1461,3280, 905, 253],
15
- [ 2139, 23, 10, 17, 3216,1744, 11, 27, 4052, 4, 8, 2471, 22, 24, 3704, 11, 0, 1677, 46, 807, 2627, 1, 30, 0, 572, 3],
16
- [ 5000, 116, 18, 81, 7761, 66, 1139, 1704, 5447, 3, 18, 4089, 580, 1878, 3263, 40, 0, 4769, 289, 118, 2452, 2, 132, 0, 1689, 8],
17
- [ 9380, 180, 69, 84,12374, 198, 43, 74,10434, 4, 31, 1332, 586, 765,10394, 116, 5, 3104, 170, 1646, 1830, 18, 297, 0, 6462, 6],
18
- [15276,3113,26517,10728, 4020,4328, 4475, 276, 420, 77,1915,10331, 5176,33718,15378, 5100,287, 4552,24204,16952, 1774,5208, 78, 494, 55,4596],
19
- [ 795, 1, 0, 4, 612, 0, 0, 6, 200, 2, 1, 2, 2, 4, 595, 2, 0, 8, 0, 1, 905, 0, 1, 0, 8, 0],
20
- [ 1431, 148, 37, 38, 4979, 124, 21, 295, 2544, 18, 72, 773, 160, 463, 597, 66, 0, 223, 410, 139, 317, 8, 202, 0, 445, 2],
21
- [16932, 484, 838, 1393,23347, 538, 577, 165,20883, 6, 506,11418, 983, 741,13004, 1082, 7, 112, 694, 2330, 4434, 760, 215, 3,14059, 25],
22
- [13712,2638, 57, 47,12258, 142, 21, 50,11357, 6, 18, 253, 2020, 763, 8702, 4382, 2, 65, 277, 43, 2536, 44, 66, 0, 2355, 4],
23
- [12665, 956, 8690, 9633,21743,2137,13071, 915,15240,340,1344, 1229, 985, 2422,12183, 1481,331, 1306, 6455,19981, 2159,1031, 692, 74, 1331, 385],
24
- [ 2423,3224, 7960, 5588, 1761,1484, 7623, 764, 4109, 97,1067,12463,12365,29811, 4684,11281,218,19802,10890, 8961,12999,3748,2600,1373, 603, 494],
25
- [ 8572, 128, 78, 32,11227, 99, 39,13194, 6864, 12, 42, 4977, 137, 389, 8768, 2022, 2, 9756, 2246, 3256, 2507, 3, 134, 0, 1480, 0],
26
- [ 2, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3715, 0, 0, 0, 0, 0],
27
- [23804,1924, 3855, 3672,23956, 956, 2009, 1602,23280,109,1188, 1865, 4781, 2802,22402, 2423, 82, 3649, 3668, 5672, 3942,1099, 653, 7, 6055, 67],
28
- [ 5952, 264, 6509, 143,11489, 276, 153, 7365,12066, 43,1027, 2264, 5604, 1568, 6008, 5761,705, 164,14428,22087, 6558, 77, 830, 0, 2130, 11],
29
- [14456, 280, 1062, 89,29443, 467, 124,12101,31684, 32, 40, 2236, 581, 475,15285, 222, 9,14163, 776, 3232, 4766, 47, 729, 1, 5564, 144],
30
- [ 2731,3282, 2899, 2362, 1999, 553, 1402, 61, 2613, 43, 294, 9782, 5983,19217, 671, 3098, 21, 8990,14587, 5472, 27, 259, 17, 244, 82, 140],
31
- [ 3212, 0, 3, 1,10716, 0, 1, 0, 4224, 0, 2, 7, 0, 5, 1545, 0, 0, 33, 8, 0, 286, 12, 0, 0, 81, 3],
32
- [ 3080, 132, 32, 143, 2121, 80, 28, 930, 2296, 1, 97, 370, 89, 562, 2172, 49, 2, 426, 216, 87, 68, 1, 57, 0, 111, 11],
33
- [ 764, 29, 391, 10, 537, 30, 7, 116, 1419, 0, 2, 41, 27, 9, 570, 550, 10, 7, 56, 778, 161, 2, 24, 0, 597, 1],
34
- [ 1446, 289, 1696, 1137, 953, 142, 742, 159, 621, 3, 47, 3010, 1796, 1699, 1255, 2686, 5, 1587, 2358, 1641, 136, 17, 204, 166, 4, 135],
35
- [ 1677, 12, 9, 15, 3613, 2, 7, 7, 708, 0, 4, 127, 5, 4, 1400, 3, 0, 8, 7, 11, 80, 2, 10, 0, 334, 271]
10
+ [0.0007,0.0455,0.0631,0.0299,0.0239,0.0068,0.0277,0.0043,0.0197,0.0009,0.0102,0.1412,0.0378,0.1417,0.0014,0.0362,0.0010,0.1074,0.0503,0.1367,0.0178,0.0105,0.0057,0.0045,0.0076,0.0045],
11
+ [0.1310,0.0229,0.0052,0.0078,0.1358,0.0018,0.0011,0.0028,0.1382,0.0041,0.0003,0.2168,0.0036,0.0019,0.1016,0.0029,0.0001,0.0996,0.0198,0.0073,0.0698,0.0020,0.0012,0.0000,0.0102,0.0001],
12
+ [0.1470,0.0001,0.0147,0.0002,0.1015,0.0000,0.0000,0.1287,0.0705,0.0000,0.0357,0.0316,0.0003,0.0016,0.1555,0.0001,0.0009,0.0532,0.0046,0.0671,0.0494,0.0000,0.0001,0.0000,0.0272,0.0003],
13
+ [0.0922,0.0027,0.0015,0.0152,0.2011,0.0032,0.0085,0.0044,0.1802,0.0021,0.0004,0.0310,0.0057,0.0155,0.0821,0.0013,0.0001,0.0503,0.0085,0.0011,0.0308,0.0034,0.0044,0.0000,0.0196,0.0003],
14
+ [0.0429,0.0083,0.0356,0.0626,0.0170,0.0097,0.0109,0.0040,0.0114,0.0010,0.0017,0.0544,0.0343,0.1065,0.0178,0.0250,0.0025,0.1806,0.0880,0.0512,0.0132,0.0085,0.0062,0.0139,0.0038,0.0011],
15
+ [0.0885,0.0010,0.0004,0.0007,0.1331,0.0722,0.0005,0.0011,0.1677,0.0002,0.0003,0.1023,0.0009,0.0010,0.1533,0.0005,0.0000,0.0694,0.0019,0.0334,0.1087,0.0000,0.0012,0.0000,0.0237,0.0001],
16
+ [0.1062,0.0025,0.0004,0.0017,0.1648,0.0014,0.0242,0.0362,0.1157,0.0001,0.0004,0.0868,0.0123,0.0399,0.0693,0.0008,0.0000,0.1013,0.0061,0.0025,0.0521,0.0000,0.0028,0.0000,0.0359,0.0002],
17
+ [0.1458,0.0028,0.0011,0.0013,0.1923,0.0031,0.0007,0.0011,0.1621,0.0001,0.0005,0.0207,0.0091,0.0119,0.1615,0.0018,0.0001,0.0482,0.0026,0.0256,0.0284,0.0003,0.0046,0.0000,0.1004,0.0001],
18
+ [0.0760,0.0155,0.1319,0.0534,0.0200,0.0215,0.0223,0.0014,0.0021,0.0004,0.0095,0.0514,0.0257,0.1677,0.0765,0.0254,0.0014,0.0226,0.1204,0.0843,0.0088,0.0259,0.0004,0.0025,0.0003,0.0229],
19
+ [0.2510,0.0003,0.0000,0.0013,0.1932,0.0000,0.0000,0.0019,0.0632,0.0006,0.0003,0.0006,0.0006,0.0013,0.1879,0.0006,0.0000,0.0025,0.0000,0.0003,0.2858,0.0000,0.0003,0.0000,0.0025,0.0000],
20
+ [0.0886,0.0092,0.0023,0.0024,0.3081,0.0077,0.0013,0.0183,0.1574,0.0011,0.0045,0.0478,0.0099,0.0287,0.0369,0.0041,0.0000,0.0138,0.0254,0.0086,0.0196,0.0005,0.0125,0.0000,0.0275,0.0001],
21
+ [0.1298,0.0037,0.0064,0.0107,0.1790,0.0041,0.0044,0.0013,0.1601,0.0000,0.0039,0.0875,0.0075,0.0057,0.0997,0.0083,0.0001,0.0009,0.0053,0.0179,0.0340,0.0058,0.0016,0.0000,0.1078,0.0002],
22
+ [0.1940,0.0373,0.0008,0.0007,0.1734,0.0020,0.0003,0.0007,0.1607,0.0001,0.0003,0.0036,0.0286,0.0108,0.1231,0.0620,0.0000,0.0009,0.0039,0.0006,0.0359,0.0006,0.0009,0.0000,0.0333,0.0001],
23
+ [0.0798,0.0060,0.0547,0.0607,0.1370,0.0135,0.0823,0.0058,0.0960,0.0021,0.0085,0.0077,0.0062,0.0153,0.0767,0.0093,0.0021,0.0082,0.0407,0.1259,0.0136,0.0065,0.0044,0.0005,0.0084,0.0024],
24
+ [0.0142,0.0189,0.0466,0.0327,0.0103,0.0087,0.0447,0.0045,0.0241,0.0006,0.0063,0.0730,0.0724,0.1746,0.0274,0.0661,0.0013,0.1160,0.0638,0.0525,0.0762,0.0220,0.0152,0.0080,0.0035,0.0029],
25
+ [0.1097,0.0016,0.0010,0.0004,0.1436,0.0013,0.0005,0.1688,0.0878,0.0002,0.0005,0.0637,0.0018,0.0050,0.1122,0.0259,0.0000,0.1248,0.0287,0.0417,0.0321,0.0000,0.0017,0.0000,0.0189,0.0000],
26
+ [0.0005,0.0000,0.0000,0.0000,0.0005,0.0000,0.0000,0.0000,0.0011,0.0000,0.0000,0.0000,0.0000,0.0000,0.0005,0.0000,0.0003,0.0003,0.0000,0.0000,0.9949,0.0000,0.0000,0.0000,0.0000,0.0000],
27
+ [0.1479,0.0120,0.0239,0.0228,0.1488,0.0059,0.0125,0.0100,0.1446,0.0007,0.0074,0.0116,0.0297,0.0174,0.1392,0.0151,0.0005,0.0227,0.0228,0.0352,0.0245,0.0068,0.0041,0.0000,0.0376,0.0004],
28
+ [0.0427,0.0019,0.0466,0.0010,0.0823,0.0020,0.0011,0.0528,0.0865,0.0003,0.0074,0.0162,0.0402,0.0112,0.0431,0.0413,0.0051,0.0012,0.1034,0.1583,0.0470,0.0006,0.0059,0.0000,0.0153,0.0001],
29
+ [0.0946,0.0018,0.0069,0.0006,0.1927,0.0031,0.0008,0.0792,0.2073,0.0002,0.0003,0.0146,0.0038,0.0031,0.1000,0.0015,0.0001,0.0927,0.0051,0.0211,0.0312,0.0003,0.0048,0.0000,0.0364,0.0009],
30
+ [0.0313,0.0376,0.0332,0.0270,0.0229,0.0063,0.0160,0.0007,0.0299,0.0005,0.0034,0.1120,0.0685,0.2200,0.0077,0.0355,0.0002,0.1029,0.1670,0.0626,0.0003,0.0030,0.0002,0.0028,0.0009,0.0016],
31
+ [0.1592,0.0000,0.0001,0.0000,0.5311,0.0000,0.0000,0.0000,0.2093,0.0000,0.0001,0.0003,0.0000,0.0002,0.0766,0.0000,0.0000,0.0016,0.0004,0.0000,0.0142,0.0006,0.0000,0.0000,0.0040,0.0001],
32
+ [0.2222,0.0095,0.0023,0.0103,0.1530,0.0058,0.0020,0.0671,0.1656,0.0001,0.0070,0.0267,0.0064,0.0405,0.1567,0.0035,0.0001,0.0307,0.0156,0.0063,0.0049,0.0001,0.0041,0.0000,0.0080,0.0008],
33
+ [0.1102,0.0042,0.0564,0.0014,0.0775,0.0043,0.0010,0.0167,0.2047,0.0000,0.0003,0.0059,0.0039,0.0013,0.0822,0.0793,0.0014,0.0010,0.0081,0.1122,0.0232,0.0003,0.0035,0.0000,0.0861,0.0001],
34
+ [0.0280,0.0056,0.0328,0.0220,0.0184,0.0027,0.0144,0.0031,0.0120,0.0001,0.0009,0.0582,0.0348,0.0329,0.0243,0.0520,0.0001,0.0307,0.0456,0.0318,0.0026,0.0003,0.0039,0.0032,0.0001,0.0026],
35
+ [0.1982,0.0014,0.0011,0.0018,0.4271,0.0002,0.0008,0.0008,0.0837,0.0000,0.0005,0.0150,0.0006,0.0005,0.1655,0.0004,0.0000,0.0009,0.0008,0.0013,0.0095,0.0002,0.0012,0.0000,0.0395,0.0320]
36
36
  ]
37
37
  end
38
38
  end
@@ -1,3 +1,3 @@
1
1
  module Andrey
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -1,3 +1,5 @@
1
+ require 'andrey/language/american_names'
2
+ require 'andrey/language/dynamic'
1
3
  require 'andrey/language/english'
2
4
 
3
5
  module Andrey
@@ -23,7 +25,7 @@ module Andrey
23
25
  index = symbols.index(letter)
24
26
 
25
27
  probabilities = probability_map[index]
26
- pointer = rand(probabilities.inject(:+))
28
+ pointer = rand
27
29
  sum = 0
28
30
 
29
31
  probabilities.each_with_index do |p, index|
@@ -36,9 +38,16 @@ module Andrey
36
38
  symbols.sample
37
39
  end
38
40
 
39
- def self.generate(length=8, language=Language::English)
40
- new(language).tap do |word|
41
- while word.length < length
41
+ def self.generate(options={})
42
+ options[:length] ||= 8
43
+
44
+ if options[:corpus]
45
+ symbols, map = Analyzer.analyze_file(options[:corpus])
46
+ options[:language] = Language::Dynamic.new(symbols, map)
47
+ end
48
+
49
+ new(options[:language] || Language::English).tap do |word|
50
+ while word.length < options[:length]
42
51
  word.add_letter
43
52
  end
44
53
  end
@@ -2,25 +2,23 @@ require 'spec_helper'
2
2
  require 'andrey/analyzer'
3
3
 
4
4
  describe Andrey::Analyzer do
5
- let(:subject) { Andrey::Analyzer.new }
5
+ let(:subject) { Andrey::Analyzer }
6
6
 
7
7
  it 'counts occurrences of bigrams in source text' do
8
- zeros = (0..25).map { |x| 0 }
9
- result = subject.analyze("abc")
8
+ symbols, map = subject.analyze_text("abc")
10
9
 
11
- freq_a = result[0]
12
- freq_a.must_equal zeros.dup.fill(1,1,1)
10
+ freq_a = map[0]
11
+ freq_a.must_equal [0, 1, 0]
13
12
 
14
- freq_b = result[1]
15
- freq_b.must_equal zeros.dup.fill(1,2,1)
13
+ freq_b = map[1]
14
+ freq_b.must_equal [0, 0, 1]
16
15
 
17
- freq_c = result[2]
18
- freq_c.must_equal zeros
16
+ freq_c = map[2]
17
+ freq_c.must_equal [0, 0, 0]
19
18
  end
20
19
 
21
20
  it 'reads corpus text from a file' do
22
21
  IO.stubs(:read).returns("abc")
23
- subject.expects(:analyze).with("abc")
24
- subject.read("filename.txt")
22
+ subject.analyze_file("filename.txt")
25
23
  end
26
24
  end
@@ -9,4 +9,26 @@ describe Andrey::Command do
9
9
  command = described_class['generate']
10
10
  command.run
11
11
  end
12
+
13
+ it 'passes length args to the subcommand' do
14
+ Andrey::Command::Generate.any_instance.stubs(:puts)
15
+ Andrey::Word.expects(:generate).with(length: 10)
16
+ command = described_class['generate']
17
+ command.run('-l', '10')
18
+ end
19
+
20
+ it 'passes probability map args to the subcommand' do
21
+ Andrey::Command::Generate.any_instance.stubs(:puts)
22
+ Andrey::Word.expects(:generate).with(language: Andrey::Language::AmericanNames)
23
+ command = described_class['generate']
24
+ command.run('-m', 'lib/andrey/language/american_names.rb')
25
+ end
26
+
27
+ it 'can generate probability maps on the fly' do
28
+ IO.stubs(:read).returns('a quick brown fox jumped over the lazy dog')
29
+ Andrey::Command::Generate.any_instance.stubs(:puts)
30
+ Andrey::Word.expects(:generate).with(corpus: 'textfile.txt')
31
+ command = described_class['generate']
32
+ command.run('-c', 'textfile.txt')
33
+ end
12
34
  end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'andrey/language/dynamic'
3
+
4
+ describe Andrey::Language::Dynamic do
5
+ let(:described_class) { Andrey::Language::Dynamic }
6
+
7
+ it 'takes symbols array and probability map' do
8
+ sym, map = ['a', 'b', 'c'], [[1, 2, 3]]
9
+ lang = described_class.new(sym, map)
10
+ lang.symbols.must_equal sym
11
+ lang.probability_map.must_equal map
12
+ end
13
+ end
@@ -2,7 +2,8 @@ require 'spec_helper'
2
2
  require 'andrey'
3
3
 
4
4
  describe Andrey do
5
- it 'generates a word based on letter frequencies' do
6
-
5
+ it 'delegates generate to the appropriate command' do
6
+ Andrey::Word.expects(:generate)
7
+ Andrey.generate
7
8
  end
8
9
  end
@@ -1,3 +1,4 @@
1
1
  require 'minitest/spec'
2
2
  require 'minitest/autorun'
3
3
  require 'mocha'
4
+ require 'pry-nav'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: andrey
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-02 00:00:00.000000000 Z
12
+ date: 2012-09-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mocha
16
- requirement: &70199999986280 !ruby/object:Gem::Requirement
16
+ requirement: &70171809710500 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70199999986280
24
+ version_requirements: *70171809710500
25
25
  description: andrey
26
26
  email:
27
27
  - rubysolo@gmail.com
@@ -40,11 +40,14 @@ files:
40
40
  - lib/andrey.rb
41
41
  - lib/andrey/analyzer.rb
42
42
  - lib/andrey/command.rb
43
+ - lib/andrey/language/american_names.rb
44
+ - lib/andrey/language/dynamic.rb
43
45
  - lib/andrey/language/english.rb
44
46
  - lib/andrey/version.rb
45
47
  - lib/andrey/word.rb
46
48
  - spec/lib/andrey/analyzer_spec.rb
47
49
  - spec/lib/andrey/command_spec.rb
50
+ - spec/lib/andrey/language/dynamic_spec.rb
48
51
  - spec/lib/andrey/language/english_spec.rb
49
52
  - spec/lib/andrey/word_spec.rb
50
53
  - spec/lib/andrey_spec.rb
@@ -76,6 +79,7 @@ summary: generate pseudopronounceable random words based on bigram freqency dist
76
79
  test_files:
77
80
  - spec/lib/andrey/analyzer_spec.rb
78
81
  - spec/lib/andrey/command_spec.rb
82
+ - spec/lib/andrey/language/dynamic_spec.rb
79
83
  - spec/lib/andrey/language/english_spec.rb
80
84
  - spec/lib/andrey/word_spec.rb
81
85
  - spec/lib/andrey_spec.rb