tomharris-random_data 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,61 @@
1
+
2
+ module RandomData
3
+
4
+ # Defines methods for the generation of random data based on a supplied grammar.
5
+
6
+ module Grammar
7
+
8
+ # Returns simple sentences based on a supplied grammar, which must be a hash, the
9
+ # keys of which are symbols. The values are either an array of successive values or a grammar
10
+ # (i.e, hash with symbols as keys, and hashes or arrays as values. The arrays contain symbols
11
+ # referencing the keys in the present grammar, or strings to be output. The keys are always symbols.
12
+ #
13
+ # Example:
14
+ # Random.grammatical_construct({:story => [:man, " bites ", :dog], :man => { :bob => "Bob"}, :dog => {:a =>"Rex", :b =>"Rover"}}, :story)
15
+ # => "Bob bites Rover"
16
+
17
+ def grammatical_construct(grammar, what=nil)
18
+ output = ""
19
+ if what.nil?
20
+ case grammar
21
+ when Hash
22
+ a_key = grammar.keys.sort_by{rand}[0]
23
+ output += grammatical_construct(grammar, a_key)
24
+ when Array
25
+ grammar.each do |item|
26
+ output += grammatical_construct(item)
27
+ end
28
+ when String
29
+ output += grammar
30
+ end
31
+ else
32
+ rhs = grammar[what]
33
+ case rhs
34
+ when Array
35
+ rhs.each do |item|
36
+ case item
37
+ when Symbol
38
+ output += grammatical_construct(grammar,item)
39
+ when String
40
+ output += item
41
+ when Hash
42
+ output += grammatical_construct(item)
43
+ else
44
+ raise "#{item.inspect} must be a symbol or string or Hash"
45
+ end
46
+ end
47
+ when Hash
48
+ output+= grammatical_construct(rhs)
49
+ when Symbol
50
+ output += grammatical_construct(rhs)
51
+ when String
52
+ output += rhs
53
+ else
54
+ raise "#{rhs.inspect} must be a symbol, string, Array or Hash"
55
+ end
56
+ end
57
+ return output
58
+ end
59
+
60
+ end
61
+ end
@@ -0,0 +1,134 @@
1
+ # coding:utf-8
2
+
3
+ module RandomData
4
+
5
+ # Defines methods to return random location data.
6
+
7
+ module Locations
8
+
9
+
10
+ trees = %w( Acacia Beech Birch Cedar Cherry Chestnut Elm Larch Laurel
11
+ Linden Maple Oak Pine Rose Walnut Willow)
12
+ people = %w( Adams Franklin Jackson Jefferson Lincoln
13
+ Madison Washington Wilson)
14
+ people_uk = %w( Churchill Tyndale Latimer Cranmer )
15
+ places = %w( Highland Hill Park Woodland Sunset Virginia)
16
+ numbers = %w( 1st 2nd 4th 5th 34th 42nd )
17
+ @@streetnames = trees + people + places + numbers
18
+
19
+ @@street_types = %w(St Ave Rd Blvd Trl Ter Rdg Pl Pkwy Ct Circle)
20
+
21
+ # Returns the first line of a US maiiling address (street number, street name, street type)
22
+ #
23
+ # Example:
24
+ #
25
+ #Random.address_line_1 = "24317 Jefferson Blvd"
26
+
27
+ def address_line_1
28
+ "#{rand(40000)} #{@@streetnames.rand} #{@@street_types.rand}"
29
+ end
30
+
31
+ alias :us_address_line_1 :address_line_1
32
+
33
+ @@line2types = ["Apt", "Bsmt", "Bldg", "Dept", "Fl", "Frnt", "Hngr", "Lbby", "Lot", "Lowr", "Ofc", "Ph", "Pier", "Rear", "Rm", "Side", "Slip", "Spc", "Stop", "Ste", "Trlr", "Unit", "Uppr"]
34
+
35
+ # Returns the first line of a US maiiling address (street number, street name, street type)
36
+ #
37
+ # Example:
38
+ #
39
+ #Random.address_line_1 = "24317 Jefferson Blvd"
40
+
41
+ def address_line_2
42
+ "#{@@line2types.rand} #{rand(999)}"
43
+ end
44
+
45
+ # Returns a random 5-digit string, not guaranteed to be a legitimate zip code.
46
+ # Legal zip codes can have leading zeroes and thus they need to be strings.
47
+
48
+ def zipcode
49
+ "%05d" % rand(99999)
50
+ end
51
+
52
+
53
+ # Returns a string providing something in the general form of a UK post code. Like the zip codes, this might
54
+ # not actually be valid. Doesn't cover London whose codes are like "SE1".
55
+
56
+ def uk_post_code
57
+ post_towns = %w(BM CB CV LE LI LS KT MK NE OX PL YO)
58
+ # Can't remember any othes at the moment
59
+ number_1 = rand(100).to_s
60
+ number_2 = rand(100).to_s
61
+ # Easier way to do this?
62
+ letters = ("AA".."ZZ").to_a.rand
63
+
64
+ return "#{post_towns.rand}#{number_1} #{number_2}#{letters}"
65
+ end
66
+
67
+ # from technoweenie: http://svn.techno-weenie.net/projects/plugins/us_states/lib/us_states.rb
68
+ @@us_states = [["Alaska", "AK"], ["Alabama", "AL"], ["Arkansas", "AR"], ["Arizona", "AZ"],
69
+ ["California", "CA"], ["Colorado", "CO"], ["Connecticut", "CT"], ["District of Columbia", "DC"],
70
+ ["Delaware", "DE"], ["Florida", "FL"], ["Georgia", "GA"], ["Hawaii", "HI"], ["Iowa", "IA"],
71
+ ["Idaho", "ID"], ["Illinois", "IL"], ["Indiana", "IN"], ["Kansas", "KS"], ["Kentucky", "KY"],
72
+ ["Louisiana", "LA"], ["Massachusetts", "MA"], ["Maryland", "MD"], ["Maine", "ME"], ["Michigan", "MI"],
73
+ ["Minnesota", "MN"], ["Missouri", "MO"], ["Mississippi", "MS"], ["Montana", "MT"], ["North Carolina", "NC"],
74
+ ["North Dakota", "ND"], ["Nebraska", "NE"], ["New Hampshire", "NH"], ["New Jersey", "NJ"],
75
+ ["New Mexico", "NM"], ["Nevada", "NV"], ["New York", "NY"], ["Ohio", "OH"], ["Oklahoma", "OK"],
76
+ ["Oregon", "OR"], ["Pennsylvania", "PA"], ["Rhode Island", "RI"], ["South Carolina", "SC"], ["South Dakota", "SD"],
77
+ ["Tennessee", "TN"], ["Texas", "TX"], ["Utah", "UT"], ["Virginia", "VA"], ["Vermont", "VT"],
78
+ ["Washington", "WA"], ["Wisconsin", "WI"], ["West Virginia", "WV"], ["Wyoming", "WY"]]
79
+
80
+ # Returns a state 2-character abbreviation
81
+ # Random.state = "IL"
82
+
83
+ def state
84
+ @@us_states.rand[1]
85
+ end
86
+
87
+ # Returns a full state name
88
+ #Random.state_full = "Texas"
89
+
90
+ def state_full
91
+ @@us_states.rand[0]
92
+ end
93
+
94
+ # from http://siteresources.worldbank.org/DATASTATISTICS/Resources/CLASS.XLS
95
+ @@countries = ["Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
96
+ "Austria", "Azerbaijan", "Bahamas, The", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan",
97
+ "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei Darussalam", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon",
98
+ "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Channel Islands", "Chile", "China", "Colombia", "Comoros", "Congo, Dem. Rep.",
99
+ "Congo, Rep.", "Costa Rica", "Côte d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador",
100
+ "Egypt, Arab Rep.", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Faeroe Islands", "Fiji", "Finland", "France", "French Polynesia",
101
+ "Gabon", "Gambia, The", "Georgia", "Germany", "Ghana", "Greece", "Greenland", "Grenada", "Guam", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti",
102
+ "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Rep.", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Jamaica",
103
+ "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, Dem. Rep.", "Korea, Rep.", "Kuwait", "Kyrgyz Republic", "Lao PDR", "Latvia", "Lebanon", "Lesotho",
104
+ "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macao, China", "Macedonia, FYR", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta",
105
+ "Marshall Islands", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia, Fed. Sts.", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco",
106
+ "Mozambique", "Myanmar", "Namibia", "Nepal", "Netherlands", "Netherlands Antilles", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria",
107
+ "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal",
108
+ "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Rwanda", "Samoa", "San Marino", "São Tomé and Principe", "Saudi Arabia", "Senegal", "Serbia",
109
+ "Seychelles", "Sierra Leone", "Singapore", "Slovak Republic", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "St. Kitts and Nevis",
110
+ "St. Lucia", "St. Vincent and the Grenadines", "Sudan", "Suriname", "Swaziland", "Sweden", "Switzerland", "Syrian Arab Republic", "Tajikistan", "Tanzania", "Thailand",
111
+ "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom",
112
+ "United States", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela, RB", "Vietnam", "Virgin Islands (U.S.)", "West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe"]
113
+
114
+ # Returns a country name, as listed by the World Bank
115
+ #
116
+ #Random.country = "Kenya"
117
+
118
+ def country
119
+ @@countries.rand
120
+ end
121
+
122
+ @@cities = %w(Midway Mount\ Pleasant Greenwood Franklin Oak Grove Centerville Salem Georgetown Fairview Riverside Rotorua Tauranga Whakatane Taupo Wanganui
123
+ Nababeep Aggeneys Pofadder Polokwane Bela Bela Goukamma Karatara Tswane Prieska Upington Hoopstad Bultfontein Wesselsbron Bothaville Trompsburg
124
+ Henneman Musina Ogies Kgatlahong Tembisa Tekoza Sebokeng Muntaung Umnkomaaz)
125
+
126
+ # Returns a generic city name, with an attempt to have some internationl appeal
127
+ #
128
+ # Random.city = "Tekoza"
129
+
130
+ def city
131
+ @@cities.rand
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,77 @@
1
+ # Methods to create a markov chain from some input text.
2
+
3
+
4
+ module RandomData
5
+ class MarkovGenerator
6
+
7
+ def initialize(memory = 1)
8
+ @memory_size = memory
9
+ @table = Hash.new {|h,k| h[k] = {}}
10
+ @state = []
11
+ end
12
+
13
+
14
+ # given the next token of input add it to the
15
+ # table
16
+ def insert(result)
17
+ # puts "insert called with #{result}"
18
+ tabindex = Marshal.dump(@state)
19
+ if @table[tabindex].has_key?(result)
20
+ @table[tabindex][result] += 1
21
+ else
22
+ @table[tabindex][result] = 1
23
+ end
24
+ # puts "table #{@table.inspect}"
25
+ next_state(result)
26
+ end
27
+
28
+ def next_state(result)
29
+ @state.shift if @state.size >= @memory_size
30
+ @state.push(result)
31
+ # puts "@state is #{@state.inspect}"
32
+ end
33
+
34
+ def generate(n=1, clear_state=false)
35
+ @state = [] if clear_state
36
+ results = []
37
+ n.times do
38
+ retry_count = 0
39
+ begin
40
+ result_hash = @table[Marshal.dump(@state)]
41
+ the_keys,the_vals = [],[]
42
+ result_hash.each_pair do |k,v|
43
+ the_keys << k
44
+ the_vals << v
45
+ end
46
+ # get the weighted random value, by index.
47
+ i = the_vals.roulette
48
+ rescue
49
+ # puts "results:#{result_hash.inspect}";
50
+ # puts "keys:#{the_keys.inspect}";
51
+ # puts "vals:#{the_vals.inspect}";
52
+ # puts "state:#{@state.inspect}";
53
+ @state = []
54
+ retry_count += 1
55
+ if retry_count < 5
56
+ retry
57
+ else
58
+ # puts
59
+ # puts "table:#{@table.inspect}";
60
+ raise
61
+ end
62
+ end
63
+ result = the_keys[i]
64
+ # puts "index:#{i.inspect}";
65
+
66
+ next_state(result)
67
+ if block_given?
68
+ yield result
69
+ end
70
+ results << result
71
+ end
72
+ return results
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -0,0 +1,73 @@
1
+ module RandomData
2
+
3
+ # Methods to create realistic-looking names
4
+ module Names
5
+
6
+ # Returns a random letter
7
+
8
+ def initial
9
+ ('A'..'Z').to_a.rand
10
+ end
11
+
12
+ @@lastnames = %w(ABEL ANDERSON ANDREWS ANTHONY BAKER BROWN BURROWS CLARK CLARKE CLARKSON DAVIDSON DAVIES DAVIS
13
+ DENT EDWARDS GARCIA GRANT HALL HARRIS HARRISON JACKSON JEFFRIES JEFFERSON JOHNSON JONES
14
+ KIRBY KIRK LAKE LEE LEWIS MARTIN MARTINEZ MAJOR MILLER MOORE OATES PETERS PETERSON ROBERTSON
15
+ ROBINSON RODRIGUEZ SMITH SMYTHE STEVENS TAYLOR THATCHER THOMAS THOMPSON WALKER WASHINGTON WHITE
16
+ WILLIAMS WILSON YORKE)
17
+
18
+ # Returns a random lastname
19
+ #
20
+ # >> Random.lastname
21
+ #
22
+ # "Harris"
23
+
24
+ def lastname
25
+ @@lastnames.rand.capitalize
26
+ end
27
+
28
+ @@male_first_names = %w(ADAM ANTHONY ARTHUR BRIAN CHARLES CHRISTOPHER DANIEL DAVID DONALD EDGAR EDWARD EDWIN
29
+ GEORGE HAROLD HERBERT HUGH JAMES JASON JOHN JOSEPH KENNETH KEVIN MARCUS MARK MATTHEW
30
+ MICHAEL PAUL PHILIP RICHARD ROBERT ROGER RONALD SIMON STEVEN TERRY THOMAS WILLIAM)
31
+
32
+ @@female_first_names = %w(ALISON ANN ANNA ANNE BARBARA BETTY BERYL CAROL CHARLOTTE CHERYL DEBORAH DIANA DONNA
33
+ DOROTHY ELIZABETH EVE FELICITY FIONA HELEN HELENA JENNIFER JESSICA JUDITH KAREN KIMBERLY
34
+ LAURA LINDA LISA LUCY MARGARET MARIA MARY MICHELLE NANCY PATRICIA POLLY ROBYN RUTH SANDRA
35
+ SARAH SHARON SUSAN TABITHA URSULA VICTORIA WENDY)
36
+
37
+ @@first_names = @@male_first_names + @@female_first_names
38
+
39
+
40
+ # Returns a random firstname
41
+ #
42
+ # >> Random.firstname
43
+ #
44
+ # "Sandra"
45
+
46
+ def firstname
47
+ @@first_names.rand.capitalize
48
+ end
49
+
50
+
51
+ # Returns a random male firstname
52
+ #
53
+ # >> Random.firstname_male
54
+ #
55
+ # "James"
56
+
57
+ def firstname_male
58
+ @@male_first_names.rand.capitalize
59
+ end
60
+
61
+
62
+ # Returns a random female firstname
63
+ #
64
+ # >> Random.firstname_female
65
+ #
66
+ # "Mary"
67
+
68
+ def firstname_female
69
+ @@female_first_names.rand.capitalize
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,31 @@
1
+ module RandomData
2
+ module Numbers
3
+ #n can be an Integer or a Range. If it is an Integer, it just returns a random
4
+ #number greater than or equal to 0 and less than n. If it is a Range, it
5
+ #returns a random number within the range
6
+ # Examples
7
+ #
8
+ # >> Random.number(5)
9
+ # => 4
10
+ # >> Random.number(5)
11
+ # => 2
12
+ # >> Random.number(5)
13
+ # => 1
14
+ def number(n)
15
+ n.is_a?(Range) ? n.to_a.rand : rand(n)
16
+ end
17
+
18
+ # return a random bit, 0 or 1.
19
+ def bit
20
+ rand(2)
21
+ end
22
+
23
+ # return an array of n random bits.
24
+ def bits(n)
25
+ x = []
26
+ n.times {x << bit}
27
+ x
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,70 @@
1
+ module RandomData
2
+
3
+ module Text
4
+
5
+ # Methods to create random strings and paragraphs.
6
+
7
+ # Returns a string of random upper- and lowercase alphanumeric characters. Accepts a size parameters, defaults to 16 characters.
8
+ #
9
+ # >> Random.alphanumeric
10
+ #
11
+ # "Ke2jdknPYAI8uCXj"
12
+ #
13
+ # >> Random.alphanumeric(5)
14
+ #
15
+ # "7sj7i"
16
+
17
+ def alphanumeric(size=16)
18
+ s = ""
19
+ size.times { s << (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }
20
+ s
21
+ end
22
+
23
+ # TODO make these more coherent #:nodoc:
24
+
25
+ @@sentences = [
26
+ "Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua",
27
+ "Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat",
28
+ "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur",
29
+ "Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum",
30
+ "There's a voice that keeps on calling me",
31
+ "Down the road that's where I'll always be",
32
+ "Every stop I make I make a new friend; Can't stay for long just turn around and I'm gone again",
33
+ "Maybe tomorrow I'll want to settle down - until tomorrow I'll just keep moving on",
34
+ "Hong Kong Phooey number one super guy, Hong Kong Phooey quicker than the human eye",
35
+ "He's got style, a groovy style, and a car that just won't stop",
36
+ "Hey there where ya goin, not exactly knowin'",
37
+ "Who says you have to call just one place home?",
38
+ "He's going everywhere, B.J. McKay and his best friend Bear",
39
+ "He just keeps on movin' and ladies keep improvin'",
40
+ "Every day is better than the last, with new dreams and better scenes and best of all I don't pay property tax",
41
+ "Rolling down to Dallas - who is providin' my palace?",
42
+ "Off to New Orleans or who knows where",
43
+ "Soaring through all the galaxies in search of Earth flying in to the night",
44
+ "Ulysses, fighting evil and tyranny with all his power and with all of his might",
45
+ "No-one else can do the things you do, like a bolt of thunder from the blue",
46
+ "Always fighting all the evil forces bringing peace and justice to all",
47
+ "I've gotten burned over Cheryl Tiegs and blown up for Raquel Welch, but when I end up in the hay it's only hay, hey hey",
48
+ "I might jump an open drawbridge or Tarzan from a vine, beause I'm the unknown stuntman that makes Eastwood look so fine"]
49
+
50
+ # Returns a given number of paragraphs delimited by two newlines (defaults to two paragraphs), using a small pool of generic sentences.
51
+ # >> Random.paragraphs
52
+ #
53
+ # "I might jump an open drawbridge or Tarzan from a vine, beause I'm the unknown stuntman that makes Eastwood look so fine.\n\n \Always fighting all the evil forces bringing peace and justice to all. \n\n"
54
+
55
+ def paragraphs(num = 2)
56
+ text = ''
57
+
58
+ num.times do
59
+ (rand(5)+1).times do
60
+ text += @@sentences.rand + '. '
61
+ end
62
+ text += "\n\n"
63
+ end
64
+
65
+ return text
66
+
67
+ end
68
+
69
+ end
70
+ end