jjulian-random_data 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+
2
+ module RandomData
3
+
4
+ # Defines methods for the generation of random data based on a supplied grammar.
5
+
6
+ module Grammar
7
+
8
+ # Returns simple sentences based on a supplied grammar, which must be a hash, the
9
+ # keys of which are symbols. The values are either an array of successive values or a grammar
10
+ # (i.e, hash with symbols as keys, and hashes or arrays as values. The arrays contain symbols
11
+ # referencing the keys in the present grammar, or strings to be output. The keys are always symbols.
12
+ #
13
+ # Example:
14
+ # Random.grammatical_construct({:story => [:man, " bites ", :dog], :man => { :bob => "Bob"}, :dog => {:a =>"Rex", :b =>"Rover"}}, :story)
15
+ # => "Bob bites Rover"
16
+
17
+ def grammatical_construct(grammar, what=nil)
18
+ output = ""
19
+ if what.nil?
20
+ case grammar
21
+ when Hash
22
+ a_key = grammar.keys.sort_by{rand}[0]
23
+ output += grammatical_construct(grammar, a_key)
24
+ when Array
25
+ grammar.each do |item|
26
+ output += grammatical_construct(item)
27
+ end
28
+ when String
29
+ output += grammar
30
+ end
31
+ else
32
+ rhs = grammar[what]
33
+ case rhs
34
+ when Array
35
+ rhs.each do |item|
36
+ case item
37
+ when Symbol
38
+ output += grammatical_construct(grammar,item)
39
+ when String
40
+ output += item
41
+ when Hash
42
+ output += grammatical_construct(item)
43
+ else
44
+ raise "#{item.inspect} must be a symbol or string or Hash"
45
+ end
46
+ end
47
+ when Hash
48
+ output+= grammatical_construct(rhs)
49
+ when Symbol
50
+ output += grammatical_construct(rhs)
51
+ when String
52
+ output += rhs
53
+ else
54
+ raise "#{rhs.inspect} must be a symbol, string, Array or Hash"
55
+ end
56
+ end
57
+ return output
58
+ end
59
+
60
+ end
61
+ end
@@ -0,0 +1,133 @@
1
+ # encoding: UTF-8
2
+ module RandomData
3
+
4
+ # Defines methods to return random location data.
5
+
6
+ module Locations
7
+
8
+
9
+ trees = %w( Acacia Beech Birch Cedar Cherry Chestnut Elm Larch Laurel
10
+ Linden Maple Oak Pine Rose Walnut Willow)
11
+ people = %w( Adams Franklin Jackson Jefferson Lincoln
12
+ Madison Washington Wilson)
13
+ people_uk = %w( Churchill Tyndale Latimer Cranmer )
14
+ places = %w( Highland Hill Park Woodland Sunset Virginia)
15
+ numbers = %w( 1st 2nd 4th 5th 34th 42nd )
16
+ @@streetnames = trees + people + places + numbers
17
+
18
+ @@street_types = %w(St Ave Rd Blvd Trl Ter Rdg Pl Pkwy Ct Circle)
19
+
20
+ # Returns the first line of a US maiiling address (street number, street name, street type)
21
+ #
22
+ # Example:
23
+ #
24
+ #Random.address_line_1 = "24317 Jefferson Blvd"
25
+
26
+ def address_line_1
27
+ "#{rand(40000)} #{@@streetnames.rand} #{@@street_types.rand}"
28
+ end
29
+
30
+ alias :us_address_line_1 :address_line_1
31
+
32
+ @@line2types = ["Apt", "Bsmt", "Bldg", "Dept", "Fl", "Frnt", "Hngr", "Lbby", "Lot", "Lowr", "Ofc", "Ph", "Pier", "Rear", "Rm", "Side", "Slip", "Spc", "Stop", "Ste", "Trlr", "Unit", "Uppr"]
33
+
34
+ # Returns the first line of a US maiiling address (street number, street name, street type)
35
+ #
36
+ # Example:
37
+ #
38
+ #Random.address_line_1 = "24317 Jefferson Blvd"
39
+
40
+ def address_line_2
41
+ "#{@@line2types.rand} #{rand(999)}"
42
+ end
43
+
44
+ # Returns a random 5-digit string, not guaranteed to be a legitimate zip code.
45
+ # Legal zip codes can have leading zeroes and thus they need to be strings.
46
+
47
+ def zipcode
48
+ "%05d" % rand(99999)
49
+ end
50
+
51
+
52
+ # Returns a string providing something in the general form of a UK post code. Like the zip codes, this might
53
+ # not actually be valid. Doesn't cover London whose codes are like "SE1".
54
+
55
+ def uk_post_code
56
+ post_towns = %w(BM CB CV LE LI LS KT MK NE OX PL YO)
57
+ # Can't remember any othes at the moment
58
+ number_1 = rand(100).to_s
59
+ number_2 = rand(100).to_s
60
+ # Easier way to do this?
61
+ letters = ("AA".."ZZ").to_a.rand
62
+
63
+ return "#{post_towns.rand}#{number_1} #{number_2}#{letters}"
64
+ end
65
+
66
+ # from technoweenie: http://svn.techno-weenie.net/projects/plugins/us_states/lib/us_states.rb
67
+ @@us_states = [["Alaska", "AK"], ["Alabama", "AL"], ["Arkansas", "AR"], ["Arizona", "AZ"],
68
+ ["California", "CA"], ["Colorado", "CO"], ["Connecticut", "CT"], ["District of Columbia", "DC"],
69
+ ["Delaware", "DE"], ["Florida", "FL"], ["Georgia", "GA"], ["Hawaii", "HI"], ["Iowa", "IA"],
70
+ ["Idaho", "ID"], ["Illinois", "IL"], ["Indiana", "IN"], ["Kansas", "KS"], ["Kentucky", "KY"],
71
+ ["Louisiana", "LA"], ["Massachusetts", "MA"], ["Maryland", "MD"], ["Maine", "ME"], ["Michigan", "MI"],
72
+ ["Minnesota", "MN"], ["Missouri", "MO"], ["Mississippi", "MS"], ["Montana", "MT"], ["North Carolina", "NC"],
73
+ ["North Dakota", "ND"], ["Nebraska", "NE"], ["New Hampshire", "NH"], ["New Jersey", "NJ"],
74
+ ["New Mexico", "NM"], ["Nevada", "NV"], ["New York", "NY"], ["Ohio", "OH"], ["Oklahoma", "OK"],
75
+ ["Oregon", "OR"], ["Pennsylvania", "PA"], ["Rhode Island", "RI"], ["South Carolina", "SC"], ["South Dakota", "SD"],
76
+ ["Tennessee", "TN"], ["Texas", "TX"], ["Utah", "UT"], ["Virginia", "VA"], ["Vermont", "VT"],
77
+ ["Washington", "WA"], ["Wisconsin", "WI"], ["West Virginia", "WV"], ["Wyoming", "WY"]]
78
+
79
+ # Returns a state 2-character abbreviation
80
+ # Random.state = "IL"
81
+
82
+ def state
83
+ @@us_states.rand[1]
84
+ end
85
+
86
+ # Returns a full state name
87
+ #Random.state_full = "Texas"
88
+
89
+ def state_full
90
+ @@us_states.rand[0]
91
+ end
92
+
93
+ # from http://siteresources.worldbank.org/DATASTATISTICS/Resources/CLASS.XLS
94
+ @@countries = ["Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
95
+ "Austria", "Azerbaijan", "Bahamas, The", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan",
96
+ "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei Darussalam", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon",
97
+ "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Channel Islands", "Chile", "China", "Colombia", "Comoros", "Congo, Dem. Rep.",
98
+ "Congo, Rep.", "Costa Rica", "Côte d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador",
99
+ "Egypt, Arab Rep.", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Faeroe Islands", "Fiji", "Finland", "France", "French Polynesia",
100
+ "Gabon", "Gambia, The", "Georgia", "Germany", "Ghana", "Greece", "Greenland", "Grenada", "Guam", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti",
101
+ "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Rep.", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Jamaica",
102
+ "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, Dem. Rep.", "Korea, Rep.", "Kuwait", "Kyrgyz Republic", "Lao PDR", "Latvia", "Lebanon", "Lesotho",
103
+ "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macao, China", "Macedonia, FYR", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta",
104
+ "Marshall Islands", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia, Fed. Sts.", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco",
105
+ "Mozambique", "Myanmar", "Namibia", "Nepal", "Netherlands", "Netherlands Antilles", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria",
106
+ "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal",
107
+ "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Rwanda", "Samoa", "San Marino", "São Tomé and Principe", "Saudi Arabia", "Senegal", "Serbia",
108
+ "Seychelles", "Sierra Leone", "Singapore", "Slovak Republic", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "St. Kitts and Nevis",
109
+ "St. Lucia", "St. Vincent and the Grenadines", "Sudan", "Suriname", "Swaziland", "Sweden", "Switzerland", "Syrian Arab Republic", "Tajikistan", "Tanzania", "Thailand",
110
+ "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom",
111
+ "United States", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela, RB", "Vietnam", "Virgin Islands (U.S.)", "West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe"]
112
+
113
+ # Returns a country name, as listed by the World Bank
114
+ #
115
+ #Random.country = "Kenya"
116
+
117
+ def country
118
+ @@countries.rand
119
+ end
120
+
121
+ @@cities = %w(Midway Mount\ Pleasant Greenwood Franklin Oak Grove Centerville Salem Georgetown Fairview Riverside Rotorua Tauranga Whakatane Taupo Wanganui
122
+ Nababeep Aggeneys Pofadder Polokwane Bela Bela Goukamma Karatara Tswane Prieska Upington Hoopstad Bultfontein Wesselsbron Bothaville Trompsburg
123
+ Henneman Musina Ogies Kgatlahong Tembisa Tekoza Sebokeng Muntaung Umnkomaaz)
124
+
125
+ # Returns a generic city name, with an attempt to have some internationl appeal
126
+ #
127
+ # Random.city = "Tekoza"
128
+
129
+ def city
130
+ @@cities.rand
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,77 @@
1
+ # Methods to create a markov chain from some input text.
2
+
3
+
4
+ module RandomData
5
+ class MarkovGenerator
6
+
7
+ def initialize(memory = 1)
8
+ @memory_size = memory
9
+ @table = Hash.new {|h,k| h[k] = {}}
10
+ @state = []
11
+ end
12
+
13
+
14
+ # given the next token of input add it to the
15
+ # table
16
+ def insert(result)
17
+ # puts "insert called with #{result}"
18
+ tabindex = Marshal.dump(@state)
19
+ if @table[tabindex].has_key?(result)
20
+ @table[tabindex][result] += 1
21
+ else
22
+ @table[tabindex][result] = 1
23
+ end
24
+ # puts "table #{@table.inspect}"
25
+ next_state(result)
26
+ end
27
+
28
+ def next_state(result)
29
+ @state.shift if @state.size >= @memory_size
30
+ @state.push(result)
31
+ # puts "@state is #{@state.inspect}"
32
+ end
33
+
34
+ def generate(n=1, clear_state=false)
35
+ @state = [] if clear_state
36
+ results = []
37
+ n.times do
38
+ retry_count = 0
39
+ begin
40
+ result_hash = @table[Marshal.dump(@state)]
41
+ the_keys,the_vals = [],[]
42
+ result_hash.each_pair do |k,v|
43
+ the_keys << k
44
+ the_vals << v
45
+ end
46
+ # get the weighted random value, by index.
47
+ i = the_vals.roulette
48
+ rescue
49
+ # puts "results:#{result_hash.inspect}";
50
+ # puts "keys:#{the_keys.inspect}";
51
+ # puts "vals:#{the_vals.inspect}";
52
+ # puts "state:#{@state.inspect}";
53
+ @state = []
54
+ retry_count += 1
55
+ if retry_count < 5
56
+ retry
57
+ else
58
+ # puts
59
+ # puts "table:#{@table.inspect}";
60
+ raise
61
+ end
62
+ end
63
+ result = the_keys[i]
64
+ # puts "index:#{i.inspect}";
65
+
66
+ next_state(result)
67
+ if block_given?
68
+ yield result
69
+ end
70
+ results << result
71
+ end
72
+ return results
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -0,0 +1,73 @@
1
+ module RandomData
2
+
3
+ # Methods to create realistic-looking names
4
+ module Names
5
+
6
+ # Returns a random letter
7
+
8
+ def initial
9
+ ('A'..'Z').to_a.rand
10
+ end
11
+
12
+ @@lastnames = %w(ABEL ANDERSON ANDREWS ANTHONY BAKER BROWN BURROWS CLARK CLARKE CLARKSON DAVIDSON DAVIES DAVIS
13
+ DENT EDWARDS GARCIA GRANT HALL HARRIS HARRISON JACKSON JEFFRIES JEFFERSON JOHNSON JONES
14
+ KIRBY KIRK LAKE LEE LEWIS MARTIN MARTINEZ MAJOR MILLER MOORE OATES PETERS PETERSON ROBERTSON
15
+ ROBINSON RODRIGUEZ SMITH SMYTHE STEVENS TAYLOR THATCHER THOMAS THOMPSON WALKER WASHINGTON WHITE
16
+ WILLIAMS WILSON YORKE)
17
+
18
+ # Returns a random lastname
19
+ #
20
+ # >> Random.lastname
21
+ #
22
+ # "Harris"
23
+
24
+ def lastname
25
+ @@lastnames.rand.capitalize
26
+ end
27
+
28
+ @@male_first_names = %w(ADAM ANTHONY ARTHUR BRIAN CHARLES CHRISTOPHER DANIEL DAVID DONALD EDGAR EDWARD EDWIN
29
+ GEORGE HAROLD HERBERT HUGH JAMES JASON JOHN JOSEPH KENNETH KEVIN MARCUS MARK MATTHEW
30
+ MICHAEL PAUL PHILIP RICHARD ROBERT ROGER RONALD SIMON STEVEN TERRY THOMAS WILLIAM)
31
+
32
+ @@female_first_names = %w(ALISON ANN ANNA ANNE BARBARA BETTY BERYL CAROL CHARLOTTE CHERYL DEBORAH DIANA DONNA
33
+ DOROTHY ELIZABETH EVE FELICITY FIONA HELEN HELENA JENNIFER JESSICA JUDITH KAREN KIMBERLY
34
+ LAURA LINDA LISA LUCY MARGARET MARIA MARY MICHELLE NANCY PATRICIA POLLY ROBYN RUTH SANDRA
35
+ SARAH SHARON SUSAN TABITHA URSULA VICTORIA WENDY)
36
+
37
+ @@first_names = @@male_first_names + @@female_first_names
38
+
39
+
40
+ # Returns a random firstname
41
+ #
42
+ # >> Random.firstname
43
+ #
44
+ # "Sandra"
45
+
46
+ def firstname
47
+ @@first_names.rand.capitalize
48
+ end
49
+
50
+
51
+ # Returns a random male firstname
52
+ #
53
+ # >> Random.firstname_male
54
+ #
55
+ # "James"
56
+
57
+ def firstname_male
58
+ @@male_first_names.rand.capitalize
59
+ end
60
+
61
+
62
+ # Returns a random female firstname
63
+ #
64
+ # >> Random.firstname_female
65
+ #
66
+ # "Mary"
67
+
68
+ def firstname_female
69
+ @@female_first_names.rand.capitalize
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,31 @@
1
+ module RandomData
2
+ module Numbers
3
+ #n can be an Integer or a Range. If it is an Integer, it just returns a random
4
+ #number greater than or equal to 0 and less than n. If it is a Range, it
5
+ #returns a random number within the range
6
+ # Examples
7
+ #
8
+ # >> Random.number(5)
9
+ # => 4
10
+ # >> Random.number(5)
11
+ # => 2
12
+ # >> Random.number(5)
13
+ # => 1
14
+ def number(n)
15
+ n.is_a?(Range) ? n.to_a.rand : rand(n)
16
+ end
17
+
18
+ # return a random bit, 0 or 1.
19
+ def bit
20
+ rand(2)
21
+ end
22
+
23
+ # return an array of n random bits.
24
+ def bits(n)
25
+ x = []
26
+ n.times {x << bit}
27
+ x
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,70 @@
1
+ module RandomData
2
+
3
+ module Text
4
+
5
+ # Methods to create random strings and paragraphs.
6
+
7
+ # Returns a string of random upper- and lowercase alphanumeric characters. Accepts a size parameters, defaults to 16 characters.
8
+ #
9
+ # >> Random.alphanumeric
10
+ #
11
+ # "Ke2jdknPYAI8uCXj"
12
+ #
13
+ # >> Random.alphanumeric(5)
14
+ #
15
+ # "7sj7i"
16
+
17
+ def alphanumeric(size=16)
18
+ s = ""
19
+ size.times { s << (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }
20
+ s
21
+ end
22
+
23
+ # TODO make these more coherent #:nodoc:
24
+
25
+ @@sentences = [
26
+ "Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua",
27
+ "Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat",
28
+ "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur",
29
+ "Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum",
30
+ "There's a voice that keeps on calling me",
31
+ "Down the road that's where I'll always be",
32
+ "Every stop I make I make a new friend; Can't stay for long just turn around and I'm gone again",
33
+ "Maybe tomorrow I'll want to settle down - until tomorrow I'll just keep moving on",
34
+ "Hong Kong Phooey number one super guy, Hong Kong Phooey quicker than the human eye",
35
+ "He's got style, a groovy style, and a car that just won't stop",
36
+ "Hey there where ya goin, not exactly knowin'",
37
+ "Who says you have to call just one place home?",
38
+ "He's going everywhere, B.J. McKay and his best friend Bear",
39
+ "He just keeps on movin' and ladies keep improvin'",
40
+ "Every day is better than the last, with new dreams and better scenes and best of all I don't pay property tax",
41
+ "Rolling down to Dallas - who is providin' my palace?",
42
+ "Off to New Orleans or who knows where",
43
+ "Soaring through all the galaxies in search of Earth flying in to the night",
44
+ "Ulysses, fighting evil and tyranny with all his power and with all of his might",
45
+ "No-one else can do the things you do, like a bolt of thunder from the blue",
46
+ "Always fighting all the evil forces bringing peace and justice to all",
47
+ "I've gotten burned over Cheryl Tiegs and blown up for Raquel Welch, but when I end up in the hay it's only hay, hey hey",
48
+ "I might jump an open drawbridge or Tarzan from a vine, beause I'm the unknown stuntman that makes Eastwood look so fine"]
49
+
50
+ # Returns a given number of paragraphs delimited by two newlines (defaults to two paragraphs), using a small pool of generic sentences.
51
+ # >> Random.paragraphs
52
+ #
53
+ # "I might jump an open drawbridge or Tarzan from a vine, beause I'm the unknown stuntman that makes Eastwood look so fine.\n\n \Always fighting all the evil forces bringing peace and justice to all. \n\n"
54
+
55
+ def paragraphs(num = 2)
56
+ text = ''
57
+
58
+ num.times do
59
+ (rand(5)+1).times do
60
+ text += @@sentences.rand + '. '
61
+ end
62
+ text += "\n\n"
63
+ end
64
+
65
+ return text
66
+
67
+ end
68
+
69
+ end
70
+ end