CSV-datagen 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +17 -0
- data/bin/gen-csv.rb +40 -0
- data/lib/gen-data.rb +122 -0
- data/lib/read-params.rb +129 -0
- data/lib/version.rb +3 -0
- data/test/test-gen.rb +57 -0
- data/test/test-read-params.rb +31 -0
- metadata +51 -0
data/README.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
This file contain all information about "CSV data generator" project.
|
2
|
+
|
3
|
+
1. Overview
|
4
|
+
2. Format of configuration file.
|
5
|
+
|
6
|
+
Configuration file contain lines that present parameters of generated csv file.
|
7
|
+
[out,<output filename>]
|
8
|
+
[lines,<number of lines in the output file>]
|
9
|
+
[headers,<true if output file should contain headers row>]
|
10
|
+
[field specification]
|
11
|
+
|
12
|
+
2.1 Field specification
|
13
|
+
|
14
|
+
integer - int,<name>,<max value> - the generated number will be between 0..max and had the given name.
|
15
|
+
string - string,<name>,<length> - the generated string had the given name and length.
|
16
|
+
float - float,<name>,<max>,<decimal> - the generated number had the given name, will be between 0..max and had decimal digits after comma.
|
17
|
+
datetime - dtime,<name>,<date format>,<time format> . The date format is a string where user can use the following acronyms - DD for days, MM for months, YYYY or YY for years. The delimiter should be the '/' sign. The time format is "12" for twelve time format or "24" for twenty fours time format.
|
data/bin/gen-csv.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#This file contain main script
|
2
|
+
require 'lib\gen-data'
|
3
|
+
require 'lib\read-params'
|
4
|
+
require 'lib\version'
|
5
|
+
require 'faster_csv'
|
6
|
+
|
7
|
+
if $0 == __FILE__
|
8
|
+
puts 'Csv data generator version:'+GenCSV::Version
|
9
|
+
reader = GenCSV::ParamsReader.new('param.ini')
|
10
|
+
datagen = DataGenerator.new
|
11
|
+
csv = FasterCSV.open(reader.out, "w")
|
12
|
+
if reader.headers
|
13
|
+
# out header row
|
14
|
+
headers = []
|
15
|
+
reader.fields.each do | field |
|
16
|
+
headers.push(field[:name])
|
17
|
+
end
|
18
|
+
csv << headers
|
19
|
+
end
|
20
|
+
reader.lines.times do
|
21
|
+
# generate array of data
|
22
|
+
row = []
|
23
|
+
reader.fields.each do | field |
|
24
|
+
element = ''
|
25
|
+
if field[:type] == 'int'
|
26
|
+
element = datagen.gen_int(field[:max]).to_s
|
27
|
+
end
|
28
|
+
if field[:type] == 'string'
|
29
|
+
element = datagen.gen_string(field[:length]).to_s
|
30
|
+
end
|
31
|
+
if field[:type] == 'float'
|
32
|
+
element = datagen.gen_float(field[:max], field[:decimal]).to_s
|
33
|
+
end
|
34
|
+
row.push(element)
|
35
|
+
end
|
36
|
+
csv << row
|
37
|
+
end
|
38
|
+
csv.close
|
39
|
+
puts "Data generation complete"
|
40
|
+
end
|
data/lib/gen-data.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#Class that generate all types of data
|
2
|
+
|
3
|
+
class DataGenerator
|
4
|
+
def gen_int( interval )
|
5
|
+
rand(interval)
|
6
|
+
end
|
7
|
+
|
8
|
+
def gen_string( len )
|
9
|
+
chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
|
10
|
+
newstr = ""
|
11
|
+
1.upto(len) { |i| newstr << chars[rand(chars.size-1)] }
|
12
|
+
return newstr
|
13
|
+
end
|
14
|
+
|
15
|
+
def raise_to_power( a, i )# a raise to i power
|
16
|
+
res = 1
|
17
|
+
i.times do
|
18
|
+
res *= a
|
19
|
+
end
|
20
|
+
return res
|
21
|
+
end
|
22
|
+
|
23
|
+
def gen_float( interval, drob = 0 )
|
24
|
+
c = rand( interval )
|
25
|
+
d = 0
|
26
|
+
if drob != 0
|
27
|
+
d = rand( raise_to_power( 10, drob ) )
|
28
|
+
end
|
29
|
+
number = (c.to_s + '.' + d.to_s).to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
def gen_bool
|
33
|
+
i = gen_int( 1000 )
|
34
|
+
res = false
|
35
|
+
if i > 500
|
36
|
+
res = true
|
37
|
+
end
|
38
|
+
return res
|
39
|
+
end
|
40
|
+
|
41
|
+
def gen_date( format)# return string created by the given format
|
42
|
+
fa = format.split('/')
|
43
|
+
del = '/'
|
44
|
+
if fa.size == 1
|
45
|
+
fa = format.split(':')
|
46
|
+
del = ':'
|
47
|
+
end
|
48
|
+
if fa.size == 1
|
49
|
+
puts 'date has unrecognized format'
|
50
|
+
return ''
|
51
|
+
end
|
52
|
+
res = ''
|
53
|
+
fa.each do | var |
|
54
|
+
var = var.downcase
|
55
|
+
case var
|
56
|
+
when 'd' , 'dd'
|
57
|
+
res += gen_int( 28 ).to_s
|
58
|
+
when 'm' , 'mm'
|
59
|
+
res += gen_int( 12 ).to_s
|
60
|
+
when 'yy'
|
61
|
+
res += gen_int( 100 ).to_s
|
62
|
+
when 'yyyy'
|
63
|
+
res += ( gen_int( 150 ) + 1900).to_s
|
64
|
+
else
|
65
|
+
puts 'date has unrecognized format'
|
66
|
+
return ''
|
67
|
+
end
|
68
|
+
res += del
|
69
|
+
end
|
70
|
+
res = res[0..-2]
|
71
|
+
end
|
72
|
+
|
73
|
+
def gen_time( format)
|
74
|
+
fa = format.split(':')
|
75
|
+
if fa.size == 1
|
76
|
+
puts 'time has unrecognized format'
|
77
|
+
return ''
|
78
|
+
end
|
79
|
+
is24 = true
|
80
|
+
if fa[0] == '12'
|
81
|
+
is24 = false
|
82
|
+
end
|
83
|
+
if fa[0] == '12' or fa[0] == '24'
|
84
|
+
fa.shift
|
85
|
+
end
|
86
|
+
res = ''
|
87
|
+
del = ':'
|
88
|
+
h = 0
|
89
|
+
fa.each do | var |
|
90
|
+
var = var.downcase
|
91
|
+
case var
|
92
|
+
when 'h'
|
93
|
+
if is24
|
94
|
+
res += gen_int(24).to_s
|
95
|
+
else
|
96
|
+
h = gen_int(12)
|
97
|
+
res += h.to_s
|
98
|
+
end
|
99
|
+
when 'm'
|
100
|
+
res += gen_int(60).to_s
|
101
|
+
when 'ms'
|
102
|
+
res += gen_int(1000).to_s
|
103
|
+
else
|
104
|
+
puts 'time has unrecognized format'
|
105
|
+
return ''
|
106
|
+
end
|
107
|
+
res += del
|
108
|
+
end
|
109
|
+
res = res[0..-2]
|
110
|
+
if not is24
|
111
|
+
if h > 12
|
112
|
+
res += ' PM'
|
113
|
+
else
|
114
|
+
res += ' AM'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
return res
|
118
|
+
end
|
119
|
+
|
120
|
+
def gen_weekday
|
121
|
+
end
|
122
|
+
end
|
data/lib/read-params.rb
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
#Class that read params from settings file
|
2
|
+
|
3
|
+
module GenCSV
|
4
|
+
|
5
|
+
DEFAULT_OUT_FILE = "out-data.csv"
|
6
|
+
DEFAULT_LINES_COUNT = 100
|
7
|
+
DEFAULT_HEADERS = false
|
8
|
+
|
9
|
+
class ParamsReader
|
10
|
+
attr_reader :out #out file name
|
11
|
+
attr_reader :lines #lines count for generated csv
|
12
|
+
attr_reader :fields #array that contain information about fields and their params
|
13
|
+
attr_reader :headers # true if output file should contain headers row
|
14
|
+
|
15
|
+
def initialize( inifile ) #inifile - path to file, that contain description of csv file
|
16
|
+
content = File.open(inifile).readlines
|
17
|
+
content = content.collect do | param | # remove all ignored symbols
|
18
|
+
param.strip
|
19
|
+
end
|
20
|
+
|
21
|
+
if content.size == 0
|
22
|
+
@out = DEFAULT_OUT_FILE
|
23
|
+
@lines = DEFAULT_LINES_COUNT
|
24
|
+
@headers = DEFAULT_HEADERS
|
25
|
+
@fields = []
|
26
|
+
return ''
|
27
|
+
end
|
28
|
+
# get out file name
|
29
|
+
p1 = content[0].split(',')
|
30
|
+
if p1[0] == 'out' && p1.length == 2
|
31
|
+
@out = p1[1]
|
32
|
+
content.shift
|
33
|
+
else
|
34
|
+
@out = DEFAULT_OUT_FILE
|
35
|
+
end
|
36
|
+
|
37
|
+
#get lines count
|
38
|
+
p2 = content[0].split(',')
|
39
|
+
if p2[0] == 'lines' && p2.length == 2
|
40
|
+
@lines = p2[1].to_i
|
41
|
+
content.shift
|
42
|
+
else
|
43
|
+
@lines = DEFAULT_LINES_COUNT
|
44
|
+
end
|
45
|
+
|
46
|
+
#get headers
|
47
|
+
p3 = content[0].split(',')
|
48
|
+
if p3[0] == "headers" && p3.length == 2
|
49
|
+
@headers = true
|
50
|
+
content.shift
|
51
|
+
else
|
52
|
+
@headers = DEFAULT_HEADERS
|
53
|
+
end
|
54
|
+
|
55
|
+
#get fields specification
|
56
|
+
@fields = []
|
57
|
+
content.each do | p |
|
58
|
+
pp = p.split(',')
|
59
|
+
if pp[0] == 'int'
|
60
|
+
pp.shift
|
61
|
+
@fields.push(parse_int( pp ))
|
62
|
+
end
|
63
|
+
if pp[0] == 'string'
|
64
|
+
pp.shift
|
65
|
+
@fields.push(parse_string( pp ))
|
66
|
+
end
|
67
|
+
if pp[0] == 'float'
|
68
|
+
pp.shift
|
69
|
+
@fields.push(parse_float( pp ))
|
70
|
+
end
|
71
|
+
if pp[0] == 'dtime'
|
72
|
+
pp.shift
|
73
|
+
@fields.push(parse_dtime( pp ))
|
74
|
+
end
|
75
|
+
if pp[0] == 'tdate'
|
76
|
+
pp.shift
|
77
|
+
@fields.push(parse_tdate( pp ))
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_int( pp ) # return hash with all information about field
|
85
|
+
field = {}
|
86
|
+
field[:type] = 'int'
|
87
|
+
field[:name] = pp[0]
|
88
|
+
field[:max] = pp[1].to_i
|
89
|
+
return field
|
90
|
+
end
|
91
|
+
|
92
|
+
def parse_string( pp )
|
93
|
+
field = {}
|
94
|
+
field[:type] = 'string'
|
95
|
+
field[:name] = pp[0]
|
96
|
+
field[:length] = pp[1].to_i
|
97
|
+
return field
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_float( pp )
|
101
|
+
field = {}
|
102
|
+
field[:type] = 'float'
|
103
|
+
field[:name] = pp[0]
|
104
|
+
field[:max] = pp[1].to_i
|
105
|
+
field[:decimal] = pp[2].to_i
|
106
|
+
return field
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_dtime( pp )
|
110
|
+
field = {}
|
111
|
+
field[:type] = 'dtime'
|
112
|
+
field[:name] = pp[0]
|
113
|
+
field[:dformat] = pp[1]
|
114
|
+
field[:tformat] = pp[2]
|
115
|
+
return field
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_tdate( pp )
|
119
|
+
field = {}
|
120
|
+
field[:type] = 'dtime'
|
121
|
+
field[:name] = pp[0]
|
122
|
+
field[:tformat] = pp[1]
|
123
|
+
field[:dformat] = pp[2]
|
124
|
+
return field
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
data/lib/version.rb
ADDED
data/test/test-gen.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#This file contain test cases for all working classes
|
2
|
+
require 'test\unit'
|
3
|
+
require 'lib\gen-data'
|
4
|
+
|
5
|
+
class DataGeneratorTestCase < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@gen = DataGenerator.new
|
8
|
+
end
|
9
|
+
def test_int_generation
|
10
|
+
first = @gen.gen_int(1000)
|
11
|
+
second = @gen.gen_int(1000)
|
12
|
+
assert_not_equal(first, second)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_string_generation
|
16
|
+
first = @gen.gen_string( 20 )
|
17
|
+
second = @gen.gen_string( 20 )
|
18
|
+
assert_not_equal(first, second)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_float_generation
|
22
|
+
first = @gen.gen_float( 1000, 4)
|
23
|
+
second = @gen.gen_float( 1000, 4)
|
24
|
+
assert_not_equal(first, second)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def test_bool_generation
|
29
|
+
i = 0
|
30
|
+
10.times do
|
31
|
+
if @gen.gen_bool == true
|
32
|
+
i += 1
|
33
|
+
end
|
34
|
+
end
|
35
|
+
res = false
|
36
|
+
if i >=3 && i <= 70
|
37
|
+
res = true
|
38
|
+
end
|
39
|
+
assert_equal(true, res)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_data_generation
|
43
|
+
assert_match(/[0-9]+\/[0-9]+\/[0-9]+/,@gen.gen_date("DD/MM/YY"))
|
44
|
+
assert_match(/[0-9]+\/[0-9]+\/[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD/MM/YYYY"))
|
45
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_date("DD:MM:YY"))
|
46
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD:MM:YYYY"))
|
47
|
+
assert_equal("",@gen.gen_date("h:m:ms"))
|
48
|
+
assert_equal("",@gen.gen_date("1;2;3"))
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_time_generation
|
52
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+\s[AM,PM]/,@gen.gen_time("12:h:m:ms"))
|
53
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_time("h:m:ms"))
|
54
|
+
assert_equal("",@gen.gen_time("hh:mm:ms"))
|
55
|
+
assert_equal("",@gen.gen_time("12/h/m/ms"))
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#test app
|
2
|
+
require 'test\unit'
|
3
|
+
require 'lib\read-params'
|
4
|
+
|
5
|
+
#sample
|
6
|
+
class ParamsReadTestCase < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_first # use for test including file 'param.ini'
|
9
|
+
reader = GenCSV::ParamsReader.new('.\param.ini')
|
10
|
+
assert_equal("out-data.csv", reader.out)
|
11
|
+
assert_equal(1000, reader.lines)
|
12
|
+
assert_equal(true, reader.headers)
|
13
|
+
assert_equal(5, reader.fields.size)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_default_1_params
|
17
|
+
reader = GenCSV::ParamsReader.new('.\default-1.ini')
|
18
|
+
assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
|
19
|
+
assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
|
20
|
+
assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
|
21
|
+
assert_equal(0, reader.fields.size)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_default_2_params
|
25
|
+
reader = GenCSV::ParamsReader.new('.\default-2.ini')
|
26
|
+
assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
|
27
|
+
assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
|
28
|
+
assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
|
29
|
+
assert_equal(1, reader.fields.size)
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: CSV-datagen
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-03-29 00:00:00 +04:00
|
8
|
+
summary: A free tool for generating random data for testing purposes.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: dmtmax@gmail.com
|
12
|
+
homepage: http://blogbydmt/blogspot.com
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: csvdatagen
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Dmitry Maksimov
|
31
|
+
files:
|
32
|
+
- bin/gen-csv.rb
|
33
|
+
- lib/gen-data.rb
|
34
|
+
- lib/read-params.rb
|
35
|
+
- lib/version.rb
|
36
|
+
- README.txt
|
37
|
+
test_files:
|
38
|
+
- test/test-gen.rb
|
39
|
+
- test/test-read-params.rb
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
extra_rdoc_files:
|
43
|
+
- README.txt
|
44
|
+
executables: []
|
45
|
+
|
46
|
+
extensions: []
|
47
|
+
|
48
|
+
requirements: []
|
49
|
+
|
50
|
+
dependencies: []
|
51
|
+
|