CSV-datagen 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +17 -0
- data/bin/gen-csv.rb +40 -0
- data/lib/gen-data.rb +122 -0
- data/lib/read-params.rb +129 -0
- data/lib/version.rb +3 -0
- data/test/test-gen.rb +57 -0
- data/test/test-read-params.rb +31 -0
- metadata +51 -0
data/README.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
This file contain all information about "CSV data generator" project.
|
2
|
+
|
3
|
+
1. Overview
|
4
|
+
2. Format of configuration file.
|
5
|
+
|
6
|
+
Configuration file contain lines that present parameters of generated csv file.
|
7
|
+
[out,<output filename>]
|
8
|
+
[lines,<number of lines in the output file>]
|
9
|
+
[headers,<true if output file should contain headers row>]
|
10
|
+
[field specification]
|
11
|
+
|
12
|
+
2.1 Field specification
|
13
|
+
|
14
|
+
integer - int,<name>,<max value> - the generated number will be between 0..max and had the given name.
|
15
|
+
string - string,<name>,<length> - the generated string had the given name and length.
|
16
|
+
float - float,<name>,<max>,<decimal> - the generated number had the given name, will be between 0..max and had decimal digits after comma.
|
17
|
+
datetime - dtime,<name>,<date format>,<time format> . The date format is a string where user can use the following acronyms - DD for days, MM for months, YYYY or YY for years. The delimiter should be the '/' sign. The time format is "12" for twelve time format or "24" for twenty fours time format.
|
data/bin/gen-csv.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#This file contain main script
|
2
|
+
require 'lib\gen-data'
|
3
|
+
require 'lib\read-params'
|
4
|
+
require 'lib\version'
|
5
|
+
require 'faster_csv'
|
6
|
+
|
7
|
+
if $0 == __FILE__
|
8
|
+
puts 'Csv data generator version:'+GenCSV::Version
|
9
|
+
reader = GenCSV::ParamsReader.new('param.ini')
|
10
|
+
datagen = DataGenerator.new
|
11
|
+
csv = FasterCSV.open(reader.out, "w")
|
12
|
+
if reader.headers
|
13
|
+
# out header row
|
14
|
+
headers = []
|
15
|
+
reader.fields.each do | field |
|
16
|
+
headers.push(field[:name])
|
17
|
+
end
|
18
|
+
csv << headers
|
19
|
+
end
|
20
|
+
reader.lines.times do
|
21
|
+
# generate array of data
|
22
|
+
row = []
|
23
|
+
reader.fields.each do | field |
|
24
|
+
element = ''
|
25
|
+
if field[:type] == 'int'
|
26
|
+
element = datagen.gen_int(field[:max]).to_s
|
27
|
+
end
|
28
|
+
if field[:type] == 'string'
|
29
|
+
element = datagen.gen_string(field[:length]).to_s
|
30
|
+
end
|
31
|
+
if field[:type] == 'float'
|
32
|
+
element = datagen.gen_float(field[:max], field[:decimal]).to_s
|
33
|
+
end
|
34
|
+
row.push(element)
|
35
|
+
end
|
36
|
+
csv << row
|
37
|
+
end
|
38
|
+
csv.close
|
39
|
+
puts "Data generation complete"
|
40
|
+
end
|
data/lib/gen-data.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#Class that generate all types of data
|
2
|
+
|
3
|
+
class DataGenerator
|
4
|
+
def gen_int( interval )
|
5
|
+
rand(interval)
|
6
|
+
end
|
7
|
+
|
8
|
+
def gen_string( len )
|
9
|
+
chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
|
10
|
+
newstr = ""
|
11
|
+
1.upto(len) { |i| newstr << chars[rand(chars.size-1)] }
|
12
|
+
return newstr
|
13
|
+
end
|
14
|
+
|
15
|
+
def raise_to_power( a, i )# a raise to i power
|
16
|
+
res = 1
|
17
|
+
i.times do
|
18
|
+
res *= a
|
19
|
+
end
|
20
|
+
return res
|
21
|
+
end
|
22
|
+
|
23
|
+
def gen_float( interval, drob = 0 )
|
24
|
+
c = rand( interval )
|
25
|
+
d = 0
|
26
|
+
if drob != 0
|
27
|
+
d = rand( raise_to_power( 10, drob ) )
|
28
|
+
end
|
29
|
+
number = (c.to_s + '.' + d.to_s).to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
def gen_bool
|
33
|
+
i = gen_int( 1000 )
|
34
|
+
res = false
|
35
|
+
if i > 500
|
36
|
+
res = true
|
37
|
+
end
|
38
|
+
return res
|
39
|
+
end
|
40
|
+
|
41
|
+
def gen_date( format)# return string created by the given format
|
42
|
+
fa = format.split('/')
|
43
|
+
del = '/'
|
44
|
+
if fa.size == 1
|
45
|
+
fa = format.split(':')
|
46
|
+
del = ':'
|
47
|
+
end
|
48
|
+
if fa.size == 1
|
49
|
+
puts 'date has unrecognized format'
|
50
|
+
return ''
|
51
|
+
end
|
52
|
+
res = ''
|
53
|
+
fa.each do | var |
|
54
|
+
var = var.downcase
|
55
|
+
case var
|
56
|
+
when 'd' , 'dd'
|
57
|
+
res += gen_int( 28 ).to_s
|
58
|
+
when 'm' , 'mm'
|
59
|
+
res += gen_int( 12 ).to_s
|
60
|
+
when 'yy'
|
61
|
+
res += gen_int( 100 ).to_s
|
62
|
+
when 'yyyy'
|
63
|
+
res += ( gen_int( 150 ) + 1900).to_s
|
64
|
+
else
|
65
|
+
puts 'date has unrecognized format'
|
66
|
+
return ''
|
67
|
+
end
|
68
|
+
res += del
|
69
|
+
end
|
70
|
+
res = res[0..-2]
|
71
|
+
end
|
72
|
+
|
73
|
+
def gen_time( format)
|
74
|
+
fa = format.split(':')
|
75
|
+
if fa.size == 1
|
76
|
+
puts 'time has unrecognized format'
|
77
|
+
return ''
|
78
|
+
end
|
79
|
+
is24 = true
|
80
|
+
if fa[0] == '12'
|
81
|
+
is24 = false
|
82
|
+
end
|
83
|
+
if fa[0] == '12' or fa[0] == '24'
|
84
|
+
fa.shift
|
85
|
+
end
|
86
|
+
res = ''
|
87
|
+
del = ':'
|
88
|
+
h = 0
|
89
|
+
fa.each do | var |
|
90
|
+
var = var.downcase
|
91
|
+
case var
|
92
|
+
when 'h'
|
93
|
+
if is24
|
94
|
+
res += gen_int(24).to_s
|
95
|
+
else
|
96
|
+
h = gen_int(12)
|
97
|
+
res += h.to_s
|
98
|
+
end
|
99
|
+
when 'm'
|
100
|
+
res += gen_int(60).to_s
|
101
|
+
when 'ms'
|
102
|
+
res += gen_int(1000).to_s
|
103
|
+
else
|
104
|
+
puts 'time has unrecognized format'
|
105
|
+
return ''
|
106
|
+
end
|
107
|
+
res += del
|
108
|
+
end
|
109
|
+
res = res[0..-2]
|
110
|
+
if not is24
|
111
|
+
if h > 12
|
112
|
+
res += ' PM'
|
113
|
+
else
|
114
|
+
res += ' AM'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
return res
|
118
|
+
end
|
119
|
+
|
120
|
+
def gen_weekday
|
121
|
+
end
|
122
|
+
end
|
data/lib/read-params.rb
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
#Class that read params from settings file
|
2
|
+
|
3
|
+
module GenCSV
|
4
|
+
|
5
|
+
DEFAULT_OUT_FILE = "out-data.csv"
|
6
|
+
DEFAULT_LINES_COUNT = 100
|
7
|
+
DEFAULT_HEADERS = false
|
8
|
+
|
9
|
+
class ParamsReader
|
10
|
+
attr_reader :out #out file name
|
11
|
+
attr_reader :lines #lines count for generated csv
|
12
|
+
attr_reader :fields #array that contain information about fields and their params
|
13
|
+
attr_reader :headers # true if output file should contain headers row
|
14
|
+
|
15
|
+
def initialize( inifile ) #inifile - path to file, that contain description of csv file
|
16
|
+
content = File.open(inifile).readlines
|
17
|
+
content = content.collect do | param | # remove all ignored symbols
|
18
|
+
param.strip
|
19
|
+
end
|
20
|
+
|
21
|
+
if content.size == 0
|
22
|
+
@out = DEFAULT_OUT_FILE
|
23
|
+
@lines = DEFAULT_LINES_COUNT
|
24
|
+
@headers = DEFAULT_HEADERS
|
25
|
+
@fields = []
|
26
|
+
return ''
|
27
|
+
end
|
28
|
+
# get out file name
|
29
|
+
p1 = content[0].split(',')
|
30
|
+
if p1[0] == 'out' && p1.length == 2
|
31
|
+
@out = p1[1]
|
32
|
+
content.shift
|
33
|
+
else
|
34
|
+
@out = DEFAULT_OUT_FILE
|
35
|
+
end
|
36
|
+
|
37
|
+
#get lines count
|
38
|
+
p2 = content[0].split(',')
|
39
|
+
if p2[0] == 'lines' && p2.length == 2
|
40
|
+
@lines = p2[1].to_i
|
41
|
+
content.shift
|
42
|
+
else
|
43
|
+
@lines = DEFAULT_LINES_COUNT
|
44
|
+
end
|
45
|
+
|
46
|
+
#get headers
|
47
|
+
p3 = content[0].split(',')
|
48
|
+
if p3[0] == "headers" && p3.length == 2
|
49
|
+
@headers = true
|
50
|
+
content.shift
|
51
|
+
else
|
52
|
+
@headers = DEFAULT_HEADERS
|
53
|
+
end
|
54
|
+
|
55
|
+
#get fields specification
|
56
|
+
@fields = []
|
57
|
+
content.each do | p |
|
58
|
+
pp = p.split(',')
|
59
|
+
if pp[0] == 'int'
|
60
|
+
pp.shift
|
61
|
+
@fields.push(parse_int( pp ))
|
62
|
+
end
|
63
|
+
if pp[0] == 'string'
|
64
|
+
pp.shift
|
65
|
+
@fields.push(parse_string( pp ))
|
66
|
+
end
|
67
|
+
if pp[0] == 'float'
|
68
|
+
pp.shift
|
69
|
+
@fields.push(parse_float( pp ))
|
70
|
+
end
|
71
|
+
if pp[0] == 'dtime'
|
72
|
+
pp.shift
|
73
|
+
@fields.push(parse_dtime( pp ))
|
74
|
+
end
|
75
|
+
if pp[0] == 'tdate'
|
76
|
+
pp.shift
|
77
|
+
@fields.push(parse_tdate( pp ))
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_int( pp ) # return hash with all information about field
|
85
|
+
field = {}
|
86
|
+
field[:type] = 'int'
|
87
|
+
field[:name] = pp[0]
|
88
|
+
field[:max] = pp[1].to_i
|
89
|
+
return field
|
90
|
+
end
|
91
|
+
|
92
|
+
def parse_string( pp )
|
93
|
+
field = {}
|
94
|
+
field[:type] = 'string'
|
95
|
+
field[:name] = pp[0]
|
96
|
+
field[:length] = pp[1].to_i
|
97
|
+
return field
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_float( pp )
|
101
|
+
field = {}
|
102
|
+
field[:type] = 'float'
|
103
|
+
field[:name] = pp[0]
|
104
|
+
field[:max] = pp[1].to_i
|
105
|
+
field[:decimal] = pp[2].to_i
|
106
|
+
return field
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_dtime( pp )
|
110
|
+
field = {}
|
111
|
+
field[:type] = 'dtime'
|
112
|
+
field[:name] = pp[0]
|
113
|
+
field[:dformat] = pp[1]
|
114
|
+
field[:tformat] = pp[2]
|
115
|
+
return field
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_tdate( pp )
|
119
|
+
field = {}
|
120
|
+
field[:type] = 'dtime'
|
121
|
+
field[:name] = pp[0]
|
122
|
+
field[:tformat] = pp[1]
|
123
|
+
field[:dformat] = pp[2]
|
124
|
+
return field
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
data/lib/version.rb
ADDED
data/test/test-gen.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#This file contain test cases for all working classes
|
2
|
+
require 'test\unit'
|
3
|
+
require 'lib\gen-data'
|
4
|
+
|
5
|
+
class DataGeneratorTestCase < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@gen = DataGenerator.new
|
8
|
+
end
|
9
|
+
def test_int_generation
|
10
|
+
first = @gen.gen_int(1000)
|
11
|
+
second = @gen.gen_int(1000)
|
12
|
+
assert_not_equal(first, second)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_string_generation
|
16
|
+
first = @gen.gen_string( 20 )
|
17
|
+
second = @gen.gen_string( 20 )
|
18
|
+
assert_not_equal(first, second)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_float_generation
|
22
|
+
first = @gen.gen_float( 1000, 4)
|
23
|
+
second = @gen.gen_float( 1000, 4)
|
24
|
+
assert_not_equal(first, second)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def test_bool_generation
|
29
|
+
i = 0
|
30
|
+
10.times do
|
31
|
+
if @gen.gen_bool == true
|
32
|
+
i += 1
|
33
|
+
end
|
34
|
+
end
|
35
|
+
res = false
|
36
|
+
if i >=3 && i <= 70
|
37
|
+
res = true
|
38
|
+
end
|
39
|
+
assert_equal(true, res)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_data_generation
|
43
|
+
assert_match(/[0-9]+\/[0-9]+\/[0-9]+/,@gen.gen_date("DD/MM/YY"))
|
44
|
+
assert_match(/[0-9]+\/[0-9]+\/[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD/MM/YYYY"))
|
45
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_date("DD:MM:YY"))
|
46
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD:MM:YYYY"))
|
47
|
+
assert_equal("",@gen.gen_date("h:m:ms"))
|
48
|
+
assert_equal("",@gen.gen_date("1;2;3"))
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_time_generation
|
52
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+\s[AM,PM]/,@gen.gen_time("12:h:m:ms"))
|
53
|
+
assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_time("h:m:ms"))
|
54
|
+
assert_equal("",@gen.gen_time("hh:mm:ms"))
|
55
|
+
assert_equal("",@gen.gen_time("12/h/m/ms"))
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#test app
|
2
|
+
require 'test\unit'
|
3
|
+
require 'lib\read-params'
|
4
|
+
|
5
|
+
#sample
|
6
|
+
class ParamsReadTestCase < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_first # use for test including file 'param.ini'
|
9
|
+
reader = GenCSV::ParamsReader.new('.\param.ini')
|
10
|
+
assert_equal("out-data.csv", reader.out)
|
11
|
+
assert_equal(1000, reader.lines)
|
12
|
+
assert_equal(true, reader.headers)
|
13
|
+
assert_equal(5, reader.fields.size)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_default_1_params
|
17
|
+
reader = GenCSV::ParamsReader.new('.\default-1.ini')
|
18
|
+
assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
|
19
|
+
assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
|
20
|
+
assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
|
21
|
+
assert_equal(0, reader.fields.size)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_default_2_params
|
25
|
+
reader = GenCSV::ParamsReader.new('.\default-2.ini')
|
26
|
+
assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
|
27
|
+
assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
|
28
|
+
assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
|
29
|
+
assert_equal(1, reader.fields.size)
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: CSV-datagen
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-03-29 00:00:00 +04:00
|
8
|
+
summary: A free tool for generating random data for testing purposes.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: dmtmax@gmail.com
|
12
|
+
homepage: http://blogbydmt/blogspot.com
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: csvdatagen
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Dmitry Maksimov
|
31
|
+
files:
|
32
|
+
- bin/gen-csv.rb
|
33
|
+
- lib/gen-data.rb
|
34
|
+
- lib/read-params.rb
|
35
|
+
- lib/version.rb
|
36
|
+
- README.txt
|
37
|
+
test_files:
|
38
|
+
- test/test-gen.rb
|
39
|
+
- test/test-read-params.rb
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
extra_rdoc_files:
|
43
|
+
- README.txt
|
44
|
+
executables: []
|
45
|
+
|
46
|
+
extensions: []
|
47
|
+
|
48
|
+
requirements: []
|
49
|
+
|
50
|
+
dependencies: []
|
51
|
+
|