CSV-datagen 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ This file contain all information about "CSV data generator" project.
2
+
3
+ 1. Overview
4
+ 2. Format of configuration file.
5
+
6
+ Configuration file contain lines that present parameters of generated csv file.
7
+ [out,<output filename>]
8
+ [lines,<number of lines in the output file>]
9
+ [headers,<true if output file should contain headers row>]
10
+ [field specification]
11
+
12
+ 2.1 Field specification
13
+
14
+ integer - int,<name>,<max value> - the generated number will be between 0..max and had the given name.
15
+ string - string,<name>,<length> - the generated string had the given name and length.
16
+ float - float,<name>,<max>,<decimal> - the generated number had the given name, will be between 0..max and had decimal digits after comma.
17
+ datetime - dtime,<name>,<date format>,<time format> . The date format is a string where user can use the following acronyms - DD for days, MM for months, YYYY or YY for years. The delimiter should be the '/' sign. The time format is "12" for twelve time format or "24" for twenty fours time format.
@@ -0,0 +1,40 @@
1
+ #This file contain main script
2
+ require 'lib\gen-data'
3
+ require 'lib\read-params'
4
+ require 'lib\version'
5
+ require 'faster_csv'
6
+
7
+ if $0 == __FILE__
8
+ puts 'Csv data generator version:'+GenCSV::Version
9
+ reader = GenCSV::ParamsReader.new('param.ini')
10
+ datagen = DataGenerator.new
11
+ csv = FasterCSV.open(reader.out, "w")
12
+ if reader.headers
13
+ # out header row
14
+ headers = []
15
+ reader.fields.each do | field |
16
+ headers.push(field[:name])
17
+ end
18
+ csv << headers
19
+ end
20
+ reader.lines.times do
21
+ # generate array of data
22
+ row = []
23
+ reader.fields.each do | field |
24
+ element = ''
25
+ if field[:type] == 'int'
26
+ element = datagen.gen_int(field[:max]).to_s
27
+ end
28
+ if field[:type] == 'string'
29
+ element = datagen.gen_string(field[:length]).to_s
30
+ end
31
+ if field[:type] == 'float'
32
+ element = datagen.gen_float(field[:max], field[:decimal]).to_s
33
+ end
34
+ row.push(element)
35
+ end
36
+ csv << row
37
+ end
38
+ csv.close
39
+ puts "Data generation complete"
40
+ end
@@ -0,0 +1,122 @@
1
+ #Class that generate all types of data
2
+
3
+ class DataGenerator
4
+ def gen_int( interval )
5
+ rand(interval)
6
+ end
7
+
8
+ def gen_string( len )
9
+ chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
10
+ newstr = ""
11
+ 1.upto(len) { |i| newstr << chars[rand(chars.size-1)] }
12
+ return newstr
13
+ end
14
+
15
+ def raise_to_power( a, i )# a raise to i power
16
+ res = 1
17
+ i.times do
18
+ res *= a
19
+ end
20
+ return res
21
+ end
22
+
23
+ def gen_float( interval, drob = 0 )
24
+ c = rand( interval )
25
+ d = 0
26
+ if drob != 0
27
+ d = rand( raise_to_power( 10, drob ) )
28
+ end
29
+ number = (c.to_s + '.' + d.to_s).to_f
30
+ end
31
+
32
+ def gen_bool
33
+ i = gen_int( 1000 )
34
+ res = false
35
+ if i > 500
36
+ res = true
37
+ end
38
+ return res
39
+ end
40
+
41
+ def gen_date( format)# return string created by the given format
42
+ fa = format.split('/')
43
+ del = '/'
44
+ if fa.size == 1
45
+ fa = format.split(':')
46
+ del = ':'
47
+ end
48
+ if fa.size == 1
49
+ puts 'date has unrecognized format'
50
+ return ''
51
+ end
52
+ res = ''
53
+ fa.each do | var |
54
+ var = var.downcase
55
+ case var
56
+ when 'd' , 'dd'
57
+ res += gen_int( 28 ).to_s
58
+ when 'm' , 'mm'
59
+ res += gen_int( 12 ).to_s
60
+ when 'yy'
61
+ res += gen_int( 100 ).to_s
62
+ when 'yyyy'
63
+ res += ( gen_int( 150 ) + 1900).to_s
64
+ else
65
+ puts 'date has unrecognized format'
66
+ return ''
67
+ end
68
+ res += del
69
+ end
70
+ res = res[0..-2]
71
+ end
72
+
73
+ def gen_time( format)
74
+ fa = format.split(':')
75
+ if fa.size == 1
76
+ puts 'time has unrecognized format'
77
+ return ''
78
+ end
79
+ is24 = true
80
+ if fa[0] == '12'
81
+ is24 = false
82
+ end
83
+ if fa[0] == '12' or fa[0] == '24'
84
+ fa.shift
85
+ end
86
+ res = ''
87
+ del = ':'
88
+ h = 0
89
+ fa.each do | var |
90
+ var = var.downcase
91
+ case var
92
+ when 'h'
93
+ if is24
94
+ res += gen_int(24).to_s
95
+ else
96
+ h = gen_int(12)
97
+ res += h.to_s
98
+ end
99
+ when 'm'
100
+ res += gen_int(60).to_s
101
+ when 'ms'
102
+ res += gen_int(1000).to_s
103
+ else
104
+ puts 'time has unrecognized format'
105
+ return ''
106
+ end
107
+ res += del
108
+ end
109
+ res = res[0..-2]
110
+ if not is24
111
+ if h > 12
112
+ res += ' PM'
113
+ else
114
+ res += ' AM'
115
+ end
116
+ end
117
+ return res
118
+ end
119
+
120
+ def gen_weekday
121
+ end
122
+ end
@@ -0,0 +1,129 @@
1
+ #Class that read params from settings file
2
+
3
+ module GenCSV
4
+
5
+ DEFAULT_OUT_FILE = "out-data.csv"
6
+ DEFAULT_LINES_COUNT = 100
7
+ DEFAULT_HEADERS = false
8
+
9
+ class ParamsReader
10
+ attr_reader :out #out file name
11
+ attr_reader :lines #lines count for generated csv
12
+ attr_reader :fields #array that contain information about fields and their params
13
+ attr_reader :headers # true if output file should contain headers row
14
+
15
+ def initialize( inifile ) #inifile - path to file, that contain description of csv file
16
+ content = File.open(inifile).readlines
17
+ content = content.collect do | param | # remove all ignored symbols
18
+ param.strip
19
+ end
20
+
21
+ if content.size == 0
22
+ @out = DEFAULT_OUT_FILE
23
+ @lines = DEFAULT_LINES_COUNT
24
+ @headers = DEFAULT_HEADERS
25
+ @fields = []
26
+ return ''
27
+ end
28
+ # get out file name
29
+ p1 = content[0].split(',')
30
+ if p1[0] == 'out' && p1.length == 2
31
+ @out = p1[1]
32
+ content.shift
33
+ else
34
+ @out = DEFAULT_OUT_FILE
35
+ end
36
+
37
+ #get lines count
38
+ p2 = content[0].split(',')
39
+ if p2[0] == 'lines' && p2.length == 2
40
+ @lines = p2[1].to_i
41
+ content.shift
42
+ else
43
+ @lines = DEFAULT_LINES_COUNT
44
+ end
45
+
46
+ #get headers
47
+ p3 = content[0].split(',')
48
+ if p3[0] == "headers" && p3.length == 2
49
+ @headers = true
50
+ content.shift
51
+ else
52
+ @headers = DEFAULT_HEADERS
53
+ end
54
+
55
+ #get fields specification
56
+ @fields = []
57
+ content.each do | p |
58
+ pp = p.split(',')
59
+ if pp[0] == 'int'
60
+ pp.shift
61
+ @fields.push(parse_int( pp ))
62
+ end
63
+ if pp[0] == 'string'
64
+ pp.shift
65
+ @fields.push(parse_string( pp ))
66
+ end
67
+ if pp[0] == 'float'
68
+ pp.shift
69
+ @fields.push(parse_float( pp ))
70
+ end
71
+ if pp[0] == 'dtime'
72
+ pp.shift
73
+ @fields.push(parse_dtime( pp ))
74
+ end
75
+ if pp[0] == 'tdate'
76
+ pp.shift
77
+ @fields.push(parse_tdate( pp ))
78
+ end
79
+
80
+ end
81
+
82
+ end
83
+
84
+ def parse_int( pp ) # return hash with all information about field
85
+ field = {}
86
+ field[:type] = 'int'
87
+ field[:name] = pp[0]
88
+ field[:max] = pp[1].to_i
89
+ return field
90
+ end
91
+
92
+ def parse_string( pp )
93
+ field = {}
94
+ field[:type] = 'string'
95
+ field[:name] = pp[0]
96
+ field[:length] = pp[1].to_i
97
+ return field
98
+ end
99
+
100
+ def parse_float( pp )
101
+ field = {}
102
+ field[:type] = 'float'
103
+ field[:name] = pp[0]
104
+ field[:max] = pp[1].to_i
105
+ field[:decimal] = pp[2].to_i
106
+ return field
107
+ end
108
+
109
+ def parse_dtime( pp )
110
+ field = {}
111
+ field[:type] = 'dtime'
112
+ field[:name] = pp[0]
113
+ field[:dformat] = pp[1]
114
+ field[:tformat] = pp[2]
115
+ return field
116
+ end
117
+
118
+ def parse_tdate( pp )
119
+ field = {}
120
+ field[:type] = 'dtime'
121
+ field[:name] = pp[0]
122
+ field[:tformat] = pp[1]
123
+ field[:dformat] = pp[2]
124
+ return field
125
+ end
126
+
127
+ end
128
+
129
+ end
@@ -0,0 +1,3 @@
1
+ module GenCSV
2
+ Version = "0.0.5"
3
+ end
@@ -0,0 +1,57 @@
1
+ #This file contain test cases for all working classes
2
+ require 'test\unit'
3
+ require 'lib\gen-data'
4
+
5
+ class DataGeneratorTestCase < Test::Unit::TestCase
6
+ def setup
7
+ @gen = DataGenerator.new
8
+ end
9
+ def test_int_generation
10
+ first = @gen.gen_int(1000)
11
+ second = @gen.gen_int(1000)
12
+ assert_not_equal(first, second)
13
+ end
14
+
15
+ def test_string_generation
16
+ first = @gen.gen_string( 20 )
17
+ second = @gen.gen_string( 20 )
18
+ assert_not_equal(first, second)
19
+ end
20
+
21
+ def test_float_generation
22
+ first = @gen.gen_float( 1000, 4)
23
+ second = @gen.gen_float( 1000, 4)
24
+ assert_not_equal(first, second)
25
+ end
26
+
27
+
28
+ def test_bool_generation
29
+ i = 0
30
+ 10.times do
31
+ if @gen.gen_bool == true
32
+ i += 1
33
+ end
34
+ end
35
+ res = false
36
+ if i >=3 && i <= 70
37
+ res = true
38
+ end
39
+ assert_equal(true, res)
40
+ end
41
+
42
+ def test_data_generation
43
+ assert_match(/[0-9]+\/[0-9]+\/[0-9]+/,@gen.gen_date("DD/MM/YY"))
44
+ assert_match(/[0-9]+\/[0-9]+\/[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD/MM/YYYY"))
45
+ assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_date("DD:MM:YY"))
46
+ assert_match(/[0-9]+\:[0-9]+\:[0-9][0-9][0-9][0-9]/,@gen.gen_date("DD:MM:YYYY"))
47
+ assert_equal("",@gen.gen_date("h:m:ms"))
48
+ assert_equal("",@gen.gen_date("1;2;3"))
49
+ end
50
+
51
+ def test_time_generation
52
+ assert_match(/[0-9]+\:[0-9]+\:[0-9]+\s[AM,PM]/,@gen.gen_time("12:h:m:ms"))
53
+ assert_match(/[0-9]+\:[0-9]+\:[0-9]+/,@gen.gen_time("h:m:ms"))
54
+ assert_equal("",@gen.gen_time("hh:mm:ms"))
55
+ assert_equal("",@gen.gen_time("12/h/m/ms"))
56
+ end
57
+ end
@@ -0,0 +1,31 @@
1
+ #test app
2
+ require 'test\unit'
3
+ require 'lib\read-params'
4
+
5
+ #sample
6
+ class ParamsReadTestCase < Test::Unit::TestCase
7
+
8
+ def test_first # use for test including file 'param.ini'
9
+ reader = GenCSV::ParamsReader.new('.\param.ini')
10
+ assert_equal("out-data.csv", reader.out)
11
+ assert_equal(1000, reader.lines)
12
+ assert_equal(true, reader.headers)
13
+ assert_equal(5, reader.fields.size)
14
+ end
15
+
16
+ def test_default_1_params
17
+ reader = GenCSV::ParamsReader.new('.\default-1.ini')
18
+ assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
19
+ assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
20
+ assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
21
+ assert_equal(0, reader.fields.size)
22
+ end
23
+
24
+ def test_default_2_params
25
+ reader = GenCSV::ParamsReader.new('.\default-2.ini')
26
+ assert_equal(GenCSV::DEFAULT_OUT_FILE, reader.out)
27
+ assert_equal(GenCSV::DEFAULT_LINES_COUNT, reader.lines)
28
+ assert_equal(GenCSV::DEFAULT_HEADERS, reader.headers)
29
+ assert_equal(1, reader.fields.size)
30
+ end
31
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: CSV-datagen
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2007-03-29 00:00:00 +04:00
8
+ summary: A free tool for generating random data for testing purposes.
9
+ require_paths:
10
+ - lib
11
+ email: dmtmax@gmail.com
12
+ homepage: http://blogbydmt/blogspot.com
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: csvdatagen
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Dmitry Maksimov
31
+ files:
32
+ - bin/gen-csv.rb
33
+ - lib/gen-data.rb
34
+ - lib/read-params.rb
35
+ - lib/version.rb
36
+ - README.txt
37
+ test_files:
38
+ - test/test-gen.rb
39
+ - test/test-read-params.rb
40
+ rdoc_options: []
41
+
42
+ extra_rdoc_files:
43
+ - README.txt
44
+ executables: []
45
+
46
+ extensions: []
47
+
48
+ requirements: []
49
+
50
+ dependencies: []
51
+