csv2psql 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +82 -3
- data/data/census_SFOH_2010.csv +981 -0
- data/lib/csv2psql/analyzer/types/base_analyzer.rb +45 -1
- data/lib/csv2psql/analyzer/types/boolean.rb +1 -1
- data/lib/csv2psql/analyzer/types/null.rb +1 -1
- data/lib/csv2psql/analyzer/types/string.rb +1 -1
- data/lib/csv2psql/analyzer/types/uuid.rb +1 -1
- data/lib/csv2psql/cli/app.rb +0 -46
- data/lib/csv2psql/cli/cmd/analyze_cmd.rb +2 -1
- data/lib/csv2psql/cli/cmd/schema_cmd.rb +85 -0
- data/lib/csv2psql/convert/convert.rb +7 -2
- data/lib/csv2psql/generator/generator.rb +13 -11
- data/lib/csv2psql/processor/processor.rb +19 -6
- data/lib/csv2psql/schema/schema_generator.rb +48 -0
- data/lib/csv2psql/version.rb +2 -2
- data/templates/schema.sql.erb +11 -0
- metadata +5 -1
@@ -16,7 +16,31 @@ module Csv2Psql
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def numeric?
|
19
|
-
|
19
|
+
sql_class?(:numeric)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sql_class?(class_name)
|
23
|
+
const_get('CLASS') == class_name
|
24
|
+
end
|
25
|
+
|
26
|
+
def sql_class
|
27
|
+
const_get('CLASS')
|
28
|
+
end
|
29
|
+
|
30
|
+
def sql_class?(class_name)
|
31
|
+
sql_class == class_name
|
32
|
+
end
|
33
|
+
|
34
|
+
def sql_type
|
35
|
+
const_get('TYPE')
|
36
|
+
end
|
37
|
+
|
38
|
+
def sql_type?(type_name)
|
39
|
+
sql_type == type_name
|
40
|
+
end
|
41
|
+
|
42
|
+
def weight
|
43
|
+
const_get('WEIGHT')
|
20
44
|
end
|
21
45
|
end
|
22
46
|
|
@@ -31,6 +55,26 @@ module Csv2Psql
|
|
31
55
|
def numeric?
|
32
56
|
self.class.numeric?
|
33
57
|
end
|
58
|
+
|
59
|
+
def sql_class
|
60
|
+
self.class.sql_class
|
61
|
+
end
|
62
|
+
|
63
|
+
def sql_class?(class_name)
|
64
|
+
self.class.sql_class?(class_name)
|
65
|
+
end
|
66
|
+
|
67
|
+
def sql_type
|
68
|
+
self.class.sql_type
|
69
|
+
end
|
70
|
+
|
71
|
+
def sql_type?(type_name)
|
72
|
+
self.class.sql_type?(type_name)
|
73
|
+
end
|
74
|
+
|
75
|
+
def weight
|
76
|
+
self.class.weight
|
77
|
+
end
|
34
78
|
end
|
35
79
|
end
|
36
80
|
end
|
data/lib/csv2psql/cli/app.rb
CHANGED
@@ -15,52 +15,6 @@ end
|
|
15
15
|
|
16
16
|
include GLI::App
|
17
17
|
|
18
|
-
program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
|
19
|
-
|
20
|
-
cmds = {
|
21
|
-
h: {
|
22
|
-
desc: 'Header row included',
|
23
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['header']
|
24
|
-
},
|
25
|
-
|
26
|
-
d: {
|
27
|
-
desc: 'Column delimiter',
|
28
|
-
type: String,
|
29
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['delimiter']
|
30
|
-
},
|
31
|
-
|
32
|
-
l: {
|
33
|
-
desc: 'How many rows process',
|
34
|
-
type: Integer,
|
35
|
-
default_value: -1
|
36
|
-
},
|
37
|
-
|
38
|
-
q: {
|
39
|
-
desc: 'Quoting character',
|
40
|
-
type: String,
|
41
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['quote']
|
42
|
-
},
|
43
|
-
|
44
|
-
s: {
|
45
|
-
desc: 'Line separator',
|
46
|
-
type: String,
|
47
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['separator']
|
48
|
-
},
|
49
|
-
|
50
|
-
'skip' => {
|
51
|
-
desc: 'How many rows skip',
|
52
|
-
type: Integer,
|
53
|
-
default_value: -1
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
switch [:h, :header], cmds[:h]
|
58
|
-
flag [:d, :delimiter], cmds[:d]
|
59
|
-
flag [:l, :limit], cmds[:l]
|
60
|
-
flag [:q, :quote], cmds[:q]
|
61
|
-
flag [:s, :separator], cmds[:s]
|
62
|
-
flag [:skip], cmds['skip']
|
63
|
-
|
64
18
|
module Csv2Psql
|
65
19
|
# Apollon CLI
|
66
20
|
module Cli
|
@@ -54,7 +54,6 @@ Csv2Psql::Cli.module_eval do
|
|
54
54
|
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
55
55
|
|
56
56
|
opts = {}.merge(global_options).merge(options)
|
57
|
-
res = Csv2Psql::Convert.analyze(args, opts)
|
58
57
|
|
59
58
|
formater = formats[opts[:format]]
|
60
59
|
if formater.nil?
|
@@ -62,6 +61,8 @@ Csv2Psql::Cli.module_eval do
|
|
62
61
|
fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
|
63
62
|
end
|
64
63
|
|
64
|
+
res = Csv2Psql::Convert.analyze(args, opts)
|
65
|
+
|
65
66
|
output = formater.call(res)
|
66
67
|
if output.is_a?(Array)
|
67
68
|
output.each do |o|
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require 'json'
|
5
|
+
require 'pp'
|
6
|
+
require 'terminal-table'
|
7
|
+
|
8
|
+
include GLI::App
|
9
|
+
|
10
|
+
require_relative '../shared'
|
11
|
+
require_relative '../../convert/convert'
|
12
|
+
require_relative '../../helpers/erb_helper'
|
13
|
+
require_relative '../../processor/processor'
|
14
|
+
|
15
|
+
Csv2Psql::Cli.module_eval do
|
16
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..', '..')
|
17
|
+
TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
|
18
|
+
SCHEMA_TEMPLATE = File.join(TEMPLATE_DIR, 'schema.sql.erb')
|
19
|
+
|
20
|
+
formats = {
|
21
|
+
'json' => lambda do |res|
|
22
|
+
JSON.pretty_generate(res)
|
23
|
+
end,
|
24
|
+
|
25
|
+
'sql' => lambda do |data|
|
26
|
+
res = ''
|
27
|
+
data.each do |_k, v|
|
28
|
+
v[:table] = 'my_table'
|
29
|
+
ctx = v
|
30
|
+
erb = Csv2Psql::ErbHelper.new
|
31
|
+
res += "\n" unless res.empty?
|
32
|
+
res += erb.process(SCHEMA_TEMPLATE, ctx)
|
33
|
+
end
|
34
|
+
res
|
35
|
+
end,
|
36
|
+
|
37
|
+
'table' => lambda do |res|
|
38
|
+
res.map do |file, data|
|
39
|
+
header = %w(column type null)
|
40
|
+
|
41
|
+
rows = data[:columns].map do |k, v|
|
42
|
+
[k, v[:type], v[:null]]
|
43
|
+
end
|
44
|
+
|
45
|
+
Terminal::Table.new title: file, headings: header, rows: rows
|
46
|
+
end
|
47
|
+
end
|
48
|
+
}
|
49
|
+
|
50
|
+
cmds = {
|
51
|
+
f: {
|
52
|
+
desc: 'Output format',
|
53
|
+
type: String,
|
54
|
+
default_value: formats.keys.first
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
desc 'Generate schema for file'
|
59
|
+
command :schema do |c|
|
60
|
+
c.flag [:f, :format], cmds[:f]
|
61
|
+
|
62
|
+
c.action do |global_options, options, args|
|
63
|
+
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
64
|
+
|
65
|
+
opts = {}.merge(global_options).merge(options)
|
66
|
+
|
67
|
+
formater = formats[opts[:format]]
|
68
|
+
if formater.nil?
|
69
|
+
fmters = formats.keys.join(', ')
|
70
|
+
fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
|
71
|
+
end
|
72
|
+
|
73
|
+
res = Csv2Psql::Convert.generate_schema(args, opts)
|
74
|
+
|
75
|
+
output = formater.call(res)
|
76
|
+
if output.is_a?(Array)
|
77
|
+
output.each do |o|
|
78
|
+
puts o
|
79
|
+
end
|
80
|
+
else
|
81
|
+
puts output
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -7,14 +7,19 @@ module Csv2Psql
|
|
7
7
|
# Csv2Psql convert module
|
8
8
|
module Convert
|
9
9
|
class << self
|
10
|
+
def analyze(paths, opts = {})
|
11
|
+
p = Processor.new
|
12
|
+
p.analyze(paths, opts)
|
13
|
+
end
|
14
|
+
|
10
15
|
def convert(paths, opts = {})
|
11
16
|
p = Processor.new
|
12
17
|
p.convert(paths, opts)
|
13
18
|
end
|
14
19
|
|
15
|
-
def
|
20
|
+
def generate_schema(paths, opts = {})
|
16
21
|
p = Processor.new
|
17
|
-
p.
|
22
|
+
p.generate_schema(paths, opts)
|
18
23
|
end
|
19
24
|
end
|
20
25
|
end
|
@@ -32,6 +32,17 @@ module Csv2Psql
|
|
32
32
|
|
33
33
|
attr_reader :output
|
34
34
|
|
35
|
+
class << self
|
36
|
+
def sanitize_header(header_column)
|
37
|
+
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
38
|
+
end
|
39
|
+
|
40
|
+
def sanitize_value(value)
|
41
|
+
value ||= ''
|
42
|
+
value.gsub("'", "''")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
35
46
|
def initialize(output)
|
36
47
|
@output = output
|
37
48
|
end
|
@@ -90,7 +101,7 @@ module Csv2Psql
|
|
90
101
|
|
91
102
|
def get_columns(row, opts = {}, header = get_header(row, opts))
|
92
103
|
if opts[:header]
|
93
|
-
header.map { |h| sanitize_header(h) }
|
104
|
+
header.map { |h| Generator.sanitize_header(h) }
|
94
105
|
else
|
95
106
|
row.map.with_index do |_item, i|
|
96
107
|
"col_#{i}"
|
@@ -101,20 +112,11 @@ module Csv2Psql
|
|
101
112
|
def get_values(row, opts = {}, header = get_header(row, opts))
|
102
113
|
header.map do |h|
|
103
114
|
value = row[h]
|
104
|
-
sanitized_value = sanitize_value(value)
|
115
|
+
sanitized_value = Generator.sanitize_value(value)
|
105
116
|
"'#{sanitized_value}'"
|
106
117
|
end
|
107
118
|
end
|
108
119
|
|
109
|
-
def sanitize_header(header_column)
|
110
|
-
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
111
|
-
end
|
112
|
-
|
113
|
-
def sanitize_value(value)
|
114
|
-
value ||= ''
|
115
|
-
value.gsub("'", "''")
|
116
|
-
end
|
117
|
-
|
118
120
|
def truncate_table(path, row, opts = {})
|
119
121
|
ctx = create_erb_context(path, row, opts)
|
120
122
|
erb = ErbHelper.new
|
@@ -12,6 +12,7 @@ require_relative '../helpers/config_helper'
|
|
12
12
|
require_relative '../helpers/csv_helper'
|
13
13
|
require_relative '../helpers/erb_helper'
|
14
14
|
require_relative '../output/output'
|
15
|
+
require_relative '../schema/schema_generator'
|
15
16
|
require_relative '../version'
|
16
17
|
|
17
18
|
module Csv2Psql
|
@@ -30,7 +31,7 @@ module Csv2Psql
|
|
30
31
|
end
|
31
32
|
|
32
33
|
def analyze(paths, opts = {})
|
33
|
-
|
34
|
+
with_files(paths, opts) do |data|
|
34
35
|
analyzer.analyze(data[:path], data[:row], opts)
|
35
36
|
end
|
36
37
|
analyzer
|
@@ -38,9 +39,8 @@ module Csv2Psql
|
|
38
39
|
|
39
40
|
def convert(paths, opts = {})
|
40
41
|
details = {}
|
41
|
-
|
42
|
+
with_files(paths, opts) do |data|
|
42
43
|
create_converted_header(details, data, opts)
|
43
|
-
|
44
44
|
output.write generator.format_row(data[:row], opts)
|
45
45
|
end
|
46
46
|
end
|
@@ -62,6 +62,19 @@ module Csv2Psql
|
|
62
62
|
files[path]
|
63
63
|
end
|
64
64
|
|
65
|
+
def generate_schema(paths, opts = {})
|
66
|
+
res = {}
|
67
|
+
paths.each do |path|
|
68
|
+
with_file(path, opts) do |data|
|
69
|
+
path = data[:path]
|
70
|
+
analyzer.analyze(path, data[:row], opts)
|
71
|
+
end
|
72
|
+
|
73
|
+
res[path] = SchemaGenerator.generate(analyzer.files[path])
|
74
|
+
end
|
75
|
+
res
|
76
|
+
end
|
77
|
+
|
65
78
|
def get_file_details(files, path)
|
66
79
|
if files.key?(path)
|
67
80
|
files[path]
|
@@ -95,7 +108,7 @@ module Csv2Psql
|
|
95
108
|
end
|
96
109
|
end
|
97
110
|
|
98
|
-
def
|
111
|
+
def with_file(path, opts = {}, &block)
|
99
112
|
output.write 'BEGIN;' if opts[:transaction]
|
100
113
|
csv_opts = merge_csv_options(opts)
|
101
114
|
@first_row = true
|
@@ -105,10 +118,10 @@ module Csv2Psql
|
|
105
118
|
output.write 'COMMIT;' if opts[:transaction]
|
106
119
|
end
|
107
120
|
|
108
|
-
def
|
121
|
+
def with_files(paths, opts = {}, &block)
|
109
122
|
paths = [paths] unless paths.is_a?(Array)
|
110
123
|
paths.each do |path|
|
111
|
-
|
124
|
+
with_file(path, opts, &block)
|
112
125
|
end
|
113
126
|
end
|
114
127
|
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
# Csv2Psql schema generator class
|
5
|
+
class SchemaGenerator
|
6
|
+
class << self
|
7
|
+
def select_analyzers_by_match(analyzers, match)
|
8
|
+
null_count = analyzers['Null'][:results][:count]
|
9
|
+
analyzers.select do |_k, v|
|
10
|
+
v[:results][:count] + null_count == match
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def select_analyzers_class(analyzers, class_name)
|
15
|
+
analyzers.select { |_k, v| v[:class].sql_class?(class_name) }
|
16
|
+
end
|
17
|
+
|
18
|
+
def select_best(analyzers, lines)
|
19
|
+
analyzers = select_analyzers_by_match(analyzers, lines)
|
20
|
+
sorted = analyzers.sort do |a, b|
|
21
|
+
a[1][:class].weight <=> b[1][:class].weight
|
22
|
+
end
|
23
|
+
|
24
|
+
analyzers[sorted.last[0]]
|
25
|
+
end
|
26
|
+
|
27
|
+
def format_result(analysis, lines)
|
28
|
+
res = { columns: {} }
|
29
|
+
analysis.each do |k, v|
|
30
|
+
res[:columns][k] = {
|
31
|
+
type: v[:class].sql_type,
|
32
|
+
null: v[:results][:count] != lines
|
33
|
+
}
|
34
|
+
end
|
35
|
+
res
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate(analysis, _opts = {})
|
39
|
+
res = {}
|
40
|
+
analysis[:columns].each do |name, analyzers|
|
41
|
+
analyzer = select_best(analyzers, analysis[:lines])
|
42
|
+
res[name] = analyzer
|
43
|
+
end
|
44
|
+
format_result(res, analysis[:lines])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/csv2psql/version.rb
CHANGED
@@ -0,0 +1,11 @@
|
|
1
|
+
CREATE TABLE <%= ctx[:table] %>
|
2
|
+
(
|
3
|
+
<% ctx[:columns].each_with_index do |item, index| %>
|
4
|
+
"<%= Generator.sanitize_header(item[0]) %>" <%= item[1][:type] %> <% if !item[1][:null] %>NOT NULL<% end %><%= ", " if index < ctx[:columns].length - 1%>
|
5
|
+
|
6
|
+
<% end %>
|
7
|
+
)
|
8
|
+
WITH (
|
9
|
+
OIDS=FALSE
|
10
|
+
);
|
11
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2psql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
@@ -265,6 +265,7 @@ files:
|
|
265
265
|
- TODO.md
|
266
266
|
- bin/csv2psql
|
267
267
|
- config/config.json
|
268
|
+
- data/census_SFOH_2010.csv
|
268
269
|
- data/cia-data-all.csv
|
269
270
|
- data/sample.csv
|
270
271
|
- data/sample_bool.csv
|
@@ -284,6 +285,7 @@ files:
|
|
284
285
|
- lib/csv2psql/cli/cli.rb
|
285
286
|
- lib/csv2psql/cli/cmd/analyze_cmd.rb
|
286
287
|
- lib/csv2psql/cli/cmd/convert_cmd.rb
|
288
|
+
- lib/csv2psql/cli/cmd/schema_cmd.rb
|
287
289
|
- lib/csv2psql/cli/cmd/version_cmd.rb
|
288
290
|
- lib/csv2psql/cli/shared.rb
|
289
291
|
- lib/csv2psql/config/config.rb
|
@@ -301,6 +303,7 @@ files:
|
|
301
303
|
- lib/csv2psql/lib.rb
|
302
304
|
- lib/csv2psql/output/output.rb
|
303
305
|
- lib/csv2psql/processor/processor.rb
|
306
|
+
- lib/csv2psql/schema/schema_generator.rb
|
304
307
|
- lib/csv2psql/version.rb
|
305
308
|
- spec/cli/app_spec.rb
|
306
309
|
- spec/cli/cmd/analyze_cmd_spec.rb
|
@@ -311,6 +314,7 @@ files:
|
|
311
314
|
- templates/create_table.sql.erb
|
312
315
|
- templates/drop_table.sql.erb
|
313
316
|
- templates/header.sql.erb
|
317
|
+
- templates/schema.sql.erb
|
314
318
|
- templates/truncate_table.sql.erb
|
315
319
|
homepage: https://github.com/korczis/csv2psql
|
316
320
|
licenses:
|