csv2psql 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +82 -3
- data/data/census_SFOH_2010.csv +981 -0
- data/lib/csv2psql/analyzer/types/base_analyzer.rb +45 -1
- data/lib/csv2psql/analyzer/types/boolean.rb +1 -1
- data/lib/csv2psql/analyzer/types/null.rb +1 -1
- data/lib/csv2psql/analyzer/types/string.rb +1 -1
- data/lib/csv2psql/analyzer/types/uuid.rb +1 -1
- data/lib/csv2psql/cli/app.rb +0 -46
- data/lib/csv2psql/cli/cmd/analyze_cmd.rb +2 -1
- data/lib/csv2psql/cli/cmd/schema_cmd.rb +85 -0
- data/lib/csv2psql/convert/convert.rb +7 -2
- data/lib/csv2psql/generator/generator.rb +13 -11
- data/lib/csv2psql/processor/processor.rb +19 -6
- data/lib/csv2psql/schema/schema_generator.rb +48 -0
- data/lib/csv2psql/version.rb +2 -2
- data/templates/schema.sql.erb +11 -0
- metadata +5 -1
@@ -16,7 +16,31 @@ module Csv2Psql
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def numeric?
|
19
|
-
|
19
|
+
sql_class?(:numeric)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sql_class?(class_name)
|
23
|
+
const_get('CLASS') == class_name
|
24
|
+
end
|
25
|
+
|
26
|
+
def sql_class
|
27
|
+
const_get('CLASS')
|
28
|
+
end
|
29
|
+
|
30
|
+
def sql_class?(class_name)
|
31
|
+
sql_class == class_name
|
32
|
+
end
|
33
|
+
|
34
|
+
def sql_type
|
35
|
+
const_get('TYPE')
|
36
|
+
end
|
37
|
+
|
38
|
+
def sql_type?(type_name)
|
39
|
+
sql_type == type_name
|
40
|
+
end
|
41
|
+
|
42
|
+
def weight
|
43
|
+
const_get('WEIGHT')
|
20
44
|
end
|
21
45
|
end
|
22
46
|
|
@@ -31,6 +55,26 @@ module Csv2Psql
|
|
31
55
|
def numeric?
|
32
56
|
self.class.numeric?
|
33
57
|
end
|
58
|
+
|
59
|
+
def sql_class
|
60
|
+
self.class.sql_class
|
61
|
+
end
|
62
|
+
|
63
|
+
def sql_class?(class_name)
|
64
|
+
self.class.sql_class?(class_name)
|
65
|
+
end
|
66
|
+
|
67
|
+
def sql_type
|
68
|
+
self.class.sql_type
|
69
|
+
end
|
70
|
+
|
71
|
+
def sql_type?(type_name)
|
72
|
+
self.class.sql_type?(type_name)
|
73
|
+
end
|
74
|
+
|
75
|
+
def weight
|
76
|
+
self.class.weight
|
77
|
+
end
|
34
78
|
end
|
35
79
|
end
|
36
80
|
end
|
data/lib/csv2psql/cli/app.rb
CHANGED
@@ -15,52 +15,6 @@ end
|
|
15
15
|
|
16
16
|
include GLI::App
|
17
17
|
|
18
|
-
program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
|
19
|
-
|
20
|
-
cmds = {
|
21
|
-
h: {
|
22
|
-
desc: 'Header row included',
|
23
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['header']
|
24
|
-
},
|
25
|
-
|
26
|
-
d: {
|
27
|
-
desc: 'Column delimiter',
|
28
|
-
type: String,
|
29
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['delimiter']
|
30
|
-
},
|
31
|
-
|
32
|
-
l: {
|
33
|
-
desc: 'How many rows process',
|
34
|
-
type: Integer,
|
35
|
-
default_value: -1
|
36
|
-
},
|
37
|
-
|
38
|
-
q: {
|
39
|
-
desc: 'Quoting character',
|
40
|
-
type: String,
|
41
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['quote']
|
42
|
-
},
|
43
|
-
|
44
|
-
s: {
|
45
|
-
desc: 'Line separator',
|
46
|
-
type: String,
|
47
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['separator']
|
48
|
-
},
|
49
|
-
|
50
|
-
'skip' => {
|
51
|
-
desc: 'How many rows skip',
|
52
|
-
type: Integer,
|
53
|
-
default_value: -1
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
switch [:h, :header], cmds[:h]
|
58
|
-
flag [:d, :delimiter], cmds[:d]
|
59
|
-
flag [:l, :limit], cmds[:l]
|
60
|
-
flag [:q, :quote], cmds[:q]
|
61
|
-
flag [:s, :separator], cmds[:s]
|
62
|
-
flag [:skip], cmds['skip']
|
63
|
-
|
64
18
|
module Csv2Psql
|
65
19
|
# Apollon CLI
|
66
20
|
module Cli
|
@@ -54,7 +54,6 @@ Csv2Psql::Cli.module_eval do
|
|
54
54
|
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
55
55
|
|
56
56
|
opts = {}.merge(global_options).merge(options)
|
57
|
-
res = Csv2Psql::Convert.analyze(args, opts)
|
58
57
|
|
59
58
|
formater = formats[opts[:format]]
|
60
59
|
if formater.nil?
|
@@ -62,6 +61,8 @@ Csv2Psql::Cli.module_eval do
|
|
62
61
|
fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
|
63
62
|
end
|
64
63
|
|
64
|
+
res = Csv2Psql::Convert.analyze(args, opts)
|
65
|
+
|
65
66
|
output = formater.call(res)
|
66
67
|
if output.is_a?(Array)
|
67
68
|
output.each do |o|
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require 'json'
|
5
|
+
require 'pp'
|
6
|
+
require 'terminal-table'
|
7
|
+
|
8
|
+
include GLI::App
|
9
|
+
|
10
|
+
require_relative '../shared'
|
11
|
+
require_relative '../../convert/convert'
|
12
|
+
require_relative '../../helpers/erb_helper'
|
13
|
+
require_relative '../../processor/processor'
|
14
|
+
|
15
|
+
Csv2Psql::Cli.module_eval do
|
16
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..', '..')
|
17
|
+
TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
|
18
|
+
SCHEMA_TEMPLATE = File.join(TEMPLATE_DIR, 'schema.sql.erb')
|
19
|
+
|
20
|
+
formats = {
|
21
|
+
'json' => lambda do |res|
|
22
|
+
JSON.pretty_generate(res)
|
23
|
+
end,
|
24
|
+
|
25
|
+
'sql' => lambda do |data|
|
26
|
+
res = ''
|
27
|
+
data.each do |_k, v|
|
28
|
+
v[:table] = 'my_table'
|
29
|
+
ctx = v
|
30
|
+
erb = Csv2Psql::ErbHelper.new
|
31
|
+
res += "\n" unless res.empty?
|
32
|
+
res += erb.process(SCHEMA_TEMPLATE, ctx)
|
33
|
+
end
|
34
|
+
res
|
35
|
+
end,
|
36
|
+
|
37
|
+
'table' => lambda do |res|
|
38
|
+
res.map do |file, data|
|
39
|
+
header = %w(column type null)
|
40
|
+
|
41
|
+
rows = data[:columns].map do |k, v|
|
42
|
+
[k, v[:type], v[:null]]
|
43
|
+
end
|
44
|
+
|
45
|
+
Terminal::Table.new title: file, headings: header, rows: rows
|
46
|
+
end
|
47
|
+
end
|
48
|
+
}
|
49
|
+
|
50
|
+
cmds = {
|
51
|
+
f: {
|
52
|
+
desc: 'Output format',
|
53
|
+
type: String,
|
54
|
+
default_value: formats.keys.first
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
desc 'Generate schema for file'
|
59
|
+
command :schema do |c|
|
60
|
+
c.flag [:f, :format], cmds[:f]
|
61
|
+
|
62
|
+
c.action do |global_options, options, args|
|
63
|
+
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
64
|
+
|
65
|
+
opts = {}.merge(global_options).merge(options)
|
66
|
+
|
67
|
+
formater = formats[opts[:format]]
|
68
|
+
if formater.nil?
|
69
|
+
fmters = formats.keys.join(', ')
|
70
|
+
fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
|
71
|
+
end
|
72
|
+
|
73
|
+
res = Csv2Psql::Convert.generate_schema(args, opts)
|
74
|
+
|
75
|
+
output = formater.call(res)
|
76
|
+
if output.is_a?(Array)
|
77
|
+
output.each do |o|
|
78
|
+
puts o
|
79
|
+
end
|
80
|
+
else
|
81
|
+
puts output
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -7,14 +7,19 @@ module Csv2Psql
|
|
7
7
|
# Csv2Psql convert module
|
8
8
|
module Convert
|
9
9
|
class << self
|
10
|
+
def analyze(paths, opts = {})
|
11
|
+
p = Processor.new
|
12
|
+
p.analyze(paths, opts)
|
13
|
+
end
|
14
|
+
|
10
15
|
def convert(paths, opts = {})
|
11
16
|
p = Processor.new
|
12
17
|
p.convert(paths, opts)
|
13
18
|
end
|
14
19
|
|
15
|
-
def
|
20
|
+
def generate_schema(paths, opts = {})
|
16
21
|
p = Processor.new
|
17
|
-
p.
|
22
|
+
p.generate_schema(paths, opts)
|
18
23
|
end
|
19
24
|
end
|
20
25
|
end
|
@@ -32,6 +32,17 @@ module Csv2Psql
|
|
32
32
|
|
33
33
|
attr_reader :output
|
34
34
|
|
35
|
+
class << self
|
36
|
+
def sanitize_header(header_column)
|
37
|
+
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
38
|
+
end
|
39
|
+
|
40
|
+
def sanitize_value(value)
|
41
|
+
value ||= ''
|
42
|
+
value.gsub("'", "''")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
35
46
|
def initialize(output)
|
36
47
|
@output = output
|
37
48
|
end
|
@@ -90,7 +101,7 @@ module Csv2Psql
|
|
90
101
|
|
91
102
|
def get_columns(row, opts = {}, header = get_header(row, opts))
|
92
103
|
if opts[:header]
|
93
|
-
header.map { |h| sanitize_header(h) }
|
104
|
+
header.map { |h| Generator.sanitize_header(h) }
|
94
105
|
else
|
95
106
|
row.map.with_index do |_item, i|
|
96
107
|
"col_#{i}"
|
@@ -101,20 +112,11 @@ module Csv2Psql
|
|
101
112
|
def get_values(row, opts = {}, header = get_header(row, opts))
|
102
113
|
header.map do |h|
|
103
114
|
value = row[h]
|
104
|
-
sanitized_value = sanitize_value(value)
|
115
|
+
sanitized_value = Generator.sanitize_value(value)
|
105
116
|
"'#{sanitized_value}'"
|
106
117
|
end
|
107
118
|
end
|
108
119
|
|
109
|
-
def sanitize_header(header_column)
|
110
|
-
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
111
|
-
end
|
112
|
-
|
113
|
-
def sanitize_value(value)
|
114
|
-
value ||= ''
|
115
|
-
value.gsub("'", "''")
|
116
|
-
end
|
117
|
-
|
118
120
|
def truncate_table(path, row, opts = {})
|
119
121
|
ctx = create_erb_context(path, row, opts)
|
120
122
|
erb = ErbHelper.new
|
@@ -12,6 +12,7 @@ require_relative '../helpers/config_helper'
|
|
12
12
|
require_relative '../helpers/csv_helper'
|
13
13
|
require_relative '../helpers/erb_helper'
|
14
14
|
require_relative '../output/output'
|
15
|
+
require_relative '../schema/schema_generator'
|
15
16
|
require_relative '../version'
|
16
17
|
|
17
18
|
module Csv2Psql
|
@@ -30,7 +31,7 @@ module Csv2Psql
|
|
30
31
|
end
|
31
32
|
|
32
33
|
def analyze(paths, opts = {})
|
33
|
-
|
34
|
+
with_files(paths, opts) do |data|
|
34
35
|
analyzer.analyze(data[:path], data[:row], opts)
|
35
36
|
end
|
36
37
|
analyzer
|
@@ -38,9 +39,8 @@ module Csv2Psql
|
|
38
39
|
|
39
40
|
def convert(paths, opts = {})
|
40
41
|
details = {}
|
41
|
-
|
42
|
+
with_files(paths, opts) do |data|
|
42
43
|
create_converted_header(details, data, opts)
|
43
|
-
|
44
44
|
output.write generator.format_row(data[:row], opts)
|
45
45
|
end
|
46
46
|
end
|
@@ -62,6 +62,19 @@ module Csv2Psql
|
|
62
62
|
files[path]
|
63
63
|
end
|
64
64
|
|
65
|
+
def generate_schema(paths, opts = {})
|
66
|
+
res = {}
|
67
|
+
paths.each do |path|
|
68
|
+
with_file(path, opts) do |data|
|
69
|
+
path = data[:path]
|
70
|
+
analyzer.analyze(path, data[:row], opts)
|
71
|
+
end
|
72
|
+
|
73
|
+
res[path] = SchemaGenerator.generate(analyzer.files[path])
|
74
|
+
end
|
75
|
+
res
|
76
|
+
end
|
77
|
+
|
65
78
|
def get_file_details(files, path)
|
66
79
|
if files.key?(path)
|
67
80
|
files[path]
|
@@ -95,7 +108,7 @@ module Csv2Psql
|
|
95
108
|
end
|
96
109
|
end
|
97
110
|
|
98
|
-
def
|
111
|
+
def with_file(path, opts = {}, &block)
|
99
112
|
output.write 'BEGIN;' if opts[:transaction]
|
100
113
|
csv_opts = merge_csv_options(opts)
|
101
114
|
@first_row = true
|
@@ -105,10 +118,10 @@ module Csv2Psql
|
|
105
118
|
output.write 'COMMIT;' if opts[:transaction]
|
106
119
|
end
|
107
120
|
|
108
|
-
def
|
121
|
+
def with_files(paths, opts = {}, &block)
|
109
122
|
paths = [paths] unless paths.is_a?(Array)
|
110
123
|
paths.each do |path|
|
111
|
-
|
124
|
+
with_file(path, opts, &block)
|
112
125
|
end
|
113
126
|
end
|
114
127
|
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
# Csv2Psql schema generator class
|
5
|
+
class SchemaGenerator
|
6
|
+
class << self
|
7
|
+
def select_analyzers_by_match(analyzers, match)
|
8
|
+
null_count = analyzers['Null'][:results][:count]
|
9
|
+
analyzers.select do |_k, v|
|
10
|
+
v[:results][:count] + null_count == match
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def select_analyzers_class(analyzers, class_name)
|
15
|
+
analyzers.select { |_k, v| v[:class].sql_class?(class_name) }
|
16
|
+
end
|
17
|
+
|
18
|
+
def select_best(analyzers, lines)
|
19
|
+
analyzers = select_analyzers_by_match(analyzers, lines)
|
20
|
+
sorted = analyzers.sort do |a, b|
|
21
|
+
a[1][:class].weight <=> b[1][:class].weight
|
22
|
+
end
|
23
|
+
|
24
|
+
analyzers[sorted.last[0]]
|
25
|
+
end
|
26
|
+
|
27
|
+
def format_result(analysis, lines)
|
28
|
+
res = { columns: {} }
|
29
|
+
analysis.each do |k, v|
|
30
|
+
res[:columns][k] = {
|
31
|
+
type: v[:class].sql_type,
|
32
|
+
null: v[:results][:count] != lines
|
33
|
+
}
|
34
|
+
end
|
35
|
+
res
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate(analysis, _opts = {})
|
39
|
+
res = {}
|
40
|
+
analysis[:columns].each do |name, analyzers|
|
41
|
+
analyzer = select_best(analyzers, analysis[:lines])
|
42
|
+
res[name] = analyzer
|
43
|
+
end
|
44
|
+
format_result(res, analysis[:lines])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/csv2psql/version.rb
CHANGED
@@ -0,0 +1,11 @@
|
|
1
|
+
CREATE TABLE <%= ctx[:table] %>
|
2
|
+
(
|
3
|
+
<% ctx[:columns].each_with_index do |item, index| %>
|
4
|
+
"<%= Generator.sanitize_header(item[0]) %>" <%= item[1][:type] %> <% if !item[1][:null] %>NOT NULL<% end %><%= ", " if index < ctx[:columns].length - 1%>
|
5
|
+
|
6
|
+
<% end %>
|
7
|
+
)
|
8
|
+
WITH (
|
9
|
+
OIDS=FALSE
|
10
|
+
);
|
11
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2psql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
@@ -265,6 +265,7 @@ files:
|
|
265
265
|
- TODO.md
|
266
266
|
- bin/csv2psql
|
267
267
|
- config/config.json
|
268
|
+
- data/census_SFOH_2010.csv
|
268
269
|
- data/cia-data-all.csv
|
269
270
|
- data/sample.csv
|
270
271
|
- data/sample_bool.csv
|
@@ -284,6 +285,7 @@ files:
|
|
284
285
|
- lib/csv2psql/cli/cli.rb
|
285
286
|
- lib/csv2psql/cli/cmd/analyze_cmd.rb
|
286
287
|
- lib/csv2psql/cli/cmd/convert_cmd.rb
|
288
|
+
- lib/csv2psql/cli/cmd/schema_cmd.rb
|
287
289
|
- lib/csv2psql/cli/cmd/version_cmd.rb
|
288
290
|
- lib/csv2psql/cli/shared.rb
|
289
291
|
- lib/csv2psql/config/config.rb
|
@@ -301,6 +303,7 @@ files:
|
|
301
303
|
- lib/csv2psql/lib.rb
|
302
304
|
- lib/csv2psql/output/output.rb
|
303
305
|
- lib/csv2psql/processor/processor.rb
|
306
|
+
- lib/csv2psql/schema/schema_generator.rb
|
304
307
|
- lib/csv2psql/version.rb
|
305
308
|
- spec/cli/app_spec.rb
|
306
309
|
- spec/cli/cmd/analyze_cmd_spec.rb
|
@@ -311,6 +314,7 @@ files:
|
|
311
314
|
- templates/create_table.sql.erb
|
312
315
|
- templates/drop_table.sql.erb
|
313
316
|
- templates/header.sql.erb
|
317
|
+
- templates/schema.sql.erb
|
314
318
|
- templates/truncate_table.sql.erb
|
315
319
|
homepage: https://github.com/korczis/csv2psql
|
316
320
|
licenses:
|