csv2psql 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +38 -13
- data/TODO.md +12 -0
- data/data/cia-data-all.csv +262 -0
- data/lib/csv2psql/analyzer/analyzer.rb +89 -0
- data/lib/csv2psql/analyzer/types/bigint.rb +27 -0
- data/lib/csv2psql/analyzer/types/decimal.rb +27 -0
- data/lib/csv2psql/cli/app.rb +36 -6
- data/lib/csv2psql/cli/cmd/analyze_cmd.rb +24 -0
- data/lib/csv2psql/cli/cmd/convert_cmd.rb +0 -27
- data/lib/csv2psql/convert/convert.rb +5 -0
- data/lib/csv2psql/dialects/psql.rb +66 -0
- data/lib/csv2psql/extensions/string.rb +10 -0
- data/lib/csv2psql/generator/generator.rb +126 -0
- data/lib/csv2psql/helpers/csv_helper.rb +21 -0
- data/lib/csv2psql/output/output.rb +18 -0
- data/lib/csv2psql/processor/processor.rb +30 -103
- data/lib/csv2psql/version.rb +2 -1
- data/templates/header.sql.erb +1 -1
- metadata +13 -2
@@ -0,0 +1,89 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../helpers/erb_helper'
|
9
|
+
require_relative '../extensions/string'
|
10
|
+
|
11
|
+
module Csv2Psql
|
12
|
+
# Analyzer file analyzer class
|
13
|
+
class Analyzer
|
14
|
+
DEFAULT_OPTIONS = {}
|
15
|
+
ANALYZERS_DIR = File.join(File.dirname(__FILE__), 'types')
|
16
|
+
|
17
|
+
attr_reader :analyzers, :files
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@files = {}
|
21
|
+
@analyzers = load_analyzers
|
22
|
+
end
|
23
|
+
|
24
|
+
def analyze(path, row, opts = {})
|
25
|
+
data = get_data(path)
|
26
|
+
|
27
|
+
header = CsvHelper.get_header(row, opts)
|
28
|
+
header.each do |h|
|
29
|
+
col = get_column(data, h)
|
30
|
+
val = row[h]
|
31
|
+
col.each do |_name, analyzer|
|
32
|
+
analyzer.analyze(val)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
data[:lines] = data[:lines] + 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def create_column(data, column)
|
40
|
+
data[:columns][column] = {}
|
41
|
+
res = data[:columns][column]
|
42
|
+
|
43
|
+
analyzers.each do |analyzer|
|
44
|
+
res[analyzer[:name]] = analyzer[:class].new
|
45
|
+
end
|
46
|
+
|
47
|
+
res
|
48
|
+
end
|
49
|
+
|
50
|
+
def create_data(path)
|
51
|
+
files[path] = {
|
52
|
+
columns: {
|
53
|
+
},
|
54
|
+
lines: 0
|
55
|
+
}
|
56
|
+
files[path]
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_data(path)
|
60
|
+
return files[path] if files.key?(path)
|
61
|
+
|
62
|
+
create_data(path)
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_column(data, column)
|
66
|
+
res = data[:columns][column]
|
67
|
+
return res if res
|
68
|
+
|
69
|
+
create_column(data, column)
|
70
|
+
end
|
71
|
+
|
72
|
+
def load_analyzers
|
73
|
+
Dir[ANALYZERS_DIR + '**/*.rb'].map do |path|
|
74
|
+
fname = File.basename(path, '.rb')
|
75
|
+
analyzer_class = fname.camel_case
|
76
|
+
require(path)
|
77
|
+
|
78
|
+
klass = Object.const_get('Csv2Psql')
|
79
|
+
.const_get('Analyzers')
|
80
|
+
.const_get(analyzer_class)
|
81
|
+
|
82
|
+
{
|
83
|
+
:name => analyzer_class,
|
84
|
+
:class => klass
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Analyzers
|
5
|
+
class Bigint
|
6
|
+
TYPE = :bigint
|
7
|
+
|
8
|
+
attr_reader :count, :min, :max
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@count = 0
|
12
|
+
@min = nil
|
13
|
+
@max = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def analyze(val)
|
17
|
+
match = val.is_a?(Integer) || (val && val.match(/^\d+$/))
|
18
|
+
return if match.nil?
|
19
|
+
|
20
|
+
val = val.to_i
|
21
|
+
@count = @count + 1
|
22
|
+
@min = val if @min.nil? || val < @min
|
23
|
+
@max = val if @max.nil? || val > @max
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Analyzers
|
5
|
+
class Decimal
|
6
|
+
TYPE = :decimal
|
7
|
+
|
8
|
+
attr_reader :count, :min, :max
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@count = 0
|
12
|
+
@min = nil
|
13
|
+
@max = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def analyze(val)
|
17
|
+
match = val.is_a?(Float) || (val && val.match(/(\d+[,.]\d+)/))
|
18
|
+
return if match.nil?
|
19
|
+
|
20
|
+
val = val.to_f
|
21
|
+
@count = @count + 1
|
22
|
+
@min = val if @min.nil? || val < @min
|
23
|
+
@max = val if @max.nil? || val > @max
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/csv2psql/cli/app.rb
CHANGED
@@ -7,19 +7,51 @@ require 'pp'
|
|
7
7
|
require_relative 'shared'
|
8
8
|
require_relative '../version'
|
9
9
|
|
10
|
+
require_relative '../processor/processor'
|
11
|
+
|
10
12
|
def launch(argv = ARGV)
|
11
13
|
run(argv)
|
12
14
|
end
|
13
15
|
|
14
16
|
include GLI::App
|
15
17
|
|
16
|
-
program_desc "csv2psql #{Csv2Psql::VERSION}"
|
18
|
+
program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
|
19
|
+
|
20
|
+
cmds = {
|
21
|
+
h: {
|
22
|
+
desc: 'Header row included',
|
23
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
|
24
|
+
},
|
25
|
+
|
26
|
+
d: {
|
27
|
+
desc: 'Column delimiter',
|
28
|
+
type: String,
|
29
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
|
30
|
+
},
|
31
|
+
|
32
|
+
q: {
|
33
|
+
desc: 'Quoting character',
|
34
|
+
type: String,
|
35
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
|
36
|
+
},
|
37
|
+
|
38
|
+
s: {
|
39
|
+
desc: 'Line separator',
|
40
|
+
type: String,
|
41
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
switch [:h, :header], cmds[:h]
|
46
|
+
flag [:d, :delimiter], cmds[:d]
|
47
|
+
flag [:q, :quote], cmds[:q]
|
48
|
+
flag [:s, :separator], cmds[:s]
|
17
49
|
|
18
50
|
module Csv2Psql
|
19
51
|
# Apollon CLI
|
20
|
-
|
21
|
-
|
22
|
-
|
52
|
+
module Cli
|
53
|
+
# CLI Application
|
54
|
+
class App
|
23
55
|
extend Csv2Psql::Cli::Shared
|
24
56
|
|
25
57
|
cmds = File.absolute_path(File.join(File.dirname(__FILE__), 'cmd'))
|
@@ -27,8 +59,6 @@ module Csv2Psql
|
|
27
59
|
require file
|
28
60
|
end
|
29
61
|
|
30
|
-
program_desc 'Csv2Psql CLI'
|
31
|
-
|
32
62
|
def main(argv = ARGV)
|
33
63
|
launch(argv)
|
34
64
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
include GLI::App
|
7
|
+
|
8
|
+
require_relative '../shared'
|
9
|
+
require_relative '../../convert/convert'
|
10
|
+
require_relative '../../processor/processor'
|
11
|
+
|
12
|
+
cmds = {
|
13
|
+
}
|
14
|
+
|
15
|
+
desc 'Analyze csv file'
|
16
|
+
command :analyze do |c|
|
17
|
+
c.action do |global_options, options, args|
|
18
|
+
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
19
|
+
|
20
|
+
opts = {}.merge(global_options).merge(options)
|
21
|
+
res = Csv2Psql::Convert.analyze(args, opts)
|
22
|
+
pp res.files
|
23
|
+
end
|
24
|
+
end
|
@@ -9,35 +9,12 @@ require_relative '../../convert/convert'
|
|
9
9
|
require_relative '../../processor/processor'
|
10
10
|
|
11
11
|
cmds = {
|
12
|
-
h: {
|
13
|
-
desc: 'Header row included',
|
14
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
|
15
|
-
},
|
16
|
-
|
17
|
-
d: {
|
18
|
-
desc: 'Column delimiter',
|
19
|
-
type: String,
|
20
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
|
21
|
-
},
|
22
|
-
|
23
12
|
t: {
|
24
13
|
desc: 'Table to insert to',
|
25
14
|
type: String,
|
26
15
|
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:table]
|
27
16
|
},
|
28
17
|
|
29
|
-
q: {
|
30
|
-
desc: 'Quoting character',
|
31
|
-
type: String,
|
32
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
|
33
|
-
},
|
34
|
-
|
35
|
-
s: {
|
36
|
-
desc: 'Line separator',
|
37
|
-
type: String,
|
38
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
|
39
|
-
},
|
40
|
-
|
41
18
|
transaction: {
|
42
19
|
desc: 'Import in transaction block',
|
43
20
|
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:transaction]
|
@@ -61,11 +38,7 @@ cmds = {
|
|
61
38
|
|
62
39
|
desc 'Convert csv file'
|
63
40
|
command :convert do |c|
|
64
|
-
c.switch [:h, :header], cmds[:h]
|
65
|
-
c.flag [:d, :delimiter], cmds[:d]
|
66
41
|
c.flag [:t, :table], cmds[:t]
|
67
|
-
c.flag [:q, :quote], cmds[:q]
|
68
|
-
c.flag [:s, :separator], cmds[:s]
|
69
42
|
c.switch [:transaction], cmds[:transaction]
|
70
43
|
c.switch ['create-table'], cmds['create-table']
|
71
44
|
c.switch ['drop-table'], cmds['drop-table']
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Dialect
|
5
|
+
# PostgreSQL specific stuff
|
6
|
+
class Psql
|
7
|
+
NUMERIC_TYPES = [
|
8
|
+
{
|
9
|
+
type: :numeric,
|
10
|
+
name: 'smallint',
|
11
|
+
size: 2,
|
12
|
+
min: -32_768,
|
13
|
+
max: 32_767
|
14
|
+
},
|
15
|
+
{
|
16
|
+
type: :numeric,
|
17
|
+
name: 'integer',
|
18
|
+
size: 4,
|
19
|
+
min: -2_147_483_648,
|
20
|
+
max: 2_147_483_647
|
21
|
+
},
|
22
|
+
{
|
23
|
+
type: :numeric,
|
24
|
+
name: 'bigint',
|
25
|
+
size: 8,
|
26
|
+
min: -9_223_372_036_854_775_808,
|
27
|
+
max: 9_223_372_036_854_775_807
|
28
|
+
},
|
29
|
+
{
|
30
|
+
type: :numeric,
|
31
|
+
name: 'decimal',
|
32
|
+
size: nil
|
33
|
+
},
|
34
|
+
{
|
35
|
+
type: :numeric,
|
36
|
+
name: 'numeric',
|
37
|
+
size: nil
|
38
|
+
},
|
39
|
+
{
|
40
|
+
type: :numeric,
|
41
|
+
name: 'real',
|
42
|
+
size: 4
|
43
|
+
},
|
44
|
+
{
|
45
|
+
type: :numeric,
|
46
|
+
name: 'double',
|
47
|
+
size: 8
|
48
|
+
},
|
49
|
+
{
|
50
|
+
type: :numeric,
|
51
|
+
name: 'serial',
|
52
|
+
size: 4,
|
53
|
+
min: 1,
|
54
|
+
max: 2_147_483_647
|
55
|
+
},
|
56
|
+
{
|
57
|
+
type: :numeric,
|
58
|
+
name: 'bigserial',
|
59
|
+
size: 8,
|
60
|
+
min: 1,
|
61
|
+
max: 9_223_372_036_854_775_807
|
62
|
+
}
|
63
|
+
]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class String
|
2
|
+
def camel_case
|
3
|
+
return self if self !~ /_/ && self =~ /[A-Z]+.*/
|
4
|
+
split('_').map { |e| e.capitalize }.join
|
5
|
+
end
|
6
|
+
|
7
|
+
def camel_case_lower
|
8
|
+
self.split('_').inject([]) { |buffer, e| buffer.push(buffer.empty? ? e : e.capitalize) }.join
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../version'
|
9
|
+
require_relative '../helpers/csv_helper'
|
10
|
+
require_relative '../helpers/erb_helper'
|
11
|
+
|
12
|
+
module Csv2Psql
|
13
|
+
# Csv2Psql type guesser class
|
14
|
+
class Generator
|
15
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..')
|
16
|
+
TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
|
17
|
+
CREATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'create_table.sql.erb')
|
18
|
+
DROP_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'drop_table.sql.erb')
|
19
|
+
HEADER_TEMPLATE = File.join(TEMPLATE_DIR, 'header.sql.erb')
|
20
|
+
TRUNCATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'truncate_table.sql.erb')
|
21
|
+
|
22
|
+
DEFAULT_OPTIONS = {
|
23
|
+
'create-table' => false,
|
24
|
+
'drop-table' => false,
|
25
|
+
'truncate-table' => false,
|
26
|
+
table: 'my_table'
|
27
|
+
}
|
28
|
+
|
29
|
+
TABLE_FUNCTIONS = {
|
30
|
+
'drop-table' => :drop_table,
|
31
|
+
'create-table' => :create_table,
|
32
|
+
'truncate-table' => :truncate_table
|
33
|
+
}
|
34
|
+
|
35
|
+
attr_reader :output
|
36
|
+
|
37
|
+
def initialize(output)
|
38
|
+
@output = output
|
39
|
+
end
|
40
|
+
|
41
|
+
def create_erb_context(path, row, opts = {})
|
42
|
+
header = get_header(row, opts)
|
43
|
+
columns = get_columns(row, opts, header)
|
44
|
+
{
|
45
|
+
path: path,
|
46
|
+
header: header,
|
47
|
+
columns: columns,
|
48
|
+
table: opts[:table] || DEFAULT_OPTIONS[:table]
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def create_header(path, row, opts = {})
|
53
|
+
ctx = create_erb_context(path, row, opts)
|
54
|
+
erb = ErbHelper.new
|
55
|
+
erb.process(HEADER_TEMPLATE, ctx)
|
56
|
+
end
|
57
|
+
|
58
|
+
def create_table(path, row, opts = {})
|
59
|
+
ctx = create_erb_context(path, row, opts)
|
60
|
+
erb = ErbHelper.new
|
61
|
+
erb.process(CREATE_TABLE_TEMPLATE, ctx)
|
62
|
+
end
|
63
|
+
|
64
|
+
def create_sql_script(path, row, opts = {})
|
65
|
+
output.write create_header(path, row, opts)
|
66
|
+
|
67
|
+
TABLE_FUNCTIONS.each do |k, v|
|
68
|
+
t = DEFAULT_OPTIONS[k]
|
69
|
+
t = opts[k] unless opts[k].nil?
|
70
|
+
output.write send(v, path, row, opts) if t
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def drop_table(path, row, opts = {})
|
75
|
+
ctx = create_erb_context(path, row, opts)
|
76
|
+
erb = ErbHelper.new
|
77
|
+
erb.process(DROP_TABLE_TEMPLATE, ctx)
|
78
|
+
end
|
79
|
+
|
80
|
+
def format_row(row, opts = {})
|
81
|
+
table = opts[:table] || DEFAULT_OPTIONS[:table]
|
82
|
+
|
83
|
+
header = get_header(row, opts)
|
84
|
+
columns = get_columns(row, opts, header).join(', ')
|
85
|
+
values = get_values(row, opts, header).join(', ')
|
86
|
+
"INSERT INTO #{table}(#{columns}) VALUES(#{values});"
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_header(row, opts = {})
|
90
|
+
CsvHelper.get_header(row, opts)
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_columns(row, opts = {}, header = get_header(row, opts))
|
94
|
+
if opts[:header]
|
95
|
+
header.map { |h| sanitize_header(h) }
|
96
|
+
else
|
97
|
+
row.map.with_index do |_item, i|
|
98
|
+
"col_#{i}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_values(row, opts = {}, header = get_header(row, opts))
|
104
|
+
header.map do |h|
|
105
|
+
value = row[h]
|
106
|
+
sanitized_value = sanitize_value(value)
|
107
|
+
"'#{sanitized_value}'"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def sanitize_header(header_column)
|
112
|
+
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
113
|
+
end
|
114
|
+
|
115
|
+
def sanitize_value(value)
|
116
|
+
value ||= ''
|
117
|
+
value.gsub("'", "''")
|
118
|
+
end
|
119
|
+
|
120
|
+
def truncate_table(path, row, opts = {})
|
121
|
+
ctx = create_erb_context(path, row, opts)
|
122
|
+
erb = ErbHelper.new
|
123
|
+
erb.process(TRUNCATE_TABLE_TEMPLATE, ctx)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'erb'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
module Csv2Psql
|
7
|
+
# CSV Helper
|
8
|
+
class CsvHelper
|
9
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..')
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def get_header(row, opts = {})
|
13
|
+
if opts[:header]
|
14
|
+
row.headers
|
15
|
+
else
|
16
|
+
row.map.with_index { |_item, i| i }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../version'
|
9
|
+
require_relative '../helpers/erb_helper'
|
10
|
+
|
11
|
+
module Csv2Psql
|
12
|
+
# Csv2Psql type guesser class
|
13
|
+
class Output
|
14
|
+
def write(str)
|
15
|
+
puts str
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|