csv2psql 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +38 -13
- data/TODO.md +12 -0
- data/data/cia-data-all.csv +262 -0
- data/lib/csv2psql/analyzer/analyzer.rb +89 -0
- data/lib/csv2psql/analyzer/types/bigint.rb +27 -0
- data/lib/csv2psql/analyzer/types/decimal.rb +27 -0
- data/lib/csv2psql/cli/app.rb +36 -6
- data/lib/csv2psql/cli/cmd/analyze_cmd.rb +24 -0
- data/lib/csv2psql/cli/cmd/convert_cmd.rb +0 -27
- data/lib/csv2psql/convert/convert.rb +5 -0
- data/lib/csv2psql/dialects/psql.rb +66 -0
- data/lib/csv2psql/extensions/string.rb +10 -0
- data/lib/csv2psql/generator/generator.rb +126 -0
- data/lib/csv2psql/helpers/csv_helper.rb +21 -0
- data/lib/csv2psql/output/output.rb +18 -0
- data/lib/csv2psql/processor/processor.rb +30 -103
- data/lib/csv2psql/version.rb +2 -1
- data/templates/header.sql.erb +1 -1
- metadata +13 -2
@@ -0,0 +1,89 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../helpers/erb_helper'
|
9
|
+
require_relative '../extensions/string'
|
10
|
+
|
11
|
+
module Csv2Psql
|
12
|
+
# Analyzer file analyzer class
|
13
|
+
class Analyzer
|
14
|
+
DEFAULT_OPTIONS = {}
|
15
|
+
ANALYZERS_DIR = File.join(File.dirname(__FILE__), 'types')
|
16
|
+
|
17
|
+
attr_reader :analyzers, :files
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@files = {}
|
21
|
+
@analyzers = load_analyzers
|
22
|
+
end
|
23
|
+
|
24
|
+
def analyze(path, row, opts = {})
|
25
|
+
data = get_data(path)
|
26
|
+
|
27
|
+
header = CsvHelper.get_header(row, opts)
|
28
|
+
header.each do |h|
|
29
|
+
col = get_column(data, h)
|
30
|
+
val = row[h]
|
31
|
+
col.each do |_name, analyzer|
|
32
|
+
analyzer.analyze(val)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
data[:lines] = data[:lines] + 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def create_column(data, column)
|
40
|
+
data[:columns][column] = {}
|
41
|
+
res = data[:columns][column]
|
42
|
+
|
43
|
+
analyzers.each do |analyzer|
|
44
|
+
res[analyzer[:name]] = analyzer[:class].new
|
45
|
+
end
|
46
|
+
|
47
|
+
res
|
48
|
+
end
|
49
|
+
|
50
|
+
def create_data(path)
|
51
|
+
files[path] = {
|
52
|
+
columns: {
|
53
|
+
},
|
54
|
+
lines: 0
|
55
|
+
}
|
56
|
+
files[path]
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_data(path)
|
60
|
+
return files[path] if files.key?(path)
|
61
|
+
|
62
|
+
create_data(path)
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_column(data, column)
|
66
|
+
res = data[:columns][column]
|
67
|
+
return res if res
|
68
|
+
|
69
|
+
create_column(data, column)
|
70
|
+
end
|
71
|
+
|
72
|
+
def load_analyzers
|
73
|
+
Dir[ANALYZERS_DIR + '**/*.rb'].map do |path|
|
74
|
+
fname = File.basename(path, '.rb')
|
75
|
+
analyzer_class = fname.camel_case
|
76
|
+
require(path)
|
77
|
+
|
78
|
+
klass = Object.const_get('Csv2Psql')
|
79
|
+
.const_get('Analyzers')
|
80
|
+
.const_get(analyzer_class)
|
81
|
+
|
82
|
+
{
|
83
|
+
:name => analyzer_class,
|
84
|
+
:class => klass
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Analyzers
|
5
|
+
class Bigint
|
6
|
+
TYPE = :bigint
|
7
|
+
|
8
|
+
attr_reader :count, :min, :max
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@count = 0
|
12
|
+
@min = nil
|
13
|
+
@max = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def analyze(val)
|
17
|
+
match = val.is_a?(Integer) || (val && val.match(/^\d+$/))
|
18
|
+
return if match.nil?
|
19
|
+
|
20
|
+
val = val.to_i
|
21
|
+
@count = @count + 1
|
22
|
+
@min = val if @min.nil? || val < @min
|
23
|
+
@max = val if @max.nil? || val > @max
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Analyzers
|
5
|
+
class Decimal
|
6
|
+
TYPE = :decimal
|
7
|
+
|
8
|
+
attr_reader :count, :min, :max
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@count = 0
|
12
|
+
@min = nil
|
13
|
+
@max = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def analyze(val)
|
17
|
+
match = val.is_a?(Float) || (val && val.match(/(\d+[,.]\d+)/))
|
18
|
+
return if match.nil?
|
19
|
+
|
20
|
+
val = val.to_f
|
21
|
+
@count = @count + 1
|
22
|
+
@min = val if @min.nil? || val < @min
|
23
|
+
@max = val if @max.nil? || val > @max
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/csv2psql/cli/app.rb
CHANGED
@@ -7,19 +7,51 @@ require 'pp'
|
|
7
7
|
require_relative 'shared'
|
8
8
|
require_relative '../version'
|
9
9
|
|
10
|
+
require_relative '../processor/processor'
|
11
|
+
|
10
12
|
def launch(argv = ARGV)
|
11
13
|
run(argv)
|
12
14
|
end
|
13
15
|
|
14
16
|
include GLI::App
|
15
17
|
|
16
|
-
program_desc "csv2psql #{Csv2Psql::VERSION}"
|
18
|
+
program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
|
19
|
+
|
20
|
+
cmds = {
|
21
|
+
h: {
|
22
|
+
desc: 'Header row included',
|
23
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
|
24
|
+
},
|
25
|
+
|
26
|
+
d: {
|
27
|
+
desc: 'Column delimiter',
|
28
|
+
type: String,
|
29
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
|
30
|
+
},
|
31
|
+
|
32
|
+
q: {
|
33
|
+
desc: 'Quoting character',
|
34
|
+
type: String,
|
35
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
|
36
|
+
},
|
37
|
+
|
38
|
+
s: {
|
39
|
+
desc: 'Line separator',
|
40
|
+
type: String,
|
41
|
+
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
switch [:h, :header], cmds[:h]
|
46
|
+
flag [:d, :delimiter], cmds[:d]
|
47
|
+
flag [:q, :quote], cmds[:q]
|
48
|
+
flag [:s, :separator], cmds[:s]
|
17
49
|
|
18
50
|
module Csv2Psql
|
19
51
|
# Apollon CLI
|
20
|
-
|
21
|
-
|
22
|
-
|
52
|
+
module Cli
|
53
|
+
# CLI Application
|
54
|
+
class App
|
23
55
|
extend Csv2Psql::Cli::Shared
|
24
56
|
|
25
57
|
cmds = File.absolute_path(File.join(File.dirname(__FILE__), 'cmd'))
|
@@ -27,8 +59,6 @@ module Csv2Psql
|
|
27
59
|
require file
|
28
60
|
end
|
29
61
|
|
30
|
-
program_desc 'Csv2Psql CLI'
|
31
|
-
|
32
62
|
def main(argv = ARGV)
|
33
63
|
launch(argv)
|
34
64
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
include GLI::App
|
7
|
+
|
8
|
+
require_relative '../shared'
|
9
|
+
require_relative '../../convert/convert'
|
10
|
+
require_relative '../../processor/processor'
|
11
|
+
|
12
|
+
cmds = {
|
13
|
+
}
|
14
|
+
|
15
|
+
desc 'Analyze csv file'
|
16
|
+
command :analyze do |c|
|
17
|
+
c.action do |global_options, options, args|
|
18
|
+
fail ArgumentError, 'No file to analyze specified' if args.empty?
|
19
|
+
|
20
|
+
opts = {}.merge(global_options).merge(options)
|
21
|
+
res = Csv2Psql::Convert.analyze(args, opts)
|
22
|
+
pp res.files
|
23
|
+
end
|
24
|
+
end
|
@@ -9,35 +9,12 @@ require_relative '../../convert/convert'
|
|
9
9
|
require_relative '../../processor/processor'
|
10
10
|
|
11
11
|
cmds = {
|
12
|
-
h: {
|
13
|
-
desc: 'Header row included',
|
14
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
|
15
|
-
},
|
16
|
-
|
17
|
-
d: {
|
18
|
-
desc: 'Column delimiter',
|
19
|
-
type: String,
|
20
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
|
21
|
-
},
|
22
|
-
|
23
12
|
t: {
|
24
13
|
desc: 'Table to insert to',
|
25
14
|
type: String,
|
26
15
|
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:table]
|
27
16
|
},
|
28
17
|
|
29
|
-
q: {
|
30
|
-
desc: 'Quoting character',
|
31
|
-
type: String,
|
32
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
|
33
|
-
},
|
34
|
-
|
35
|
-
s: {
|
36
|
-
desc: 'Line separator',
|
37
|
-
type: String,
|
38
|
-
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
|
39
|
-
},
|
40
|
-
|
41
18
|
transaction: {
|
42
19
|
desc: 'Import in transaction block',
|
43
20
|
default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:transaction]
|
@@ -61,11 +38,7 @@ cmds = {
|
|
61
38
|
|
62
39
|
desc 'Convert csv file'
|
63
40
|
command :convert do |c|
|
64
|
-
c.switch [:h, :header], cmds[:h]
|
65
|
-
c.flag [:d, :delimiter], cmds[:d]
|
66
41
|
c.flag [:t, :table], cmds[:t]
|
67
|
-
c.flag [:q, :quote], cmds[:q]
|
68
|
-
c.flag [:s, :separator], cmds[:s]
|
69
42
|
c.switch [:transaction], cmds[:transaction]
|
70
43
|
c.switch ['create-table'], cmds['create-table']
|
71
44
|
c.switch ['drop-table'], cmds['drop-table']
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Csv2Psql
|
4
|
+
module Dialect
|
5
|
+
# PostgreSQL specific stuff
|
6
|
+
class Psql
|
7
|
+
NUMERIC_TYPES = [
|
8
|
+
{
|
9
|
+
type: :numeric,
|
10
|
+
name: 'smallint',
|
11
|
+
size: 2,
|
12
|
+
min: -32_768,
|
13
|
+
max: 32_767
|
14
|
+
},
|
15
|
+
{
|
16
|
+
type: :numeric,
|
17
|
+
name: 'integer',
|
18
|
+
size: 4,
|
19
|
+
min: -2_147_483_648,
|
20
|
+
max: 2_147_483_647
|
21
|
+
},
|
22
|
+
{
|
23
|
+
type: :numeric,
|
24
|
+
name: 'bigint',
|
25
|
+
size: 8,
|
26
|
+
min: -9_223_372_036_854_775_808,
|
27
|
+
max: 9_223_372_036_854_775_807
|
28
|
+
},
|
29
|
+
{
|
30
|
+
type: :numeric,
|
31
|
+
name: 'decimal',
|
32
|
+
size: nil
|
33
|
+
},
|
34
|
+
{
|
35
|
+
type: :numeric,
|
36
|
+
name: 'numeric',
|
37
|
+
size: nil
|
38
|
+
},
|
39
|
+
{
|
40
|
+
type: :numeric,
|
41
|
+
name: 'real',
|
42
|
+
size: 4
|
43
|
+
},
|
44
|
+
{
|
45
|
+
type: :numeric,
|
46
|
+
name: 'double',
|
47
|
+
size: 8
|
48
|
+
},
|
49
|
+
{
|
50
|
+
type: :numeric,
|
51
|
+
name: 'serial',
|
52
|
+
size: 4,
|
53
|
+
min: 1,
|
54
|
+
max: 2_147_483_647
|
55
|
+
},
|
56
|
+
{
|
57
|
+
type: :numeric,
|
58
|
+
name: 'bigserial',
|
59
|
+
size: 8,
|
60
|
+
min: 1,
|
61
|
+
max: 9_223_372_036_854_775_807
|
62
|
+
}
|
63
|
+
]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class String
|
2
|
+
def camel_case
|
3
|
+
return self if self !~ /_/ && self =~ /[A-Z]+.*/
|
4
|
+
split('_').map { |e| e.capitalize }.join
|
5
|
+
end
|
6
|
+
|
7
|
+
def camel_case_lower
|
8
|
+
self.split('_').inject([]) { |buffer, e| buffer.push(buffer.empty? ? e : e.capitalize) }.join
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../version'
|
9
|
+
require_relative '../helpers/csv_helper'
|
10
|
+
require_relative '../helpers/erb_helper'
|
11
|
+
|
12
|
+
module Csv2Psql
|
13
|
+
# Csv2Psql type guesser class
|
14
|
+
class Generator
|
15
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..')
|
16
|
+
TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
|
17
|
+
CREATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'create_table.sql.erb')
|
18
|
+
DROP_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'drop_table.sql.erb')
|
19
|
+
HEADER_TEMPLATE = File.join(TEMPLATE_DIR, 'header.sql.erb')
|
20
|
+
TRUNCATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'truncate_table.sql.erb')
|
21
|
+
|
22
|
+
DEFAULT_OPTIONS = {
|
23
|
+
'create-table' => false,
|
24
|
+
'drop-table' => false,
|
25
|
+
'truncate-table' => false,
|
26
|
+
table: 'my_table'
|
27
|
+
}
|
28
|
+
|
29
|
+
TABLE_FUNCTIONS = {
|
30
|
+
'drop-table' => :drop_table,
|
31
|
+
'create-table' => :create_table,
|
32
|
+
'truncate-table' => :truncate_table
|
33
|
+
}
|
34
|
+
|
35
|
+
attr_reader :output
|
36
|
+
|
37
|
+
def initialize(output)
|
38
|
+
@output = output
|
39
|
+
end
|
40
|
+
|
41
|
+
def create_erb_context(path, row, opts = {})
|
42
|
+
header = get_header(row, opts)
|
43
|
+
columns = get_columns(row, opts, header)
|
44
|
+
{
|
45
|
+
path: path,
|
46
|
+
header: header,
|
47
|
+
columns: columns,
|
48
|
+
table: opts[:table] || DEFAULT_OPTIONS[:table]
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def create_header(path, row, opts = {})
|
53
|
+
ctx = create_erb_context(path, row, opts)
|
54
|
+
erb = ErbHelper.new
|
55
|
+
erb.process(HEADER_TEMPLATE, ctx)
|
56
|
+
end
|
57
|
+
|
58
|
+
def create_table(path, row, opts = {})
|
59
|
+
ctx = create_erb_context(path, row, opts)
|
60
|
+
erb = ErbHelper.new
|
61
|
+
erb.process(CREATE_TABLE_TEMPLATE, ctx)
|
62
|
+
end
|
63
|
+
|
64
|
+
def create_sql_script(path, row, opts = {})
|
65
|
+
output.write create_header(path, row, opts)
|
66
|
+
|
67
|
+
TABLE_FUNCTIONS.each do |k, v|
|
68
|
+
t = DEFAULT_OPTIONS[k]
|
69
|
+
t = opts[k] unless opts[k].nil?
|
70
|
+
output.write send(v, path, row, opts) if t
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def drop_table(path, row, opts = {})
|
75
|
+
ctx = create_erb_context(path, row, opts)
|
76
|
+
erb = ErbHelper.new
|
77
|
+
erb.process(DROP_TABLE_TEMPLATE, ctx)
|
78
|
+
end
|
79
|
+
|
80
|
+
def format_row(row, opts = {})
|
81
|
+
table = opts[:table] || DEFAULT_OPTIONS[:table]
|
82
|
+
|
83
|
+
header = get_header(row, opts)
|
84
|
+
columns = get_columns(row, opts, header).join(', ')
|
85
|
+
values = get_values(row, opts, header).join(', ')
|
86
|
+
"INSERT INTO #{table}(#{columns}) VALUES(#{values});"
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_header(row, opts = {})
|
90
|
+
CsvHelper.get_header(row, opts)
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_columns(row, opts = {}, header = get_header(row, opts))
|
94
|
+
if opts[:header]
|
95
|
+
header.map { |h| sanitize_header(h) }
|
96
|
+
else
|
97
|
+
row.map.with_index do |_item, i|
|
98
|
+
"col_#{i}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_values(row, opts = {}, header = get_header(row, opts))
|
104
|
+
header.map do |h|
|
105
|
+
value = row[h]
|
106
|
+
sanitized_value = sanitize_value(value)
|
107
|
+
"'#{sanitized_value}'"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def sanitize_header(header_column)
|
112
|
+
header_column.downcase.gsub(/[^0-9a-z]/i, '_')
|
113
|
+
end
|
114
|
+
|
115
|
+
def sanitize_value(value)
|
116
|
+
value ||= ''
|
117
|
+
value.gsub("'", "''")
|
118
|
+
end
|
119
|
+
|
120
|
+
def truncate_table(path, row, opts = {})
|
121
|
+
ctx = create_erb_context(path, row, opts)
|
122
|
+
erb = ErbHelper.new
|
123
|
+
erb.process(TRUNCATE_TABLE_TEMPLATE, ctx)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'erb'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
module Csv2Psql
|
7
|
+
# CSV Helper
|
8
|
+
class CsvHelper
|
9
|
+
BASE_DIR = File.join(File.dirname(__FILE__), '..')
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def get_header(row, opts = {})
|
13
|
+
if opts[:header]
|
14
|
+
row.headers
|
15
|
+
else
|
16
|
+
row.map.with_index { |_item, i| i }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'pathname'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative '../version'
|
9
|
+
require_relative '../helpers/erb_helper'
|
10
|
+
|
11
|
+
module Csv2Psql
|
12
|
+
# Csv2Psql type guesser class
|
13
|
+
class Output
|
14
|
+
def write(str)
|
15
|
+
puts str
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|