csv2psql 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,89 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../helpers/erb_helper'
9
+ require_relative '../extensions/string'
10
+
11
+ module Csv2Psql
12
+ # Analyzer file analyzer class
13
+ class Analyzer
14
+ DEFAULT_OPTIONS = {}
15
+ ANALYZERS_DIR = File.join(File.dirname(__FILE__), 'types')
16
+
17
+ attr_reader :analyzers, :files
18
+
19
+ def initialize
20
+ @files = {}
21
+ @analyzers = load_analyzers
22
+ end
23
+
24
+ def analyze(path, row, opts = {})
25
+ data = get_data(path)
26
+
27
+ header = CsvHelper.get_header(row, opts)
28
+ header.each do |h|
29
+ col = get_column(data, h)
30
+ val = row[h]
31
+ col.each do |_name, analyzer|
32
+ analyzer.analyze(val)
33
+ end
34
+ end
35
+
36
+ data[:lines] = data[:lines] + 1
37
+ end
38
+
39
+ def create_column(data, column)
40
+ data[:columns][column] = {}
41
+ res = data[:columns][column]
42
+
43
+ analyzers.each do |analyzer|
44
+ res[analyzer[:name]] = analyzer[:class].new
45
+ end
46
+
47
+ res
48
+ end
49
+
50
+ def create_data(path)
51
+ files[path] = {
52
+ columns: {
53
+ },
54
+ lines: 0
55
+ }
56
+ files[path]
57
+ end
58
+
59
+ def get_data(path)
60
+ return files[path] if files.key?(path)
61
+
62
+ create_data(path)
63
+ end
64
+
65
+ def get_column(data, column)
66
+ res = data[:columns][column]
67
+ return res if res
68
+
69
+ create_column(data, column)
70
+ end
71
+
72
+ def load_analyzers
73
+ Dir[ANALYZERS_DIR + '**/*.rb'].map do |path|
74
+ fname = File.basename(path, '.rb')
75
+ analyzer_class = fname.camel_case
76
+ require(path)
77
+
78
+ klass = Object.const_get('Csv2Psql')
79
+ .const_get('Analyzers')
80
+ .const_get(analyzer_class)
81
+
82
+ {
83
+ :name => analyzer_class,
84
+ :class => klass
85
+ }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Analyzers
5
+ class Bigint
6
+ TYPE = :bigint
7
+
8
+ attr_reader :count, :min, :max
9
+
10
+ def initialize
11
+ @count = 0
12
+ @min = nil
13
+ @max = nil
14
+ end
15
+
16
+ def analyze(val)
17
+ match = val.is_a?(Integer) || (val && val.match(/^\d+$/))
18
+ return if match.nil?
19
+
20
+ val = val.to_i
21
+ @count = @count + 1
22
+ @min = val if @min.nil? || val < @min
23
+ @max = val if @max.nil? || val > @max
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Analyzers
5
+ class Decimal
6
+ TYPE = :decimal
7
+
8
+ attr_reader :count, :min, :max
9
+
10
+ def initialize
11
+ @count = 0
12
+ @min = nil
13
+ @max = nil
14
+ end
15
+
16
+ def analyze(val)
17
+ match = val.is_a?(Float) || (val && val.match(/(\d+[,.]\d+)/))
18
+ return if match.nil?
19
+
20
+ val = val.to_f
21
+ @count = @count + 1
22
+ @min = val if @min.nil? || val < @min
23
+ @max = val if @max.nil? || val > @max
24
+ end
25
+ end
26
+ end
27
+ end
@@ -7,19 +7,51 @@ require 'pp'
7
7
  require_relative 'shared'
8
8
  require_relative '../version'
9
9
 
10
+ require_relative '../processor/processor'
11
+
10
12
  def launch(argv = ARGV)
11
13
  run(argv)
12
14
  end
13
15
 
14
16
  include GLI::App
15
17
 
16
- program_desc "csv2psql #{Csv2Psql::VERSION}"
18
+ program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
19
+
20
+ cmds = {
21
+ h: {
22
+ desc: 'Header row included',
23
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
24
+ },
25
+
26
+ d: {
27
+ desc: 'Column delimiter',
28
+ type: String,
29
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
30
+ },
31
+
32
+ q: {
33
+ desc: 'Quoting character',
34
+ type: String,
35
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
36
+ },
37
+
38
+ s: {
39
+ desc: 'Line separator',
40
+ type: String,
41
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
42
+ }
43
+ }
44
+
45
+ switch [:h, :header], cmds[:h]
46
+ flag [:d, :delimiter], cmds[:d]
47
+ flag [:q, :quote], cmds[:q]
48
+ flag [:s, :separator], cmds[:s]
17
49
 
18
50
  module Csv2Psql
19
51
  # Apollon CLI
20
- module Cli
21
- # CLI Application
22
- class App
52
+ module Cli
53
+ # CLI Application
54
+ class App
23
55
  extend Csv2Psql::Cli::Shared
24
56
 
25
57
  cmds = File.absolute_path(File.join(File.dirname(__FILE__), 'cmd'))
@@ -27,8 +59,6 @@ module Csv2Psql
27
59
  require file
28
60
  end
29
61
 
30
- program_desc 'Csv2Psql CLI'
31
-
32
62
  def main(argv = ARGV)
33
63
  launch(argv)
34
64
  end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'gli'
4
+ require 'pp'
5
+
6
+ include GLI::App
7
+
8
+ require_relative '../shared'
9
+ require_relative '../../convert/convert'
10
+ require_relative '../../processor/processor'
11
+
12
+ cmds = {
13
+ }
14
+
15
+ desc 'Analyze csv file'
16
+ command :analyze do |c|
17
+ c.action do |global_options, options, args|
18
+ fail ArgumentError, 'No file to analyze specified' if args.empty?
19
+
20
+ opts = {}.merge(global_options).merge(options)
21
+ res = Csv2Psql::Convert.analyze(args, opts)
22
+ pp res.files
23
+ end
24
+ end
@@ -9,35 +9,12 @@ require_relative '../../convert/convert'
9
9
  require_relative '../../processor/processor'
10
10
 
11
11
  cmds = {
12
- h: {
13
- desc: 'Header row included',
14
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
15
- },
16
-
17
- d: {
18
- desc: 'Column delimiter',
19
- type: String,
20
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
21
- },
22
-
23
12
  t: {
24
13
  desc: 'Table to insert to',
25
14
  type: String,
26
15
  default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:table]
27
16
  },
28
17
 
29
- q: {
30
- desc: 'Quoting character',
31
- type: String,
32
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
33
- },
34
-
35
- s: {
36
- desc: 'Line separator',
37
- type: String,
38
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
39
- },
40
-
41
18
  transaction: {
42
19
  desc: 'Import in transaction block',
43
20
  default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:transaction]
@@ -61,11 +38,7 @@ cmds = {
61
38
 
62
39
  desc 'Convert csv file'
63
40
  command :convert do |c|
64
- c.switch [:h, :header], cmds[:h]
65
- c.flag [:d, :delimiter], cmds[:d]
66
41
  c.flag [:t, :table], cmds[:t]
67
- c.flag [:q, :quote], cmds[:q]
68
- c.flag [:s, :separator], cmds[:s]
69
42
  c.switch [:transaction], cmds[:transaction]
70
43
  c.switch ['create-table'], cmds['create-table']
71
44
  c.switch ['drop-table'], cmds['drop-table']
@@ -16,6 +16,11 @@ module Csv2Psql
16
16
  p = Processor.new
17
17
  p.convert(paths, opts)
18
18
  end
19
+
20
+ def analyze(paths, opts = {})
21
+ p = Processor.new
22
+ p.analyze(paths, opts)
23
+ end
19
24
  end
20
25
  end
21
26
  end
@@ -0,0 +1,66 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Dialect
5
+ # PostgreSQL specific stuff
6
+ class Psql
7
+ NUMERIC_TYPES = [
8
+ {
9
+ type: :numeric,
10
+ name: 'smallint',
11
+ size: 2,
12
+ min: -32_768,
13
+ max: 32_767
14
+ },
15
+ {
16
+ type: :numeric,
17
+ name: 'integer',
18
+ size: 4,
19
+ min: -2_147_483_648,
20
+ max: 2_147_483_647
21
+ },
22
+ {
23
+ type: :numeric,
24
+ name: 'bigint',
25
+ size: 8,
26
+ min: -9_223_372_036_854_775_808,
27
+ max: 9_223_372_036_854_775_807
28
+ },
29
+ {
30
+ type: :numeric,
31
+ name: 'decimal',
32
+ size: nil
33
+ },
34
+ {
35
+ type: :numeric,
36
+ name: 'numeric',
37
+ size: nil
38
+ },
39
+ {
40
+ type: :numeric,
41
+ name: 'real',
42
+ size: 4
43
+ },
44
+ {
45
+ type: :numeric,
46
+ name: 'double',
47
+ size: 8
48
+ },
49
+ {
50
+ type: :numeric,
51
+ name: 'serial',
52
+ size: 4,
53
+ min: 1,
54
+ max: 2_147_483_647
55
+ },
56
+ {
57
+ type: :numeric,
58
+ name: 'bigserial',
59
+ size: 8,
60
+ min: 1,
61
+ max: 9_223_372_036_854_775_807
62
+ }
63
+ ]
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,10 @@
1
+ class String
2
+ def camel_case
3
+ return self if self !~ /_/ && self =~ /[A-Z]+.*/
4
+ split('_').map { |e| e.capitalize }.join
5
+ end
6
+
7
+ def camel_case_lower
8
+ self.split('_').inject([]) { |buffer, e| buffer.push(buffer.empty? ? e : e.capitalize) }.join
9
+ end
10
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../version'
9
+ require_relative '../helpers/csv_helper'
10
+ require_relative '../helpers/erb_helper'
11
+
12
+ module Csv2Psql
13
+ # Csv2Psql type guesser class
14
+ class Generator
15
+ BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..')
16
+ TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
17
+ CREATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'create_table.sql.erb')
18
+ DROP_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'drop_table.sql.erb')
19
+ HEADER_TEMPLATE = File.join(TEMPLATE_DIR, 'header.sql.erb')
20
+ TRUNCATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'truncate_table.sql.erb')
21
+
22
+ DEFAULT_OPTIONS = {
23
+ 'create-table' => false,
24
+ 'drop-table' => false,
25
+ 'truncate-table' => false,
26
+ table: 'my_table'
27
+ }
28
+
29
+ TABLE_FUNCTIONS = {
30
+ 'drop-table' => :drop_table,
31
+ 'create-table' => :create_table,
32
+ 'truncate-table' => :truncate_table
33
+ }
34
+
35
+ attr_reader :output
36
+
37
+ def initialize(output)
38
+ @output = output
39
+ end
40
+
41
+ def create_erb_context(path, row, opts = {})
42
+ header = get_header(row, opts)
43
+ columns = get_columns(row, opts, header)
44
+ {
45
+ path: path,
46
+ header: header,
47
+ columns: columns,
48
+ table: opts[:table] || DEFAULT_OPTIONS[:table]
49
+ }
50
+ end
51
+
52
+ def create_header(path, row, opts = {})
53
+ ctx = create_erb_context(path, row, opts)
54
+ erb = ErbHelper.new
55
+ erb.process(HEADER_TEMPLATE, ctx)
56
+ end
57
+
58
+ def create_table(path, row, opts = {})
59
+ ctx = create_erb_context(path, row, opts)
60
+ erb = ErbHelper.new
61
+ erb.process(CREATE_TABLE_TEMPLATE, ctx)
62
+ end
63
+
64
+ def create_sql_script(path, row, opts = {})
65
+ output.write create_header(path, row, opts)
66
+
67
+ TABLE_FUNCTIONS.each do |k, v|
68
+ t = DEFAULT_OPTIONS[k]
69
+ t = opts[k] unless opts[k].nil?
70
+ output.write send(v, path, row, opts) if t
71
+ end
72
+ end
73
+
74
+ def drop_table(path, row, opts = {})
75
+ ctx = create_erb_context(path, row, opts)
76
+ erb = ErbHelper.new
77
+ erb.process(DROP_TABLE_TEMPLATE, ctx)
78
+ end
79
+
80
+ def format_row(row, opts = {})
81
+ table = opts[:table] || DEFAULT_OPTIONS[:table]
82
+
83
+ header = get_header(row, opts)
84
+ columns = get_columns(row, opts, header).join(', ')
85
+ values = get_values(row, opts, header).join(', ')
86
+ "INSERT INTO #{table}(#{columns}) VALUES(#{values});"
87
+ end
88
+
89
+ def get_header(row, opts = {})
90
+ CsvHelper.get_header(row, opts)
91
+ end
92
+
93
+ def get_columns(row, opts = {}, header = get_header(row, opts))
94
+ if opts[:header]
95
+ header.map { |h| sanitize_header(h) }
96
+ else
97
+ row.map.with_index do |_item, i|
98
+ "col_#{i}"
99
+ end
100
+ end
101
+ end
102
+
103
+ def get_values(row, opts = {}, header = get_header(row, opts))
104
+ header.map do |h|
105
+ value = row[h]
106
+ sanitized_value = sanitize_value(value)
107
+ "'#{sanitized_value}'"
108
+ end
109
+ end
110
+
111
+ def sanitize_header(header_column)
112
+ header_column.downcase.gsub(/[^0-9a-z]/i, '_')
113
+ end
114
+
115
+ def sanitize_value(value)
116
+ value ||= ''
117
+ value.gsub("'", "''")
118
+ end
119
+
120
+ def truncate_table(path, row, opts = {})
121
+ ctx = create_erb_context(path, row, opts)
122
+ erb = ErbHelper.new
123
+ erb.process(TRUNCATE_TABLE_TEMPLATE, ctx)
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'erb'
4
+ require 'pathname'
5
+
6
+ module Csv2Psql
7
+ # CSV Helper
8
+ class CsvHelper
9
+ BASE_DIR = File.join(File.dirname(__FILE__), '..')
10
+
11
+ class << self
12
+ def get_header(row, opts = {})
13
+ if opts[:header]
14
+ row.headers
15
+ else
16
+ row.map.with_index { |_item, i| i }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../version'
9
+ require_relative '../helpers/erb_helper'
10
+
11
+ module Csv2Psql
12
+ # Csv2Psql type guesser class
13
+ class Output
14
+ def write(str)
15
+ puts str
16
+ end
17
+ end
18
+ end