csv2psql 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../helpers/erb_helper'
9
+ require_relative '../extensions/string'
10
+
11
+ module Csv2Psql
12
+ # Analyzer file analyzer class
13
+ class Analyzer
14
+ DEFAULT_OPTIONS = {}
15
+ ANALYZERS_DIR = File.join(File.dirname(__FILE__), 'types')
16
+
17
+ attr_reader :analyzers, :files
18
+
19
+ def initialize
20
+ @files = {}
21
+ @analyzers = load_analyzers
22
+ end
23
+
24
+ def analyze(path, row, opts = {})
25
+ data = get_data(path)
26
+
27
+ header = CsvHelper.get_header(row, opts)
28
+ header.each do |h|
29
+ col = get_column(data, h)
30
+ val = row[h]
31
+ col.each do |_name, analyzer|
32
+ analyzer.analyze(val)
33
+ end
34
+ end
35
+
36
+ data[:lines] = data[:lines] + 1
37
+ end
38
+
39
+ def create_column(data, column)
40
+ data[:columns][column] = {}
41
+ res = data[:columns][column]
42
+
43
+ analyzers.each do |analyzer|
44
+ res[analyzer[:name]] = analyzer[:class].new
45
+ end
46
+
47
+ res
48
+ end
49
+
50
+ def create_data(path)
51
+ files[path] = {
52
+ columns: {
53
+ },
54
+ lines: 0
55
+ }
56
+ files[path]
57
+ end
58
+
59
+ def get_data(path)
60
+ return files[path] if files.key?(path)
61
+
62
+ create_data(path)
63
+ end
64
+
65
+ def get_column(data, column)
66
+ res = data[:columns][column]
67
+ return res if res
68
+
69
+ create_column(data, column)
70
+ end
71
+
72
+ def load_analyzers
73
+ Dir[ANALYZERS_DIR + '**/*.rb'].map do |path|
74
+ fname = File.basename(path, '.rb')
75
+ analyzer_class = fname.camel_case
76
+ require(path)
77
+
78
+ klass = Object.const_get('Csv2Psql')
79
+ .const_get('Analyzers')
80
+ .const_get(analyzer_class)
81
+
82
+ {
83
+ :name => analyzer_class,
84
+ :class => klass
85
+ }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Analyzers
5
+ class Bigint
6
+ TYPE = :bigint
7
+
8
+ attr_reader :count, :min, :max
9
+
10
+ def initialize
11
+ @count = 0
12
+ @min = nil
13
+ @max = nil
14
+ end
15
+
16
+ def analyze(val)
17
+ match = val.is_a?(Integer) || (val && val.match(/^\d+$/))
18
+ return if match.nil?
19
+
20
+ val = val.to_i
21
+ @count = @count + 1
22
+ @min = val if @min.nil? || val < @min
23
+ @max = val if @max.nil? || val > @max
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Analyzers
5
+ class Decimal
6
+ TYPE = :decimal
7
+
8
+ attr_reader :count, :min, :max
9
+
10
+ def initialize
11
+ @count = 0
12
+ @min = nil
13
+ @max = nil
14
+ end
15
+
16
+ def analyze(val)
17
+ match = val.is_a?(Float) || (val && val.match(/(\d+[,.]\d+)/))
18
+ return if match.nil?
19
+
20
+ val = val.to_f
21
+ @count = @count + 1
22
+ @min = val if @min.nil? || val < @min
23
+ @max = val if @max.nil? || val > @max
24
+ end
25
+ end
26
+ end
27
+ end
@@ -7,19 +7,51 @@ require 'pp'
7
7
  require_relative 'shared'
8
8
  require_relative '../version'
9
9
 
10
+ require_relative '../processor/processor'
11
+
10
12
  def launch(argv = ARGV)
11
13
  run(argv)
12
14
  end
13
15
 
14
16
  include GLI::App
15
17
 
16
- program_desc "csv2psql #{Csv2Psql::VERSION}"
18
+ program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
19
+
20
+ cmds = {
21
+ h: {
22
+ desc: 'Header row included',
23
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
24
+ },
25
+
26
+ d: {
27
+ desc: 'Column delimiter',
28
+ type: String,
29
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
30
+ },
31
+
32
+ q: {
33
+ desc: 'Quoting character',
34
+ type: String,
35
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
36
+ },
37
+
38
+ s: {
39
+ desc: 'Line separator',
40
+ type: String,
41
+ default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
42
+ }
43
+ }
44
+
45
+ switch [:h, :header], cmds[:h]
46
+ flag [:d, :delimiter], cmds[:d]
47
+ flag [:q, :quote], cmds[:q]
48
+ flag [:s, :separator], cmds[:s]
17
49
 
18
50
  module Csv2Psql
19
51
  # Apollon CLI
20
- module Cli
21
- # CLI Application
22
- class App
52
+ module Cli
53
+ # CLI Application
54
+ class App
23
55
  extend Csv2Psql::Cli::Shared
24
56
 
25
57
  cmds = File.absolute_path(File.join(File.dirname(__FILE__), 'cmd'))
@@ -27,8 +59,6 @@ module Csv2Psql
27
59
  require file
28
60
  end
29
61
 
30
- program_desc 'Csv2Psql CLI'
31
-
32
62
  def main(argv = ARGV)
33
63
  launch(argv)
34
64
  end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'gli'
4
+ require 'pp'
5
+
6
+ include GLI::App
7
+
8
+ require_relative '../shared'
9
+ require_relative '../../convert/convert'
10
+ require_relative '../../processor/processor'
11
+
12
+ cmds = {
13
+ }
14
+
15
+ desc 'Analyze csv file'
16
+ command :analyze do |c|
17
+ c.action do |global_options, options, args|
18
+ fail ArgumentError, 'No file to analyze specified' if args.empty?
19
+
20
+ opts = {}.merge(global_options).merge(options)
21
+ res = Csv2Psql::Convert.analyze(args, opts)
22
+ pp res.files
23
+ end
24
+ end
@@ -9,35 +9,12 @@ require_relative '../../convert/convert'
9
9
  require_relative '../../processor/processor'
10
10
 
11
11
  cmds = {
12
- h: {
13
- desc: 'Header row included',
14
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
15
- },
16
-
17
- d: {
18
- desc: 'Column delimiter',
19
- type: String,
20
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
21
- },
22
-
23
12
  t: {
24
13
  desc: 'Table to insert to',
25
14
  type: String,
26
15
  default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:table]
27
16
  },
28
17
 
29
- q: {
30
- desc: 'Quoting character',
31
- type: String,
32
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
33
- },
34
-
35
- s: {
36
- desc: 'Line separator',
37
- type: String,
38
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
39
- },
40
-
41
18
  transaction: {
42
19
  desc: 'Import in transaction block',
43
20
  default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:transaction]
@@ -61,11 +38,7 @@ cmds = {
61
38
 
62
39
  desc 'Convert csv file'
63
40
  command :convert do |c|
64
- c.switch [:h, :header], cmds[:h]
65
- c.flag [:d, :delimiter], cmds[:d]
66
41
  c.flag [:t, :table], cmds[:t]
67
- c.flag [:q, :quote], cmds[:q]
68
- c.flag [:s, :separator], cmds[:s]
69
42
  c.switch [:transaction], cmds[:transaction]
70
43
  c.switch ['create-table'], cmds['create-table']
71
44
  c.switch ['drop-table'], cmds['drop-table']
@@ -16,6 +16,11 @@ module Csv2Psql
16
16
  p = Processor.new
17
17
  p.convert(paths, opts)
18
18
  end
19
+
20
+ def analyze(paths, opts = {})
21
+ p = Processor.new
22
+ p.analyze(paths, opts)
23
+ end
19
24
  end
20
25
  end
21
26
  end
@@ -0,0 +1,66 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ module Dialect
5
+ # PostgreSQL specific stuff
6
+ class Psql
7
+ NUMERIC_TYPES = [
8
+ {
9
+ type: :numeric,
10
+ name: 'smallint',
11
+ size: 2,
12
+ min: -32_768,
13
+ max: 32_767
14
+ },
15
+ {
16
+ type: :numeric,
17
+ name: 'integer',
18
+ size: 4,
19
+ min: -2_147_483_648,
20
+ max: 2_147_483_647
21
+ },
22
+ {
23
+ type: :numeric,
24
+ name: 'bigint',
25
+ size: 8,
26
+ min: -9_223_372_036_854_775_808,
27
+ max: 9_223_372_036_854_775_807
28
+ },
29
+ {
30
+ type: :numeric,
31
+ name: 'decimal',
32
+ size: nil
33
+ },
34
+ {
35
+ type: :numeric,
36
+ name: 'numeric',
37
+ size: nil
38
+ },
39
+ {
40
+ type: :numeric,
41
+ name: 'real',
42
+ size: 4
43
+ },
44
+ {
45
+ type: :numeric,
46
+ name: 'double',
47
+ size: 8
48
+ },
49
+ {
50
+ type: :numeric,
51
+ name: 'serial',
52
+ size: 4,
53
+ min: 1,
54
+ max: 2_147_483_647
55
+ },
56
+ {
57
+ type: :numeric,
58
+ name: 'bigserial',
59
+ size: 8,
60
+ min: 1,
61
+ max: 9_223_372_036_854_775_807
62
+ }
63
+ ]
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,10 @@
1
+ class String
2
+ def camel_case
3
+ return self if self !~ /_/ && self =~ /[A-Z]+.*/
4
+ split('_').map { |e| e.capitalize }.join
5
+ end
6
+
7
+ def camel_case_lower
8
+ self.split('_').inject([]) { |buffer, e| buffer.push(buffer.empty? ? e : e.capitalize) }.join
9
+ end
10
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../version'
9
+ require_relative '../helpers/csv_helper'
10
+ require_relative '../helpers/erb_helper'
11
+
12
+ module Csv2Psql
13
+ # Csv2Psql type guesser class
14
+ class Generator
15
+ BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..')
16
+ TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
17
+ CREATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'create_table.sql.erb')
18
+ DROP_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'drop_table.sql.erb')
19
+ HEADER_TEMPLATE = File.join(TEMPLATE_DIR, 'header.sql.erb')
20
+ TRUNCATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'truncate_table.sql.erb')
21
+
22
+ DEFAULT_OPTIONS = {
23
+ 'create-table' => false,
24
+ 'drop-table' => false,
25
+ 'truncate-table' => false,
26
+ table: 'my_table'
27
+ }
28
+
29
+ TABLE_FUNCTIONS = {
30
+ 'drop-table' => :drop_table,
31
+ 'create-table' => :create_table,
32
+ 'truncate-table' => :truncate_table
33
+ }
34
+
35
+ attr_reader :output
36
+
37
+ def initialize(output)
38
+ @output = output
39
+ end
40
+
41
+ def create_erb_context(path, row, opts = {})
42
+ header = get_header(row, opts)
43
+ columns = get_columns(row, opts, header)
44
+ {
45
+ path: path,
46
+ header: header,
47
+ columns: columns,
48
+ table: opts[:table] || DEFAULT_OPTIONS[:table]
49
+ }
50
+ end
51
+
52
+ def create_header(path, row, opts = {})
53
+ ctx = create_erb_context(path, row, opts)
54
+ erb = ErbHelper.new
55
+ erb.process(HEADER_TEMPLATE, ctx)
56
+ end
57
+
58
+ def create_table(path, row, opts = {})
59
+ ctx = create_erb_context(path, row, opts)
60
+ erb = ErbHelper.new
61
+ erb.process(CREATE_TABLE_TEMPLATE, ctx)
62
+ end
63
+
64
+ def create_sql_script(path, row, opts = {})
65
+ output.write create_header(path, row, opts)
66
+
67
+ TABLE_FUNCTIONS.each do |k, v|
68
+ t = DEFAULT_OPTIONS[k]
69
+ t = opts[k] unless opts[k].nil?
70
+ output.write send(v, path, row, opts) if t
71
+ end
72
+ end
73
+
74
+ def drop_table(path, row, opts = {})
75
+ ctx = create_erb_context(path, row, opts)
76
+ erb = ErbHelper.new
77
+ erb.process(DROP_TABLE_TEMPLATE, ctx)
78
+ end
79
+
80
+ def format_row(row, opts = {})
81
+ table = opts[:table] || DEFAULT_OPTIONS[:table]
82
+
83
+ header = get_header(row, opts)
84
+ columns = get_columns(row, opts, header).join(', ')
85
+ values = get_values(row, opts, header).join(', ')
86
+ "INSERT INTO #{table}(#{columns}) VALUES(#{values});"
87
+ end
88
+
89
+ def get_header(row, opts = {})
90
+ CsvHelper.get_header(row, opts)
91
+ end
92
+
93
+ def get_columns(row, opts = {}, header = get_header(row, opts))
94
+ if opts[:header]
95
+ header.map { |h| sanitize_header(h) }
96
+ else
97
+ row.map.with_index do |_item, i|
98
+ "col_#{i}"
99
+ end
100
+ end
101
+ end
102
+
103
+ def get_values(row, opts = {}, header = get_header(row, opts))
104
+ header.map do |h|
105
+ value = row[h]
106
+ sanitized_value = sanitize_value(value)
107
+ "'#{sanitized_value}'"
108
+ end
109
+ end
110
+
111
+ def sanitize_header(header_column)
112
+ header_column.downcase.gsub(/[^0-9a-z]/i, '_')
113
+ end
114
+
115
+ def sanitize_value(value)
116
+ value ||= ''
117
+ value.gsub("'", "''")
118
+ end
119
+
120
+ def truncate_table(path, row, opts = {})
121
+ ctx = create_erb_context(path, row, opts)
122
+ erb = ErbHelper.new
123
+ erb.process(TRUNCATE_TABLE_TEMPLATE, ctx)
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'erb'
4
+ require 'pathname'
5
+
6
+ module Csv2Psql
7
+ # CSV Helper
8
+ class CsvHelper
9
+ BASE_DIR = File.join(File.dirname(__FILE__), '..')
10
+
11
+ class << self
12
+ def get_header(row, opts = {})
13
+ if opts[:header]
14
+ row.headers
15
+ else
16
+ row.map.with_index { |_item, i| i }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+ require 'multi_json'
5
+ require 'pathname'
6
+ require 'pp'
7
+
8
+ require_relative '../version'
9
+ require_relative '../helpers/erb_helper'
10
+
11
+ module Csv2Psql
12
+ # Csv2Psql type guesser class
13
+ class Output
14
+ def write(str)
15
+ puts str
16
+ end
17
+ end
18
+ end