csv2psql 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,31 @@ module Csv2Psql
16
16
  end
17
17
 
18
18
  def numeric?
19
- const_get('CLASS') == :numeric
19
+ sql_class?(:numeric)
20
+ end
21
+
22
+ def sql_class?(class_name)
23
+ const_get('CLASS') == class_name
24
+ end
25
+
26
+ def sql_class
27
+ const_get('CLASS')
28
+ end
29
+
30
+ def sql_class?(class_name)
31
+ sql_class == class_name
32
+ end
33
+
34
+ def sql_type
35
+ const_get('TYPE')
36
+ end
37
+
38
+ def sql_type?(type_name)
39
+ sql_type == type_name
40
+ end
41
+
42
+ def weight
43
+ const_get('WEIGHT')
20
44
  end
21
45
  end
22
46
 
@@ -31,6 +55,26 @@ module Csv2Psql
31
55
  def numeric?
32
56
  self.class.numeric?
33
57
  end
58
+
59
+ def sql_class
60
+ self.class.sql_class
61
+ end
62
+
63
+ def sql_class?(class_name)
64
+ self.class.sql_class?(class_name)
65
+ end
66
+
67
+ def sql_type
68
+ self.class.sql_type
69
+ end
70
+
71
+ def sql_type?(type_name)
72
+ self.class.sql_type?(type_name)
73
+ end
74
+
75
+ def weight
76
+ self.class.weight
77
+ end
34
78
  end
35
79
  end
36
80
  end
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # Bolean value matcher
8
8
  class Boolean < BaseAnalyzer
9
9
  TYPE = :boolean
10
- CLASS = :boolean
10
+ CLASS = :special
11
11
  WEIGHT = 5
12
12
 
13
13
  BOOLEAN_VALUES = %w(true false 0 1)
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # Null value matcher
8
8
  class Null < BaseAnalyzer
9
9
  TYPE = :null
10
- CLASS = nil # TODO: Maybe use better class for Null type?
10
+ CLASS = :null
11
11
  WEIGHT = 0
12
12
 
13
13
  class << self
@@ -6,7 +6,7 @@ module Csv2Psql
6
6
  module Analyzers
7
7
  # UUID value matcher
8
8
  class String < BaseAnalyzer
9
- TYPE = :string
9
+ TYPE = :text
10
10
  CLASS = :character
11
11
  WEIGHT = 1
12
12
 
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # UUID value matcher
8
8
  class Uuid < BaseAnalyzer
9
9
  TYPE = :uuid
10
- CLASS = :uuid
10
+ CLASS = :special
11
11
  WEIGHT = 5
12
12
 
13
13
  RE = /^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}$/ # rubocop:disable Metrics/LineLength
@@ -15,52 +15,6 @@ end
15
15
 
16
16
  include GLI::App
17
17
 
18
- program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
19
-
20
- cmds = {
21
- h: {
22
- desc: 'Header row included',
23
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['header']
24
- },
25
-
26
- d: {
27
- desc: 'Column delimiter',
28
- type: String,
29
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['delimiter']
30
- },
31
-
32
- l: {
33
- desc: 'How many rows process',
34
- type: Integer,
35
- default_value: -1
36
- },
37
-
38
- q: {
39
- desc: 'Quoting character',
40
- type: String,
41
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['quote']
42
- },
43
-
44
- s: {
45
- desc: 'Line separator',
46
- type: String,
47
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['separator']
48
- },
49
-
50
- 'skip' => {
51
- desc: 'How many rows skip',
52
- type: Integer,
53
- default_value: -1
54
- }
55
- }
56
-
57
- switch [:h, :header], cmds[:h]
58
- flag [:d, :delimiter], cmds[:d]
59
- flag [:l, :limit], cmds[:l]
60
- flag [:q, :quote], cmds[:q]
61
- flag [:s, :separator], cmds[:s]
62
- flag [:skip], cmds['skip']
63
-
64
18
  module Csv2Psql
65
19
  # Apollon CLI
66
20
  module Cli
@@ -54,7 +54,6 @@ Csv2Psql::Cli.module_eval do
54
54
  fail ArgumentError, 'No file to analyze specified' if args.empty?
55
55
 
56
56
  opts = {}.merge(global_options).merge(options)
57
- res = Csv2Psql::Convert.analyze(args, opts)
58
57
 
59
58
  formater = formats[opts[:format]]
60
59
  if formater.nil?
@@ -62,6 +61,8 @@ Csv2Psql::Cli.module_eval do
62
61
  fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
63
62
  end
64
63
 
64
+ res = Csv2Psql::Convert.analyze(args, opts)
65
+
65
66
  output = formater.call(res)
66
67
  if output.is_a?(Array)
67
68
  output.each do |o|
@@ -0,0 +1,85 @@
1
+ # encoding: utf-8
2
+
3
+ require 'gli'
4
+ require 'json'
5
+ require 'pp'
6
+ require 'terminal-table'
7
+
8
+ include GLI::App
9
+
10
+ require_relative '../shared'
11
+ require_relative '../../convert/convert'
12
+ require_relative '../../helpers/erb_helper'
13
+ require_relative '../../processor/processor'
14
+
15
+ Csv2Psql::Cli.module_eval do
16
+ BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..', '..')
17
+ TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
18
+ SCHEMA_TEMPLATE = File.join(TEMPLATE_DIR, 'schema.sql.erb')
19
+
20
+ formats = {
21
+ 'json' => lambda do |res|
22
+ JSON.pretty_generate(res)
23
+ end,
24
+
25
+ 'sql' => lambda do |data|
26
+ res = ''
27
+ data.each do |_k, v|
28
+ v[:table] = 'my_table'
29
+ ctx = v
30
+ erb = Csv2Psql::ErbHelper.new
31
+ res += "\n" unless res.empty?
32
+ res += erb.process(SCHEMA_TEMPLATE, ctx)
33
+ end
34
+ res
35
+ end,
36
+
37
+ 'table' => lambda do |res|
38
+ res.map do |file, data|
39
+ header = %w(column type null)
40
+
41
+ rows = data[:columns].map do |k, v|
42
+ [k, v[:type], v[:null]]
43
+ end
44
+
45
+ Terminal::Table.new title: file, headings: header, rows: rows
46
+ end
47
+ end
48
+ }
49
+
50
+ cmds = {
51
+ f: {
52
+ desc: 'Output format',
53
+ type: String,
54
+ default_value: formats.keys.first
55
+ }
56
+ }
57
+
58
+ desc 'Generate schema for file'
59
+ command :schema do |c|
60
+ c.flag [:f, :format], cmds[:f]
61
+
62
+ c.action do |global_options, options, args|
63
+ fail ArgumentError, 'No file to analyze specified' if args.empty?
64
+
65
+ opts = {}.merge(global_options).merge(options)
66
+
67
+ formater = formats[opts[:format]]
68
+ if formater.nil?
69
+ fmters = formats.keys.join(', ')
70
+ fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
71
+ end
72
+
73
+ res = Csv2Psql::Convert.generate_schema(args, opts)
74
+
75
+ output = formater.call(res)
76
+ if output.is_a?(Array)
77
+ output.each do |o|
78
+ puts o
79
+ end
80
+ else
81
+ puts output
82
+ end
83
+ end
84
+ end
85
+ end
@@ -7,14 +7,19 @@ module Csv2Psql
7
7
  # Csv2Psql convert module
8
8
  module Convert
9
9
  class << self
10
+ def analyze(paths, opts = {})
11
+ p = Processor.new
12
+ p.analyze(paths, opts)
13
+ end
14
+
10
15
  def convert(paths, opts = {})
11
16
  p = Processor.new
12
17
  p.convert(paths, opts)
13
18
  end
14
19
 
15
- def analyze(paths, opts = {})
20
+ def generate_schema(paths, opts = {})
16
21
  p = Processor.new
17
- p.analyze(paths, opts)
22
+ p.generate_schema(paths, opts)
18
23
  end
19
24
  end
20
25
  end
@@ -32,6 +32,17 @@ module Csv2Psql
32
32
 
33
33
  attr_reader :output
34
34
 
35
+ class << self
36
+ def sanitize_header(header_column)
37
+ header_column.downcase.gsub(/[^0-9a-z]/i, '_')
38
+ end
39
+
40
+ def sanitize_value(value)
41
+ value ||= ''
42
+ value.gsub("'", "''")
43
+ end
44
+ end
45
+
35
46
  def initialize(output)
36
47
  @output = output
37
48
  end
@@ -90,7 +101,7 @@ module Csv2Psql
90
101
 
91
102
  def get_columns(row, opts = {}, header = get_header(row, opts))
92
103
  if opts[:header]
93
- header.map { |h| sanitize_header(h) }
104
+ header.map { |h| Generator.sanitize_header(h) }
94
105
  else
95
106
  row.map.with_index do |_item, i|
96
107
  "col_#{i}"
@@ -101,20 +112,11 @@ module Csv2Psql
101
112
  def get_values(row, opts = {}, header = get_header(row, opts))
102
113
  header.map do |h|
103
114
  value = row[h]
104
- sanitized_value = sanitize_value(value)
115
+ sanitized_value = Generator.sanitize_value(value)
105
116
  "'#{sanitized_value}'"
106
117
  end
107
118
  end
108
119
 
109
- def sanitize_header(header_column)
110
- header_column.downcase.gsub(/[^0-9a-z]/i, '_')
111
- end
112
-
113
- def sanitize_value(value)
114
- value ||= ''
115
- value.gsub("'", "''")
116
- end
117
-
118
120
  def truncate_table(path, row, opts = {})
119
121
  ctx = create_erb_context(path, row, opts)
120
122
  erb = ErbHelper.new
@@ -12,6 +12,7 @@ require_relative '../helpers/config_helper'
12
12
  require_relative '../helpers/csv_helper'
13
13
  require_relative '../helpers/erb_helper'
14
14
  require_relative '../output/output'
15
+ require_relative '../schema/schema_generator'
15
16
  require_relative '../version'
16
17
 
17
18
  module Csv2Psql
@@ -30,7 +31,7 @@ module Csv2Psql
30
31
  end
31
32
 
32
33
  def analyze(paths, opts = {})
33
- with_paths(paths, opts) do |data|
34
+ with_files(paths, opts) do |data|
34
35
  analyzer.analyze(data[:path], data[:row], opts)
35
36
  end
36
37
  analyzer
@@ -38,9 +39,8 @@ module Csv2Psql
38
39
 
39
40
  def convert(paths, opts = {})
40
41
  details = {}
41
- with_paths(paths, opts) do |data|
42
+ with_files(paths, opts) do |data|
42
43
  create_converted_header(details, data, opts)
43
-
44
44
  output.write generator.format_row(data[:row], opts)
45
45
  end
46
46
  end
@@ -62,6 +62,19 @@ module Csv2Psql
62
62
  files[path]
63
63
  end
64
64
 
65
+ def generate_schema(paths, opts = {})
66
+ res = {}
67
+ paths.each do |path|
68
+ with_file(path, opts) do |data|
69
+ path = data[:path]
70
+ analyzer.analyze(path, data[:row], opts)
71
+ end
72
+
73
+ res[path] = SchemaGenerator.generate(analyzer.files[path])
74
+ end
75
+ res
76
+ end
77
+
65
78
  def get_file_details(files, path)
66
79
  if files.key?(path)
67
80
  files[path]
@@ -95,7 +108,7 @@ module Csv2Psql
95
108
  end
96
109
  end
97
110
 
98
- def with_path(path, opts = {}, &block)
111
+ def with_file(path, opts = {}, &block)
99
112
  output.write 'BEGIN;' if opts[:transaction]
100
113
  csv_opts = merge_csv_options(opts)
101
114
  @first_row = true
@@ -105,10 +118,10 @@ module Csv2Psql
105
118
  output.write 'COMMIT;' if opts[:transaction]
106
119
  end
107
120
 
108
- def with_paths(paths, opts = {}, &block)
121
+ def with_files(paths, opts = {}, &block)
109
122
  paths = [paths] unless paths.is_a?(Array)
110
123
  paths.each do |path|
111
- with_path(path, opts, &block)
124
+ with_file(path, opts, &block)
112
125
  end
113
126
  end
114
127
 
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ # Csv2Psql schema generator class
5
+ class SchemaGenerator
6
+ class << self
7
+ def select_analyzers_by_match(analyzers, match)
8
+ null_count = analyzers['Null'][:results][:count]
9
+ analyzers.select do |_k, v|
10
+ v[:results][:count] + null_count == match
11
+ end
12
+ end
13
+
14
+ def select_analyzers_class(analyzers, class_name)
15
+ analyzers.select { |_k, v| v[:class].sql_class?(class_name) }
16
+ end
17
+
18
+ def select_best(analyzers, lines)
19
+ analyzers = select_analyzers_by_match(analyzers, lines)
20
+ sorted = analyzers.sort do |a, b|
21
+ a[1][:class].weight <=> b[1][:class].weight
22
+ end
23
+
24
+ analyzers[sorted.last[0]]
25
+ end
26
+
27
+ def format_result(analysis, lines)
28
+ res = { columns: {} }
29
+ analysis.each do |k, v|
30
+ res[:columns][k] = {
31
+ type: v[:class].sql_type,
32
+ null: v[:results][:count] != lines
33
+ }
34
+ end
35
+ res
36
+ end
37
+
38
+ def generate(analysis, _opts = {})
39
+ res = {}
40
+ analysis[:columns].each do |name, analyzers|
41
+ analyzer = select_best(analyzers, analysis[:lines])
42
+ res[name] = analyzer
43
+ end
44
+ format_result(res, analysis[:lines])
45
+ end
46
+ end
47
+ end
48
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  # Csv2Psql module
4
4
  module Csv2Psql
5
- CODENAME = 'Famous rat'
6
- VERSION = '0.0.11'
5
+ CODENAME = 'Lazy dolphin'
6
+ VERSION = '0.0.12'
7
7
  end
@@ -0,0 +1,11 @@
1
+ CREATE TABLE <%= ctx[:table] %>
2
+ (
3
+ <% ctx[:columns].each_with_index do |item, index| %>
4
+ "<%= Generator.sanitize_header(item[0]) %>" <%= item[1][:type] %> <% if !item[1][:null] %>NOT NULL<% end %><%= ", " if index < ctx[:columns].length - 1%>
5
+
6
+ <% end %>
7
+ )
8
+ WITH (
9
+ OIDS=FALSE
10
+ );
11
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2psql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
@@ -265,6 +265,7 @@ files:
265
265
  - TODO.md
266
266
  - bin/csv2psql
267
267
  - config/config.json
268
+ - data/census_SFOH_2010.csv
268
269
  - data/cia-data-all.csv
269
270
  - data/sample.csv
270
271
  - data/sample_bool.csv
@@ -284,6 +285,7 @@ files:
284
285
  - lib/csv2psql/cli/cli.rb
285
286
  - lib/csv2psql/cli/cmd/analyze_cmd.rb
286
287
  - lib/csv2psql/cli/cmd/convert_cmd.rb
288
+ - lib/csv2psql/cli/cmd/schema_cmd.rb
287
289
  - lib/csv2psql/cli/cmd/version_cmd.rb
288
290
  - lib/csv2psql/cli/shared.rb
289
291
  - lib/csv2psql/config/config.rb
@@ -301,6 +303,7 @@ files:
301
303
  - lib/csv2psql/lib.rb
302
304
  - lib/csv2psql/output/output.rb
303
305
  - lib/csv2psql/processor/processor.rb
306
+ - lib/csv2psql/schema/schema_generator.rb
304
307
  - lib/csv2psql/version.rb
305
308
  - spec/cli/app_spec.rb
306
309
  - spec/cli/cmd/analyze_cmd_spec.rb
@@ -311,6 +314,7 @@ files:
311
314
  - templates/create_table.sql.erb
312
315
  - templates/drop_table.sql.erb
313
316
  - templates/header.sql.erb
317
+ - templates/schema.sql.erb
314
318
  - templates/truncate_table.sql.erb
315
319
  homepage: https://github.com/korczis/csv2psql
316
320
  licenses: