csv2psql 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,31 @@ module Csv2Psql
16
16
  end
17
17
 
18
18
  def numeric?
19
- const_get('CLASS') == :numeric
19
+ sql_class?(:numeric)
20
+ end
21
+
22
+ def sql_class?(class_name)
23
+ const_get('CLASS') == class_name
24
+ end
25
+
26
+ def sql_class
27
+ const_get('CLASS')
28
+ end
29
+
30
+ def sql_class?(class_name)
31
+ sql_class == class_name
32
+ end
33
+
34
+ def sql_type
35
+ const_get('TYPE')
36
+ end
37
+
38
+ def sql_type?(type_name)
39
+ sql_type == type_name
40
+ end
41
+
42
+ def weight
43
+ const_get('WEIGHT')
20
44
  end
21
45
  end
22
46
 
@@ -31,6 +55,26 @@ module Csv2Psql
31
55
  def numeric?
32
56
  self.class.numeric?
33
57
  end
58
+
59
+ def sql_class
60
+ self.class.sql_class
61
+ end
62
+
63
+ def sql_class?(class_name)
64
+ self.class.sql_class?(class_name)
65
+ end
66
+
67
+ def sql_type
68
+ self.class.sql_type
69
+ end
70
+
71
+ def sql_type?(type_name)
72
+ self.class.sql_type?(type_name)
73
+ end
74
+
75
+ def weight
76
+ self.class.weight
77
+ end
34
78
  end
35
79
  end
36
80
  end
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # Bolean value matcher
8
8
  class Boolean < BaseAnalyzer
9
9
  TYPE = :boolean
10
- CLASS = :boolean
10
+ CLASS = :special
11
11
  WEIGHT = 5
12
12
 
13
13
  BOOLEAN_VALUES = %w(true false 0 1)
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # Null value matcher
8
8
  class Null < BaseAnalyzer
9
9
  TYPE = :null
10
- CLASS = nil # TODO: Maybe use better class for Null type?
10
+ CLASS = :null
11
11
  WEIGHT = 0
12
12
 
13
13
  class << self
@@ -6,7 +6,7 @@ module Csv2Psql
6
6
  module Analyzers
7
7
  # UUID value matcher
8
8
  class String < BaseAnalyzer
9
- TYPE = :string
9
+ TYPE = :text
10
10
  CLASS = :character
11
11
  WEIGHT = 1
12
12
 
@@ -7,7 +7,7 @@ module Csv2Psql
7
7
  # UUID value matcher
8
8
  class Uuid < BaseAnalyzer
9
9
  TYPE = :uuid
10
- CLASS = :uuid
10
+ CLASS = :special
11
11
  WEIGHT = 5
12
12
 
13
13
  RE = /^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}$/ # rubocop:disable Metrics/LineLength
@@ -15,52 +15,6 @@ end
15
15
 
16
16
  include GLI::App
17
17
 
18
- program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
19
-
20
- cmds = {
21
- h: {
22
- desc: 'Header row included',
23
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['header']
24
- },
25
-
26
- d: {
27
- desc: 'Column delimiter',
28
- type: String,
29
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['delimiter']
30
- },
31
-
32
- l: {
33
- desc: 'How many rows process',
34
- type: Integer,
35
- default_value: -1
36
- },
37
-
38
- q: {
39
- desc: 'Quoting character',
40
- type: String,
41
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['quote']
42
- },
43
-
44
- s: {
45
- desc: 'Line separator',
46
- type: String,
47
- default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['separator']
48
- },
49
-
50
- 'skip' => {
51
- desc: 'How many rows skip',
52
- type: Integer,
53
- default_value: -1
54
- }
55
- }
56
-
57
- switch [:h, :header], cmds[:h]
58
- flag [:d, :delimiter], cmds[:d]
59
- flag [:l, :limit], cmds[:l]
60
- flag [:q, :quote], cmds[:q]
61
- flag [:s, :separator], cmds[:s]
62
- flag [:skip], cmds['skip']
63
-
64
18
  module Csv2Psql
65
19
  # Apollon CLI
66
20
  module Cli
@@ -54,7 +54,6 @@ Csv2Psql::Cli.module_eval do
54
54
  fail ArgumentError, 'No file to analyze specified' if args.empty?
55
55
 
56
56
  opts = {}.merge(global_options).merge(options)
57
- res = Csv2Psql::Convert.analyze(args, opts)
58
57
 
59
58
  formater = formats[opts[:format]]
60
59
  if formater.nil?
@@ -62,6 +61,8 @@ Csv2Psql::Cli.module_eval do
62
61
  fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
63
62
  end
64
63
 
64
+ res = Csv2Psql::Convert.analyze(args, opts)
65
+
65
66
  output = formater.call(res)
66
67
  if output.is_a?(Array)
67
68
  output.each do |o|
@@ -0,0 +1,85 @@
1
+ # encoding: utf-8
2
+
3
+ require 'gli'
4
+ require 'json'
5
+ require 'pp'
6
+ require 'terminal-table'
7
+
8
+ include GLI::App
9
+
10
+ require_relative '../shared'
11
+ require_relative '../../convert/convert'
12
+ require_relative '../../helpers/erb_helper'
13
+ require_relative '../../processor/processor'
14
+
15
+ Csv2Psql::Cli.module_eval do
16
+ BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..', '..')
17
+ TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
18
+ SCHEMA_TEMPLATE = File.join(TEMPLATE_DIR, 'schema.sql.erb')
19
+
20
+ formats = {
21
+ 'json' => lambda do |res|
22
+ JSON.pretty_generate(res)
23
+ end,
24
+
25
+ 'sql' => lambda do |data|
26
+ res = ''
27
+ data.each do |_k, v|
28
+ v[:table] = 'my_table'
29
+ ctx = v
30
+ erb = Csv2Psql::ErbHelper.new
31
+ res += "\n" unless res.empty?
32
+ res += erb.process(SCHEMA_TEMPLATE, ctx)
33
+ end
34
+ res
35
+ end,
36
+
37
+ 'table' => lambda do |res|
38
+ res.map do |file, data|
39
+ header = %w(column type null)
40
+
41
+ rows = data[:columns].map do |k, v|
42
+ [k, v[:type], v[:null]]
43
+ end
44
+
45
+ Terminal::Table.new title: file, headings: header, rows: rows
46
+ end
47
+ end
48
+ }
49
+
50
+ cmds = {
51
+ f: {
52
+ desc: 'Output format',
53
+ type: String,
54
+ default_value: formats.keys.first
55
+ }
56
+ }
57
+
58
+ desc 'Generate schema for file'
59
+ command :schema do |c|
60
+ c.flag [:f, :format], cmds[:f]
61
+
62
+ c.action do |global_options, options, args|
63
+ fail ArgumentError, 'No file to analyze specified' if args.empty?
64
+
65
+ opts = {}.merge(global_options).merge(options)
66
+
67
+ formater = formats[opts[:format]]
68
+ if formater.nil?
69
+ fmters = formats.keys.join(', ')
70
+ fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
71
+ end
72
+
73
+ res = Csv2Psql::Convert.generate_schema(args, opts)
74
+
75
+ output = formater.call(res)
76
+ if output.is_a?(Array)
77
+ output.each do |o|
78
+ puts o
79
+ end
80
+ else
81
+ puts output
82
+ end
83
+ end
84
+ end
85
+ end
@@ -7,14 +7,19 @@ module Csv2Psql
7
7
  # Csv2Psql convert module
8
8
  module Convert
9
9
  class << self
10
+ def analyze(paths, opts = {})
11
+ p = Processor.new
12
+ p.analyze(paths, opts)
13
+ end
14
+
10
15
  def convert(paths, opts = {})
11
16
  p = Processor.new
12
17
  p.convert(paths, opts)
13
18
  end
14
19
 
15
- def analyze(paths, opts = {})
20
+ def generate_schema(paths, opts = {})
16
21
  p = Processor.new
17
- p.analyze(paths, opts)
22
+ p.generate_schema(paths, opts)
18
23
  end
19
24
  end
20
25
  end
@@ -32,6 +32,17 @@ module Csv2Psql
32
32
 
33
33
  attr_reader :output
34
34
 
35
+ class << self
36
+ def sanitize_header(header_column)
37
+ header_column.downcase.gsub(/[^0-9a-z]/i, '_')
38
+ end
39
+
40
+ def sanitize_value(value)
41
+ value ||= ''
42
+ value.gsub("'", "''")
43
+ end
44
+ end
45
+
35
46
  def initialize(output)
36
47
  @output = output
37
48
  end
@@ -90,7 +101,7 @@ module Csv2Psql
90
101
 
91
102
  def get_columns(row, opts = {}, header = get_header(row, opts))
92
103
  if opts[:header]
93
- header.map { |h| sanitize_header(h) }
104
+ header.map { |h| Generator.sanitize_header(h) }
94
105
  else
95
106
  row.map.with_index do |_item, i|
96
107
  "col_#{i}"
@@ -101,20 +112,11 @@ module Csv2Psql
101
112
  def get_values(row, opts = {}, header = get_header(row, opts))
102
113
  header.map do |h|
103
114
  value = row[h]
104
- sanitized_value = sanitize_value(value)
115
+ sanitized_value = Generator.sanitize_value(value)
105
116
  "'#{sanitized_value}'"
106
117
  end
107
118
  end
108
119
 
109
- def sanitize_header(header_column)
110
- header_column.downcase.gsub(/[^0-9a-z]/i, '_')
111
- end
112
-
113
- def sanitize_value(value)
114
- value ||= ''
115
- value.gsub("'", "''")
116
- end
117
-
118
120
  def truncate_table(path, row, opts = {})
119
121
  ctx = create_erb_context(path, row, opts)
120
122
  erb = ErbHelper.new
@@ -12,6 +12,7 @@ require_relative '../helpers/config_helper'
12
12
  require_relative '../helpers/csv_helper'
13
13
  require_relative '../helpers/erb_helper'
14
14
  require_relative '../output/output'
15
+ require_relative '../schema/schema_generator'
15
16
  require_relative '../version'
16
17
 
17
18
  module Csv2Psql
@@ -30,7 +31,7 @@ module Csv2Psql
30
31
  end
31
32
 
32
33
  def analyze(paths, opts = {})
33
- with_paths(paths, opts) do |data|
34
+ with_files(paths, opts) do |data|
34
35
  analyzer.analyze(data[:path], data[:row], opts)
35
36
  end
36
37
  analyzer
@@ -38,9 +39,8 @@ module Csv2Psql
38
39
 
39
40
  def convert(paths, opts = {})
40
41
  details = {}
41
- with_paths(paths, opts) do |data|
42
+ with_files(paths, opts) do |data|
42
43
  create_converted_header(details, data, opts)
43
-
44
44
  output.write generator.format_row(data[:row], opts)
45
45
  end
46
46
  end
@@ -62,6 +62,19 @@ module Csv2Psql
62
62
  files[path]
63
63
  end
64
64
 
65
+ def generate_schema(paths, opts = {})
66
+ res = {}
67
+ paths.each do |path|
68
+ with_file(path, opts) do |data|
69
+ path = data[:path]
70
+ analyzer.analyze(path, data[:row], opts)
71
+ end
72
+
73
+ res[path] = SchemaGenerator.generate(analyzer.files[path])
74
+ end
75
+ res
76
+ end
77
+
65
78
  def get_file_details(files, path)
66
79
  if files.key?(path)
67
80
  files[path]
@@ -95,7 +108,7 @@ module Csv2Psql
95
108
  end
96
109
  end
97
110
 
98
- def with_path(path, opts = {}, &block)
111
+ def with_file(path, opts = {}, &block)
99
112
  output.write 'BEGIN;' if opts[:transaction]
100
113
  csv_opts = merge_csv_options(opts)
101
114
  @first_row = true
@@ -105,10 +118,10 @@ module Csv2Psql
105
118
  output.write 'COMMIT;' if opts[:transaction]
106
119
  end
107
120
 
108
- def with_paths(paths, opts = {}, &block)
121
+ def with_files(paths, opts = {}, &block)
109
122
  paths = [paths] unless paths.is_a?(Array)
110
123
  paths.each do |path|
111
- with_path(path, opts, &block)
124
+ with_file(path, opts, &block)
112
125
  end
113
126
  end
114
127
 
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+
3
+ module Csv2Psql
4
+ # Csv2Psql schema generator class
5
+ class SchemaGenerator
6
+ class << self
7
+ def select_analyzers_by_match(analyzers, match)
8
+ null_count = analyzers['Null'][:results][:count]
9
+ analyzers.select do |_k, v|
10
+ v[:results][:count] + null_count == match
11
+ end
12
+ end
13
+
14
+ def select_analyzers_class(analyzers, class_name)
15
+ analyzers.select { |_k, v| v[:class].sql_class?(class_name) }
16
+ end
17
+
18
+ def select_best(analyzers, lines)
19
+ analyzers = select_analyzers_by_match(analyzers, lines)
20
+ sorted = analyzers.sort do |a, b|
21
+ a[1][:class].weight <=> b[1][:class].weight
22
+ end
23
+
24
+ analyzers[sorted.last[0]]
25
+ end
26
+
27
+ def format_result(analysis, lines)
28
+ res = { columns: {} }
29
+ analysis.each do |k, v|
30
+ res[:columns][k] = {
31
+ type: v[:class].sql_type,
32
+ null: v[:results][:count] != lines
33
+ }
34
+ end
35
+ res
36
+ end
37
+
38
+ def generate(analysis, _opts = {})
39
+ res = {}
40
+ analysis[:columns].each do |name, analyzers|
41
+ analyzer = select_best(analyzers, analysis[:lines])
42
+ res[name] = analyzer
43
+ end
44
+ format_result(res, analysis[:lines])
45
+ end
46
+ end
47
+ end
48
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  # Csv2Psql module
4
4
  module Csv2Psql
5
- CODENAME = 'Famous rat'
6
- VERSION = '0.0.11'
5
+ CODENAME = 'Lazy dolphin'
6
+ VERSION = '0.0.12'
7
7
  end
@@ -0,0 +1,11 @@
1
+ CREATE TABLE <%= ctx[:table] %>
2
+ (
3
+ <% ctx[:columns].each_with_index do |item, index| %>
4
+ "<%= Generator.sanitize_header(item[0]) %>" <%= item[1][:type] %> <% if !item[1][:null] %>NOT NULL<% end %><%= ", " if index < ctx[:columns].length - 1%>
5
+
6
+ <% end %>
7
+ )
8
+ WITH (
9
+ OIDS=FALSE
10
+ );
11
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2psql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
@@ -265,6 +265,7 @@ files:
265
265
  - TODO.md
266
266
  - bin/csv2psql
267
267
  - config/config.json
268
+ - data/census_SFOH_2010.csv
268
269
  - data/cia-data-all.csv
269
270
  - data/sample.csv
270
271
  - data/sample_bool.csv
@@ -284,6 +285,7 @@ files:
284
285
  - lib/csv2psql/cli/cli.rb
285
286
  - lib/csv2psql/cli/cmd/analyze_cmd.rb
286
287
  - lib/csv2psql/cli/cmd/convert_cmd.rb
288
+ - lib/csv2psql/cli/cmd/schema_cmd.rb
287
289
  - lib/csv2psql/cli/cmd/version_cmd.rb
288
290
  - lib/csv2psql/cli/shared.rb
289
291
  - lib/csv2psql/config/config.rb
@@ -301,6 +303,7 @@ files:
301
303
  - lib/csv2psql/lib.rb
302
304
  - lib/csv2psql/output/output.rb
303
305
  - lib/csv2psql/processor/processor.rb
306
+ - lib/csv2psql/schema/schema_generator.rb
304
307
  - lib/csv2psql/version.rb
305
308
  - spec/cli/app_spec.rb
306
309
  - spec/cli/cmd/analyze_cmd_spec.rb
@@ -311,6 +314,7 @@ files:
311
314
  - templates/create_table.sql.erb
312
315
  - templates/drop_table.sql.erb
313
316
  - templates/header.sql.erb
317
+ - templates/schema.sql.erb
314
318
  - templates/truncate_table.sql.erb
315
319
  homepage: https://github.com/korczis/csv2psql
316
320
  licenses: