RubyGems - csv2psql - Versions diffs - 0.0.11 → 0.0.12 - Mend

csv2psql 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +82 -3
data/data/census_SFOH_2010.csv +981 -0
data/lib/csv2psql/analyzer/types/base_analyzer.rb +45 -1
data/lib/csv2psql/analyzer/types/boolean.rb +1 -1
data/lib/csv2psql/analyzer/types/null.rb +1 -1
data/lib/csv2psql/analyzer/types/string.rb +1 -1
data/lib/csv2psql/analyzer/types/uuid.rb +1 -1
data/lib/csv2psql/cli/app.rb +0 -46
data/lib/csv2psql/cli/cmd/analyze_cmd.rb +2 -1
data/lib/csv2psql/cli/cmd/schema_cmd.rb +85 -0
data/lib/csv2psql/convert/convert.rb +7 -2
data/lib/csv2psql/generator/generator.rb +13 -11
data/lib/csv2psql/processor/processor.rb +19 -6
data/lib/csv2psql/schema/schema_generator.rb +48 -0
data/lib/csv2psql/version.rb +2 -2
data/templates/schema.sql.erb +11 -0
metadata +5 -1

data/lib/csv2psql/analyzer/types/base_analyzer.rb CHANGED Viewed

@@ -16,7 +16,31 @@ module Csv2Psql
         end
         def numeric?
-          const_get('CLASS') == :numeric
+          sql_class?(:numeric)
+        end
+        def sql_class?(class_name)
+          const_get('CLASS') == class_name
+        end
+        def sql_class
+          const_get('CLASS')
+        end
+        def sql_class?(class_name)
+          sql_class == class_name
+        end
+        def sql_type
+          const_get('TYPE')
+        end
+        def sql_type?(type_name)
+          sql_type == type_name
+        end
+        def weight
+          const_get('WEIGHT')
         end
       end
@@ -31,6 +55,26 @@ module Csv2Psql
       def numeric?
         self.class.numeric?
       end
+      def sql_class
+        self.class.sql_class
+      end
+      def sql_class?(class_name)
+        self.class.sql_class?(class_name)
+      end
+      def sql_type
+        self.class.sql_type
+      end
+      def sql_type?(type_name)
+        self.class.sql_type?(type_name)
+      end
+      def weight
+        self.class.weight
+      end
     end
   end
 end

data/lib/csv2psql/analyzer/types/boolean.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Csv2Psql
     # Bolean value matcher
     class Boolean < BaseAnalyzer
       TYPE = :boolean
-      CLASS = :boolean
+      CLASS = :special
       WEIGHT = 5
       BOOLEAN_VALUES = %w(true false 0 1)

data/lib/csv2psql/analyzer/types/null.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Csv2Psql
     # Null value matcher
     class Null < BaseAnalyzer
       TYPE = :null
-      CLASS = nil # TODO: Maybe use better class for Null type?
+      CLASS = :null
       WEIGHT = 0
       class << self

data/lib/csv2psql/analyzer/types/string.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Csv2Psql
   module Analyzers
     # UUID value matcher
     class String < BaseAnalyzer
-      TYPE = :string
+      TYPE = :text
       CLASS = :character
       WEIGHT = 1

data/lib/csv2psql/analyzer/types/uuid.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Csv2Psql
     # UUID value matcher
     class Uuid < BaseAnalyzer
       TYPE = :uuid
-      CLASS = :uuid
+      CLASS = :special
       WEIGHT = 5
       RE = /^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}$/ # rubocop:disable Metrics/LineLength

data/lib/csv2psql/cli/app.rb CHANGED Viewed

@@ -15,52 +15,6 @@ end
 include GLI::App
-program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
-cmds = {
-  h: {
-    desc: 'Header row included',
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['header']
-  },
-  d: {
-    desc: 'Column delimiter',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['delimiter']
-  },
-  l: {
-    desc: 'How many rows process',
-    type: Integer,
-    default_value: -1
-  },
-  q: {
-    desc: 'Quoting character',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['quote']
-  },
-  s: {
-    desc: 'Line separator',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS['separator']
-  },
-  'skip' => {
-    desc: 'How many rows skip',
-    type: Integer,
-    default_value: -1
-  }
-}
-switch [:h, :header], cmds[:h]
-flag [:d, :delimiter], cmds[:d]
-flag [:l, :limit], cmds[:l]
-flag [:q, :quote], cmds[:q]
-flag [:s, :separator], cmds[:s]
-flag [:skip], cmds['skip']
 module Csv2Psql
   # Apollon CLI
   module Cli

data/lib/csv2psql/cli/cmd/analyze_cmd.rb CHANGED Viewed

@@ -54,7 +54,6 @@ Csv2Psql::Cli.module_eval do
       fail ArgumentError, 'No file to analyze specified' if args.empty?
       opts = {}.merge(global_options).merge(options)
-      res = Csv2Psql::Convert.analyze(args, opts)
       formater = formats[opts[:format]]
       if formater.nil?
@@ -62,6 +61,8 @@ Csv2Psql::Cli.module_eval do
         fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
       end
+      res = Csv2Psql::Convert.analyze(args, opts)
       output = formater.call(res)
       if output.is_a?(Array)
         output.each do |o|

data/lib/csv2psql/cli/cmd/schema_cmd.rb ADDED Viewed

@@ -0,0 +1,85 @@
+# encoding: utf-8
+require 'gli'
+require 'json'
+require 'pp'
+require 'terminal-table'
+include GLI::App
+require_relative '../shared'
+require_relative '../../convert/convert'
+require_relative '../../helpers/erb_helper'
+require_relative '../../processor/processor'
+Csv2Psql::Cli.module_eval do
+  BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..', '..')
+  TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
+  SCHEMA_TEMPLATE = File.join(TEMPLATE_DIR, 'schema.sql.erb')
+  formats = {
+    'json' => lambda do |res|
+      JSON.pretty_generate(res)
+    end,
+    'sql' => lambda do |data|
+      res = ''
+      data.each do |_k, v|
+        v[:table] = 'my_table'
+        ctx = v
+        erb = Csv2Psql::ErbHelper.new
+        res += "\n" unless res.empty?
+        res += erb.process(SCHEMA_TEMPLATE, ctx)
+      end
+      res
+    end,
+    'table' => lambda do |res|
+      res.map do |file, data|
+        header = %w(column type null)
+        rows = data[:columns].map do |k, v|
+          [k, v[:type], v[:null]]
+        end
+        Terminal::Table.new title: file, headings: header, rows: rows
+      end
+    end
+  }
+  cmds = {
+    f: {
+      desc: 'Output format',
+      type: String,
+      default_value: formats.keys.first
+    }
+  }
+  desc 'Generate schema for file'
+  command :schema do |c|
+    c.flag [:f, :format], cmds[:f]
+    c.action do |global_options, options, args|
+      fail ArgumentError, 'No file to analyze specified' if args.empty?
+      opts = {}.merge(global_options).merge(options)
+      formater = formats[opts[:format]]
+      if formater.nil?
+        fmters = formats.keys.join(', ')
+        fail ArgumentError, "Wrong formatter specified, can be: #{fmters}"
+      end
+      res = Csv2Psql::Convert.generate_schema(args, opts)
+      output = formater.call(res)
+      if output.is_a?(Array)
+        output.each do |o|
+          puts o
+        end
+      else
+        puts output
+      end
+    end
+  end
+end

data/lib/csv2psql/convert/convert.rb CHANGED Viewed

@@ -7,14 +7,19 @@ module Csv2Psql
   # Csv2Psql convert module
   module Convert
     class << self
+      def analyze(paths, opts = {})
+        p = Processor.new
+        p.analyze(paths, opts)
+      end
       def convert(paths, opts = {})
         p = Processor.new
         p.convert(paths, opts)
       end
-      def analyze(paths, opts = {})
+      def generate_schema(paths, opts = {})
         p = Processor.new
-        p.analyze(paths, opts)
+        p.generate_schema(paths, opts)
       end
     end
   end

data/lib/csv2psql/generator/generator.rb CHANGED Viewed

@@ -32,6 +32,17 @@ module Csv2Psql
     attr_reader :output
+    class << self
+      def sanitize_header(header_column)
+        header_column.downcase.gsub(/[^0-9a-z]/i, '_')
+      end
+      def sanitize_value(value)
+        value ||= ''
+        value.gsub("'", "''")
+      end
+    end
     def initialize(output)
       @output = output
     end
@@ -90,7 +101,7 @@ module Csv2Psql
     def get_columns(row, opts = {}, header = get_header(row, opts))
       if opts[:header]
-        header.map { |h| sanitize_header(h) }
+        header.map { |h| Generator.sanitize_header(h) }
       else
         row.map.with_index do |_item, i|
           "col_#{i}"
@@ -101,20 +112,11 @@ module Csv2Psql
     def get_values(row, opts = {}, header = get_header(row, opts))
       header.map do |h|
         value = row[h]
-        sanitized_value = sanitize_value(value)
+        sanitized_value = Generator.sanitize_value(value)
         "'#{sanitized_value}'"
       end
     end
-    def sanitize_header(header_column)
-      header_column.downcase.gsub(/[^0-9a-z]/i, '_')
-    end
-    def sanitize_value(value)
-      value ||= ''
-      value.gsub("'", "''")
-    end
     def truncate_table(path, row, opts = {})
       ctx = create_erb_context(path, row, opts)
       erb = ErbHelper.new

data/lib/csv2psql/processor/processor.rb CHANGED Viewed

@@ -12,6 +12,7 @@ require_relative '../helpers/config_helper'
 require_relative '../helpers/csv_helper'
 require_relative '../helpers/erb_helper'
 require_relative '../output/output'
+require_relative '../schema/schema_generator'
 require_relative '../version'
 module Csv2Psql
@@ -30,7 +31,7 @@ module Csv2Psql
     end
     def analyze(paths, opts = {})
-      with_paths(paths, opts) do |data|
+      with_files(paths, opts) do |data|
         analyzer.analyze(data[:path], data[:row], opts)
       end
       analyzer
@@ -38,9 +39,8 @@ module Csv2Psql
     def convert(paths, opts = {})
       details = {}
-      with_paths(paths, opts) do |data|
+      with_files(paths, opts) do |data|
         create_converted_header(details, data, opts)
         output.write generator.format_row(data[:row], opts)
       end
     end
@@ -62,6 +62,19 @@ module Csv2Psql
       files[path]
     end
+    def generate_schema(paths, opts = {})
+      res = {}
+      paths.each do |path|
+        with_file(path, opts) do |data|
+          path = data[:path]
+          analyzer.analyze(path, data[:row], opts)
+        end
+        res[path] = SchemaGenerator.generate(analyzer.files[path])
+      end
+      res
+    end
     def get_file_details(files, path)
       if files.key?(path)
         files[path]
@@ -95,7 +108,7 @@ module Csv2Psql
       end
     end
-    def with_path(path, opts = {}, &block)
+    def with_file(path, opts = {}, &block)
       output.write 'BEGIN;' if opts[:transaction]
       csv_opts = merge_csv_options(opts)
       @first_row = true
@@ -105,10 +118,10 @@ module Csv2Psql
       output.write 'COMMIT;' if opts[:transaction]
     end
-    def with_paths(paths, opts = {}, &block)
+    def with_files(paths, opts = {}, &block)
       paths = [paths] unless paths.is_a?(Array)
       paths.each do |path|
-        with_path(path, opts, &block)
+        with_file(path, opts, &block)
       end
     end

data/lib/csv2psql/schema/schema_generator.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# encoding: UTF-8
+module Csv2Psql
+  # Csv2Psql schema generator class
+  class SchemaGenerator
+    class << self
+      def select_analyzers_by_match(analyzers, match)
+        null_count = analyzers['Null'][:results][:count]
+        analyzers.select do |_k, v|
+          v[:results][:count] + null_count == match
+        end
+      end
+      def select_analyzers_class(analyzers, class_name)
+        analyzers.select { |_k, v| v[:class].sql_class?(class_name) }
+      end
+      def select_best(analyzers, lines)
+        analyzers = select_analyzers_by_match(analyzers, lines)
+        sorted = analyzers.sort do |a, b|
+          a[1][:class].weight <=> b[1][:class].weight
+        end
+        analyzers[sorted.last[0]]
+      end
+      def format_result(analysis, lines)
+        res = { columns: {} }
+        analysis.each do |k, v|
+          res[:columns][k] = {
+            type: v[:class].sql_type,
+            null: v[:results][:count] != lines
+          }
+        end
+        res
+      end
+      def generate(analysis, _opts = {})
+        res = {}
+        analysis[:columns].each do |name, analyzers|
+          analyzer = select_best(analyzers, analysis[:lines])
+          res[name] = analyzer
+        end
+        format_result(res, analysis[:lines])
+      end
+    end
+  end
+end

data/lib/csv2psql/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 # Csv2Psql module
 module Csv2Psql
-  CODENAME = 'Famous rat'
-  VERSION = '0.0.11'
+  CODENAME = 'Lazy dolphin'
+  VERSION = '0.0.12'
 end

data/templates/schema.sql.erb ADDED Viewed

@@ -0,0 +1,11 @@
+CREATE TABLE <%= ctx[:table] %>
+(
+<% ctx[:columns].each_with_index do |item, index| %>
+	"<%= Generator.sanitize_header(item[0]) %>" <%= item[1][:type] %> <% if !item[1][:null] %>NOT NULL<% end %><%= ", " if index < ctx[:columns].length - 1%>
+<% end %>
+)
+WITH (
+  OIDS=FALSE
+);

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: csv2psql
 version: !ruby/object:Gem::Version
-  version: 0.0.11
+  version: 0.0.12
 platform: ruby
 authors:
 - Tomas Korcak
@@ -265,6 +265,7 @@ files:
 - TODO.md
 - bin/csv2psql
 - config/config.json
+- data/census_SFOH_2010.csv
 - data/cia-data-all.csv
 - data/sample.csv
 - data/sample_bool.csv
@@ -284,6 +285,7 @@ files:
 - lib/csv2psql/cli/cli.rb
 - lib/csv2psql/cli/cmd/analyze_cmd.rb
 - lib/csv2psql/cli/cmd/convert_cmd.rb
+- lib/csv2psql/cli/cmd/schema_cmd.rb
 - lib/csv2psql/cli/cmd/version_cmd.rb
 - lib/csv2psql/cli/shared.rb
 - lib/csv2psql/config/config.rb
@@ -301,6 +303,7 @@ files:
 - lib/csv2psql/lib.rb
 - lib/csv2psql/output/output.rb
 - lib/csv2psql/processor/processor.rb
+- lib/csv2psql/schema/schema_generator.rb
 - lib/csv2psql/version.rb
 - spec/cli/app_spec.rb
 - spec/cli/cmd/analyze_cmd_spec.rb
@@ -311,6 +314,7 @@ files:
 - templates/create_table.sql.erb
 - templates/drop_table.sql.erb
 - templates/header.sql.erb
+- templates/schema.sql.erb
 - templates/truncate_table.sql.erb
 homepage: https://github.com/korczis/csv2psql
 licenses: