RubyGems - csv2psql - Versions diffs - 0.0.6 → 0.0.8 - Mend

csv2psql 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/.gitignore +2 -0
data/README.md +38 -13
data/TODO.md +12 -0
data/data/cia-data-all.csv +262 -0
data/lib/csv2psql/analyzer/analyzer.rb +89 -0
data/lib/csv2psql/analyzer/types/bigint.rb +27 -0
data/lib/csv2psql/analyzer/types/decimal.rb +27 -0
data/lib/csv2psql/cli/app.rb +36 -6
data/lib/csv2psql/cli/cmd/analyze_cmd.rb +24 -0
data/lib/csv2psql/cli/cmd/convert_cmd.rb +0 -27
data/lib/csv2psql/convert/convert.rb +5 -0
data/lib/csv2psql/dialects/psql.rb +66 -0
data/lib/csv2psql/extensions/string.rb +10 -0
data/lib/csv2psql/generator/generator.rb +126 -0
data/lib/csv2psql/helpers/csv_helper.rb +21 -0
data/lib/csv2psql/output/output.rb +18 -0
data/lib/csv2psql/processor/processor.rb +30 -103
data/lib/csv2psql/version.rb +2 -1
data/templates/header.sql.erb +1 -1
metadata +13 -2

data/lib/csv2psql/analyzer/analyzer.rb ADDED Viewed

@@ -0,0 +1,89 @@
+# encoding: UTF-8
+require 'csv'
+require 'multi_json'
+require 'pathname'
+require 'pp'
+require_relative '../helpers/erb_helper'
+require_relative '../extensions/string'
+module Csv2Psql
+  # Analyzer file analyzer class
+  class Analyzer
+    DEFAULT_OPTIONS = {}
+    ANALYZERS_DIR = File.join(File.dirname(__FILE__), 'types')
+    attr_reader :analyzers, :files
+    def initialize
+      @files = {}
+      @analyzers = load_analyzers
+    end
+    def analyze(path, row, opts = {})
+      data = get_data(path)
+      header = CsvHelper.get_header(row, opts)
+      header.each do |h|
+        col = get_column(data, h)
+        val = row[h]
+        col.each do |_name, analyzer|
+          analyzer.analyze(val)
+        end
+      end
+      data[:lines] = data[:lines] + 1
+    end
+    def create_column(data, column)
+      data[:columns][column] = {}
+      res = data[:columns][column]
+      analyzers.each do |analyzer|
+        res[analyzer[:name]] = analyzer[:class].new
+      end
+      res
+    end
+    def create_data(path)
+      files[path] = {
+        columns: {
+        },
+        lines: 0
+      }
+      files[path]
+    end
+    def get_data(path)
+      return files[path] if files.key?(path)
+      create_data(path)
+    end
+    def get_column(data, column)
+      res = data[:columns][column]
+      return res if res
+      create_column(data, column)
+    end
+    def load_analyzers
+      Dir[ANALYZERS_DIR + '**/*.rb'].map do |path|
+        fname = File.basename(path, '.rb')
+        analyzer_class = fname.camel_case
+        require(path)
+        klass = Object.const_get('Csv2Psql')
+          .const_get('Analyzers')
+          .const_get(analyzer_class)
+        {
+          :name => analyzer_class,
+          :class => klass
+        }
+      end
+    end
+  end
+end

data/lib/csv2psql/analyzer/types/bigint.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# encoding: UTF-8
+module Csv2Psql
+  module Analyzers
+    class Bigint
+      TYPE = :bigint
+      attr_reader :count, :min, :max
+      def initialize
+        @count = 0
+        @min = nil
+        @max = nil
+      end
+      def analyze(val)
+        match = val.is_a?(Integer) || (val && val.match(/^\d+$/))
+        return if match.nil?
+        val = val.to_i
+        @count = @count + 1
+        @min = val if @min.nil? || val < @min
+        @max = val if @max.nil? || val > @max
+      end
+    end
+  end
+end

data/lib/csv2psql/analyzer/types/decimal.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# encoding: UTF-8
+module Csv2Psql
+  module Analyzers
+    class Decimal
+      TYPE = :decimal
+      attr_reader :count, :min, :max
+      def initialize
+        @count = 0
+        @min = nil
+        @max = nil
+      end
+      def analyze(val)
+        match = val.is_a?(Float) || (val && val.match(/(\d+[,.]\d+)/))
+        return if match.nil?
+        val = val.to_f
+        @count = @count + 1
+        @min = val if @min.nil? || val < @min
+        @max = val if @max.nil? || val > @max
+      end
+    end
+  end
+end

data/lib/csv2psql/cli/app.rb CHANGED Viewed

@@ -7,19 +7,51 @@ require 'pp'
 require_relative 'shared'
 require_relative '../version'
+require_relative '../processor/processor'
 def launch(argv = ARGV)
   run(argv)
 end
 include GLI::App
-program_desc "csv2psql #{Csv2Psql::VERSION}"
+program_desc "csv2psql #{Csv2Psql::VERSION} (Codename: #{Csv2Psql::CODENAME})"
+cmds = {
+  h: {
+    desc: 'Header row included',
+    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
+  },
+  d: {
+    desc: 'Column delimiter',
+    type: String,
+    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
+  },
+  q: {
+    desc: 'Quoting character',
+    type: String,
+    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
+  },
+  s: {
+    desc: 'Line separator',
+    type: String,
+    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
+  }
+}
+switch [:h, :header], cmds[:h]
+flag [:d, :delimiter], cmds[:d]
+flag [:q, :quote], cmds[:q]
+flag [:s, :separator], cmds[:s]
 module Csv2Psql
   # Apollon CLI
-  module Cli
-    # CLI Application
-    class App
+      module Cli
+        # CLI Application
+        class App
       extend Csv2Psql::Cli::Shared
       cmds = File.absolute_path(File.join(File.dirname(__FILE__), 'cmd'))
@@ -27,8 +59,6 @@ module Csv2Psql
         require file
       end
-      program_desc 'Csv2Psql CLI'
       def main(argv = ARGV)
         launch(argv)
       end

data/lib/csv2psql/cli/cmd/analyze_cmd.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# encoding: utf-8
+require 'gli'
+require 'pp'
+include GLI::App
+require_relative '../shared'
+require_relative '../../convert/convert'
+require_relative '../../processor/processor'
+cmds = {
+}
+desc 'Analyze csv file'
+command :analyze do |c|
+  c.action do |global_options, options, args|
+    fail ArgumentError, 'No file to analyze specified' if args.empty?
+    opts = {}.merge(global_options).merge(options)
+    res = Csv2Psql::Convert.analyze(args, opts)
+    pp res.files
+  end
+end

data/lib/csv2psql/cli/cmd/convert_cmd.rb CHANGED Viewed

@@ -9,35 +9,12 @@ require_relative '../../convert/convert'
 require_relative '../../processor/processor'
 cmds = {
-  h: {
-    desc: 'Header row included',
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:header]
-  },
-  d: {
-    desc: 'Column delimiter',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:delimiter]
-  },
   t: {
     desc: 'Table to insert to',
     type: String,
     default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:table]
   },
-  q: {
-    desc: 'Quoting character',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:quote]
-  },
-  s: {
-    desc: 'Line separator',
-    type: String,
-    default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:separator]
-  },
   transaction: {
     desc: 'Import in transaction block',
     default_value: Csv2Psql::Processor::DEFAULT_OPTIONS[:transaction]
@@ -61,11 +38,7 @@ cmds = {
 desc 'Convert csv file'
 command :convert do |c|
-  c.switch [:h, :header], cmds[:h]
-  c.flag [:d, :delimiter], cmds[:d]
   c.flag [:t, :table], cmds[:t]
-  c.flag [:q, :quote], cmds[:q]
-  c.flag [:s, :separator], cmds[:s]
   c.switch [:transaction], cmds[:transaction]
   c.switch ['create-table'], cmds['create-table']
   c.switch ['drop-table'], cmds['drop-table']

data/lib/csv2psql/convert/convert.rb CHANGED Viewed

@@ -16,6 +16,11 @@ module Csv2Psql
         p = Processor.new
         p.convert(paths, opts)
       end
+      def analyze(paths, opts = {})
+        p = Processor.new
+        p.analyze(paths, opts)
+      end
     end
   end
 end

data/lib/csv2psql/dialects/psql.rb ADDED Viewed

@@ -0,0 +1,66 @@
+# encoding: UTF-8
+module Csv2Psql
+  module Dialect
+    # PostgreSQL specific stuff
+    class Psql
+      NUMERIC_TYPES = [
+        {
+          type: :numeric,
+          name: 'smallint',
+          size: 2,
+          min: -32_768,
+          max: 32_767
+        },
+        {
+          type: :numeric,
+          name: 'integer',
+          size: 4,
+          min: -2_147_483_648,
+          max: 2_147_483_647
+        },
+        {
+          type: :numeric,
+          name: 'bigint',
+          size: 8,
+          min: -9_223_372_036_854_775_808,
+          max: 9_223_372_036_854_775_807
+        },
+        {
+          type: :numeric,
+          name: 'decimal',
+          size: nil
+        },
+        {
+          type: :numeric,
+          name: 'numeric',
+          size: nil
+        },
+        {
+          type: :numeric,
+          name: 'real',
+          size: 4
+        },
+        {
+          type: :numeric,
+          name: 'double',
+          size: 8
+        },
+        {
+          type: :numeric,
+          name: 'serial',
+          size: 4,
+          min: 1,
+          max: 2_147_483_647
+        },
+        {
+          type: :numeric,
+          name: 'bigserial',
+          size: 8,
+          min: 1,
+          max: 9_223_372_036_854_775_807
+        }
+      ]
+    end
+  end
+end

data/lib/csv2psql/extensions/string.rb ADDED Viewed

@@ -0,0 +1,10 @@
+class String
+  def camel_case
+    return self if self !~ /_/ && self =~ /[A-Z]+.*/
+    split('_').map { |e| e.capitalize }.join
+  end
+  def camel_case_lower
+    self.split('_').inject([]) { |buffer, e| buffer.push(buffer.empty? ? e : e.capitalize) }.join
+  end
+end

data/lib/csv2psql/generator/generator.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# encoding: UTF-8
+require 'csv'
+require 'multi_json'
+require 'pathname'
+require 'pp'
+require_relative '../version'
+require_relative '../helpers/csv_helper'
+require_relative '../helpers/erb_helper'
+module Csv2Psql
+  # Csv2Psql type guesser class
+  class Generator
+    BASE_DIR = File.join(File.dirname(__FILE__), '..', '..', '..')
+    TEMPLATE_DIR = File.join(BASE_DIR, 'templates')
+    CREATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'create_table.sql.erb')
+    DROP_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'drop_table.sql.erb')
+    HEADER_TEMPLATE = File.join(TEMPLATE_DIR, 'header.sql.erb')
+    TRUNCATE_TABLE_TEMPLATE = File.join(TEMPLATE_DIR, 'truncate_table.sql.erb')
+    DEFAULT_OPTIONS = {
+      'create-table' => false,
+      'drop-table' => false,
+      'truncate-table' => false,
+      table: 'my_table'
+    }
+    TABLE_FUNCTIONS = {
+      'drop-table' => :drop_table,
+      'create-table' => :create_table,
+      'truncate-table' => :truncate_table
+    }
+    attr_reader :output
+    def initialize(output)
+      @output = output
+    end
+    def create_erb_context(path, row, opts = {})
+      header = get_header(row, opts)
+      columns = get_columns(row, opts, header)
+      {
+        path: path,
+        header: header,
+        columns: columns,
+        table: opts[:table] || DEFAULT_OPTIONS[:table]
+      }
+    end
+    def create_header(path, row, opts = {})
+      ctx = create_erb_context(path, row, opts)
+      erb = ErbHelper.new
+      erb.process(HEADER_TEMPLATE, ctx)
+    end
+    def create_table(path, row, opts = {})
+      ctx = create_erb_context(path, row, opts)
+      erb = ErbHelper.new
+      erb.process(CREATE_TABLE_TEMPLATE, ctx)
+    end
+    def create_sql_script(path, row, opts = {})
+      output.write create_header(path, row, opts)
+      TABLE_FUNCTIONS.each do |k, v|
+        t = DEFAULT_OPTIONS[k]
+        t = opts[k] unless opts[k].nil?
+        output.write send(v, path, row, opts) if t
+      end
+    end
+    def drop_table(path, row, opts = {})
+      ctx = create_erb_context(path, row, opts)
+      erb = ErbHelper.new
+      erb.process(DROP_TABLE_TEMPLATE, ctx)
+    end
+    def format_row(row, opts = {})
+      table = opts[:table] || DEFAULT_OPTIONS[:table]
+      header = get_header(row, opts)
+      columns = get_columns(row, opts, header).join(', ')
+      values = get_values(row, opts, header).join(', ')
+      "INSERT INTO #{table}(#{columns}) VALUES(#{values});"
+    end
+    def get_header(row, opts = {})
+      CsvHelper.get_header(row, opts)
+    end
+    def get_columns(row, opts = {}, header = get_header(row, opts))
+      if opts[:header]
+        header.map { |h| sanitize_header(h) }
+      else
+        row.map.with_index do |_item, i|
+          "col_#{i}"
+        end
+      end
+    end
+    def get_values(row, opts = {}, header = get_header(row, opts))
+      header.map do |h|
+        value = row[h]
+        sanitized_value = sanitize_value(value)
+        "'#{sanitized_value}'"
+      end
+    end
+    def sanitize_header(header_column)
+      header_column.downcase.gsub(/[^0-9a-z]/i, '_')
+    end
+    def sanitize_value(value)
+      value ||= ''
+      value.gsub("'", "''")
+    end
+    def truncate_table(path, row, opts = {})
+      ctx = create_erb_context(path, row, opts)
+      erb = ErbHelper.new
+      erb.process(TRUNCATE_TABLE_TEMPLATE, ctx)
+    end
+  end
+end

data/lib/csv2psql/helpers/csv_helper.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# encoding: UTF-8
+require 'erb'
+require 'pathname'
+module Csv2Psql
+  # CSV Helper
+  class CsvHelper
+    BASE_DIR = File.join(File.dirname(__FILE__), '..')
+    class << self
+      def get_header(row, opts = {})
+        if opts[:header]
+          row.headers
+        else
+          row.map.with_index { |_item, i| i }
+        end
+      end
+    end
+  end
+end

data/lib/csv2psql/output/output.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# encoding: UTF-8
+require 'csv'
+require 'multi_json'
+require 'pathname'
+require 'pp'
+require_relative '../version'
+require_relative '../helpers/erb_helper'
+module Csv2Psql
+  # Csv2Psql type guesser class
+  class Output
+    def write(str)
+      puts str
+    end
+  end
+end