RubyGems - csvsql - Versions diffs - 0.1.5 → 0.2.0 - Mend

csvsql 0.1.5 → 0.2.0

Files changed (11) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -0
data/.travis.yml +1 -1
data/Gemfile.lock +1 -1
data/README.md +19 -1
data/exe/csvsql +3 -52
data/lib/csvsql.rb +74 -4
data/lib/csvsql/command_runner.rb +96 -0
data/lib/csvsql/db.rb +31 -52
data/lib/csvsql/version.rb +1 -1
metadata +5 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f52f3d8ec0aa806dd0f4f5bd601e4c90edd03aab
-  data.tar.gz: d903969288bb84f78699c1fff30f95ff855930f8
+  metadata.gz: ed472535cf8cc98e8b16f5298ea37fae1baa5c2e
+  data.tar.gz: f2e481c4c09a8cfbf1bb118496830be291afab58
 SHA512:
-  metadata.gz: 4560ca06f7b76be154a3976b757d5b71f78af14eeec0e8b4bb10b16483d8c2f54f462fecb03a500b5ed40589931e4aa1333440923ae6e64a925a906c5a31a5df
-  data.tar.gz: 13ebccc88ea17f240ddb9d1c72986f836cc770d81ec8e78dcc2eee757b77b45565373526729a0dfc6d184bbf89b6ce52cd5e5a4c744fb1b6a7159bd4a62f4670
+  metadata.gz: 1b01deab426caf1f3281cc5a0ea19be48161ee5508677c8978711fad36c5c6c48fa1b6fec4d693943cc394061f09c7db3320df65504c5ee5cf2fb734d1cbe1f9
+  data.tar.gz: e61c908e568c5392f810571e45f4061d95b0069f53a574a4459b47a8f6a928b6728e03048b1823bab949c3c3c7fdcad33ebbb4f0646ed6d1287a9881aa69a2ac

data/.ruby-version ADDED

	@@ -0,0 +1 @@
1	+ 2.3.4

data/.travis.yml CHANGED

@@ -17,4 +17,4 @@ before_install:
 script:
   - bundle exec rspec
-  - rubocop
+  - bundle exec rubocop

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    csvsql (0.1.4)
+    csvsql (0.2.0)
       sqlite3 (~> 1.3.13)
 GEM

data/README.md CHANGED

@@ -7,6 +7,10 @@ Csvsql
 Use SQL to query your CSV file, and return a new CSV.
+## Requirements
+* SQlite 3.6.16 or newer.
 ## Installation
 Add this line to your application's Gemfile:
@@ -75,7 +79,7 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
 ### Cache CSV data
-It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
+It will save the parsed data to `~/.csvsql_cache` folder. it will not parse data again next time if the data file didn't change
 ```
 csvsql -i large.csv -c "select count(*) from csv"
@@ -84,6 +88,20 @@ csvsql -i large.csv -c "select count(*) from csv"
 csvsql -i large.csv -c "select count(*) from csv"
 ```
+### Query multiple CSV files
+For multiple files, we should name each files. This name will be a `table name`.
+```
+csvsql -i users.csv:users -i posts.csv:posts "select * from posts join users on posts.user_id = users.id where users.role = 'guest'"
+```
+With cache. those name will be a `database name`, the table name is `csv`. If a csv file was updated, that db will be update only. Other db still use cache.
+```
+csvsql -i users.csv:users -i posts.csv:posts -c "select * from posts.csv join users.csv on posts.csv.user_id = users.csv.id where users.csv.role = 'guest'"
+```
 ### Clear Cache
 This command will remove all data in the `~/.csvsql_cache`

data/exe/csvsql CHANGED

@@ -1,60 +1,11 @@
 #! /usr/bin/env ruby
 require 'optparse'
+require 'pry'
 lib = File.expand_path("../../lib", __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'csvsql'
-options = {}
-OptionParser.new do |opts|
-  opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
-  opts.version = Csvsql::VERSION
-  opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
-    options[:csv_path] = path
-  end
-  opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
-    options[:use_cache] = true
-  end
-  opts.on(
-    '-b', '--batch-rows n',
-    "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
-  ) do |n|
-    options[:batch_rows] = n.to_i
-  end
-  opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
-    options[:encoding] = encoding
-  end
-  opts.on('--clear-cache', "Clear all cache data") do
-    options[:clear_cache] = true
-  end
-  opts.on('--debug', "Print debug info") do
-    options[:debug] = true
-  end
-end.parse!
-if options[:clear_cache]
-  Csvsql::Db.clear_cache!
-  puts "Completed clear cache."
-  exit
-end
-if options[:debug]
-  Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
-end
-csv_data = options[:csv_path] || StringIO.new($stdin.read)
-puts Csvsql.execute(
-  ARGV[0], csv_data,
-  use_cache: options[:use_cache],
-  batch_rows: options[:batch_rows],
-  sql_error_action: 'exit',
-  encoding: options[:encoding]
-)
+result = Csvsql::CommandRunner.run!(ARGV)
+puts result if result

data/lib/csvsql.rb CHANGED

@@ -4,15 +4,21 @@ require "csvsql/version"
 require 'csv'
 require 'sqlite3'
+require 'digest'
+require 'fileutils'
 require 'csvsql/db'
 require 'csvsql/tracker'
+require 'csvsql/command_runner'
 module Csvsql
-  def self.execute(sql, csv_data, opts = {})
-    encoding = opts.delete(:encoding)
-    csvdb = Csvsql::Db.new(opts)
-    csvdb.import(csv_data, encoding: encoding)
+  extend self
+  CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
+  FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
+  def execute(sql, csv_data, opts = {})
+    csvdb = init_data(csv_data, opts)
     pst = Csvsql::Tracker.commit(:execute_query_sql) do
       csvdb.prepare(sql)
     end
@@ -22,4 +28,68 @@ module Csvsql
       pst.each { |line| csv << line }
     end.tap { Csvsql::Tracker.commit(:output_format) }
   end
+  def self.clear_cache!
+    FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
+  end
+  private
+  def init_data(csv_data, opts)
+    encoding = opts.delete(:encoding)
+    use_cache = opts.delete(:use_cache)
+    csvdb = Csvsql::Db.new(opts)
+    unless use_cache
+      csvdb.import(csv_data, encoding: encoding)
+      return csvdb
+    end
+    case csv_data
+    when StringIO, IO
+      # nothing
+    when Hash
+      dbs = []
+      csv_data.each do |dbname, csv_path|
+        dbs << [dbname, csvdb.init_db(get_db_cache_path(csv_path) || '')]
+        csvdb.import(csv_path, encoding: encoding)
+      end
+      dbs.each do |dbname, db|
+        csvdb.execute("ATTACH DATABASE '#{db.filename}' AS #{dbname};")
+      end
+    else
+      csvdb.init_db(get_db_cache_path(csv_data) || '')
+      csvdb.import(csv_data, encoding: encoding)
+    end
+    csvdb
+  end
+  def get_db_cache_path(csv_path)
+    csv_path = csv_path || ''
+    return unless File.exist?(csv_path)
+    stat = File.stat(csv_path)
+    filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
+    file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
+    stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
+    cache_path = File.join(CACHE_DIR, filename)
+    if File.exist?(stat_path)
+      if File.read(stat_path) == file_stat
+        cache_path
+      else
+        if update_cb
+          update_cb.call
+        else
+          FileUtils.rm(cache_path)
+        end
+        File.write(stat_path, file_stat)
+        cache_path
+      end
+    else
+      File.write(stat_path, file_stat)
+      cache_path
+    end
+  end
 end

data/lib/csvsql/command_runner.rb ADDED

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+require 'optparse'
+class Csvsql::CommandRunner
+  def self.run!(argv)
+    options = self.new.parse!(argv)
+    return unless options
+    if options[:clear_cache]
+      Csvsql.clear_cache!
+      puts "Completed clear cache."
+      return
+    end
+    if options[:debug]
+      Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
+    end
+    Csvsql.execute(
+      options[:sql], options[:csv_data],
+      use_cache: options[:use_cache],
+      batch_rows: options[:batch_rows],
+      sql_error_action: 'exit',
+      encoding: options[:encoding]
+    )
+  end
+  def options
+    @options ||= { csv_paths: [] }
+  end
+  def parse!(argv)
+    parser.parse!(argv)
+    options[:sql] = argv.last
+    paths = options.delete(:csv_paths)
+    options[:csv_data] = case paths.size
+    when 0
+      $stdin
+    when 1
+      paths.first
+    else
+      paths.each_with_object({}) do |path, r|
+        p, n = path.split(':')
+        if n.nil? || n.empty?
+          puts "You should give #{p} a name, example: #{p}:a_name"
+          return false
+        end
+        r[n] = p
+      end
+    end
+    return options
+  end
+  private
+  def parser
+    OptionParser.new do |opts|
+      opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
+      opts.version = Csvsql::VERSION
+      opts.on(
+        '-i', '--input path[:name]', "CSV file path, optional. read from stdin if no give." +
+          " Name is required if have multiple files. This name will be a table name." +
+          " It will be a database name if cache is enabled"
+      ) do |path|
+        options[:csv_paths] << path
+      end
+      opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
+        options[:use_cache] = true
+      end
+      opts.on(
+        '-b', '--batch-rows n',
+        "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
+      ) do |n|
+        options[:batch_rows] = n.to_i
+      end
+      opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
+        options[:encoding] = encoding
+      end
+      opts.on('--clear-cache', "Clear all cache data") do
+        options[:clear_cache] = true
+      end
+      opts.on('--debug', "Print debug information") do
+        options[:debug] = true
+      end
+    end
+  end
+end

data/lib/csvsql/db.rb CHANGED

@@ -1,24 +1,13 @@
 # frozen_string_literal: true
-require 'digest'
-require 'fileutils'
 class Csvsql::Db
   BATCH_ROWS = 10000
-  CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
-  FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
-  attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
-  def self.clear_cache!
-    FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
-  end
+  attr_reader :data_source, :batch_rows
-  def initialize(use_cache: false, batch_rows: nil, sql_error_action: nil)
+  def initialize(batch_rows: nil, sql_error_action: nil)
     @db = nil
-    @csv_io = nil
-    @csv_path = nil
-    @use_cache = use_cache
+    @data_source = {}
     @batch_rows = batch_rows || BATCH_ROWS
     @sql_error_action = (sql_error_action || :raise).to_sym
   end
@@ -42,18 +31,35 @@ class Csvsql::Db
     process_sql_error(sql, e)
   end
+  def db
+    @db ||= init_db
+  end
+  def init_db(cache_path = '')
+    @db = SQLite3::Database.new(cache_path)
+  end
+  # Params:
+  #   csv_data_or_path:
+  #     [String] csv path
+  #     [StringIO, IO] csv buffer io
+  #     [Hash] { table_name => csv_path }
   def import(csv_data_or_path, encoding: 'utf-8')
     case csv_data_or_path
     when StringIO, IO
-      @csv_io = csv_data_or_path
+      data_source['csv'] = CSV.new(csv_data_or_path)
+    when Hash
+      csv_data_or_path.each do |table_name, path|
+        data_source[table_name.to_s] = CSV.open(path, "r:#{encoding}")
+      end
     else
-      @csv_path = csv_data_or_path
+      data_source['csv'] = CSV.open(csv_data_or_path, "r:#{encoding}")
     end
-    @db = SQLite3::Database.new(get_db_path(@csv_path))
     tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
-    unless tables.include?('csv')
-      init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path, "r:#{encoding}"))
+    data_source.each do |table_name, csv|
+      next if tables.include?('csv')
+      init_table_by_csv(table_name, csv)
     end
     true
   end
@@ -67,11 +73,11 @@ class Csvsql::Db
     end
   end
-  def init_db_by_csv(csv)
+  def init_table_by_csv(table_name, csv)
     header = parser_header(csv.readline)
     cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
-    sql = "CREATE TABLE csv (#{cols});"
+    sql = "CREATE TABLE #{table_name} (#{cols});"
     execute sql
     cache = []
@@ -81,18 +87,18 @@ class Csvsql::Db
       cache << header.each_with_index.map { |h, i| format_sql_val(line[i], h[1]) }
       if cache.length >= batch_rows then
-        import_lines(cache, col_names)
+        import_lines(table_name, cache, col_names)
         cache.clear
       end
     end
-    import_lines(cache, col_names) unless cache.empty?
+    import_lines(table_name, cache, col_names) unless cache.empty?
     Csvsql::Tracker.commit(:import_csv)
     db
   end
-  def import_lines(lines, col_names)
+  def import_lines(table_name, lines, col_names)
     sql = Csvsql::Tracker.commit(:generate_import_sql) do
-      s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
+      s = "INSERT INTO #{table_name} (#{col_names.join(', ')}) VALUES "
       s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
     end
     Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
@@ -123,31 +129,4 @@ class Csvsql::Db
       raise err
     end
   end
-  def get_db_path(csv_path)
-    csv_path = csv_path || ''
-    return '' unless File.exist?(csv_path)
-    if use_cache
-      stat = File.stat(csv_path)
-      filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
-      file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
-      stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
-      cache_path = File.join(CACHE_DIR, filename)
-      if File.exist?(stat_path)
-        if File.read(stat_path) == file_stat
-          cache_path
-        else
-          FileUtils.rm(cache_path)
-          cache_path
-        end
-      else
-        File.write(stat_path, file_stat)
-        cache_path
-      end
-    else
-      ''
-    end
-  end
 end

data/lib/csvsql/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Csvsql
-  VERSION = "0.1.5"
+  VERSION = "0.2.0"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: csvsql
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.2.0
 platform: ruby
 authors:
 - jiangzhi.xie
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-09-18 00:00:00.000000000 Z
+date: 2018-10-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: sqlite3
@@ -91,6 +91,7 @@ files:
 - ".gitignore"
 - ".rspec"
 - ".rubocop.yml"
+- ".ruby-version"
 - ".travis.yml"
 - Gemfile
 - Gemfile.lock
@@ -101,6 +102,7 @@ files:
 - csvsql.gemspec
 - exe/csvsql
 - lib/csvsql.rb
+- lib/csvsql/command_runner.rb
 - lib/csvsql/db.rb
 - lib/csvsql/tracker.rb
 - lib/csvsql/version.rb
@@ -124,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5.4
+rubygems_version: 2.5.2
 signing_key:
 specification_version: 4
 summary: Process csv with SQL.