csvsql 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f52f3d8ec0aa806dd0f4f5bd601e4c90edd03aab
4
- data.tar.gz: d903969288bb84f78699c1fff30f95ff855930f8
3
+ metadata.gz: ed472535cf8cc98e8b16f5298ea37fae1baa5c2e
4
+ data.tar.gz: f2e481c4c09a8cfbf1bb118496830be291afab58
5
5
  SHA512:
6
- metadata.gz: 4560ca06f7b76be154a3976b757d5b71f78af14eeec0e8b4bb10b16483d8c2f54f462fecb03a500b5ed40589931e4aa1333440923ae6e64a925a906c5a31a5df
7
- data.tar.gz: 13ebccc88ea17f240ddb9d1c72986f836cc770d81ec8e78dcc2eee757b77b45565373526729a0dfc6d184bbf89b6ce52cd5e5a4c744fb1b6a7159bd4a62f4670
6
+ metadata.gz: 1b01deab426caf1f3281cc5a0ea19be48161ee5508677c8978711fad36c5c6c48fa1b6fec4d693943cc394061f09c7db3320df65504c5ee5cf2fb734d1cbe1f9
7
+ data.tar.gz: e61c908e568c5392f810571e45f4061d95b0069f53a574a4459b47a8f6a928b6728e03048b1823bab949c3c3c7fdcad33ebbb4f0646ed6d1287a9881aa69a2ac
@@ -0,0 +1 @@
1
+ 2.3.4
@@ -17,4 +17,4 @@ before_install:
17
17
 
18
18
  script:
19
19
  - bundle exec rspec
20
- - rubocop
20
+ - bundle exec rubocop
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- csvsql (0.1.4)
4
+ csvsql (0.2.0)
5
5
  sqlite3 (~> 1.3.13)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -7,6 +7,10 @@ Csvsql
7
7
  Use SQL to query your CSV file, and return a new CSV.
8
8
 
9
9
 
10
+ ## Requirements
11
+
12
+ * SQlite 3.6.16 or newer.
13
+
10
14
  ## Installation
11
15
 
12
16
  Add this line to your application's Gemfile:
@@ -75,7 +79,7 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
75
79
 
76
80
  ### Cache CSV data
77
81
 
78
- It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
82
+ It will save the parsed data to `~/.csvsql_cache` folder. it will not parse data again next time if the data file didn't change
79
83
 
80
84
  ```
81
85
  csvsql -i large.csv -c "select count(*) from csv"
@@ -84,6 +88,20 @@ csvsql -i large.csv -c "select count(*) from csv"
84
88
  csvsql -i large.csv -c "select count(*) from csv"
85
89
  ```
86
90
 
91
+ ### Query multiple CSV files
92
+
93
+ For multiple files, we should name each files. This name will be a `table name`.
94
+
95
+ ```
96
+ csvsql -i users.csv:users -i posts.csv:posts "select * from posts join users on posts.user_id = users.id where users.role = 'guest'"
97
+ ```
98
+
99
+ With cache. those name will be a `database name`, the table name is `csv`. If a csv file was updated, that db will be update only. Other db still use cache.
100
+
101
+ ```
102
+ csvsql -i users.csv:users -i posts.csv:posts -c "select * from posts.csv join users.csv on posts.csv.user_id = users.csv.id where users.csv.role = 'guest'"
103
+ ```
104
+
87
105
  ### Clear Cache
88
106
 
89
107
  This command will remove all data in the `~/.csvsql_cache`
data/exe/csvsql CHANGED
@@ -1,60 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  require 'optparse'
4
+ require 'pry'
4
5
 
5
6
  lib = File.expand_path("../../lib", __FILE__)
6
7
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
7
8
  require 'csvsql'
8
9
 
9
- options = {}
10
- OptionParser.new do |opts|
11
- opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
12
- opts.version = Csvsql::VERSION
13
-
14
- opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
15
- options[:csv_path] = path
16
- end
17
-
18
- opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
19
- options[:use_cache] = true
20
- end
21
-
22
- opts.on(
23
- '-b', '--batch-rows n',
24
- "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
25
- ) do |n|
26
- options[:batch_rows] = n.to_i
27
- end
28
-
29
- opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
30
- options[:encoding] = encoding
31
- end
32
-
33
- opts.on('--clear-cache', "Clear all cache data") do
34
- options[:clear_cache] = true
35
- end
36
-
37
- opts.on('--debug', "Print debug info") do
38
- options[:debug] = true
39
- end
40
- end.parse!
41
-
42
- if options[:clear_cache]
43
- Csvsql::Db.clear_cache!
44
- puts "Completed clear cache."
45
- exit
46
- end
47
-
48
- if options[:debug]
49
- Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
50
- end
51
-
52
- csv_data = options[:csv_path] || StringIO.new($stdin.read)
53
-
54
- puts Csvsql.execute(
55
- ARGV[0], csv_data,
56
- use_cache: options[:use_cache],
57
- batch_rows: options[:batch_rows],
58
- sql_error_action: 'exit',
59
- encoding: options[:encoding]
60
- )
10
+ result = Csvsql::CommandRunner.run!(ARGV)
11
+ puts result if result
@@ -4,15 +4,21 @@ require "csvsql/version"
4
4
 
5
5
  require 'csv'
6
6
  require 'sqlite3'
7
+ require 'digest'
8
+ require 'fileutils'
7
9
 
8
10
  require 'csvsql/db'
9
11
  require 'csvsql/tracker'
12
+ require 'csvsql/command_runner'
10
13
 
11
14
  module Csvsql
12
- def self.execute(sql, csv_data, opts = {})
13
- encoding = opts.delete(:encoding)
14
- csvdb = Csvsql::Db.new(opts)
15
- csvdb.import(csv_data, encoding: encoding)
15
+ extend self
16
+
17
+ CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
18
+ FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
19
+
20
+ def execute(sql, csv_data, opts = {})
21
+ csvdb = init_data(csv_data, opts)
16
22
  pst = Csvsql::Tracker.commit(:execute_query_sql) do
17
23
  csvdb.prepare(sql)
18
24
  end
@@ -22,4 +28,68 @@ module Csvsql
22
28
  pst.each { |line| csv << line }
23
29
  end.tap { Csvsql::Tracker.commit(:output_format) }
24
30
  end
31
+
32
+ def self.clear_cache!
33
+ FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
34
+ end
35
+
36
+ private
37
+
38
+ def init_data(csv_data, opts)
39
+ encoding = opts.delete(:encoding)
40
+ use_cache = opts.delete(:use_cache)
41
+ csvdb = Csvsql::Db.new(opts)
42
+
43
+ unless use_cache
44
+ csvdb.import(csv_data, encoding: encoding)
45
+ return csvdb
46
+ end
47
+
48
+ case csv_data
49
+ when StringIO, IO
50
+ # nothing
51
+ when Hash
52
+ dbs = []
53
+ csv_data.each do |dbname, csv_path|
54
+ dbs << [dbname, csvdb.init_db(get_db_cache_path(csv_path) || '')]
55
+ csvdb.import(csv_path, encoding: encoding)
56
+ end
57
+ dbs.each do |dbname, db|
58
+ csvdb.execute("ATTACH DATABASE '#{db.filename}' AS #{dbname};")
59
+ end
60
+ else
61
+ csvdb.init_db(get_db_cache_path(csv_data) || '')
62
+ csvdb.import(csv_data, encoding: encoding)
63
+ end
64
+
65
+ csvdb
66
+ end
67
+
68
+ def get_db_cache_path(csv_path)
69
+ csv_path = csv_path || ''
70
+ return unless File.exist?(csv_path)
71
+
72
+ stat = File.stat(csv_path)
73
+ filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
74
+ file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
75
+ stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
76
+ cache_path = File.join(CACHE_DIR, filename)
77
+
78
+ if File.exist?(stat_path)
79
+ if File.read(stat_path) == file_stat
80
+ cache_path
81
+ else
82
+ if update_cb
83
+ update_cb.call
84
+ else
85
+ FileUtils.rm(cache_path)
86
+ end
87
+ File.write(stat_path, file_stat)
88
+ cache_path
89
+ end
90
+ else
91
+ File.write(stat_path, file_stat)
92
+ cache_path
93
+ end
94
+ end
25
95
  end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'optparse'
4
+
5
+ class Csvsql::CommandRunner
6
+ def self.run!(argv)
7
+ options = self.new.parse!(argv)
8
+ return unless options
9
+
10
+ if options[:clear_cache]
11
+ Csvsql.clear_cache!
12
+ puts "Completed clear cache."
13
+ return
14
+ end
15
+
16
+ if options[:debug]
17
+ Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
18
+ end
19
+
20
+ Csvsql.execute(
21
+ options[:sql], options[:csv_data],
22
+ use_cache: options[:use_cache],
23
+ batch_rows: options[:batch_rows],
24
+ sql_error_action: 'exit',
25
+ encoding: options[:encoding]
26
+ )
27
+ end
28
+
29
+ def options
30
+ @options ||= { csv_paths: [] }
31
+ end
32
+
33
+ def parse!(argv)
34
+ parser.parse!(argv)
35
+ options[:sql] = argv.last
36
+
37
+ paths = options.delete(:csv_paths)
38
+ options[:csv_data] = case paths.size
39
+ when 0
40
+ $stdin
41
+ when 1
42
+ paths.first
43
+ else
44
+ paths.each_with_object({}) do |path, r|
45
+ p, n = path.split(':')
46
+ if n.nil? || n.empty?
47
+ puts "You should give #{p} a name, example: #{p}:a_name"
48
+ return false
49
+ end
50
+ r[n] = p
51
+ end
52
+ end
53
+
54
+ return options
55
+ end
56
+
57
+ private
58
+
59
+ def parser
60
+ OptionParser.new do |opts|
61
+ opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
62
+ opts.version = Csvsql::VERSION
63
+
64
+ opts.on(
65
+ '-i', '--input path[:name]', "CSV file path, optional. read from stdin if no give." +
66
+ " Name is required if have multiple files. This name will be a table name." +
67
+ " It will be a database name if cache is enabled"
68
+ ) do |path|
69
+ options[:csv_paths] << path
70
+ end
71
+
72
+ opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
73
+ options[:use_cache] = true
74
+ end
75
+
76
+ opts.on(
77
+ '-b', '--batch-rows n',
78
+ "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
79
+ ) do |n|
80
+ options[:batch_rows] = n.to_i
81
+ end
82
+
83
+ opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
84
+ options[:encoding] = encoding
85
+ end
86
+
87
+ opts.on('--clear-cache', "Clear all cache data") do
88
+ options[:clear_cache] = true
89
+ end
90
+
91
+ opts.on('--debug', "Print debug information") do
92
+ options[:debug] = true
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,24 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'digest'
4
- require 'fileutils'
5
-
6
3
  class Csvsql::Db
7
4
  BATCH_ROWS = 10000
8
- CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
9
- FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
10
-
11
- attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
12
5
 
13
- def self.clear_cache!
14
- FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
15
- end
6
+ attr_reader :data_source, :batch_rows
16
7
 
17
- def initialize(use_cache: false, batch_rows: nil, sql_error_action: nil)
8
+ def initialize(batch_rows: nil, sql_error_action: nil)
18
9
  @db = nil
19
- @csv_io = nil
20
- @csv_path = nil
21
- @use_cache = use_cache
10
+ @data_source = {}
22
11
  @batch_rows = batch_rows || BATCH_ROWS
23
12
  @sql_error_action = (sql_error_action || :raise).to_sym
24
13
  end
@@ -42,18 +31,35 @@ class Csvsql::Db
42
31
  process_sql_error(sql, e)
43
32
  end
44
33
 
34
+ def db
35
+ @db ||= init_db
36
+ end
37
+
38
+ def init_db(cache_path = '')
39
+ @db = SQLite3::Database.new(cache_path)
40
+ end
41
+
42
+ # Params:
43
+ # csv_data_or_path:
44
+ # [String] csv path
45
+ # [StringIO, IO] csv buffer io
46
+ # [Hash] { table_name => csv_path }
45
47
  def import(csv_data_or_path, encoding: 'utf-8')
46
48
  case csv_data_or_path
47
49
  when StringIO, IO
48
- @csv_io = csv_data_or_path
50
+ data_source['csv'] = CSV.new(csv_data_or_path)
51
+ when Hash
52
+ csv_data_or_path.each do |table_name, path|
53
+ data_source[table_name.to_s] = CSV.open(path, "r:#{encoding}")
54
+ end
49
55
  else
50
- @csv_path = csv_data_or_path
56
+ data_source['csv'] = CSV.open(csv_data_or_path, "r:#{encoding}")
51
57
  end
52
- @db = SQLite3::Database.new(get_db_path(@csv_path))
53
58
 
54
59
  tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
55
- unless tables.include?('csv')
56
- init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path, "r:#{encoding}"))
60
+ data_source.each do |table_name, csv|
61
+ next if tables.include?('csv')
62
+ init_table_by_csv(table_name, csv)
57
63
  end
58
64
  true
59
65
  end
@@ -67,11 +73,11 @@ class Csvsql::Db
67
73
  end
68
74
  end
69
75
 
70
- def init_db_by_csv(csv)
76
+ def init_table_by_csv(table_name, csv)
71
77
  header = parser_header(csv.readline)
72
78
 
73
79
  cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
74
- sql = "CREATE TABLE csv (#{cols});"
80
+ sql = "CREATE TABLE #{table_name} (#{cols});"
75
81
  execute sql
76
82
 
77
83
  cache = []
@@ -81,18 +87,18 @@ class Csvsql::Db
81
87
  cache << header.each_with_index.map { |h, i| format_sql_val(line[i], h[1]) }
82
88
 
83
89
  if cache.length >= batch_rows then
84
- import_lines(cache, col_names)
90
+ import_lines(table_name, cache, col_names)
85
91
  cache.clear
86
92
  end
87
93
  end
88
- import_lines(cache, col_names) unless cache.empty?
94
+ import_lines(table_name, cache, col_names) unless cache.empty?
89
95
  Csvsql::Tracker.commit(:import_csv)
90
96
  db
91
97
  end
92
98
 
93
- def import_lines(lines, col_names)
99
+ def import_lines(table_name, lines, col_names)
94
100
  sql = Csvsql::Tracker.commit(:generate_import_sql) do
95
- s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
101
+ s = "INSERT INTO #{table_name} (#{col_names.join(', ')}) VALUES "
96
102
  s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
97
103
  end
98
104
  Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
@@ -123,31 +129,4 @@ class Csvsql::Db
123
129
  raise err
124
130
  end
125
131
  end
126
-
127
- def get_db_path(csv_path)
128
- csv_path = csv_path || ''
129
- return '' unless File.exist?(csv_path)
130
-
131
- if use_cache
132
- stat = File.stat(csv_path)
133
- filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
134
- file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
135
- stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
136
- cache_path = File.join(CACHE_DIR, filename)
137
-
138
- if File.exist?(stat_path)
139
- if File.read(stat_path) == file_stat
140
- cache_path
141
- else
142
- FileUtils.rm(cache_path)
143
- cache_path
144
- end
145
- else
146
- File.write(stat_path, file_stat)
147
- cache_path
148
- end
149
- else
150
- ''
151
- end
152
- end
153
132
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvsql
4
- VERSION = "0.1.5"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvsql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - jiangzhi.xie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-09-18 00:00:00.000000000 Z
11
+ date: 2018-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -91,6 +91,7 @@ files:
91
91
  - ".gitignore"
92
92
  - ".rspec"
93
93
  - ".rubocop.yml"
94
+ - ".ruby-version"
94
95
  - ".travis.yml"
95
96
  - Gemfile
96
97
  - Gemfile.lock
@@ -101,6 +102,7 @@ files:
101
102
  - csvsql.gemspec
102
103
  - exe/csvsql
103
104
  - lib/csvsql.rb
105
+ - lib/csvsql/command_runner.rb
104
106
  - lib/csvsql/db.rb
105
107
  - lib/csvsql/tracker.rb
106
108
  - lib/csvsql/version.rb
@@ -124,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
126
  version: '0'
125
127
  requirements: []
126
128
  rubyforge_project:
127
- rubygems_version: 2.4.5.4
129
+ rubygems_version: 2.5.2
128
130
  signing_key:
129
131
  specification_version: 4
130
132
  summary: Process csv with SQL.