csvsql 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/.travis.yml +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +19 -1
- data/exe/csvsql +3 -52
- data/lib/csvsql.rb +74 -4
- data/lib/csvsql/command_runner.rb +96 -0
- data/lib/csvsql/db.rb +31 -52
- data/lib/csvsql/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed472535cf8cc98e8b16f5298ea37fae1baa5c2e
|
4
|
+
data.tar.gz: f2e481c4c09a8cfbf1bb118496830be291afab58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b01deab426caf1f3281cc5a0ea19be48161ee5508677c8978711fad36c5c6c48fa1b6fec4d693943cc394061f09c7db3320df65504c5ee5cf2fb734d1cbe1f9
|
7
|
+
data.tar.gz: e61c908e568c5392f810571e45f4061d95b0069f53a574a4459b47a8f6a928b6728e03048b1823bab949c3c3c7fdcad33ebbb4f0646ed6d1287a9881aa69a2ac
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.4
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,10 @@ Csvsql
|
|
7
7
|
Use SQL to query your CSV file, and return a new CSV.
|
8
8
|
|
9
9
|
|
10
|
+
## Requirements
|
11
|
+
|
12
|
+
* SQlite 3.6.16 or newer.
|
13
|
+
|
10
14
|
## Installation
|
11
15
|
|
12
16
|
Add this line to your application's Gemfile:
|
@@ -75,7 +79,7 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
|
|
75
79
|
|
76
80
|
### Cache CSV data
|
77
81
|
|
78
|
-
It will save the
|
82
|
+
It will save the parsed data to `~/.csvsql_cache` folder. it will not parse data again next time if the data file didn't change
|
79
83
|
|
80
84
|
```
|
81
85
|
csvsql -i large.csv -c "select count(*) from csv"
|
@@ -84,6 +88,20 @@ csvsql -i large.csv -c "select count(*) from csv"
|
|
84
88
|
csvsql -i large.csv -c "select count(*) from csv"
|
85
89
|
```
|
86
90
|
|
91
|
+
### Query multiple CSV files
|
92
|
+
|
93
|
+
For multiple files, we should name each files. This name will be a `table name`.
|
94
|
+
|
95
|
+
```
|
96
|
+
csvsql -i users.csv:users -i posts.csv:posts "select * from posts join users on posts.user_id = users.id where users.role = 'guest'"
|
97
|
+
```
|
98
|
+
|
99
|
+
With cache. those name will be a `database name`, the table name is `csv`. If a csv file was updated, that db will be update only. Other db still use cache.
|
100
|
+
|
101
|
+
```
|
102
|
+
csvsql -i users.csv:users -i posts.csv:posts -c "select * from posts.csv join users.csv on posts.csv.user_id = users.csv.id where users.csv.role = 'guest'"
|
103
|
+
```
|
104
|
+
|
87
105
|
### Clear Cache
|
88
106
|
|
89
107
|
This command will remove all data in the `~/.csvsql_cache`
|
data/exe/csvsql
CHANGED
@@ -1,60 +1,11 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'optparse'
|
4
|
+
require 'pry'
|
4
5
|
|
5
6
|
lib = File.expand_path("../../lib", __FILE__)
|
6
7
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
7
8
|
require 'csvsql'
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
|
12
|
-
opts.version = Csvsql::VERSION
|
13
|
-
|
14
|
-
opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
|
15
|
-
options[:csv_path] = path
|
16
|
-
end
|
17
|
-
|
18
|
-
opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
|
19
|
-
options[:use_cache] = true
|
20
|
-
end
|
21
|
-
|
22
|
-
opts.on(
|
23
|
-
'-b', '--batch-rows n',
|
24
|
-
"How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
|
25
|
-
) do |n|
|
26
|
-
options[:batch_rows] = n.to_i
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
|
30
|
-
options[:encoding] = encoding
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on('--clear-cache', "Clear all cache data") do
|
34
|
-
options[:clear_cache] = true
|
35
|
-
end
|
36
|
-
|
37
|
-
opts.on('--debug', "Print debug info") do
|
38
|
-
options[:debug] = true
|
39
|
-
end
|
40
|
-
end.parse!
|
41
|
-
|
42
|
-
if options[:clear_cache]
|
43
|
-
Csvsql::Db.clear_cache!
|
44
|
-
puts "Completed clear cache."
|
45
|
-
exit
|
46
|
-
end
|
47
|
-
|
48
|
-
if options[:debug]
|
49
|
-
Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
|
50
|
-
end
|
51
|
-
|
52
|
-
csv_data = options[:csv_path] || StringIO.new($stdin.read)
|
53
|
-
|
54
|
-
puts Csvsql.execute(
|
55
|
-
ARGV[0], csv_data,
|
56
|
-
use_cache: options[:use_cache],
|
57
|
-
batch_rows: options[:batch_rows],
|
58
|
-
sql_error_action: 'exit',
|
59
|
-
encoding: options[:encoding]
|
60
|
-
)
|
10
|
+
result = Csvsql::CommandRunner.run!(ARGV)
|
11
|
+
puts result if result
|
data/lib/csvsql.rb
CHANGED
@@ -4,15 +4,21 @@ require "csvsql/version"
|
|
4
4
|
|
5
5
|
require 'csv'
|
6
6
|
require 'sqlite3'
|
7
|
+
require 'digest'
|
8
|
+
require 'fileutils'
|
7
9
|
|
8
10
|
require 'csvsql/db'
|
9
11
|
require 'csvsql/tracker'
|
12
|
+
require 'csvsql/command_runner'
|
10
13
|
|
11
14
|
module Csvsql
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
extend self
|
16
|
+
|
17
|
+
CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
|
18
|
+
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
|
19
|
+
|
20
|
+
def execute(sql, csv_data, opts = {})
|
21
|
+
csvdb = init_data(csv_data, opts)
|
16
22
|
pst = Csvsql::Tracker.commit(:execute_query_sql) do
|
17
23
|
csvdb.prepare(sql)
|
18
24
|
end
|
@@ -22,4 +28,68 @@ module Csvsql
|
|
22
28
|
pst.each { |line| csv << line }
|
23
29
|
end.tap { Csvsql::Tracker.commit(:output_format) }
|
24
30
|
end
|
31
|
+
|
32
|
+
def self.clear_cache!
|
33
|
+
FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def init_data(csv_data, opts)
|
39
|
+
encoding = opts.delete(:encoding)
|
40
|
+
use_cache = opts.delete(:use_cache)
|
41
|
+
csvdb = Csvsql::Db.new(opts)
|
42
|
+
|
43
|
+
unless use_cache
|
44
|
+
csvdb.import(csv_data, encoding: encoding)
|
45
|
+
return csvdb
|
46
|
+
end
|
47
|
+
|
48
|
+
case csv_data
|
49
|
+
when StringIO, IO
|
50
|
+
# nothing
|
51
|
+
when Hash
|
52
|
+
dbs = []
|
53
|
+
csv_data.each do |dbname, csv_path|
|
54
|
+
dbs << [dbname, csvdb.init_db(get_db_cache_path(csv_path) || '')]
|
55
|
+
csvdb.import(csv_path, encoding: encoding)
|
56
|
+
end
|
57
|
+
dbs.each do |dbname, db|
|
58
|
+
csvdb.execute("ATTACH DATABASE '#{db.filename}' AS #{dbname};")
|
59
|
+
end
|
60
|
+
else
|
61
|
+
csvdb.init_db(get_db_cache_path(csv_data) || '')
|
62
|
+
csvdb.import(csv_data, encoding: encoding)
|
63
|
+
end
|
64
|
+
|
65
|
+
csvdb
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_db_cache_path(csv_path)
|
69
|
+
csv_path = csv_path || ''
|
70
|
+
return unless File.exist?(csv_path)
|
71
|
+
|
72
|
+
stat = File.stat(csv_path)
|
73
|
+
filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
|
74
|
+
file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
|
75
|
+
stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
|
76
|
+
cache_path = File.join(CACHE_DIR, filename)
|
77
|
+
|
78
|
+
if File.exist?(stat_path)
|
79
|
+
if File.read(stat_path) == file_stat
|
80
|
+
cache_path
|
81
|
+
else
|
82
|
+
if update_cb
|
83
|
+
update_cb.call
|
84
|
+
else
|
85
|
+
FileUtils.rm(cache_path)
|
86
|
+
end
|
87
|
+
File.write(stat_path, file_stat)
|
88
|
+
cache_path
|
89
|
+
end
|
90
|
+
else
|
91
|
+
File.write(stat_path, file_stat)
|
92
|
+
cache_path
|
93
|
+
end
|
94
|
+
end
|
25
95
|
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
class Csvsql::CommandRunner
|
6
|
+
def self.run!(argv)
|
7
|
+
options = self.new.parse!(argv)
|
8
|
+
return unless options
|
9
|
+
|
10
|
+
if options[:clear_cache]
|
11
|
+
Csvsql.clear_cache!
|
12
|
+
puts "Completed clear cache."
|
13
|
+
return
|
14
|
+
end
|
15
|
+
|
16
|
+
if options[:debug]
|
17
|
+
Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
|
18
|
+
end
|
19
|
+
|
20
|
+
Csvsql.execute(
|
21
|
+
options[:sql], options[:csv_data],
|
22
|
+
use_cache: options[:use_cache],
|
23
|
+
batch_rows: options[:batch_rows],
|
24
|
+
sql_error_action: 'exit',
|
25
|
+
encoding: options[:encoding]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def options
|
30
|
+
@options ||= { csv_paths: [] }
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse!(argv)
|
34
|
+
parser.parse!(argv)
|
35
|
+
options[:sql] = argv.last
|
36
|
+
|
37
|
+
paths = options.delete(:csv_paths)
|
38
|
+
options[:csv_data] = case paths.size
|
39
|
+
when 0
|
40
|
+
$stdin
|
41
|
+
when 1
|
42
|
+
paths.first
|
43
|
+
else
|
44
|
+
paths.each_with_object({}) do |path, r|
|
45
|
+
p, n = path.split(':')
|
46
|
+
if n.nil? || n.empty?
|
47
|
+
puts "You should give #{p} a name, example: #{p}:a_name"
|
48
|
+
return false
|
49
|
+
end
|
50
|
+
r[n] = p
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return options
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def parser
|
60
|
+
OptionParser.new do |opts|
|
61
|
+
opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
|
62
|
+
opts.version = Csvsql::VERSION
|
63
|
+
|
64
|
+
opts.on(
|
65
|
+
'-i', '--input path[:name]', "CSV file path, optional. read from stdin if no give." +
|
66
|
+
" Name is required if have multiple files. This name will be a table name." +
|
67
|
+
" It will be a database name if cache is enabled"
|
68
|
+
) do |path|
|
69
|
+
options[:csv_paths] << path
|
70
|
+
end
|
71
|
+
|
72
|
+
opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
|
73
|
+
options[:use_cache] = true
|
74
|
+
end
|
75
|
+
|
76
|
+
opts.on(
|
77
|
+
'-b', '--batch-rows n',
|
78
|
+
"How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
|
79
|
+
) do |n|
|
80
|
+
options[:batch_rows] = n.to_i
|
81
|
+
end
|
82
|
+
|
83
|
+
opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
|
84
|
+
options[:encoding] = encoding
|
85
|
+
end
|
86
|
+
|
87
|
+
opts.on('--clear-cache', "Clear all cache data") do
|
88
|
+
options[:clear_cache] = true
|
89
|
+
end
|
90
|
+
|
91
|
+
opts.on('--debug', "Print debug information") do
|
92
|
+
options[:debug] = true
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/csvsql/db.rb
CHANGED
@@ -1,24 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'digest'
|
4
|
-
require 'fileutils'
|
5
|
-
|
6
3
|
class Csvsql::Db
|
7
4
|
BATCH_ROWS = 10000
|
8
|
-
CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
|
9
|
-
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
|
10
|
-
|
11
|
-
attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
|
12
5
|
|
13
|
-
|
14
|
-
FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
|
15
|
-
end
|
6
|
+
attr_reader :data_source, :batch_rows
|
16
7
|
|
17
|
-
def initialize(
|
8
|
+
def initialize(batch_rows: nil, sql_error_action: nil)
|
18
9
|
@db = nil
|
19
|
-
@
|
20
|
-
@csv_path = nil
|
21
|
-
@use_cache = use_cache
|
10
|
+
@data_source = {}
|
22
11
|
@batch_rows = batch_rows || BATCH_ROWS
|
23
12
|
@sql_error_action = (sql_error_action || :raise).to_sym
|
24
13
|
end
|
@@ -42,18 +31,35 @@ class Csvsql::Db
|
|
42
31
|
process_sql_error(sql, e)
|
43
32
|
end
|
44
33
|
|
34
|
+
def db
|
35
|
+
@db ||= init_db
|
36
|
+
end
|
37
|
+
|
38
|
+
def init_db(cache_path = '')
|
39
|
+
@db = SQLite3::Database.new(cache_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Params:
|
43
|
+
# csv_data_or_path:
|
44
|
+
# [String] csv path
|
45
|
+
# [StringIO, IO] csv buffer io
|
46
|
+
# [Hash] { table_name => csv_path }
|
45
47
|
def import(csv_data_or_path, encoding: 'utf-8')
|
46
48
|
case csv_data_or_path
|
47
49
|
when StringIO, IO
|
48
|
-
|
50
|
+
data_source['csv'] = CSV.new(csv_data_or_path)
|
51
|
+
when Hash
|
52
|
+
csv_data_or_path.each do |table_name, path|
|
53
|
+
data_source[table_name.to_s] = CSV.open(path, "r:#{encoding}")
|
54
|
+
end
|
49
55
|
else
|
50
|
-
|
56
|
+
data_source['csv'] = CSV.open(csv_data_or_path, "r:#{encoding}")
|
51
57
|
end
|
52
|
-
@db = SQLite3::Database.new(get_db_path(@csv_path))
|
53
58
|
|
54
59
|
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
|
55
|
-
|
56
|
-
|
60
|
+
data_source.each do |table_name, csv|
|
61
|
+
next if tables.include?('csv')
|
62
|
+
init_table_by_csv(table_name, csv)
|
57
63
|
end
|
58
64
|
true
|
59
65
|
end
|
@@ -67,11 +73,11 @@ class Csvsql::Db
|
|
67
73
|
end
|
68
74
|
end
|
69
75
|
|
70
|
-
def
|
76
|
+
def init_table_by_csv(table_name, csv)
|
71
77
|
header = parser_header(csv.readline)
|
72
78
|
|
73
79
|
cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
|
74
|
-
sql = "CREATE TABLE
|
80
|
+
sql = "CREATE TABLE #{table_name} (#{cols});"
|
75
81
|
execute sql
|
76
82
|
|
77
83
|
cache = []
|
@@ -81,18 +87,18 @@ class Csvsql::Db
|
|
81
87
|
cache << header.each_with_index.map { |h, i| format_sql_val(line[i], h[1]) }
|
82
88
|
|
83
89
|
if cache.length >= batch_rows then
|
84
|
-
import_lines(cache, col_names)
|
90
|
+
import_lines(table_name, cache, col_names)
|
85
91
|
cache.clear
|
86
92
|
end
|
87
93
|
end
|
88
|
-
import_lines(cache, col_names) unless cache.empty?
|
94
|
+
import_lines(table_name, cache, col_names) unless cache.empty?
|
89
95
|
Csvsql::Tracker.commit(:import_csv)
|
90
96
|
db
|
91
97
|
end
|
92
98
|
|
93
|
-
def import_lines(lines, col_names)
|
99
|
+
def import_lines(table_name, lines, col_names)
|
94
100
|
sql = Csvsql::Tracker.commit(:generate_import_sql) do
|
95
|
-
s = "INSERT INTO
|
101
|
+
s = "INSERT INTO #{table_name} (#{col_names.join(', ')}) VALUES "
|
96
102
|
s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
|
97
103
|
end
|
98
104
|
Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
|
@@ -123,31 +129,4 @@ class Csvsql::Db
|
|
123
129
|
raise err
|
124
130
|
end
|
125
131
|
end
|
126
|
-
|
127
|
-
def get_db_path(csv_path)
|
128
|
-
csv_path = csv_path || ''
|
129
|
-
return '' unless File.exist?(csv_path)
|
130
|
-
|
131
|
-
if use_cache
|
132
|
-
stat = File.stat(csv_path)
|
133
|
-
filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
|
134
|
-
file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
|
135
|
-
stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
|
136
|
-
cache_path = File.join(CACHE_DIR, filename)
|
137
|
-
|
138
|
-
if File.exist?(stat_path)
|
139
|
-
if File.read(stat_path) == file_stat
|
140
|
-
cache_path
|
141
|
-
else
|
142
|
-
FileUtils.rm(cache_path)
|
143
|
-
cache_path
|
144
|
-
end
|
145
|
-
else
|
146
|
-
File.write(stat_path, file_stat)
|
147
|
-
cache_path
|
148
|
-
end
|
149
|
-
else
|
150
|
-
''
|
151
|
-
end
|
152
|
-
end
|
153
132
|
end
|
data/lib/csvsql/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvsql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jiangzhi.xie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sqlite3
|
@@ -91,6 +91,7 @@ files:
|
|
91
91
|
- ".gitignore"
|
92
92
|
- ".rspec"
|
93
93
|
- ".rubocop.yml"
|
94
|
+
- ".ruby-version"
|
94
95
|
- ".travis.yml"
|
95
96
|
- Gemfile
|
96
97
|
- Gemfile.lock
|
@@ -101,6 +102,7 @@ files:
|
|
101
102
|
- csvsql.gemspec
|
102
103
|
- exe/csvsql
|
103
104
|
- lib/csvsql.rb
|
105
|
+
- lib/csvsql/command_runner.rb
|
104
106
|
- lib/csvsql/db.rb
|
105
107
|
- lib/csvsql/tracker.rb
|
106
108
|
- lib/csvsql/version.rb
|
@@ -124,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
126
|
version: '0'
|
125
127
|
requirements: []
|
126
128
|
rubyforge_project:
|
127
|
-
rubygems_version: 2.
|
129
|
+
rubygems_version: 2.5.2
|
128
130
|
signing_key:
|
129
131
|
specification_version: 4
|
130
132
|
summary: Process csv with SQL.
|