csvsql 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/.travis.yml +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +19 -1
- data/exe/csvsql +3 -52
- data/lib/csvsql.rb +74 -4
- data/lib/csvsql/command_runner.rb +96 -0
- data/lib/csvsql/db.rb +31 -52
- data/lib/csvsql/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed472535cf8cc98e8b16f5298ea37fae1baa5c2e
|
4
|
+
data.tar.gz: f2e481c4c09a8cfbf1bb118496830be291afab58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b01deab426caf1f3281cc5a0ea19be48161ee5508677c8978711fad36c5c6c48fa1b6fec4d693943cc394061f09c7db3320df65504c5ee5cf2fb734d1cbe1f9
|
7
|
+
data.tar.gz: e61c908e568c5392f810571e45f4061d95b0069f53a574a4459b47a8f6a928b6728e03048b1823bab949c3c3c7fdcad33ebbb4f0646ed6d1287a9881aa69a2ac
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.4
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,10 @@ Csvsql
|
|
7
7
|
Use SQL to query your CSV file, and return a new CSV.
|
8
8
|
|
9
9
|
|
10
|
+
## Requirements
|
11
|
+
|
12
|
+
* SQlite 3.6.16 or newer.
|
13
|
+
|
10
14
|
## Installation
|
11
15
|
|
12
16
|
Add this line to your application's Gemfile:
|
@@ -75,7 +79,7 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
|
|
75
79
|
|
76
80
|
### Cache CSV data
|
77
81
|
|
78
|
-
It will save the
|
82
|
+
It will save the parsed data to `~/.csvsql_cache` folder. it will not parse data again next time if the data file didn't change
|
79
83
|
|
80
84
|
```
|
81
85
|
csvsql -i large.csv -c "select count(*) from csv"
|
@@ -84,6 +88,20 @@ csvsql -i large.csv -c "select count(*) from csv"
|
|
84
88
|
csvsql -i large.csv -c "select count(*) from csv"
|
85
89
|
```
|
86
90
|
|
91
|
+
### Query multiple CSV files
|
92
|
+
|
93
|
+
For multiple files, we should name each files. This name will be a `table name`.
|
94
|
+
|
95
|
+
```
|
96
|
+
csvsql -i users.csv:users -i posts.csv:posts "select * from posts join users on posts.user_id = users.id where users.role = 'guest'"
|
97
|
+
```
|
98
|
+
|
99
|
+
With cache. those name will be a `database name`, the table name is `csv`. If a csv file was updated, that db will be update only. Other db still use cache.
|
100
|
+
|
101
|
+
```
|
102
|
+
csvsql -i users.csv:users -i posts.csv:posts -c "select * from posts.csv join users.csv on posts.csv.user_id = users.csv.id where users.csv.role = 'guest'"
|
103
|
+
```
|
104
|
+
|
87
105
|
### Clear Cache
|
88
106
|
|
89
107
|
This command will remove all data in the `~/.csvsql_cache`
|
data/exe/csvsql
CHANGED
@@ -1,60 +1,11 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'optparse'
|
4
|
+
require 'pry'
|
4
5
|
|
5
6
|
lib = File.expand_path("../../lib", __FILE__)
|
6
7
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
7
8
|
require 'csvsql'
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
|
12
|
-
opts.version = Csvsql::VERSION
|
13
|
-
|
14
|
-
opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
|
15
|
-
options[:csv_path] = path
|
16
|
-
end
|
17
|
-
|
18
|
-
opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
|
19
|
-
options[:use_cache] = true
|
20
|
-
end
|
21
|
-
|
22
|
-
opts.on(
|
23
|
-
'-b', '--batch-rows n',
|
24
|
-
"How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
|
25
|
-
) do |n|
|
26
|
-
options[:batch_rows] = n.to_i
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
|
30
|
-
options[:encoding] = encoding
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on('--clear-cache', "Clear all cache data") do
|
34
|
-
options[:clear_cache] = true
|
35
|
-
end
|
36
|
-
|
37
|
-
opts.on('--debug', "Print debug info") do
|
38
|
-
options[:debug] = true
|
39
|
-
end
|
40
|
-
end.parse!
|
41
|
-
|
42
|
-
if options[:clear_cache]
|
43
|
-
Csvsql::Db.clear_cache!
|
44
|
-
puts "Completed clear cache."
|
45
|
-
exit
|
46
|
-
end
|
47
|
-
|
48
|
-
if options[:debug]
|
49
|
-
Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
|
50
|
-
end
|
51
|
-
|
52
|
-
csv_data = options[:csv_path] || StringIO.new($stdin.read)
|
53
|
-
|
54
|
-
puts Csvsql.execute(
|
55
|
-
ARGV[0], csv_data,
|
56
|
-
use_cache: options[:use_cache],
|
57
|
-
batch_rows: options[:batch_rows],
|
58
|
-
sql_error_action: 'exit',
|
59
|
-
encoding: options[:encoding]
|
60
|
-
)
|
10
|
+
result = Csvsql::CommandRunner.run!(ARGV)
|
11
|
+
puts result if result
|
data/lib/csvsql.rb
CHANGED
@@ -4,15 +4,21 @@ require "csvsql/version"
|
|
4
4
|
|
5
5
|
require 'csv'
|
6
6
|
require 'sqlite3'
|
7
|
+
require 'digest'
|
8
|
+
require 'fileutils'
|
7
9
|
|
8
10
|
require 'csvsql/db'
|
9
11
|
require 'csvsql/tracker'
|
12
|
+
require 'csvsql/command_runner'
|
10
13
|
|
11
14
|
module Csvsql
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
extend self
|
16
|
+
|
17
|
+
CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
|
18
|
+
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
|
19
|
+
|
20
|
+
def execute(sql, csv_data, opts = {})
|
21
|
+
csvdb = init_data(csv_data, opts)
|
16
22
|
pst = Csvsql::Tracker.commit(:execute_query_sql) do
|
17
23
|
csvdb.prepare(sql)
|
18
24
|
end
|
@@ -22,4 +28,68 @@ module Csvsql
|
|
22
28
|
pst.each { |line| csv << line }
|
23
29
|
end.tap { Csvsql::Tracker.commit(:output_format) }
|
24
30
|
end
|
31
|
+
|
32
|
+
def self.clear_cache!
|
33
|
+
FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def init_data(csv_data, opts)
|
39
|
+
encoding = opts.delete(:encoding)
|
40
|
+
use_cache = opts.delete(:use_cache)
|
41
|
+
csvdb = Csvsql::Db.new(opts)
|
42
|
+
|
43
|
+
unless use_cache
|
44
|
+
csvdb.import(csv_data, encoding: encoding)
|
45
|
+
return csvdb
|
46
|
+
end
|
47
|
+
|
48
|
+
case csv_data
|
49
|
+
when StringIO, IO
|
50
|
+
# nothing
|
51
|
+
when Hash
|
52
|
+
dbs = []
|
53
|
+
csv_data.each do |dbname, csv_path|
|
54
|
+
dbs << [dbname, csvdb.init_db(get_db_cache_path(csv_path) || '')]
|
55
|
+
csvdb.import(csv_path, encoding: encoding)
|
56
|
+
end
|
57
|
+
dbs.each do |dbname, db|
|
58
|
+
csvdb.execute("ATTACH DATABASE '#{db.filename}' AS #{dbname};")
|
59
|
+
end
|
60
|
+
else
|
61
|
+
csvdb.init_db(get_db_cache_path(csv_data) || '')
|
62
|
+
csvdb.import(csv_data, encoding: encoding)
|
63
|
+
end
|
64
|
+
|
65
|
+
csvdb
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_db_cache_path(csv_path)
|
69
|
+
csv_path = csv_path || ''
|
70
|
+
return unless File.exist?(csv_path)
|
71
|
+
|
72
|
+
stat = File.stat(csv_path)
|
73
|
+
filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
|
74
|
+
file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
|
75
|
+
stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
|
76
|
+
cache_path = File.join(CACHE_DIR, filename)
|
77
|
+
|
78
|
+
if File.exist?(stat_path)
|
79
|
+
if File.read(stat_path) == file_stat
|
80
|
+
cache_path
|
81
|
+
else
|
82
|
+
if update_cb
|
83
|
+
update_cb.call
|
84
|
+
else
|
85
|
+
FileUtils.rm(cache_path)
|
86
|
+
end
|
87
|
+
File.write(stat_path, file_stat)
|
88
|
+
cache_path
|
89
|
+
end
|
90
|
+
else
|
91
|
+
File.write(stat_path, file_stat)
|
92
|
+
cache_path
|
93
|
+
end
|
94
|
+
end
|
25
95
|
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
class Csvsql::CommandRunner
|
6
|
+
def self.run!(argv)
|
7
|
+
options = self.new.parse!(argv)
|
8
|
+
return unless options
|
9
|
+
|
10
|
+
if options[:clear_cache]
|
11
|
+
Csvsql.clear_cache!
|
12
|
+
puts "Completed clear cache."
|
13
|
+
return
|
14
|
+
end
|
15
|
+
|
16
|
+
if options[:debug]
|
17
|
+
Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
|
18
|
+
end
|
19
|
+
|
20
|
+
Csvsql.execute(
|
21
|
+
options[:sql], options[:csv_data],
|
22
|
+
use_cache: options[:use_cache],
|
23
|
+
batch_rows: options[:batch_rows],
|
24
|
+
sql_error_action: 'exit',
|
25
|
+
encoding: options[:encoding]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def options
|
30
|
+
@options ||= { csv_paths: [] }
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse!(argv)
|
34
|
+
parser.parse!(argv)
|
35
|
+
options[:sql] = argv.last
|
36
|
+
|
37
|
+
paths = options.delete(:csv_paths)
|
38
|
+
options[:csv_data] = case paths.size
|
39
|
+
when 0
|
40
|
+
$stdin
|
41
|
+
when 1
|
42
|
+
paths.first
|
43
|
+
else
|
44
|
+
paths.each_with_object({}) do |path, r|
|
45
|
+
p, n = path.split(':')
|
46
|
+
if n.nil? || n.empty?
|
47
|
+
puts "You should give #{p} a name, example: #{p}:a_name"
|
48
|
+
return false
|
49
|
+
end
|
50
|
+
r[n] = p
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return options
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def parser
|
60
|
+
OptionParser.new do |opts|
|
61
|
+
opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
|
62
|
+
opts.version = Csvsql::VERSION
|
63
|
+
|
64
|
+
opts.on(
|
65
|
+
'-i', '--input path[:name]', "CSV file path, optional. read from stdin if no give." +
|
66
|
+
" Name is required if have multiple files. This name will be a table name." +
|
67
|
+
" It will be a database name if cache is enabled"
|
68
|
+
) do |path|
|
69
|
+
options[:csv_paths] << path
|
70
|
+
end
|
71
|
+
|
72
|
+
opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
|
73
|
+
options[:use_cache] = true
|
74
|
+
end
|
75
|
+
|
76
|
+
opts.on(
|
77
|
+
'-b', '--batch-rows n',
|
78
|
+
"How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
|
79
|
+
) do |n|
|
80
|
+
options[:batch_rows] = n.to_i
|
81
|
+
end
|
82
|
+
|
83
|
+
opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
|
84
|
+
options[:encoding] = encoding
|
85
|
+
end
|
86
|
+
|
87
|
+
opts.on('--clear-cache', "Clear all cache data") do
|
88
|
+
options[:clear_cache] = true
|
89
|
+
end
|
90
|
+
|
91
|
+
opts.on('--debug', "Print debug information") do
|
92
|
+
options[:debug] = true
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/csvsql/db.rb
CHANGED
@@ -1,24 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'digest'
|
4
|
-
require 'fileutils'
|
5
|
-
|
6
3
|
class Csvsql::Db
|
7
4
|
BATCH_ROWS = 10000
|
8
|
-
CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
|
9
|
-
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
|
10
|
-
|
11
|
-
attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
|
12
5
|
|
13
|
-
|
14
|
-
FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
|
15
|
-
end
|
6
|
+
attr_reader :data_source, :batch_rows
|
16
7
|
|
17
|
-
def initialize(
|
8
|
+
def initialize(batch_rows: nil, sql_error_action: nil)
|
18
9
|
@db = nil
|
19
|
-
@
|
20
|
-
@csv_path = nil
|
21
|
-
@use_cache = use_cache
|
10
|
+
@data_source = {}
|
22
11
|
@batch_rows = batch_rows || BATCH_ROWS
|
23
12
|
@sql_error_action = (sql_error_action || :raise).to_sym
|
24
13
|
end
|
@@ -42,18 +31,35 @@ class Csvsql::Db
|
|
42
31
|
process_sql_error(sql, e)
|
43
32
|
end
|
44
33
|
|
34
|
+
def db
|
35
|
+
@db ||= init_db
|
36
|
+
end
|
37
|
+
|
38
|
+
def init_db(cache_path = '')
|
39
|
+
@db = SQLite3::Database.new(cache_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Params:
|
43
|
+
# csv_data_or_path:
|
44
|
+
# [String] csv path
|
45
|
+
# [StringIO, IO] csv buffer io
|
46
|
+
# [Hash] { table_name => csv_path }
|
45
47
|
def import(csv_data_or_path, encoding: 'utf-8')
|
46
48
|
case csv_data_or_path
|
47
49
|
when StringIO, IO
|
48
|
-
|
50
|
+
data_source['csv'] = CSV.new(csv_data_or_path)
|
51
|
+
when Hash
|
52
|
+
csv_data_or_path.each do |table_name, path|
|
53
|
+
data_source[table_name.to_s] = CSV.open(path, "r:#{encoding}")
|
54
|
+
end
|
49
55
|
else
|
50
|
-
|
56
|
+
data_source['csv'] = CSV.open(csv_data_or_path, "r:#{encoding}")
|
51
57
|
end
|
52
|
-
@db = SQLite3::Database.new(get_db_path(@csv_path))
|
53
58
|
|
54
59
|
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
|
55
|
-
|
56
|
-
|
60
|
+
data_source.each do |table_name, csv|
|
61
|
+
next if tables.include?('csv')
|
62
|
+
init_table_by_csv(table_name, csv)
|
57
63
|
end
|
58
64
|
true
|
59
65
|
end
|
@@ -67,11 +73,11 @@ class Csvsql::Db
|
|
67
73
|
end
|
68
74
|
end
|
69
75
|
|
70
|
-
def
|
76
|
+
def init_table_by_csv(table_name, csv)
|
71
77
|
header = parser_header(csv.readline)
|
72
78
|
|
73
79
|
cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
|
74
|
-
sql = "CREATE TABLE
|
80
|
+
sql = "CREATE TABLE #{table_name} (#{cols});"
|
75
81
|
execute sql
|
76
82
|
|
77
83
|
cache = []
|
@@ -81,18 +87,18 @@ class Csvsql::Db
|
|
81
87
|
cache << header.each_with_index.map { |h, i| format_sql_val(line[i], h[1]) }
|
82
88
|
|
83
89
|
if cache.length >= batch_rows then
|
84
|
-
import_lines(cache, col_names)
|
90
|
+
import_lines(table_name, cache, col_names)
|
85
91
|
cache.clear
|
86
92
|
end
|
87
93
|
end
|
88
|
-
import_lines(cache, col_names) unless cache.empty?
|
94
|
+
import_lines(table_name, cache, col_names) unless cache.empty?
|
89
95
|
Csvsql::Tracker.commit(:import_csv)
|
90
96
|
db
|
91
97
|
end
|
92
98
|
|
93
|
-
def import_lines(lines, col_names)
|
99
|
+
def import_lines(table_name, lines, col_names)
|
94
100
|
sql = Csvsql::Tracker.commit(:generate_import_sql) do
|
95
|
-
s = "INSERT INTO
|
101
|
+
s = "INSERT INTO #{table_name} (#{col_names.join(', ')}) VALUES "
|
96
102
|
s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
|
97
103
|
end
|
98
104
|
Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
|
@@ -123,31 +129,4 @@ class Csvsql::Db
|
|
123
129
|
raise err
|
124
130
|
end
|
125
131
|
end
|
126
|
-
|
127
|
-
def get_db_path(csv_path)
|
128
|
-
csv_path = csv_path || ''
|
129
|
-
return '' unless File.exist?(csv_path)
|
130
|
-
|
131
|
-
if use_cache
|
132
|
-
stat = File.stat(csv_path)
|
133
|
-
filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
|
134
|
-
file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
|
135
|
-
stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
|
136
|
-
cache_path = File.join(CACHE_DIR, filename)
|
137
|
-
|
138
|
-
if File.exist?(stat_path)
|
139
|
-
if File.read(stat_path) == file_stat
|
140
|
-
cache_path
|
141
|
-
else
|
142
|
-
FileUtils.rm(cache_path)
|
143
|
-
cache_path
|
144
|
-
end
|
145
|
-
else
|
146
|
-
File.write(stat_path, file_stat)
|
147
|
-
cache_path
|
148
|
-
end
|
149
|
-
else
|
150
|
-
''
|
151
|
-
end
|
152
|
-
end
|
153
132
|
end
|
data/lib/csvsql/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvsql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jiangzhi.xie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sqlite3
|
@@ -91,6 +91,7 @@ files:
|
|
91
91
|
- ".gitignore"
|
92
92
|
- ".rspec"
|
93
93
|
- ".rubocop.yml"
|
94
|
+
- ".ruby-version"
|
94
95
|
- ".travis.yml"
|
95
96
|
- Gemfile
|
96
97
|
- Gemfile.lock
|
@@ -101,6 +102,7 @@ files:
|
|
101
102
|
- csvsql.gemspec
|
102
103
|
- exe/csvsql
|
103
104
|
- lib/csvsql.rb
|
105
|
+
- lib/csvsql/command_runner.rb
|
104
106
|
- lib/csvsql/db.rb
|
105
107
|
- lib/csvsql/tracker.rb
|
106
108
|
- lib/csvsql/version.rb
|
@@ -124,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
126
|
version: '0'
|
125
127
|
requirements: []
|
126
128
|
rubyforge_project:
|
127
|
-
rubygems_version: 2.
|
129
|
+
rubygems_version: 2.5.2
|
128
130
|
signing_key:
|
129
131
|
specification_version: 4
|
130
132
|
summary: Process csv with SQL.
|