csvsql 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/exe/csvsql +18 -2
- data/lib/csvsql/db.rb +112 -109
- data/lib/csvsql/tracker.rb +44 -0
- data/lib/csvsql/version.rb +1 -1
- data/lib/csvsql.rb +7 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70fa4c1c358f92a85d5f1cd0502081297d1d27cc
|
4
|
+
data.tar.gz: 0bf5efcd968a8965c8308b75e39ae82e43c8d29e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88ddda2f38d83fd4defc23bbfd0d596517f600aacf30963a63b1a15c7f5dca73d7dfe977b0d8b80bff597238ee8e515f559480afb6d54c6a3c2b30e7d6626f24
|
7
|
+
data.tar.gz: d51eb995c1acfe9420153e3acb25f91c06137276f88dd3d67d60d7807395acefd9a7d9403030dfde5a22ab3c0e3492e5de44d479478eca26288f0b475e30b4e4
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -75,10 +75,10 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
|
|
75
75
|
It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
|
76
76
|
|
77
77
|
```
|
78
|
-
csvsql -i large.csv -
|
78
|
+
csvsql -i large.csv -c "select count(*) from csv"
|
79
79
|
|
80
80
|
# the second, it will be fast.
|
81
|
-
csvsql -i large.csv -
|
81
|
+
csvsql -i large.csv -c "select count(*) from csv"
|
82
82
|
```
|
83
83
|
|
84
84
|
### Clear Cache
|
@@ -86,7 +86,7 @@ csvsql -i large.csv -t "select count(*) from csv"
|
|
86
86
|
This command will remove all data in the `~/.csvsql_cache`
|
87
87
|
|
88
88
|
```
|
89
|
-
csvsql --clear
|
89
|
+
csvsql --clear-cache
|
90
90
|
```
|
91
91
|
|
92
92
|
|
data/exe/csvsql
CHANGED
@@ -8,7 +8,8 @@ require 'csvsql'
|
|
8
8
|
|
9
9
|
options = {}
|
10
10
|
OptionParser.new do |opts|
|
11
|
-
opts.banner = "
|
11
|
+
opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
|
12
|
+
opts.version = Csvsql::VERSION
|
12
13
|
|
13
14
|
opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
|
14
15
|
options[:csv_path] = path
|
@@ -18,9 +19,20 @@ OptionParser.new do |opts|
|
|
18
19
|
options[:use_cache] = true
|
19
20
|
end
|
20
21
|
|
22
|
+
opts.on(
|
23
|
+
'-b', '--batch-rows n',
|
24
|
+
"How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
|
25
|
+
) do |n|
|
26
|
+
options[:batch_rows] = n.to_i
|
27
|
+
end
|
28
|
+
|
21
29
|
opts.on('--clear-cache', "Clear all cache data") do
|
22
30
|
options[:clear_cache] = true
|
23
31
|
end
|
32
|
+
|
33
|
+
opts.on('--debug', "Print debug info") do
|
34
|
+
options[:debug] = true
|
35
|
+
end
|
24
36
|
end.parse!
|
25
37
|
|
26
38
|
if options[:clear_cache]
|
@@ -29,6 +41,10 @@ if options[:clear_cache]
|
|
29
41
|
exit
|
30
42
|
end
|
31
43
|
|
44
|
+
if options[:debug]
|
45
|
+
Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
|
46
|
+
end
|
47
|
+
|
32
48
|
csv_data = options[:csv_path] || StringIO.new($stdin.read)
|
33
49
|
|
34
|
-
puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache])
|
50
|
+
puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache], batch_rows: options[:batch_rows])
|
data/lib/csvsql/db.rb
CHANGED
@@ -2,146 +2,149 @@
|
|
2
2
|
|
3
3
|
require 'digest'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
class Csvsql::Db
|
6
|
+
BATCH_ROWS = 10000
|
7
|
+
CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
|
8
|
+
FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
|
9
9
|
|
10
|
-
|
10
|
+
attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
|
11
11
|
|
12
|
-
|
12
|
+
def self.clear_cache!
|
13
|
+
require 'fileutils'
|
14
|
+
FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
|
15
|
+
end
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
17
|
+
def initialize(use_cache: false, batch_rows: nil)
|
18
|
+
@db = nil
|
19
|
+
@csv_io = nil
|
20
|
+
@csv_path = nil
|
21
|
+
@use_cache = use_cache
|
22
|
+
@batch_rows = batch_rows || BATCH_ROWS
|
23
|
+
end
|
18
24
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
# action:
|
26
|
+
# raise: default
|
27
|
+
# exit
|
28
|
+
def sql_error_action=(action)
|
29
|
+
@sql_error_action = action.to_sym
|
30
|
+
end
|
24
31
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
32
|
+
def execute(sql)
|
33
|
+
db.execute(sql)
|
34
|
+
rescue SQLite3::SQLException => e
|
35
|
+
process_sql_error(sql, e)
|
36
|
+
end
|
31
37
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
38
|
+
def prepare(sql)
|
39
|
+
db.prepare(sql)
|
40
|
+
rescue SQLite3::SQLException => e
|
41
|
+
process_sql_error(sql, e)
|
42
|
+
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
44
|
+
def import(csv_data_or_path)
|
45
|
+
case csv_data_or_path
|
46
|
+
when StringIO, IO
|
47
|
+
@csv_io = csv_data_or_path
|
48
|
+
else
|
49
|
+
@csv_path = csv_data_or_path
|
42
50
|
end
|
51
|
+
@db = SQLite3::Database.new(get_db_path(@csv_path))
|
43
52
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
@csv_io = csv_data_or_path
|
48
|
-
else
|
49
|
-
@csv_path = csv_data_or_path
|
50
|
-
end
|
51
|
-
@db = SQLite3::Database.new(get_db_path(@csv_path))
|
52
|
-
|
53
|
-
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").first
|
54
|
-
unless tables && tables.include?('csv')
|
55
|
-
init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
|
56
|
-
end
|
57
|
-
true
|
53
|
+
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
|
54
|
+
unless tables.include?('csv')
|
55
|
+
init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
|
58
56
|
end
|
57
|
+
true
|
58
|
+
end
|
59
59
|
|
60
|
-
|
60
|
+
private
|
61
61
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end
|
62
|
+
def parser_header(csv_header)
|
63
|
+
csv_header.map do |col, r|
|
64
|
+
name, type = col.strip.split(':')
|
65
|
+
[name, (type || 'varchar(255)').downcase.to_sym]
|
67
66
|
end
|
67
|
+
end
|
68
68
|
|
69
|
-
|
70
|
-
|
69
|
+
def init_db_by_csv(csv)
|
70
|
+
header = parser_header(csv.readline)
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
|
73
|
+
sql = "CREATE TABLE csv (#{cols});"
|
74
|
+
execute sql
|
75
75
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
76
|
+
cache = []
|
77
|
+
col_names = header.map(&:first)
|
78
|
+
Csvsql::Tracker.commit(:import_csv)
|
79
|
+
csv.each do |line|
|
80
|
+
cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) }
|
81
|
+
|
82
|
+
if cache.length >= batch_rows then
|
83
|
+
import_lines(cache, col_names)
|
84
|
+
cache.clear
|
85
85
|
end
|
86
|
-
import_lines(cache, col_names) unless cache.empty?
|
87
|
-
db
|
88
86
|
end
|
87
|
+
import_lines(cache, col_names) unless cache.empty?
|
88
|
+
Csvsql::Tracker.commit(:import_csv)
|
89
|
+
db
|
90
|
+
end
|
89
91
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
92
|
+
def import_lines(lines, col_names)
|
93
|
+
sql = Csvsql::Tracker.commit(:generate_import_sql) do
|
94
|
+
s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
|
95
|
+
s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
|
94
96
|
end
|
97
|
+
Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
|
98
|
+
end
|
95
99
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
105
|
-
rescue => e
|
106
|
-
process_sql_error("Parse val: #{val}", e)
|
100
|
+
def format_sql_val(val, type)
|
101
|
+
case type
|
102
|
+
when :int, :integer then val.to_i
|
103
|
+
when :float, :double then val.to_f
|
104
|
+
when :date then "'#{Date.parse(val).to_s}'"
|
105
|
+
when :datetime then "'#{Time.parse(val).strftime('%F %T')}'"
|
106
|
+
else
|
107
|
+
"'#{val.gsub("'", "''")}'"
|
107
108
|
end
|
109
|
+
rescue => e
|
110
|
+
process_sql_error("Parse val: #{val}", e)
|
111
|
+
end
|
108
112
|
|
109
|
-
|
110
|
-
|
113
|
+
def process_sql_error(sql, err)
|
114
|
+
$stderr.puts(sql)
|
111
115
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end
|
116
|
+
if @error_action == :exit
|
117
|
+
$stderr.puts(e.message)
|
118
|
+
exit
|
119
|
+
else
|
120
|
+
raise err
|
118
121
|
end
|
122
|
+
end
|
119
123
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
else
|
135
|
-
FileUtils.rm(cache_path)
|
136
|
-
cache_path
|
137
|
-
end
|
124
|
+
def get_db_path(csv_path)
|
125
|
+
csv_path = csv_path || ''
|
126
|
+
return '' unless File.exist?(csv_path)
|
127
|
+
|
128
|
+
if use_cache
|
129
|
+
stat = File.stat(csv_path)
|
130
|
+
filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
|
131
|
+
file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
|
132
|
+
stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
|
133
|
+
cache_path = File.join(CACHE_DIR, filename)
|
134
|
+
|
135
|
+
if File.exist?(stat_path)
|
136
|
+
if File.read(stat_path) == file_stat
|
137
|
+
cache_path
|
138
138
|
else
|
139
|
-
|
139
|
+
FileUtils.rm(cache_path)
|
140
140
|
cache_path
|
141
141
|
end
|
142
142
|
else
|
143
|
-
|
143
|
+
File.write(stat_path, file_stat)
|
144
|
+
cache_path
|
144
145
|
end
|
146
|
+
else
|
147
|
+
''
|
145
148
|
end
|
146
149
|
end
|
147
150
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
class Csvsql::Tracker
|
4
|
+
attr_reader :stats, :logger
|
5
|
+
|
6
|
+
def self.tracker
|
7
|
+
@tracker ||= new
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.tracker=(t)
|
11
|
+
@tracker = t
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.commit(*args, &block)
|
15
|
+
tracker.commit(*args, &block)
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(logger = Logger.new('/dev/null'))
|
19
|
+
@stats = {}
|
20
|
+
@logger = logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def commit(id, output: true, &block)
|
24
|
+
id = id.to_s
|
25
|
+
old = stats[id]
|
26
|
+
stats[id] = get_stat
|
27
|
+
|
28
|
+
if block
|
29
|
+
block.call.tap { commit(id) }
|
30
|
+
elsif output && old
|
31
|
+
logger.info("[#{id}] #{compare_stat(old, stats[id])}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def get_stat
|
38
|
+
{ time: Time.now }
|
39
|
+
end
|
40
|
+
|
41
|
+
def compare_stat(old, new)
|
42
|
+
"Time cost: #{((new[:time] - old[:time]) * 1000000).to_i / 1000}ms"
|
43
|
+
end
|
44
|
+
end
|
data/lib/csvsql/version.rb
CHANGED
data/lib/csvsql.rb
CHANGED
@@ -6,15 +6,20 @@ require 'csv'
|
|
6
6
|
require 'sqlite3'
|
7
7
|
|
8
8
|
require 'csvsql/db'
|
9
|
+
require 'csvsql/tracker'
|
9
10
|
|
10
11
|
module Csvsql
|
11
12
|
def self.execute(sql, csv_data, opts = {})
|
12
13
|
csvdb = Csvsql::Db.new(opts)
|
13
14
|
csvdb.import(csv_data)
|
14
|
-
pst =
|
15
|
+
pst = Csvsql::Tracker.commit(:execute_query_sql) do
|
16
|
+
csvdb.prepare(sql)
|
17
|
+
end
|
18
|
+
Csvsql::Tracker.commit(:output_format)
|
15
19
|
CSV.generate do |csv|
|
16
20
|
csv << pst.columns.zip(pst.types).map { |c| c.compact.join(':') }
|
17
21
|
pst.each { |line| csv << line }
|
18
|
-
end
|
22
|
+
end.tap { Csvsql::Tracker.commit(:output_format) }
|
19
23
|
end
|
20
24
|
end
|
25
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvsql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jiangzhi.xie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-07-
|
11
|
+
date: 2018-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sqlite3
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- exe/csvsql
|
103
103
|
- lib/csvsql.rb
|
104
104
|
- lib/csvsql/db.rb
|
105
|
+
- lib/csvsql/tracker.rb
|
105
106
|
- lib/csvsql/version.rb
|
106
107
|
homepage: https://github.com/xiejiangzhi/csvsql
|
107
108
|
licenses:
|