csvsql 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 77f9bf18a9ec8773b4015b456cd4dc789bebd36d
4
- data.tar.gz: 6c18bed0e815307ffa18cd8b9ef80a774abecaa9
3
+ metadata.gz: 70fa4c1c358f92a85d5f1cd0502081297d1d27cc
4
+ data.tar.gz: 0bf5efcd968a8965c8308b75e39ae82e43c8d29e
5
5
  SHA512:
6
- metadata.gz: 567347889397e591db8a04dc8dd9b8154e4915eb85b88575f62767a7e78bc578e2282d8250cd27f5947cc7462e3bd37f6869c455ec76ada203be4c8e03b1da7b
7
- data.tar.gz: 4c345a17fb5b380d6039fbb1bfad0609596a1703c8dca07b65cd917f5bfa0b77d49e7d3b41181d6f6cf7195b8ebc8f5d869099aebfc0d2b8dc190de64e303656
6
+ metadata.gz: 88ddda2f38d83fd4defc23bbfd0d596517f600aacf30963a63b1a15c7f5dca73d7dfe977b0d8b80bff597238ee8e515f559480afb6d54c6a3c2b30e7d6626f24
7
+ data.tar.gz: d51eb995c1acfe9420153e3acb25f91c06137276f88dd3d67d60d7807395acefd9a7d9403030dfde5a22ab3c0e3492e5de44d479478eca26288f0b475e30b4e4
data/.gitignore CHANGED
@@ -9,5 +9,7 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+ /spec/examples.txt
12
13
 
13
14
  *.swp
15
+ *.gem
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- csvsql (0.1.0)
4
+ csvsql (0.1.1)
5
5
  sqlite3 (~> 1.3.13)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -75,10 +75,10 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
75
75
  It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
76
76
 
77
77
  ```
78
- csvsql -i large.csv -t "select count(*) from csv"
78
+ csvsql -i large.csv -c "select count(*) from csv"
79
79
 
80
80
  # the second, it will be fast.
81
- csvsql -i large.csv -t "select count(*) from csv"
81
+ csvsql -i large.csv -c "select count(*) from csv"
82
82
  ```
83
83
 
84
84
  ### Clear Cache
@@ -86,7 +86,7 @@ csvsql -i large.csv -t "select count(*) from csv"
86
86
  This command will remove all data in the `~/.csvsql_cache`
87
87
 
88
88
  ```
89
- csvsql --clear
89
+ csvsql --clear-cache
90
90
  ```
91
91
 
92
92
 
data/exe/csvsql CHANGED
@@ -8,7 +8,8 @@ require 'csvsql'
8
8
 
9
9
  options = {}
10
10
  OptionParser.new do |opts|
11
- opts.banner = "Usage: csvsql [options] SQL"
11
+ opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
12
+ opts.version = Csvsql::VERSION
12
13
 
13
14
  opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
14
15
  options[:csv_path] = path
@@ -18,9 +19,20 @@ OptionParser.new do |opts|
18
19
  options[:use_cache] = true
19
20
  end
20
21
 
22
+ opts.on(
23
+ '-b', '--batch-rows n',
24
+ "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
25
+ ) do |n|
26
+ options[:batch_rows] = n.to_i
27
+ end
28
+
21
29
  opts.on('--clear-cache', "Clear all cache data") do
22
30
  options[:clear_cache] = true
23
31
  end
32
+
33
+ opts.on('--debug', "Print debug info") do
34
+ options[:debug] = true
35
+ end
24
36
  end.parse!
25
37
 
26
38
  if options[:clear_cache]
@@ -29,6 +41,10 @@ if options[:clear_cache]
29
41
  exit
30
42
  end
31
43
 
44
+ if options[:debug]
45
+ Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
46
+ end
47
+
32
48
  csv_data = options[:csv_path] || StringIO.new($stdin.read)
33
49
 
34
- puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache])
50
+ puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache], batch_rows: options[:batch_rows])
data/lib/csvsql/db.rb CHANGED
@@ -2,146 +2,149 @@
2
2
 
3
3
  require 'digest'
4
4
 
5
- module Csvsql
6
- class Db
7
- BATCH_LINES = 10000
8
- CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
5
+ class Csvsql::Db
6
+ BATCH_ROWS = 10000
7
+ CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
8
+ FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
9
9
 
10
- FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
10
+ attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
11
11
 
12
- attr_reader :use_cache, :csv_path, :csv_io, :db
12
+ def self.clear_cache!
13
+ require 'fileutils'
14
+ FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
15
+ end
13
16
 
14
- def self.clear_cache!
15
- require 'fileutils'
16
- FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
17
- end
17
+ def initialize(use_cache: false, batch_rows: nil)
18
+ @db = nil
19
+ @csv_io = nil
20
+ @csv_path = nil
21
+ @use_cache = use_cache
22
+ @batch_rows = batch_rows || BATCH_ROWS
23
+ end
18
24
 
19
- def initialize(use_cache: false)
20
- @db = nil
21
- @csv_path = nil
22
- @use_cache = use_cache
23
- end
25
+ # action:
26
+ # raise: default
27
+ # exit
28
+ def sql_error_action=(action)
29
+ @sql_error_action = action.to_sym
30
+ end
24
31
 
25
- # action:
26
- # raise: default
27
- # exit
28
- def sql_error_action=(action)
29
- @sql_error_action = action.to_sym
30
- end
32
+ def execute(sql)
33
+ db.execute(sql)
34
+ rescue SQLite3::SQLException => e
35
+ process_sql_error(sql, e)
36
+ end
31
37
 
32
- def execute(sql)
33
- db.execute(sql)
34
- rescue SQLite3::SQLException => e
35
- process_sql_error(sql, e)
36
- end
38
+ def prepare(sql)
39
+ db.prepare(sql)
40
+ rescue SQLite3::SQLException => e
41
+ process_sql_error(sql, e)
42
+ end
37
43
 
38
- def prepare(sql)
39
- db.prepare(sql)
40
- rescue SQLite3::SQLException => e
41
- process_sql_error(sql, e)
44
+ def import(csv_data_or_path)
45
+ case csv_data_or_path
46
+ when StringIO, IO
47
+ @csv_io = csv_data_or_path
48
+ else
49
+ @csv_path = csv_data_or_path
42
50
  end
51
+ @db = SQLite3::Database.new(get_db_path(@csv_path))
43
52
 
44
- def import(csv_data_or_path)
45
- case csv_data_or_path
46
- when StringIO, IO
47
- @csv_io = csv_data_or_path
48
- else
49
- @csv_path = csv_data_or_path
50
- end
51
- @db = SQLite3::Database.new(get_db_path(@csv_path))
52
-
53
- tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").first
54
- unless tables && tables.include?('csv')
55
- init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
56
- end
57
- true
53
+ tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
54
+ unless tables.include?('csv')
55
+ init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
58
56
  end
57
+ true
58
+ end
59
59
 
60
- private
60
+ private
61
61
 
62
- def parser_header(csv_header)
63
- csv_header.map do |col, r|
64
- name, type = col.strip.split(':')
65
- [name, (type || 'varchar(255)').downcase.to_sym]
66
- end
62
+ def parser_header(csv_header)
63
+ csv_header.map do |col, r|
64
+ name, type = col.strip.split(':')
65
+ [name, (type || 'varchar(255)').downcase.to_sym]
67
66
  end
67
+ end
68
68
 
69
- def init_db_by_csv(csv)
70
- header = parser_header(csv.readline)
69
+ def init_db_by_csv(csv)
70
+ header = parser_header(csv.readline)
71
71
 
72
- cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
73
- sql = "CREATE TABLE csv (#{cols});"
74
- execute sql
72
+ cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
73
+ sql = "CREATE TABLE csv (#{cols});"
74
+ execute sql
75
75
 
76
- cache = []
77
- col_names = header.map(&:first)
78
- csv.each do |line|
79
- if cache.length > BATCH_LINES then
80
- import_lines(cache, col_names)
81
- cache.clear
82
- else
83
- cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) }
84
- end
76
+ cache = []
77
+ col_names = header.map(&:first)
78
+ Csvsql::Tracker.commit(:import_csv)
79
+ csv.each do |line|
80
+ cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) }
81
+
82
+ if cache.length >= batch_rows then
83
+ import_lines(cache, col_names)
84
+ cache.clear
85
85
  end
86
- import_lines(cache, col_names) unless cache.empty?
87
- db
88
86
  end
87
+ import_lines(cache, col_names) unless cache.empty?
88
+ Csvsql::Tracker.commit(:import_csv)
89
+ db
90
+ end
89
91
 
90
- def import_lines(lines, col_names)
91
- sql = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
92
- values = lines.map { |line| "(#{line.join(',')})" }.join(', ')
93
- execute sql + values
92
+ def import_lines(lines, col_names)
93
+ sql = Csvsql::Tracker.commit(:generate_import_sql) do
94
+ s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
95
+ s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
94
96
  end
97
+ Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
98
+ end
95
99
 
96
- def format_sql_val(val, type)
97
- case type
98
- when :int, :integer then val.to_i
99
- when :float, :double then val.to_f
100
- when :date then "'#{Date.parse(val).to_s}'"
101
- when :datetime then "'#{Time.parse(val).strftime('%F %T')}'"
102
- else
103
- "'#{val.gsub("'", "''")}'"
104
- end
105
- rescue => e
106
- process_sql_error("Parse val: #{val}", e)
100
+ def format_sql_val(val, type)
101
+ case type
102
+ when :int, :integer then val.to_i
103
+ when :float, :double then val.to_f
104
+ when :date then "'#{Date.parse(val).to_s}'"
105
+ when :datetime then "'#{Time.parse(val).strftime('%F %T')}'"
106
+ else
107
+ "'#{val.gsub("'", "''")}'"
107
108
  end
109
+ rescue => e
110
+ process_sql_error("Parse val: #{val}", e)
111
+ end
108
112
 
109
- def process_sql_error(sql, err)
110
- $stderr.puts(sql)
113
+ def process_sql_error(sql, err)
114
+ $stderr.puts(sql)
111
115
 
112
- if @error_action == :exit
113
- $stderr.puts(e.message)
114
- exit
115
- else
116
- raise err
117
- end
116
+ if @error_action == :exit
117
+ $stderr.puts(e.message)
118
+ exit
119
+ else
120
+ raise err
118
121
  end
122
+ end
119
123
 
120
- def get_db_path(csv_path)
121
- csv_path = csv_path || ''
122
- return '' unless File.exist?(csv_path)
123
-
124
- if use_cache
125
- stat = File.stat(csv_path)
126
- filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
127
- file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
128
- stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
129
- cache_path = File.join(CACHE_DIR, filename)
130
-
131
- if File.exist?(stat_path)
132
- if File.read(stat_path) == file_stat
133
- cache_path
134
- else
135
- FileUtils.rm(cache_path)
136
- cache_path
137
- end
124
+ def get_db_path(csv_path)
125
+ csv_path = csv_path || ''
126
+ return '' unless File.exist?(csv_path)
127
+
128
+ if use_cache
129
+ stat = File.stat(csv_path)
130
+ filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
131
+ file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
132
+ stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
133
+ cache_path = File.join(CACHE_DIR, filename)
134
+
135
+ if File.exist?(stat_path)
136
+ if File.read(stat_path) == file_stat
137
+ cache_path
138
138
  else
139
- File.write(stat_path, file_stat)
139
+ FileUtils.rm(cache_path)
140
140
  cache_path
141
141
  end
142
142
  else
143
- ''
143
+ File.write(stat_path, file_stat)
144
+ cache_path
144
145
  end
146
+ else
147
+ ''
145
148
  end
146
149
  end
147
150
  end
@@ -0,0 +1,44 @@
1
+ require 'logger'
2
+
3
+ class Csvsql::Tracker
4
+ attr_reader :stats, :logger
5
+
6
+ def self.tracker
7
+ @tracker ||= new
8
+ end
9
+
10
+ def self.tracker=(t)
11
+ @tracker = t
12
+ end
13
+
14
+ def self.commit(*args, &block)
15
+ tracker.commit(*args, &block)
16
+ end
17
+
18
+ def initialize(logger = Logger.new('/dev/null'))
19
+ @stats = {}
20
+ @logger = logger
21
+ end
22
+
23
+ def commit(id, output: true, &block)
24
+ id = id.to_s
25
+ old = stats[id]
26
+ stats[id] = get_stat
27
+
28
+ if block
29
+ block.call.tap { commit(id) }
30
+ elsif output && old
31
+ logger.info("[#{id}] #{compare_stat(old, stats[id])}")
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def get_stat
38
+ { time: Time.now }
39
+ end
40
+
41
+ def compare_stat(old, new)
42
+ "Time cost: #{((new[:time] - old[:time]) * 1000000).to_i / 1000}ms"
43
+ end
44
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvsql
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/csvsql.rb CHANGED
@@ -6,15 +6,20 @@ require 'csv'
6
6
  require 'sqlite3'
7
7
 
8
8
  require 'csvsql/db'
9
+ require 'csvsql/tracker'
9
10
 
10
11
  module Csvsql
11
12
  def self.execute(sql, csv_data, opts = {})
12
13
  csvdb = Csvsql::Db.new(opts)
13
14
  csvdb.import(csv_data)
14
- pst = csvdb.prepare(sql)
15
+ pst = Csvsql::Tracker.commit(:execute_query_sql) do
16
+ csvdb.prepare(sql)
17
+ end
18
+ Csvsql::Tracker.commit(:output_format)
15
19
  CSV.generate do |csv|
16
20
  csv << pst.columns.zip(pst.types).map { |c| c.compact.join(':') }
17
21
  pst.each { |line| csv << line }
18
- end
22
+ end.tap { Csvsql::Tracker.commit(:output_format) }
19
23
  end
20
24
  end
25
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvsql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - jiangzhi.xie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-18 00:00:00.000000000 Z
11
+ date: 2018-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -102,6 +102,7 @@ files:
102
102
  - exe/csvsql
103
103
  - lib/csvsql.rb
104
104
  - lib/csvsql/db.rb
105
+ - lib/csvsql/tracker.rb
105
106
  - lib/csvsql/version.rb
106
107
  homepage: https://github.com/xiejiangzhi/csvsql
107
108
  licenses: