csvsql 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 77f9bf18a9ec8773b4015b456cd4dc789bebd36d
4
- data.tar.gz: 6c18bed0e815307ffa18cd8b9ef80a774abecaa9
3
+ metadata.gz: 70fa4c1c358f92a85d5f1cd0502081297d1d27cc
4
+ data.tar.gz: 0bf5efcd968a8965c8308b75e39ae82e43c8d29e
5
5
  SHA512:
6
- metadata.gz: 567347889397e591db8a04dc8dd9b8154e4915eb85b88575f62767a7e78bc578e2282d8250cd27f5947cc7462e3bd37f6869c455ec76ada203be4c8e03b1da7b
7
- data.tar.gz: 4c345a17fb5b380d6039fbb1bfad0609596a1703c8dca07b65cd917f5bfa0b77d49e7d3b41181d6f6cf7195b8ebc8f5d869099aebfc0d2b8dc190de64e303656
6
+ metadata.gz: 88ddda2f38d83fd4defc23bbfd0d596517f600aacf30963a63b1a15c7f5dca73d7dfe977b0d8b80bff597238ee8e515f559480afb6d54c6a3c2b30e7d6626f24
7
+ data.tar.gz: d51eb995c1acfe9420153e3acb25f91c06137276f88dd3d67d60d7807395acefd9a7d9403030dfde5a22ab3c0e3492e5de44d479478eca26288f0b475e30b4e4
data/.gitignore CHANGED
@@ -9,5 +9,7 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+ /spec/examples.txt
12
13
 
13
14
  *.swp
15
+ *.gem
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- csvsql (0.1.0)
4
+ csvsql (0.1.1)
5
5
  sqlite3 (~> 1.3.13)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -75,10 +75,10 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
75
75
  It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
76
76
 
77
77
  ```
78
- csvsql -i large.csv -t "select count(*) from csv"
78
+ csvsql -i large.csv -c "select count(*) from csv"
79
79
 
80
80
  # the second, it will be fast.
81
- csvsql -i large.csv -t "select count(*) from csv"
81
+ csvsql -i large.csv -c "select count(*) from csv"
82
82
  ```
83
83
 
84
84
  ### Clear Cache
@@ -86,7 +86,7 @@ csvsql -i large.csv -t "select count(*) from csv"
86
86
  This command will remove all data in the `~/.csvsql_cache`
87
87
 
88
88
  ```
89
- csvsql --clear
89
+ csvsql --clear-cache
90
90
  ```
91
91
 
92
92
 
data/exe/csvsql CHANGED
@@ -8,7 +8,8 @@ require 'csvsql'
8
8
 
9
9
  options = {}
10
10
  OptionParser.new do |opts|
11
- opts.banner = "Usage: csvsql [options] SQL"
11
+ opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
12
+ opts.version = Csvsql::VERSION
12
13
 
13
14
  opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
14
15
  options[:csv_path] = path
@@ -18,9 +19,20 @@ OptionParser.new do |opts|
18
19
  options[:use_cache] = true
19
20
  end
20
21
 
22
+ opts.on(
23
+ '-b', '--batch-rows n',
24
+ "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
25
+ ) do |n|
26
+ options[:batch_rows] = n.to_i
27
+ end
28
+
21
29
  opts.on('--clear-cache', "Clear all cache data") do
22
30
  options[:clear_cache] = true
23
31
  end
32
+
33
+ opts.on('--debug', "Print debug info") do
34
+ options[:debug] = true
35
+ end
24
36
  end.parse!
25
37
 
26
38
  if options[:clear_cache]
@@ -29,6 +41,10 @@ if options[:clear_cache]
29
41
  exit
30
42
  end
31
43
 
44
+ if options[:debug]
45
+ Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
46
+ end
47
+
32
48
  csv_data = options[:csv_path] || StringIO.new($stdin.read)
33
49
 
34
- puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache])
50
+ puts Csvsql.execute(ARGV[0], csv_data, use_cache: options[:use_cache], batch_rows: options[:batch_rows])
data/lib/csvsql/db.rb CHANGED
@@ -2,146 +2,149 @@
2
2
 
3
3
  require 'digest'
4
4
 
5
- module Csvsql
6
- class Db
7
- BATCH_LINES = 10000
8
- CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
5
+ class Csvsql::Db
6
+ BATCH_ROWS = 10000
7
+ CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
8
+ FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
9
9
 
10
- FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
10
+ attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
11
11
 
12
- attr_reader :use_cache, :csv_path, :csv_io, :db
12
+ def self.clear_cache!
13
+ require 'fileutils'
14
+ FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
15
+ end
13
16
 
14
- def self.clear_cache!
15
- require 'fileutils'
16
- FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
17
- end
17
+ def initialize(use_cache: false, batch_rows: nil)
18
+ @db = nil
19
+ @csv_io = nil
20
+ @csv_path = nil
21
+ @use_cache = use_cache
22
+ @batch_rows = batch_rows || BATCH_ROWS
23
+ end
18
24
 
19
- def initialize(use_cache: false)
20
- @db = nil
21
- @csv_path = nil
22
- @use_cache = use_cache
23
- end
25
+ # action:
26
+ # raise: default
27
+ # exit
28
+ def sql_error_action=(action)
29
+ @sql_error_action = action.to_sym
30
+ end
24
31
 
25
- # action:
26
- # raise: default
27
- # exit
28
- def sql_error_action=(action)
29
- @sql_error_action = action.to_sym
30
- end
32
+ def execute(sql)
33
+ db.execute(sql)
34
+ rescue SQLite3::SQLException => e
35
+ process_sql_error(sql, e)
36
+ end
31
37
 
32
- def execute(sql)
33
- db.execute(sql)
34
- rescue SQLite3::SQLException => e
35
- process_sql_error(sql, e)
36
- end
38
+ def prepare(sql)
39
+ db.prepare(sql)
40
+ rescue SQLite3::SQLException => e
41
+ process_sql_error(sql, e)
42
+ end
37
43
 
38
- def prepare(sql)
39
- db.prepare(sql)
40
- rescue SQLite3::SQLException => e
41
- process_sql_error(sql, e)
44
+ def import(csv_data_or_path)
45
+ case csv_data_or_path
46
+ when StringIO, IO
47
+ @csv_io = csv_data_or_path
48
+ else
49
+ @csv_path = csv_data_or_path
42
50
  end
51
+ @db = SQLite3::Database.new(get_db_path(@csv_path))
43
52
 
44
- def import(csv_data_or_path)
45
- case csv_data_or_path
46
- when StringIO, IO
47
- @csv_io = csv_data_or_path
48
- else
49
- @csv_path = csv_data_or_path
50
- end
51
- @db = SQLite3::Database.new(get_db_path(@csv_path))
52
-
53
- tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").first
54
- unless tables && tables.include?('csv')
55
- init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
56
- end
57
- true
53
+ tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
54
+ unless tables.include?('csv')
55
+ init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path))
58
56
  end
57
+ true
58
+ end
59
59
 
60
- private
60
+ private
61
61
 
62
- def parser_header(csv_header)
63
- csv_header.map do |col, r|
64
- name, type = col.strip.split(':')
65
- [name, (type || 'varchar(255)').downcase.to_sym]
66
- end
62
+ def parser_header(csv_header)
63
+ csv_header.map do |col, r|
64
+ name, type = col.strip.split(':')
65
+ [name, (type || 'varchar(255)').downcase.to_sym]
67
66
  end
67
+ end
68
68
 
69
- def init_db_by_csv(csv)
70
- header = parser_header(csv.readline)
69
+ def init_db_by_csv(csv)
70
+ header = parser_header(csv.readline)
71
71
 
72
- cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
73
- sql = "CREATE TABLE csv (#{cols});"
74
- execute sql
72
+ cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
73
+ sql = "CREATE TABLE csv (#{cols});"
74
+ execute sql
75
75
 
76
- cache = []
77
- col_names = header.map(&:first)
78
- csv.each do |line|
79
- if cache.length > BATCH_LINES then
80
- import_lines(cache, col_names)
81
- cache.clear
82
- else
83
- cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) }
84
- end
76
+ cache = []
77
+ col_names = header.map(&:first)
78
+ Csvsql::Tracker.commit(:import_csv)
79
+ csv.each do |line|
80
+ cache << line.each_with_index.map { |v, i| format_sql_val(v, header[i][1]) }
81
+
82
+ if cache.length >= batch_rows then
83
+ import_lines(cache, col_names)
84
+ cache.clear
85
85
  end
86
- import_lines(cache, col_names) unless cache.empty?
87
- db
88
86
  end
87
+ import_lines(cache, col_names) unless cache.empty?
88
+ Csvsql::Tracker.commit(:import_csv)
89
+ db
90
+ end
89
91
 
90
- def import_lines(lines, col_names)
91
- sql = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
92
- values = lines.map { |line| "(#{line.join(',')})" }.join(', ')
93
- execute sql + values
92
+ def import_lines(lines, col_names)
93
+ sql = Csvsql::Tracker.commit(:generate_import_sql) do
94
+ s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
95
+ s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
94
96
  end
97
+ Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
98
+ end
95
99
 
96
- def format_sql_val(val, type)
97
- case type
98
- when :int, :integer then val.to_i
99
- when :float, :double then val.to_f
100
- when :date then "'#{Date.parse(val).to_s}'"
101
- when :datetime then "'#{Time.parse(val).strftime('%F %T')}'"
102
- else
103
- "'#{val.gsub("'", "''")}'"
104
- end
105
- rescue => e
106
- process_sql_error("Parse val: #{val}", e)
100
+ def format_sql_val(val, type)
101
+ case type
102
+ when :int, :integer then val.to_i
103
+ when :float, :double then val.to_f
104
+ when :date then "'#{Date.parse(val).to_s}'"
105
+ when :datetime then "'#{Time.parse(val).strftime('%F %T')}'"
106
+ else
107
+ "'#{val.gsub("'", "''")}'"
107
108
  end
109
+ rescue => e
110
+ process_sql_error("Parse val: #{val}", e)
111
+ end
108
112
 
109
- def process_sql_error(sql, err)
110
- $stderr.puts(sql)
113
+ def process_sql_error(sql, err)
114
+ $stderr.puts(sql)
111
115
 
112
- if @error_action == :exit
113
- $stderr.puts(e.message)
114
- exit
115
- else
116
- raise err
117
- end
116
+ if @error_action == :exit
117
+ $stderr.puts(e.message)
118
+ exit
119
+ else
120
+ raise err
118
121
  end
122
+ end
119
123
 
120
- def get_db_path(csv_path)
121
- csv_path = csv_path || ''
122
- return '' unless File.exist?(csv_path)
123
-
124
- if use_cache
125
- stat = File.stat(csv_path)
126
- filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
127
- file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
128
- stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
129
- cache_path = File.join(CACHE_DIR, filename)
130
-
131
- if File.exist?(stat_path)
132
- if File.read(stat_path) == file_stat
133
- cache_path
134
- else
135
- FileUtils.rm(cache_path)
136
- cache_path
137
- end
124
+ def get_db_path(csv_path)
125
+ csv_path = csv_path || ''
126
+ return '' unless File.exist?(csv_path)
127
+
128
+ if use_cache
129
+ stat = File.stat(csv_path)
130
+ filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
131
+ file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
132
+ stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
133
+ cache_path = File.join(CACHE_DIR, filename)
134
+
135
+ if File.exist?(stat_path)
136
+ if File.read(stat_path) == file_stat
137
+ cache_path
138
138
  else
139
- File.write(stat_path, file_stat)
139
+ FileUtils.rm(cache_path)
140
140
  cache_path
141
141
  end
142
142
  else
143
- ''
143
+ File.write(stat_path, file_stat)
144
+ cache_path
144
145
  end
146
+ else
147
+ ''
145
148
  end
146
149
  end
147
150
  end
@@ -0,0 +1,44 @@
1
+ require 'logger'
2
+
3
+ class Csvsql::Tracker
4
+ attr_reader :stats, :logger
5
+
6
+ def self.tracker
7
+ @tracker ||= new
8
+ end
9
+
10
+ def self.tracker=(t)
11
+ @tracker = t
12
+ end
13
+
14
+ def self.commit(*args, &block)
15
+ tracker.commit(*args, &block)
16
+ end
17
+
18
+ def initialize(logger = Logger.new('/dev/null'))
19
+ @stats = {}
20
+ @logger = logger
21
+ end
22
+
23
+ def commit(id, output: true, &block)
24
+ id = id.to_s
25
+ old = stats[id]
26
+ stats[id] = get_stat
27
+
28
+ if block
29
+ block.call.tap { commit(id) }
30
+ elsif output && old
31
+ logger.info("[#{id}] #{compare_stat(old, stats[id])}")
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def get_stat
38
+ { time: Time.now }
39
+ end
40
+
41
+ def compare_stat(old, new)
42
+ "Time cost: #{((new[:time] - old[:time]) * 1000000).to_i / 1000}ms"
43
+ end
44
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvsql
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/csvsql.rb CHANGED
@@ -6,15 +6,20 @@ require 'csv'
6
6
  require 'sqlite3'
7
7
 
8
8
  require 'csvsql/db'
9
+ require 'csvsql/tracker'
9
10
 
10
11
  module Csvsql
11
12
  def self.execute(sql, csv_data, opts = {})
12
13
  csvdb = Csvsql::Db.new(opts)
13
14
  csvdb.import(csv_data)
14
- pst = csvdb.prepare(sql)
15
+ pst = Csvsql::Tracker.commit(:execute_query_sql) do
16
+ csvdb.prepare(sql)
17
+ end
18
+ Csvsql::Tracker.commit(:output_format)
15
19
  CSV.generate do |csv|
16
20
  csv << pst.columns.zip(pst.types).map { |c| c.compact.join(':') }
17
21
  pst.each { |line| csv << line }
18
- end
22
+ end.tap { Csvsql::Tracker.commit(:output_format) }
19
23
  end
20
24
  end
25
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvsql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - jiangzhi.xie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-18 00:00:00.000000000 Z
11
+ date: 2018-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -102,6 +102,7 @@ files:
102
102
  - exe/csvsql
103
103
  - lib/csvsql.rb
104
104
  - lib/csvsql/db.rb
105
+ - lib/csvsql/tracker.rb
105
106
  - lib/csvsql/version.rb
106
107
  homepage: https://github.com/xiejiangzhi/csvsql
107
108
  licenses: