csvsql 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f52f3d8ec0aa806dd0f4f5bd601e4c90edd03aab
4
- data.tar.gz: d903969288bb84f78699c1fff30f95ff855930f8
3
+ metadata.gz: ed472535cf8cc98e8b16f5298ea37fae1baa5c2e
4
+ data.tar.gz: f2e481c4c09a8cfbf1bb118496830be291afab58
5
5
  SHA512:
6
- metadata.gz: 4560ca06f7b76be154a3976b757d5b71f78af14eeec0e8b4bb10b16483d8c2f54f462fecb03a500b5ed40589931e4aa1333440923ae6e64a925a906c5a31a5df
7
- data.tar.gz: 13ebccc88ea17f240ddb9d1c72986f836cc770d81ec8e78dcc2eee757b77b45565373526729a0dfc6d184bbf89b6ce52cd5e5a4c744fb1b6a7159bd4a62f4670
6
+ metadata.gz: 1b01deab426caf1f3281cc5a0ea19be48161ee5508677c8978711fad36c5c6c48fa1b6fec4d693943cc394061f09c7db3320df65504c5ee5cf2fb734d1cbe1f9
7
+ data.tar.gz: e61c908e568c5392f810571e45f4061d95b0069f53a574a4459b47a8f6a928b6728e03048b1823bab949c3c3c7fdcad33ebbb4f0646ed6d1287a9881aa69a2ac
@@ -0,0 +1 @@
1
+ 2.3.4
@@ -17,4 +17,4 @@ before_install:
17
17
 
18
18
  script:
19
19
  - bundle exec rspec
20
- - rubocop
20
+ - bundle exec rubocop
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- csvsql (0.1.4)
4
+ csvsql (0.2.0)
5
5
  sqlite3 (~> 1.3.13)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -7,6 +7,10 @@ Csvsql
7
7
  Use SQL to query your CSV file, and return a new CSV.
8
8
 
9
9
 
10
+ ## Requirements
11
+
12
+ * SQlite 3.6.16 or newer.
13
+
10
14
  ## Installation
11
15
 
12
16
  Add this line to your application's Gemfile:
@@ -75,7 +79,7 @@ csvsql -i mydata.csv "select name, total from csv where total < 30" | csvsql "se
75
79
 
76
80
  ### Cache CSV data
77
81
 
78
- It will save the CSV data to a tempfile. we use `~/.csvsql_cache` folder to save the cache
82
+ It will save the parsed data to `~/.csvsql_cache` folder. it will not parse data again next time if the data file didn't change
79
83
 
80
84
  ```
81
85
  csvsql -i large.csv -c "select count(*) from csv"
@@ -84,6 +88,20 @@ csvsql -i large.csv -c "select count(*) from csv"
84
88
  csvsql -i large.csv -c "select count(*) from csv"
85
89
  ```
86
90
 
91
+ ### Query multiple CSV files
92
+
93
+ For multiple files, we should name each files. This name will be a `table name`.
94
+
95
+ ```
96
+ csvsql -i users.csv:users -i posts.csv:posts "select * from posts join users on posts.user_id = users.id where users.role = 'guest'"
97
+ ```
98
+
99
+ With cache. those name will be a `database name`, the table name is `csv`. If a csv file was updated, that db will be update only. Other db still use cache.
100
+
101
+ ```
102
+ csvsql -i users.csv:users -i posts.csv:posts -c "select * from posts.csv join users.csv on posts.csv.user_id = users.csv.id where users.csv.role = 'guest'"
103
+ ```
104
+
87
105
  ### Clear Cache
88
106
 
89
107
  This command will remove all data in the `~/.csvsql_cache`
data/exe/csvsql CHANGED
@@ -1,60 +1,11 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
3
  require 'optparse'
4
+ require 'pry'
4
5
 
5
6
  lib = File.expand_path("../../lib", __FILE__)
6
7
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
7
8
  require 'csvsql'
8
9
 
9
- options = {}
10
- OptionParser.new do |opts|
11
- opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
12
- opts.version = Csvsql::VERSION
13
-
14
- opts.on('-i', '--input path', "CSV file path, optional. read from stdin if no give") do |path|
15
- options[:csv_path] = path
16
- end
17
-
18
- opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
19
- options[:use_cache] = true
20
- end
21
-
22
- opts.on(
23
- '-b', '--batch-rows n',
24
- "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
25
- ) do |n|
26
- options[:batch_rows] = n.to_i
27
- end
28
-
29
- opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
30
- options[:encoding] = encoding
31
- end
32
-
33
- opts.on('--clear-cache', "Clear all cache data") do
34
- options[:clear_cache] = true
35
- end
36
-
37
- opts.on('--debug', "Print debug info") do
38
- options[:debug] = true
39
- end
40
- end.parse!
41
-
42
- if options[:clear_cache]
43
- Csvsql::Db.clear_cache!
44
- puts "Completed clear cache."
45
- exit
46
- end
47
-
48
- if options[:debug]
49
- Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
50
- end
51
-
52
- csv_data = options[:csv_path] || StringIO.new($stdin.read)
53
-
54
- puts Csvsql.execute(
55
- ARGV[0], csv_data,
56
- use_cache: options[:use_cache],
57
- batch_rows: options[:batch_rows],
58
- sql_error_action: 'exit',
59
- encoding: options[:encoding]
60
- )
10
+ result = Csvsql::CommandRunner.run!(ARGV)
11
+ puts result if result
@@ -4,15 +4,21 @@ require "csvsql/version"
4
4
 
5
5
  require 'csv'
6
6
  require 'sqlite3'
7
+ require 'digest'
8
+ require 'fileutils'
7
9
 
8
10
  require 'csvsql/db'
9
11
  require 'csvsql/tracker'
12
+ require 'csvsql/command_runner'
10
13
 
11
14
  module Csvsql
12
- def self.execute(sql, csv_data, opts = {})
13
- encoding = opts.delete(:encoding)
14
- csvdb = Csvsql::Db.new(opts)
15
- csvdb.import(csv_data, encoding: encoding)
15
+ extend self
16
+
17
+ CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
18
+ FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
19
+
20
+ def execute(sql, csv_data, opts = {})
21
+ csvdb = init_data(csv_data, opts)
16
22
  pst = Csvsql::Tracker.commit(:execute_query_sql) do
17
23
  csvdb.prepare(sql)
18
24
  end
@@ -22,4 +28,68 @@ module Csvsql
22
28
  pst.each { |line| csv << line }
23
29
  end.tap { Csvsql::Tracker.commit(:output_format) }
24
30
  end
31
+
32
+ def self.clear_cache!
33
+ FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
34
+ end
35
+
36
+ private
37
+
38
+ def init_data(csv_data, opts)
39
+ encoding = opts.delete(:encoding)
40
+ use_cache = opts.delete(:use_cache)
41
+ csvdb = Csvsql::Db.new(opts)
42
+
43
+ unless use_cache
44
+ csvdb.import(csv_data, encoding: encoding)
45
+ return csvdb
46
+ end
47
+
48
+ case csv_data
49
+ when StringIO, IO
50
+ # nothing
51
+ when Hash
52
+ dbs = []
53
+ csv_data.each do |dbname, csv_path|
54
+ dbs << [dbname, csvdb.init_db(get_db_cache_path(csv_path) || '')]
55
+ csvdb.import(csv_path, encoding: encoding)
56
+ end
57
+ dbs.each do |dbname, db|
58
+ csvdb.execute("ATTACH DATABASE '#{db.filename}' AS #{dbname};")
59
+ end
60
+ else
61
+ csvdb.init_db(get_db_cache_path(csv_data) || '')
62
+ csvdb.import(csv_data, encoding: encoding)
63
+ end
64
+
65
+ csvdb
66
+ end
67
+
68
+ def get_db_cache_path(csv_path)
69
+ csv_path = csv_path || ''
70
+ return unless File.exist?(csv_path)
71
+
72
+ stat = File.stat(csv_path)
73
+ filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
74
+ file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
75
+ stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
76
+ cache_path = File.join(CACHE_DIR, filename)
77
+
78
+ if File.exist?(stat_path)
79
+ if File.read(stat_path) == file_stat
80
+ cache_path
81
+ else
82
+ if update_cb
83
+ update_cb.call
84
+ else
85
+ FileUtils.rm(cache_path)
86
+ end
87
+ File.write(stat_path, file_stat)
88
+ cache_path
89
+ end
90
+ else
91
+ File.write(stat_path, file_stat)
92
+ cache_path
93
+ end
94
+ end
25
95
  end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'optparse'
4
+
5
+ class Csvsql::CommandRunner
6
+ def self.run!(argv)
7
+ options = self.new.parse!(argv)
8
+ return unless options
9
+
10
+ if options[:clear_cache]
11
+ Csvsql.clear_cache!
12
+ puts "Completed clear cache."
13
+ return
14
+ end
15
+
16
+ if options[:debug]
17
+ Csvsql::Tracker.tracker = Csvsql::Tracker.new(Logger.new($stdout))
18
+ end
19
+
20
+ Csvsql.execute(
21
+ options[:sql], options[:csv_data],
22
+ use_cache: options[:use_cache],
23
+ batch_rows: options[:batch_rows],
24
+ sql_error_action: 'exit',
25
+ encoding: options[:encoding]
26
+ )
27
+ end
28
+
29
+ def options
30
+ @options ||= { csv_paths: [] }
31
+ end
32
+
33
+ def parse!(argv)
34
+ parser.parse!(argv)
35
+ options[:sql] = argv.last
36
+
37
+ paths = options.delete(:csv_paths)
38
+ options[:csv_data] = case paths.size
39
+ when 0
40
+ $stdin
41
+ when 1
42
+ paths.first
43
+ else
44
+ paths.each_with_object({}) do |path, r|
45
+ p, n = path.split(':')
46
+ if n.nil? || n.empty?
47
+ puts "You should give #{p} a name, example: #{p}:a_name"
48
+ return false
49
+ end
50
+ r[n] = p
51
+ end
52
+ end
53
+
54
+ return options
55
+ end
56
+
57
+ private
58
+
59
+ def parser
60
+ OptionParser.new do |opts|
61
+ opts.banner = "Csvsql #{Csvsql::VERSION}\nUsage: csvsql [options] SQL"
62
+ opts.version = Csvsql::VERSION
63
+
64
+ opts.on(
65
+ '-i', '--input path[:name]', "CSV file path, optional. read from stdin if no give." +
66
+ " Name is required if have multiple files. This name will be a table name." +
67
+ " It will be a database name if cache is enabled"
68
+ ) do |path|
69
+ options[:csv_paths] << path
70
+ end
71
+
72
+ opts.on('-c', '--use-cache', "Cache data in ~/.csvsql_cache. it will still reload if file was changed") do
73
+ options[:use_cache] = true
74
+ end
75
+
76
+ opts.on(
77
+ '-b', '--batch-rows n',
78
+ "How many rows to import per batch. Default value is #{Csvsql::Db::BATCH_ROWS}"
79
+ ) do |n|
80
+ options[:batch_rows] = n.to_i
81
+ end
82
+
83
+ opts.on('-e', '--encoding encoding', "Set the file encoding, default is UTF-8") do |encoding|
84
+ options[:encoding] = encoding
85
+ end
86
+
87
+ opts.on('--clear-cache', "Clear all cache data") do
88
+ options[:clear_cache] = true
89
+ end
90
+
91
+ opts.on('--debug', "Print debug information") do
92
+ options[:debug] = true
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,24 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'digest'
4
- require 'fileutils'
5
-
6
3
  class Csvsql::Db
7
4
  BATCH_ROWS = 10000
8
- CACHE_DIR = File.join(Dir.home, '.csvsql_cache')
9
- FileUtils.mkdir_p(CACHE_DIR) unless Dir.exists?(CACHE_DIR)
10
-
11
- attr_reader :use_cache, :csv_path, :csv_io, :db, :batch_rows
12
5
 
13
- def self.clear_cache!
14
- FileUtils.rm_f(Dir.glob(File.join(CACHE_DIR, '*')))
15
- end
6
+ attr_reader :data_source, :batch_rows
16
7
 
17
- def initialize(use_cache: false, batch_rows: nil, sql_error_action: nil)
8
+ def initialize(batch_rows: nil, sql_error_action: nil)
18
9
  @db = nil
19
- @csv_io = nil
20
- @csv_path = nil
21
- @use_cache = use_cache
10
+ @data_source = {}
22
11
  @batch_rows = batch_rows || BATCH_ROWS
23
12
  @sql_error_action = (sql_error_action || :raise).to_sym
24
13
  end
@@ -42,18 +31,35 @@ class Csvsql::Db
42
31
  process_sql_error(sql, e)
43
32
  end
44
33
 
34
+ def db
35
+ @db ||= init_db
36
+ end
37
+
38
+ def init_db(cache_path = '')
39
+ @db = SQLite3::Database.new(cache_path)
40
+ end
41
+
42
+ # Params:
43
+ # csv_data_or_path:
44
+ # [String] csv path
45
+ # [StringIO, IO] csv buffer io
46
+ # [Hash] { table_name => csv_path }
45
47
  def import(csv_data_or_path, encoding: 'utf-8')
46
48
  case csv_data_or_path
47
49
  when StringIO, IO
48
- @csv_io = csv_data_or_path
50
+ data_source['csv'] = CSV.new(csv_data_or_path)
51
+ when Hash
52
+ csv_data_or_path.each do |table_name, path|
53
+ data_source[table_name.to_s] = CSV.open(path, "r:#{encoding}")
54
+ end
49
55
  else
50
- @csv_path = csv_data_or_path
56
+ data_source['csv'] = CSV.open(csv_data_or_path, "r:#{encoding}")
51
57
  end
52
- @db = SQLite3::Database.new(get_db_path(@csv_path))
53
58
 
54
59
  tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';").flatten
55
- unless tables.include?('csv')
56
- init_db_by_csv(@csv_io ? CSV.new(@csv_io) : CSV.open(@csv_path, "r:#{encoding}"))
60
+ data_source.each do |table_name, csv|
61
+ next if tables.include?('csv')
62
+ init_table_by_csv(table_name, csv)
57
63
  end
58
64
  true
59
65
  end
@@ -67,11 +73,11 @@ class Csvsql::Db
67
73
  end
68
74
  end
69
75
 
70
- def init_db_by_csv(csv)
76
+ def init_table_by_csv(table_name, csv)
71
77
  header = parser_header(csv.readline)
72
78
 
73
79
  cols = header.map { |name, type| "#{name} #{type}" }.join(', ')
74
- sql = "CREATE TABLE csv (#{cols});"
80
+ sql = "CREATE TABLE #{table_name} (#{cols});"
75
81
  execute sql
76
82
 
77
83
  cache = []
@@ -81,18 +87,18 @@ class Csvsql::Db
81
87
  cache << header.each_with_index.map { |h, i| format_sql_val(line[i], h[1]) }
82
88
 
83
89
  if cache.length >= batch_rows then
84
- import_lines(cache, col_names)
90
+ import_lines(table_name, cache, col_names)
85
91
  cache.clear
86
92
  end
87
93
  end
88
- import_lines(cache, col_names) unless cache.empty?
94
+ import_lines(table_name, cache, col_names) unless cache.empty?
89
95
  Csvsql::Tracker.commit(:import_csv)
90
96
  db
91
97
  end
92
98
 
93
- def import_lines(lines, col_names)
99
+ def import_lines(table_name, lines, col_names)
94
100
  sql = Csvsql::Tracker.commit(:generate_import_sql) do
95
- s = "INSERT INTO csv (#{col_names.join(', ')}) VALUES "
101
+ s = "INSERT INTO #{table_name} (#{col_names.join(', ')}) VALUES "
96
102
  s += lines.map { |line| "(#{line.join(',')})" }.join(', ')
97
103
  end
98
104
  Csvsql::Tracker.commit(:execute_import_sql) { execute sql }
@@ -123,31 +129,4 @@ class Csvsql::Db
123
129
  raise err
124
130
  end
125
131
  end
126
-
127
- def get_db_path(csv_path)
128
- csv_path = csv_path || ''
129
- return '' unless File.exist?(csv_path)
130
-
131
- if use_cache
132
- stat = File.stat(csv_path)
133
- filename = Digest::SHA2.hexdigest(File.absolute_path(csv_path)) + '.cache'
134
- file_stat = [File.absolute_path(csv_path), stat.size, stat.ctime].join("\n")
135
- stat_path = File.join(CACHE_DIR, filename.gsub(/\.cache$/, '.stat'))
136
- cache_path = File.join(CACHE_DIR, filename)
137
-
138
- if File.exist?(stat_path)
139
- if File.read(stat_path) == file_stat
140
- cache_path
141
- else
142
- FileUtils.rm(cache_path)
143
- cache_path
144
- end
145
- else
146
- File.write(stat_path, file_stat)
147
- cache_path
148
- end
149
- else
150
- ''
151
- end
152
- end
153
132
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvsql
4
- VERSION = "0.1.5"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvsql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - jiangzhi.xie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-09-18 00:00:00.000000000 Z
11
+ date: 2018-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -91,6 +91,7 @@ files:
91
91
  - ".gitignore"
92
92
  - ".rspec"
93
93
  - ".rubocop.yml"
94
+ - ".ruby-version"
94
95
  - ".travis.yml"
95
96
  - Gemfile
96
97
  - Gemfile.lock
@@ -101,6 +102,7 @@ files:
101
102
  - csvsql.gemspec
102
103
  - exe/csvsql
103
104
  - lib/csvsql.rb
105
+ - lib/csvsql/command_runner.rb
104
106
  - lib/csvsql/db.rb
105
107
  - lib/csvsql/tracker.rb
106
108
  - lib/csvsql/version.rb
@@ -124,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
126
  version: '0'
125
127
  requirements: []
126
128
  rubyforge_project:
127
- rubygems_version: 2.4.5.4
129
+ rubygems_version: 2.5.2
128
130
  signing_key:
129
131
  specification_version: 4
130
132
  summary: Process csv with SQL.