csv_query 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/LICENSE +19 -0
- data/README.md +36 -0
- data/Rakefile +2 -0
- data/bin/csvq +6 -0
- data/csv_query.gemspec +23 -0
- data/lib/csv_query.rb +2 -0
- data/lib/csv_query/command_line.rb +67 -0
- data/lib/csv_query/query.rb +118 -0
- data/lib/csv_query/version.rb +3 -0
- metadata +73 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2012 Jakob Skjerning
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
CSV Query - Use SQL to query CSV data
|
2
|
+
=====================================
|
3
|
+
|
4
|
+
CSV Query is a command line tool that allows you to run SQL queries on data
|
5
|
+
stored in CSV files.
|
6
|
+
|
7
|
+
For example:
|
8
|
+
|
9
|
+
$ csvq --select "count(*)" sample.csv
|
10
|
+
count(*)
|
11
|
+
--------
|
12
|
+
1
|
13
|
+
|
14
|
+
Assumptions
|
15
|
+
-----------
|
16
|
+
|
17
|
+
The first row of data is assumed to be headers.
|
18
|
+
|
19
|
+
All fields are created as `VARCHAR(255)`, which hopefully works for most cases.
|
20
|
+
|
21
|
+
Behind the scenes
|
22
|
+
-----------------
|
23
|
+
|
24
|
+
CSV Query loads the CSV data into an in-memory SQLite database. Thus, the SQL
|
25
|
+
queries need to be SQLite-flavored where applicable.
|
26
|
+
|
27
|
+
Alternative/related projects
|
28
|
+
----------------------------
|
29
|
+
|
30
|
+
* [csvq.py - Python variant that does almost the same thing](http://www.gl1tch.com/~lukewarm/software/csvq/)
|
31
|
+
* [CSVql - A query language for CSV files](https://github.com/ondrasej/CSVql)
|
32
|
+
|
33
|
+
License
|
34
|
+
-------
|
35
|
+
|
36
|
+
Licensed under the MIT License. See LICENSE for details.
|
data/Rakefile
ADDED
data/bin/csvq
ADDED
data/csv_query.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "csv_query/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "csv_query"
|
7
|
+
s.version = CsvQuery::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Jakob Skjerning"]
|
10
|
+
s.email = ["jakob@mentalized.net"]
|
11
|
+
s.homepage = "http://mentalized.net"
|
12
|
+
s.summary = %q{Use SQL to query CSV data}
|
13
|
+
s.description = %q{CSV Query allows you to run SQL queries against data stored in CSV files.}
|
14
|
+
|
15
|
+
s.required_ruby_version = '>= 1.9'
|
16
|
+
s.add_dependency('sqlite3')
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
end
|
data/lib/csv_query.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'csv_query/query'
|
2
|
+
|
3
|
+
module CsvQuery
|
4
|
+
class CommandLine
|
5
|
+
def self.parse_options_from_commandline
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
# Set defaults
|
9
|
+
options[:delimiter] = ','
|
10
|
+
options[:select] = '*'
|
11
|
+
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: csvq [options] [CSV file]"
|
14
|
+
opts.separator ""
|
15
|
+
opts.separator "Specific options:"
|
16
|
+
opts.on(
|
17
|
+
"-d",
|
18
|
+
"--delimiter DELIMITER",
|
19
|
+
"Sets the DELIMITER used between fields in the CSV data. Default: #{options[:delimiter].inspect}"
|
20
|
+
) do |d|
|
21
|
+
options[:delimiter] = d
|
22
|
+
end
|
23
|
+
opts.on(
|
24
|
+
"-H",
|
25
|
+
"--headers HEADERS",
|
26
|
+
"Comma separated list of headers. Default: First row of CSV data."
|
27
|
+
) do |h|
|
28
|
+
options[:headers] = h
|
29
|
+
end
|
30
|
+
opts.on(
|
31
|
+
"-q",
|
32
|
+
"--query SQL",
|
33
|
+
"The SQL query to run on the dataset. If specified --select and --where will be ignored."
|
34
|
+
) do |q|
|
35
|
+
options[:sql_query] = q
|
36
|
+
end
|
37
|
+
opts.on(
|
38
|
+
"-s",
|
39
|
+
"--select SQL",
|
40
|
+
"The SQL statement to select what fields to return. Unused if --query is given. Default: #{options[:select].inspect}."
|
41
|
+
) do |s|
|
42
|
+
options[:select] = s
|
43
|
+
end
|
44
|
+
opts.on(
|
45
|
+
"-w",
|
46
|
+
"--where SQL",
|
47
|
+
"The SQL conditions to use for quering the dataset. Unused if --query is given."
|
48
|
+
) do |w|
|
49
|
+
options[:where] = w
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on_tail("-h", "--help", "Show this message.") do
|
53
|
+
puts opts
|
54
|
+
exit
|
55
|
+
end
|
56
|
+
end.parse!
|
57
|
+
|
58
|
+
options
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.run
|
62
|
+
options = parse_options_from_commandline
|
63
|
+
|
64
|
+
CsvQuery::Query.new(ARGF.read, options).run
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'optparse'
|
3
|
+
require 'sqlite3'
|
4
|
+
|
5
|
+
module CsvQuery
|
6
|
+
class Query
|
7
|
+
attr_reader :csv_data, :options
|
8
|
+
|
9
|
+
def initialize(csv_data, options)
|
10
|
+
@csv_data = csv_data
|
11
|
+
@options = options
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
results = run_query
|
16
|
+
output_results_table(results)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def build_sql_query
|
22
|
+
sql_query = options[:sql_query]
|
23
|
+
return sql_query unless sql_query.nil?
|
24
|
+
|
25
|
+
select_statement = "SELECT #{options[:select]}"
|
26
|
+
from_statement = "FROM csv"
|
27
|
+
where_statement = if options[:where]
|
28
|
+
"WHERE #{options[:where]}"
|
29
|
+
end
|
30
|
+
|
31
|
+
[
|
32
|
+
select_statement,
|
33
|
+
from_statement,
|
34
|
+
where_statement
|
35
|
+
].join(" ")
|
36
|
+
end
|
37
|
+
|
38
|
+
def create_database_and_table(csv)
|
39
|
+
database = SQLite3::Database.new(':memory:')
|
40
|
+
|
41
|
+
column_definitions = csv.headers.collect { |name| "#{name} VARCHAR(255)" }
|
42
|
+
database.execute "CREATE TABLE csv (#{column_definitions.join(", ")})"
|
43
|
+
|
44
|
+
database
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_database_with_data_from_csv
|
48
|
+
database = create_database_and_table(csv)
|
49
|
+
import_csv_into_database(csv, database)
|
50
|
+
database
|
51
|
+
end
|
52
|
+
|
53
|
+
def csv
|
54
|
+
@parsed_csv ||= parse_csv_data
|
55
|
+
end
|
56
|
+
|
57
|
+
def database
|
58
|
+
@database ||= create_database_with_data_from_csv
|
59
|
+
end
|
60
|
+
|
61
|
+
def headers
|
62
|
+
options[:headers] || :first_row
|
63
|
+
end
|
64
|
+
|
65
|
+
def import_csv_into_database(csv, database)
|
66
|
+
sql = "INSERT INTO csv VALUES (#{(['?'] * csv.headers.size).join(',')})"
|
67
|
+
statement = database.prepare(sql)
|
68
|
+
|
69
|
+
csv.each do |row|
|
70
|
+
statement.execute(row.fields)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def output_results_table(results)
|
75
|
+
num_columns = if results.first
|
76
|
+
results.first.size
|
77
|
+
else
|
78
|
+
0
|
79
|
+
end
|
80
|
+
column_widths = [0] * num_columns
|
81
|
+
results.collect { |row|
|
82
|
+
row.each_with_index do |column, index|
|
83
|
+
width = column.size
|
84
|
+
column_widths[index] = width if width > column_widths[index]
|
85
|
+
end
|
86
|
+
}
|
87
|
+
format_strings = column_widths.collect { |width|
|
88
|
+
"%#{width}s"
|
89
|
+
}
|
90
|
+
format_string = format_strings.join(" | ")
|
91
|
+
|
92
|
+
results.each_with_index do |result, index|
|
93
|
+
puts format_string % result
|
94
|
+
if index == 0
|
95
|
+
# Seperate headers and results
|
96
|
+
puts column_widths.collect { |width| '-' * width }.join('-+-')
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse_csv_data
|
102
|
+
csv_options = {
|
103
|
+
:headers => headers,
|
104
|
+
:col_sep => options[:delimiter]
|
105
|
+
}
|
106
|
+
|
107
|
+
csv = CSV.parse(csv_data, csv_options)
|
108
|
+
end
|
109
|
+
|
110
|
+
def run_query
|
111
|
+
database.execute2(sql_query)
|
112
|
+
end
|
113
|
+
|
114
|
+
def sql_query
|
115
|
+
@sql_query ||= build_sql_query
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_query
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jakob Skjerning
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-11-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: sqlite3
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: CSV Query allows you to run SQL queries against data stored in CSV files.
|
31
|
+
email:
|
32
|
+
- jakob@mentalized.net
|
33
|
+
executables:
|
34
|
+
- csvq
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- .gitignore
|
39
|
+
- Gemfile
|
40
|
+
- LICENSE
|
41
|
+
- README.md
|
42
|
+
- Rakefile
|
43
|
+
- bin/csvq
|
44
|
+
- csv_query.gemspec
|
45
|
+
- lib/csv_query.rb
|
46
|
+
- lib/csv_query/command_line.rb
|
47
|
+
- lib/csv_query/query.rb
|
48
|
+
- lib/csv_query/version.rb
|
49
|
+
homepage: http://mentalized.net
|
50
|
+
licenses: []
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '1.9'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.8.24
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: Use SQL to query CSV data
|
73
|
+
test_files: []
|