csv_query 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/LICENSE +19 -0
- data/README.md +36 -0
- data/Rakefile +2 -0
- data/bin/csvq +6 -0
- data/csv_query.gemspec +23 -0
- data/lib/csv_query.rb +2 -0
- data/lib/csv_query/command_line.rb +67 -0
- data/lib/csv_query/query.rb +118 -0
- data/lib/csv_query/version.rb +3 -0
- metadata +73 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2012 Jakob Skjerning
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
CSV Query - Use SQL to query CSV data
|
2
|
+
=====================================
|
3
|
+
|
4
|
+
CSV Query is a command line tool that allows you to run SQL queries on data
|
5
|
+
stored in CSV files.
|
6
|
+
|
7
|
+
For example:
|
8
|
+
|
9
|
+
$ csvq --select "count(*)" sample.csv
|
10
|
+
count(*)
|
11
|
+
--------
|
12
|
+
1
|
13
|
+
|
14
|
+
Assumptions
|
15
|
+
-----------
|
16
|
+
|
17
|
+
The first row of data is assumed to be headers.
|
18
|
+
|
19
|
+
All fields are created as `VARCHAR(255)`, which hopefully works for most cases.
|
20
|
+
|
21
|
+
Behind the scenes
|
22
|
+
-----------------
|
23
|
+
|
24
|
+
CSV Query loads the CSV data into an in-memory SQLite database. Thus, the SQL
|
25
|
+
queries need to be SQLite-flavored where applicable.
|
26
|
+
|
27
|
+
Alternative/related projects
|
28
|
+
----------------------------
|
29
|
+
|
30
|
+
* [csvq.py - Python variant that does almost the same thing](http://www.gl1tch.com/~lukewarm/software/csvq/)
|
31
|
+
* [CSVql - A query language for CSV files](https://github.com/ondrasej/CSVql)
|
32
|
+
|
33
|
+
License
|
34
|
+
-------
|
35
|
+
|
36
|
+
Licensed under the MIT License. See LICENSE for details.
|
data/Rakefile
ADDED
data/bin/csvq
ADDED
data/csv_query.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "csv_query/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "csv_query"
|
7
|
+
s.version = CsvQuery::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Jakob Skjerning"]
|
10
|
+
s.email = ["jakob@mentalized.net"]
|
11
|
+
s.homepage = "http://mentalized.net"
|
12
|
+
s.summary = %q{Use SQL to query CSV data}
|
13
|
+
s.description = %q{CSV Query allows you to run SQL queries against data stored in CSV files.}
|
14
|
+
|
15
|
+
s.required_ruby_version = '>= 1.9'
|
16
|
+
s.add_dependency('sqlite3')
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
end
|
data/lib/csv_query.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'csv_query/query'
|
2
|
+
|
3
|
+
module CsvQuery
|
4
|
+
class CommandLine
|
5
|
+
def self.parse_options_from_commandline
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
# Set defaults
|
9
|
+
options[:delimiter] = ','
|
10
|
+
options[:select] = '*'
|
11
|
+
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: csvq [options] [CSV file]"
|
14
|
+
opts.separator ""
|
15
|
+
opts.separator "Specific options:"
|
16
|
+
opts.on(
|
17
|
+
"-d",
|
18
|
+
"--delimiter DELIMITER",
|
19
|
+
"Sets the DELIMITER used between fields in the CSV data. Default: #{options[:delimiter].inspect}"
|
20
|
+
) do |d|
|
21
|
+
options[:delimiter] = d
|
22
|
+
end
|
23
|
+
opts.on(
|
24
|
+
"-H",
|
25
|
+
"--headers HEADERS",
|
26
|
+
"Comma separated list of headers. Default: First row of CSV data."
|
27
|
+
) do |h|
|
28
|
+
options[:headers] = h
|
29
|
+
end
|
30
|
+
opts.on(
|
31
|
+
"-q",
|
32
|
+
"--query SQL",
|
33
|
+
"The SQL query to run on the dataset. If specified --select and --where will be ignored."
|
34
|
+
) do |q|
|
35
|
+
options[:sql_query] = q
|
36
|
+
end
|
37
|
+
opts.on(
|
38
|
+
"-s",
|
39
|
+
"--select SQL",
|
40
|
+
"The SQL statement to select what fields to return. Unused if --query is given. Default: #{options[:select].inspect}."
|
41
|
+
) do |s|
|
42
|
+
options[:select] = s
|
43
|
+
end
|
44
|
+
opts.on(
|
45
|
+
"-w",
|
46
|
+
"--where SQL",
|
47
|
+
"The SQL conditions to use for quering the dataset. Unused if --query is given."
|
48
|
+
) do |w|
|
49
|
+
options[:where] = w
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on_tail("-h", "--help", "Show this message.") do
|
53
|
+
puts opts
|
54
|
+
exit
|
55
|
+
end
|
56
|
+
end.parse!
|
57
|
+
|
58
|
+
options
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.run
|
62
|
+
options = parse_options_from_commandline
|
63
|
+
|
64
|
+
CsvQuery::Query.new(ARGF.read, options).run
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'optparse'
|
3
|
+
require 'sqlite3'
|
4
|
+
|
5
|
+
module CsvQuery
|
6
|
+
class Query
|
7
|
+
attr_reader :csv_data, :options
|
8
|
+
|
9
|
+
def initialize(csv_data, options)
|
10
|
+
@csv_data = csv_data
|
11
|
+
@options = options
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
results = run_query
|
16
|
+
output_results_table(results)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def build_sql_query
|
22
|
+
sql_query = options[:sql_query]
|
23
|
+
return sql_query unless sql_query.nil?
|
24
|
+
|
25
|
+
select_statement = "SELECT #{options[:select]}"
|
26
|
+
from_statement = "FROM csv"
|
27
|
+
where_statement = if options[:where]
|
28
|
+
"WHERE #{options[:where]}"
|
29
|
+
end
|
30
|
+
|
31
|
+
[
|
32
|
+
select_statement,
|
33
|
+
from_statement,
|
34
|
+
where_statement
|
35
|
+
].join(" ")
|
36
|
+
end
|
37
|
+
|
38
|
+
def create_database_and_table(csv)
|
39
|
+
database = SQLite3::Database.new(':memory:')
|
40
|
+
|
41
|
+
column_definitions = csv.headers.collect { |name| "#{name} VARCHAR(255)" }
|
42
|
+
database.execute "CREATE TABLE csv (#{column_definitions.join(", ")})"
|
43
|
+
|
44
|
+
database
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_database_with_data_from_csv
|
48
|
+
database = create_database_and_table(csv)
|
49
|
+
import_csv_into_database(csv, database)
|
50
|
+
database
|
51
|
+
end
|
52
|
+
|
53
|
+
def csv
|
54
|
+
@parsed_csv ||= parse_csv_data
|
55
|
+
end
|
56
|
+
|
57
|
+
def database
|
58
|
+
@database ||= create_database_with_data_from_csv
|
59
|
+
end
|
60
|
+
|
61
|
+
def headers
|
62
|
+
options[:headers] || :first_row
|
63
|
+
end
|
64
|
+
|
65
|
+
def import_csv_into_database(csv, database)
|
66
|
+
sql = "INSERT INTO csv VALUES (#{(['?'] * csv.headers.size).join(',')})"
|
67
|
+
statement = database.prepare(sql)
|
68
|
+
|
69
|
+
csv.each do |row|
|
70
|
+
statement.execute(row.fields)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def output_results_table(results)
|
75
|
+
num_columns = if results.first
|
76
|
+
results.first.size
|
77
|
+
else
|
78
|
+
0
|
79
|
+
end
|
80
|
+
column_widths = [0] * num_columns
|
81
|
+
results.collect { |row|
|
82
|
+
row.each_with_index do |column, index|
|
83
|
+
width = column.size
|
84
|
+
column_widths[index] = width if width > column_widths[index]
|
85
|
+
end
|
86
|
+
}
|
87
|
+
format_strings = column_widths.collect { |width|
|
88
|
+
"%#{width}s"
|
89
|
+
}
|
90
|
+
format_string = format_strings.join(" | ")
|
91
|
+
|
92
|
+
results.each_with_index do |result, index|
|
93
|
+
puts format_string % result
|
94
|
+
if index == 0
|
95
|
+
# Seperate headers and results
|
96
|
+
puts column_widths.collect { |width| '-' * width }.join('-+-')
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse_csv_data
|
102
|
+
csv_options = {
|
103
|
+
:headers => headers,
|
104
|
+
:col_sep => options[:delimiter]
|
105
|
+
}
|
106
|
+
|
107
|
+
csv = CSV.parse(csv_data, csv_options)
|
108
|
+
end
|
109
|
+
|
110
|
+
def run_query
|
111
|
+
database.execute2(sql_query)
|
112
|
+
end
|
113
|
+
|
114
|
+
def sql_query
|
115
|
+
@sql_query ||= build_sql_query
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_query
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jakob Skjerning
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-11-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: sqlite3
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: CSV Query allows you to run SQL queries against data stored in CSV files.
|
31
|
+
email:
|
32
|
+
- jakob@mentalized.net
|
33
|
+
executables:
|
34
|
+
- csvq
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- .gitignore
|
39
|
+
- Gemfile
|
40
|
+
- LICENSE
|
41
|
+
- README.md
|
42
|
+
- Rakefile
|
43
|
+
- bin/csvq
|
44
|
+
- csv_query.gemspec
|
45
|
+
- lib/csv_query.rb
|
46
|
+
- lib/csv_query/command_line.rb
|
47
|
+
- lib/csv_query/query.rb
|
48
|
+
- lib/csv_query/version.rb
|
49
|
+
homepage: http://mentalized.net
|
50
|
+
licenses: []
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '1.9'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.8.24
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: Use SQL to query CSV data
|
73
|
+
test_files: []
|