csv_query 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in csv_query.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2012 Jakob Skjerning
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ CSV Query - Use SQL to query CSV data
2
+ =====================================
3
+
4
+ CSV Query is a command line tool that allows you to run SQL queries on data
5
+ stored in CSV files.
6
+
7
+ For example:
8
+
9
+ $ csvq --select "count(*)" sample.csv
10
+ count(*)
11
+ --------
12
+ 1
13
+
14
+ Assumptions
15
+ -----------
16
+
17
+ The first row of data is assumed to be headers.
18
+
19
+ All fields are created as `VARCHAR(255)`, which hopefully works for most cases.
20
+
21
+ Behind the scenes
22
+ -----------------
23
+
24
+ CSV Query loads the CSV data into an in-memory SQLite database. Thus, the SQL
25
+ queries need to be SQLite-flavored where applicable.
26
+
27
+ Alternative/related projects
28
+ ----------------------------
29
+
30
+ * [csvq.py - Python variant that does almost the same thing](http://www.gl1tch.com/~lukewarm/software/csvq/)
31
+ * [CSVql - A query language for CSV files](https://github.com/ondrasej/CSVql)
32
+
33
+ License
34
+ -------
35
+
36
+ Licensed under the MIT License. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/bin/csvq ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "csv_query"
4
+ require 'csv_query/command_line'
5
+
6
+ CsvQuery::CommandLine.run
data/csv_query.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "csv_query/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "csv_query"
7
+ s.version = CsvQuery::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jakob Skjerning"]
10
+ s.email = ["jakob@mentalized.net"]
11
+ s.homepage = "http://mentalized.net"
12
+ s.summary = %q{Use SQL to query CSV data}
13
+ s.description = %q{CSV Query allows you to run SQL queries against data stored in CSV files.}
14
+
15
+ s.required_ruby_version = '>= 1.9'
16
+ s.add_dependency('sqlite3')
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ end
data/lib/csv_query.rb ADDED
@@ -0,0 +1,2 @@
1
+ module CsvQuery
2
+ end
@@ -0,0 +1,67 @@
1
+ require 'csv_query/query'
2
+
3
+ module CsvQuery
4
+ class CommandLine
5
+ def self.parse_options_from_commandline
6
+ options = {}
7
+
8
+ # Set defaults
9
+ options[:delimiter] = ','
10
+ options[:select] = '*'
11
+
12
+ OptionParser.new do |opts|
13
+ opts.banner = "Usage: csvq [options] [CSV file]"
14
+ opts.separator ""
15
+ opts.separator "Specific options:"
16
+ opts.on(
17
+ "-d",
18
+ "--delimiter DELIMITER",
19
+ "Sets the DELIMITER used between fields in the CSV data. Default: #{options[:delimiter].inspect}"
20
+ ) do |d|
21
+ options[:delimiter] = d
22
+ end
23
+ opts.on(
24
+ "-H",
25
+ "--headers HEADERS",
26
+ "Comma separated list of headers. Default: First row of CSV data."
27
+ ) do |h|
28
+ options[:headers] = h
29
+ end
30
+ opts.on(
31
+ "-q",
32
+ "--query SQL",
33
+ "The SQL query to run on the dataset. If specified --select and --where will be ignored."
34
+ ) do |q|
35
+ options[:sql_query] = q
36
+ end
37
+ opts.on(
38
+ "-s",
39
+ "--select SQL",
40
+ "The SQL statement to select what fields to return. Unused if --query is given. Default: #{options[:select].inspect}."
41
+ ) do |s|
42
+ options[:select] = s
43
+ end
44
+ opts.on(
45
+ "-w",
46
+ "--where SQL",
47
+ "The SQL conditions to use for quering the dataset. Unused if --query is given."
48
+ ) do |w|
49
+ options[:where] = w
50
+ end
51
+
52
+ opts.on_tail("-h", "--help", "Show this message.") do
53
+ puts opts
54
+ exit
55
+ end
56
+ end.parse!
57
+
58
+ options
59
+ end
60
+
61
+ def self.run
62
+ options = parse_options_from_commandline
63
+
64
+ CsvQuery::Query.new(ARGF.read, options).run
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,118 @@
1
+ require 'csv'
2
+ require 'optparse'
3
+ require 'sqlite3'
4
+
5
+ module CsvQuery
6
+ class Query
7
+ attr_reader :csv_data, :options
8
+
9
+ def initialize(csv_data, options)
10
+ @csv_data = csv_data
11
+ @options = options
12
+ end
13
+
14
+ def run
15
+ results = run_query
16
+ output_results_table(results)
17
+ end
18
+
19
+ private
20
+
21
+ def build_sql_query
22
+ sql_query = options[:sql_query]
23
+ return sql_query unless sql_query.nil?
24
+
25
+ select_statement = "SELECT #{options[:select]}"
26
+ from_statement = "FROM csv"
27
+ where_statement = if options[:where]
28
+ "WHERE #{options[:where]}"
29
+ end
30
+
31
+ [
32
+ select_statement,
33
+ from_statement,
34
+ where_statement
35
+ ].join(" ")
36
+ end
37
+
38
+ def create_database_and_table(csv)
39
+ database = SQLite3::Database.new(':memory:')
40
+
41
+ column_definitions = csv.headers.collect { |name| "#{name} VARCHAR(255)" }
42
+ database.execute "CREATE TABLE csv (#{column_definitions.join(", ")})"
43
+
44
+ database
45
+ end
46
+
47
+ def create_database_with_data_from_csv
48
+ database = create_database_and_table(csv)
49
+ import_csv_into_database(csv, database)
50
+ database
51
+ end
52
+
53
+ def csv
54
+ @parsed_csv ||= parse_csv_data
55
+ end
56
+
57
+ def database
58
+ @database ||= create_database_with_data_from_csv
59
+ end
60
+
61
+ def headers
62
+ options[:headers] || :first_row
63
+ end
64
+
65
+ def import_csv_into_database(csv, database)
66
+ sql = "INSERT INTO csv VALUES (#{(['?'] * csv.headers.size).join(',')})"
67
+ statement = database.prepare(sql)
68
+
69
+ csv.each do |row|
70
+ statement.execute(row.fields)
71
+ end
72
+ end
73
+
74
+ def output_results_table(results)
75
+ num_columns = if results.first
76
+ results.first.size
77
+ else
78
+ 0
79
+ end
80
+ column_widths = [0] * num_columns
81
+ results.collect { |row|
82
+ row.each_with_index do |column, index|
83
+ width = column.size
84
+ column_widths[index] = width if width > column_widths[index]
85
+ end
86
+ }
87
+ format_strings = column_widths.collect { |width|
88
+ "%#{width}s"
89
+ }
90
+ format_string = format_strings.join(" | ")
91
+
92
+ results.each_with_index do |result, index|
93
+ puts format_string % result
94
+ if index == 0
95
+ # Seperate headers and results
96
+ puts column_widths.collect { |width| '-' * width }.join('-+-')
97
+ end
98
+ end
99
+ end
100
+
101
+ def parse_csv_data
102
+ csv_options = {
103
+ :headers => headers,
104
+ :col_sep => options[:delimiter]
105
+ }
106
+
107
+ csv = CSV.parse(csv_data, csv_options)
108
+ end
109
+
110
+ def run_query
111
+ database.execute2(sql_query)
112
+ end
113
+
114
+ def sql_query
115
+ @sql_query ||= build_sql_query
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,3 @@
1
+ module CsvQuery
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_query
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jakob Skjerning
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sqlite3
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: CSV Query allows you to run SQL queries against data stored in CSV files.
31
+ email:
32
+ - jakob@mentalized.net
33
+ executables:
34
+ - csvq
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - Gemfile
40
+ - LICENSE
41
+ - README.md
42
+ - Rakefile
43
+ - bin/csvq
44
+ - csv_query.gemspec
45
+ - lib/csv_query.rb
46
+ - lib/csv_query/command_line.rb
47
+ - lib/csv_query/query.rb
48
+ - lib/csv_query/version.rb
49
+ homepage: http://mentalized.net
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '1.9'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.24
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Use SQL to query CSV data
73
+ test_files: []