csv_query 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in csv_query.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2012 Jakob Skjerning
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ CSV Query - Use SQL to query CSV data
2
+ =====================================
3
+
4
+ CSV Query is a command line tool that allows you to run SQL queries on data
5
+ stored in CSV files.
6
+
7
+ For example:
8
+
9
+ $ csvq --select "count(*)" sample.csv
10
+ count(*)
11
+ --------
12
+ 1
13
+
14
+ Assumptions
15
+ -----------
16
+
17
+ The first row of data is assumed to be headers.
18
+
19
+ All fields are created as `VARCHAR(255)`, which hopefully works for most cases.
20
+
21
+ Behind the scenes
22
+ -----------------
23
+
24
+ CSV Query loads the CSV data into an in-memory SQLite database. Thus, the SQL
25
+ queries need to be SQLite-flavored where applicable.
26
+
27
+ Alternative/related projects
28
+ ----------------------------
29
+
30
+ * [csvq.py - Python variant that does almost the same thing](http://www.gl1tch.com/~lukewarm/software/csvq/)
31
+ * [CSVql - A query language for CSV files](https://github.com/ondrasej/CSVql)
32
+
33
+ License
34
+ -------
35
+
36
+ Licensed under the MIT License. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/bin/csvq ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "csv_query"
4
+ require 'csv_query/command_line'
5
+
6
+ CsvQuery::CommandLine.run
data/csv_query.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "csv_query/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "csv_query"
7
+ s.version = CsvQuery::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jakob Skjerning"]
10
+ s.email = ["jakob@mentalized.net"]
11
+ s.homepage = "http://mentalized.net"
12
+ s.summary = %q{Use SQL to query CSV data}
13
+ s.description = %q{CSV Query allows you to run SQL queries against data stored in CSV files.}
14
+
15
+ s.required_ruby_version = '>= 1.9'
16
+ s.add_dependency('sqlite3')
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ end
data/lib/csv_query.rb ADDED
@@ -0,0 +1,2 @@
1
+ module CsvQuery
2
+ end
@@ -0,0 +1,67 @@
1
+ require 'csv_query/query'
2
+
3
+ module CsvQuery
4
+ class CommandLine
5
+ def self.parse_options_from_commandline
6
+ options = {}
7
+
8
+ # Set defaults
9
+ options[:delimiter] = ','
10
+ options[:select] = '*'
11
+
12
+ OptionParser.new do |opts|
13
+ opts.banner = "Usage: csvq [options] [CSV file]"
14
+ opts.separator ""
15
+ opts.separator "Specific options:"
16
+ opts.on(
17
+ "-d",
18
+ "--delimiter DELIMITER",
19
+ "Sets the DELIMITER used between fields in the CSV data. Default: #{options[:delimiter].inspect}"
20
+ ) do |d|
21
+ options[:delimiter] = d
22
+ end
23
+ opts.on(
24
+ "-H",
25
+ "--headers HEADERS",
26
+ "Comma separated list of headers. Default: First row of CSV data."
27
+ ) do |h|
28
+ options[:headers] = h
29
+ end
30
+ opts.on(
31
+ "-q",
32
+ "--query SQL",
33
+ "The SQL query to run on the dataset. If specified --select and --where will be ignored."
34
+ ) do |q|
35
+ options[:sql_query] = q
36
+ end
37
+ opts.on(
38
+ "-s",
39
+ "--select SQL",
40
+ "The SQL statement to select what fields to return. Unused if --query is given. Default: #{options[:select].inspect}."
41
+ ) do |s|
42
+ options[:select] = s
43
+ end
44
+ opts.on(
45
+ "-w",
46
+ "--where SQL",
47
+ "The SQL conditions to use for quering the dataset. Unused if --query is given."
48
+ ) do |w|
49
+ options[:where] = w
50
+ end
51
+
52
+ opts.on_tail("-h", "--help", "Show this message.") do
53
+ puts opts
54
+ exit
55
+ end
56
+ end.parse!
57
+
58
+ options
59
+ end
60
+
61
+ def self.run
62
+ options = parse_options_from_commandline
63
+
64
+ CsvQuery::Query.new(ARGF.read, options).run
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,118 @@
1
+ require 'csv'
2
+ require 'optparse'
3
+ require 'sqlite3'
4
+
5
+ module CsvQuery
6
+ class Query
7
+ attr_reader :csv_data, :options
8
+
9
+ def initialize(csv_data, options)
10
+ @csv_data = csv_data
11
+ @options = options
12
+ end
13
+
14
+ def run
15
+ results = run_query
16
+ output_results_table(results)
17
+ end
18
+
19
+ private
20
+
21
+ def build_sql_query
22
+ sql_query = options[:sql_query]
23
+ return sql_query unless sql_query.nil?
24
+
25
+ select_statement = "SELECT #{options[:select]}"
26
+ from_statement = "FROM csv"
27
+ where_statement = if options[:where]
28
+ "WHERE #{options[:where]}"
29
+ end
30
+
31
+ [
32
+ select_statement,
33
+ from_statement,
34
+ where_statement
35
+ ].join(" ")
36
+ end
37
+
38
+ def create_database_and_table(csv)
39
+ database = SQLite3::Database.new(':memory:')
40
+
41
+ column_definitions = csv.headers.collect { |name| "#{name} VARCHAR(255)" }
42
+ database.execute "CREATE TABLE csv (#{column_definitions.join(", ")})"
43
+
44
+ database
45
+ end
46
+
47
+ def create_database_with_data_from_csv
48
+ database = create_database_and_table(csv)
49
+ import_csv_into_database(csv, database)
50
+ database
51
+ end
52
+
53
+ def csv
54
+ @parsed_csv ||= parse_csv_data
55
+ end
56
+
57
+ def database
58
+ @database ||= create_database_with_data_from_csv
59
+ end
60
+
61
+ def headers
62
+ options[:headers] || :first_row
63
+ end
64
+
65
+ def import_csv_into_database(csv, database)
66
+ sql = "INSERT INTO csv VALUES (#{(['?'] * csv.headers.size).join(',')})"
67
+ statement = database.prepare(sql)
68
+
69
+ csv.each do |row|
70
+ statement.execute(row.fields)
71
+ end
72
+ end
73
+
74
+ def output_results_table(results)
75
+ num_columns = if results.first
76
+ results.first.size
77
+ else
78
+ 0
79
+ end
80
+ column_widths = [0] * num_columns
81
+ results.collect { |row|
82
+ row.each_with_index do |column, index|
83
+ width = column.size
84
+ column_widths[index] = width if width > column_widths[index]
85
+ end
86
+ }
87
+ format_strings = column_widths.collect { |width|
88
+ "%#{width}s"
89
+ }
90
+ format_string = format_strings.join(" | ")
91
+
92
+ results.each_with_index do |result, index|
93
+ puts format_string % result
94
+ if index == 0
95
+ # Seperate headers and results
96
+ puts column_widths.collect { |width| '-' * width }.join('-+-')
97
+ end
98
+ end
99
+ end
100
+
101
+ def parse_csv_data
102
+ csv_options = {
103
+ :headers => headers,
104
+ :col_sep => options[:delimiter]
105
+ }
106
+
107
+ csv = CSV.parse(csv_data, csv_options)
108
+ end
109
+
110
+ def run_query
111
+ database.execute2(sql_query)
112
+ end
113
+
114
+ def sql_query
115
+ @sql_query ||= build_sql_query
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,3 @@
1
+ module CsvQuery
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_query
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jakob Skjerning
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sqlite3
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: CSV Query allows you to run SQL queries against data stored in CSV files.
31
+ email:
32
+ - jakob@mentalized.net
33
+ executables:
34
+ - csvq
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - Gemfile
40
+ - LICENSE
41
+ - README.md
42
+ - Rakefile
43
+ - bin/csvq
44
+ - csv_query.gemspec
45
+ - lib/csv_query.rb
46
+ - lib/csv_query/command_line.rb
47
+ - lib/csv_query/query.rb
48
+ - lib/csv_query/version.rb
49
+ homepage: http://mentalized.net
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '1.9'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.24
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Use SQL to query CSV data
73
+ test_files: []