abstract-tables 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +76 -0
- data/abstract-tables.gemspec +45 -0
- data/bin/atcat +25 -0
- data/bin/atview +136 -0
- data/introducing-abtab/README.textile +50 -0
- data/lib/abtab.rb +57 -0
- data/lib/abtab/driver.rb +24 -0
- data/lib/abtab/drivers/csv_driver.rb +68 -0
- data/lib/abtab/drivers/dbi_driver.rb +149 -0
- data/lib/abtab/drivers/tab_driver.rb +76 -0
- data/test/fixtures/files/file1.csv +3 -0
- data/test/fixtures/files/file1.tab +3 -0
- metadata +106 -0
data/README.textile
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
h1. Abstract Table Library
|
2
|
+
|
3
|
+
In the spirit of the standard Unix utilites (like cat, grep, cut, etc.), this library creates an abstraction over tabular data. The library implements a series of drivers for sources with different encodings that act like sequences.
|
4
|
+
|
5
|
+
|
6
|
+
h1. Command Line Utilities
|
7
|
+
|
8
|
+
h2. atcat
|
9
|
+
|
10
|
+
<pre>
|
11
|
+
# dump a postgres table to the console (default is tab delimited)
|
12
|
+
atcat dbi://pg/localhost/database_name/table_name
|
13
|
+
|
14
|
+
# export a table to a tab file:
|
15
|
+
atcat dbi://pg/localhost/database_name/table_name tab://table_name.tab
|
16
|
+
|
17
|
+
# you can omit the schema for files, the schema will be guessed based on the file extension
|
18
|
+
atcat dbi://pg/localhost/database_name/table_name table_name.tab
|
19
|
+
|
20
|
+
# dump to csv
|
21
|
+
atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
22
|
+
# you can omit the scehma
|
23
|
+
atcat dbi://pg/localhost/database_name/table_name table_name.csv
|
24
|
+
|
25
|
+
# convert from csv to tab
|
26
|
+
atcat csv://some-file.csv tab://some-file.tab
|
27
|
+
|
28
|
+
atcat some-file.csv some-file.tab
|
29
|
+
|
30
|
+
# convert form csv to pipe
|
31
|
+
atcat csv://some-file.csv 'tab://some-file.tab?col_sep=|'
|
32
|
+
</pre>
|
33
|
+
|
34
|
+
h2. rec-view
|
35
|
+
|
36
|
+
<pre>
|
37
|
+
rec-view tab://some-file.tab | less
|
38
|
+
rec-view csv://some-file.csv | less
|
39
|
+
rec-view dbi://Pg/some-file.csv | less
|
40
|
+
</pre>
|
41
|
+
|
42
|
+
h2. atcat
|
43
|
+
|
44
|
+
Limitations: only implemented 'driver' is dbi. Only supported output port is stdout as tab delimited.
|
45
|
+
|
46
|
+
<pre>
|
47
|
+
atcat dbi://user:pass@Pg/localhost/db_name/table_name | rec-view | less
|
48
|
+
</pre>
|
49
|
+
|
50
|
+
h1. Suported Drivers
|
51
|
+
|
52
|
+
h2. tab
|
53
|
+
|
54
|
+
Native Ruby for now.
|
55
|
+
|
56
|
+
h3. col_sep
|
57
|
+
|
58
|
+
Defaults to a tab character.
|
59
|
+
|
60
|
+
h2. csv
|
61
|
+
|
62
|
+
Via the "FasterCSV":http://fastercsv.rubyforge.org/ ruby gem.
|
63
|
+
|
64
|
+
h3. col_sep
|
65
|
+
|
66
|
+
Override the default ',' column seperator.
|
67
|
+
|
68
|
+
h3. quote_char
|
69
|
+
|
70
|
+
Override the default quote_char (").
|
71
|
+
|
72
|
+
h1. License
|
73
|
+
|
74
|
+
h1. Authors
|
75
|
+
|
76
|
+
Kyle Burton <kyle.burton@gmail.com>
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
SPEC = Gem::Specification.new do |s|
|
4
|
+
s.name = "abstract-tables"
|
5
|
+
s.version = "1.0.0"
|
6
|
+
s.date = '2011-02-09'
|
7
|
+
s.authors = ["Kyle Burton"]
|
8
|
+
s.email = "kyle.burton@gmail.com"
|
9
|
+
s.platform = Gem::Platform::RUBY
|
10
|
+
s.description = <<DESC
|
11
|
+
The best I could come up with was to just show you:
|
12
|
+
|
13
|
+
$ atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
14
|
+
|
15
|
+
That exports a table from Postgres into a comma separated value file. You can
|
16
|
+
read from or write to: tab, csv, dbi, etc. You can opaquely treat any of those
|
17
|
+
'table of records' based sources as an opaque URI. Want to read more?
|
18
|
+
|
19
|
+
https://github.com/kyleburton/abstract-tables
|
20
|
+
|
21
|
+
DESC
|
22
|
+
s.summary = "Table Abstraction as a URI : Record Streams, Filters, ETL Ginsu"
|
23
|
+
s.homepage = "http://github.com/kyleburton/abstract-tables"
|
24
|
+
s.files = %w[
|
25
|
+
abstract-tables-1.0.0.gem
|
26
|
+
abstract-tables.gemspec
|
27
|
+
bin/atcat
|
28
|
+
bin/atview
|
29
|
+
introducing-abtab/README.textile
|
30
|
+
lib/abtab/driver.rb
|
31
|
+
lib/abtab/drivers/csv_driver.rb
|
32
|
+
lib/abtab/drivers/dbi_driver.rb
|
33
|
+
lib/abtab/drivers/tab_driver.rb
|
34
|
+
lib/abtab.rb
|
35
|
+
README.textile
|
36
|
+
test/fixtures/files/file1.csv
|
37
|
+
test/fixtures/files/file1.tab
|
38
|
+
]
|
39
|
+
puts "all files: #{s.files.inspect}"
|
40
|
+
s.executables = %w[atcat atview]
|
41
|
+
s.require_paths = %w[lib bin]
|
42
|
+
s.extra_rdoc_files = %w[README.textile] # LICENSE]
|
43
|
+
s.add_runtime_dependency('dbi', [">= 0.4.5"])
|
44
|
+
s.has_rdoc = false
|
45
|
+
end
|
data/bin/atcat
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'abtab'
|
4
|
+
|
5
|
+
# TODO: if stdin is a pipe, open it as tab:///dev/stdin
|
6
|
+
input_uri = nil
|
7
|
+
if File.pipe?('/dev/stdin')
|
8
|
+
input_uri = 'tab:///dev/stdin'
|
9
|
+
else
|
10
|
+
input_uri = ARGV.shift or raise "You must speicfy a URI to cat"
|
11
|
+
end
|
12
|
+
|
13
|
+
inp = Abtab.read_handle input_uri
|
14
|
+
|
15
|
+
outp = nil
|
16
|
+
if ARGV.empty?
|
17
|
+
outp = Abtab.write_handle 'tab:///dev/stdout'
|
18
|
+
else
|
19
|
+
outp = Abtab.write_handle ARGV.shift
|
20
|
+
end
|
21
|
+
|
22
|
+
outp.set_columns inp.columns
|
23
|
+
|
24
|
+
outp.import inp
|
25
|
+
|
data/bin/atview
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'abtab'
|
4
|
+
|
5
|
+
#
|
6
|
+
# Pipe mysql through this:
|
7
|
+
#
|
8
|
+
# mysql -uuser database_name -e 'select * from some_table' | rec-view.rb
|
9
|
+
#
|
10
|
+
# and it will produce a 'portrait' view of the records
|
11
|
+
#
|
12
|
+
# Record[1]
|
13
|
+
# [ 1] ID : 1
|
14
|
+
# [ 2] FNAME: Kyle
|
15
|
+
# [ 3] LNAME: Burton
|
16
|
+
# [ 4] CITY : Philadelphia
|
17
|
+
# [ 5] ST : PA
|
18
|
+
# [ 6] ZIP : 19101
|
19
|
+
#
|
20
|
+
# Record[2]
|
21
|
+
# [ 1] ID : 2
|
22
|
+
# [ 2] FNAME: Alan
|
23
|
+
# [ 3] LNAME: Barton
|
24
|
+
# [ 4] CITY : Newardk
|
25
|
+
# [ 5] ST : DE
|
26
|
+
# [ 6] ZIP : 10817
|
27
|
+
#
|
28
|
+
|
29
|
+
require 'optparse'
|
30
|
+
|
31
|
+
# $options = {:no_hdr => false}
|
32
|
+
# OptionParser.new do |opts|
|
33
|
+
# opts.banner = "Usage: #$0 [options]"
|
34
|
+
#
|
35
|
+
# opts.on("-s", "--sort", "Sort the fields before display") do |v|
|
36
|
+
# $options[:sort_fields] = v
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
# opts.on("-f", "--fields FIELDS", "Specify the fields as a comma delimited string") do |v|
|
40
|
+
# $options[:fields] = v
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# opts.on("-c", "--const N:V,N:V,...", "Specify constant field values, always prefixed, comma separated") do |v|
|
44
|
+
# $options[:constant_fields] = v
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
# opts.on("-r", "--raw", "Raw output, just tab delimited") do |v|
|
48
|
+
# $options[:raw] = v
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# opts.on("-n", "--no-header", "Suppress printing the header for --raw") do |v|
|
52
|
+
# $options[:no_hdr] = true
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# end.parse!
|
56
|
+
#
|
57
|
+
# def constant_field_names
|
58
|
+
# $options[:constant_fields].split(/,/).map { |pair|
|
59
|
+
# pair.split(/:/)[0]
|
60
|
+
# }
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# def constant_field_values
|
64
|
+
# $options[:constant_fields].split(/,/).map { |pair|
|
65
|
+
# pair.split(/:/)[1]
|
66
|
+
# }
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
# fields = []
|
70
|
+
# if $options[:fields]
|
71
|
+
# fields = $options[:fields].split(/,/)
|
72
|
+
# else
|
73
|
+
# header_line = $stdin.readline
|
74
|
+
# fields = header_line.split(/\t/)
|
75
|
+
# fields[-1].chomp!
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# fields.unshift(*constant_field_names) if $options[:constant_fields]
|
79
|
+
#
|
80
|
+
# orig_order_map = {}
|
81
|
+
# fields.each_with_index do |field,idx|
|
82
|
+
# orig_order_map[field] = idx
|
83
|
+
# end
|
84
|
+
# sorted_fields = $options[:sort_fields] ? fields.sort : fields
|
85
|
+
#
|
86
|
+
# max_width = fields.map {|f| f.size}.max
|
87
|
+
# recno = 0
|
88
|
+
#
|
89
|
+
# if $options[:raw]
|
90
|
+
# puts fields.join("\t") if !$options[:no_hdr]
|
91
|
+
# $stdin.each do |line|
|
92
|
+
# recno = recno + 1
|
93
|
+
# rec = line.split(/\t/)
|
94
|
+
# rec[-1].chomp!
|
95
|
+
# rec.unshift(*constant_field_values) if $options[:constant_fields]
|
96
|
+
# puts rec.join("\t")
|
97
|
+
# end
|
98
|
+
# exit 0
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
# $stdin.each do |line|
|
102
|
+
# recno = recno + 1
|
103
|
+
# puts "Record[#{recno}]"
|
104
|
+
# rec = line.split(/\t/)
|
105
|
+
# rec[-1].chomp!
|
106
|
+
# rec.unshift(*constant_field_values) if $options[:constant_fields]
|
107
|
+
# rows = []
|
108
|
+
# sorted_fields.each_with_index do |field,idx|
|
109
|
+
# actual_idx = orig_order_map[field]
|
110
|
+
# rows << sprintf( "[% 3d] %-*s: %s\n", 1+idx, max_width, field, rec[actual_idx] )
|
111
|
+
# end
|
112
|
+
# if $options[:sort_fields]
|
113
|
+
# puts rows.sort
|
114
|
+
# else
|
115
|
+
# puts rows
|
116
|
+
# end
|
117
|
+
# puts ""
|
118
|
+
# end
|
119
|
+
|
120
|
+
uri = ARGV.shift or raise "You must speicfy a URI to view"
|
121
|
+
inp = Abtab.read_handle uri
|
122
|
+
recno = 0
|
123
|
+
max_width = inp.columns.map {|f| f.size}.max
|
124
|
+
|
125
|
+
while rec = inp.next_record
|
126
|
+
break if rec.nil?
|
127
|
+
recno = recno + 1
|
128
|
+
puts "Record[#{recno}]"
|
129
|
+
rows = []
|
130
|
+
inp.columns.each_with_index do |field,idx|
|
131
|
+
rows << sprintf( "[% 3d] %-*s: %s\n", 1+idx, max_width, field, rec[idx] )
|
132
|
+
end
|
133
|
+
|
134
|
+
puts rows
|
135
|
+
puts ""
|
136
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
I came to Ruby by way of Perl.
|
2
|
+
|
3
|
+
The Perl archipelligo, Wall Island, Chiristiansen, Schwartz, Conway
|
4
|
+
|
5
|
+
* gasp *
|
6
|
+
|
7
|
+
I started with Perl 4 in order to customize the installation of Windows NT so
|
8
|
+
that it could be automated.
|
9
|
+
|
10
|
+
This involved heavy use of regular expressions.
|
11
|
+
|
12
|
+
Mom: "Bobby, we need to talk"
|
13
|
+
"What's is Mom?" : Bobby
|
14
|
+
Mom: "A freiend's mother said
|
15
|
+
other kids at school
|
16
|
+
have been expeirmenting
|
17
|
+
with the regexes..."
|
18
|
+
"It's no big deal Mom, they're : Bobby
|
19
|
+
just =~ /(posix|pcre)/ims"
|
20
|
+
Mom: "OMG, you're taking the regex!"
|
21
|
+
|
22
|
+
Why regexes?
|
23
|
+
|
24
|
+
Some people, when confronted with a problem, think "I know, I'll use regular
|
25
|
+
expressions." Now they have two problems.
|
26
|
+
|
27
|
+
-- Jamie Zawinski
|
28
|
+
|
29
|
+
Regexes are a key that unlocks data
|
30
|
+
|
31
|
+
Find/Comission a graphic of a key-shaped word 'regex' and the word 'data' shaped
|
32
|
+
as an old style padlock (where the key goes in the front/middle)?
|
33
|
+
|
34
|
+
When I worked at Health Market Science, Ben Kennedy and I were asked to help
|
35
|
+
out our data delivery department with a client job that was taking at the time
|
36
|
+
about 48hrs to process.
|
37
|
+
|
38
|
+
During that time we happened upon the idea of a suite of command line tools and
|
39
|
+
a library which abstracted away file encodings and how tables of data were to
|
40
|
+
be represnted.
|
41
|
+
|
42
|
+
Some clients wanted tab files, some csv, some pipe delimited, some Excel and
|
43
|
+
others MS Access databse files.
|
44
|
+
|
45
|
+
The other idea that we stumbled upon was to use a URL to represent both the
|
46
|
+
location of the table's data and its encoding. This meant the command line utilities and the libraries coudl be written to outputs,
|
47
|
+
|
48
|
+
Picture(s):
|
49
|
+
URI: Yuri Gregarin
|
50
|
+
URL: the Ural Mountains
|
data/lib/abtab.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'abtab/driver'
|
2
|
+
|
3
|
+
module Abtab
|
4
|
+
REGISTERED_DRIVERS = {}
|
5
|
+
|
6
|
+
def self.register schema, driver
|
7
|
+
REGISTERED_DRIVERS[schema] = driver
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.open thing
|
11
|
+
if thing !~ /^.+?:\/\//
|
12
|
+
return self.open_file thing
|
13
|
+
end
|
14
|
+
|
15
|
+
viable_drivers = REGISTERED_DRIVERS.keys.select do |schema|
|
16
|
+
thing.start_with? schema
|
17
|
+
end
|
18
|
+
|
19
|
+
if 0 == viable_drivers.size
|
20
|
+
raise "Error: there is no registered driver for url: #{thing}"
|
21
|
+
end
|
22
|
+
|
23
|
+
if 1 == viable_drivers.size
|
24
|
+
REGISTERED_DRIVERS[viable_drivers.first].new thing
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.read_handle uri
|
29
|
+
driver = self.open uri
|
30
|
+
driver.open_for_reading
|
31
|
+
driver
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.write_handle uri
|
35
|
+
driver = self.open uri
|
36
|
+
driver.open_for_writing
|
37
|
+
driver
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.open_file thing
|
41
|
+
# TODO: put in some more 'magick' -- use some hueristics to figure out the likely format by looking at the first few lines...
|
42
|
+
if thing =~ /.tab$/
|
43
|
+
return self.open "tab://#{thing}"
|
44
|
+
end
|
45
|
+
|
46
|
+
if thing =~ /.csv$/
|
47
|
+
return self.open "csv://#{thing}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# tab delimited is the default
|
51
|
+
return self.open "tab://#{thing}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
Dir[File.dirname(__FILE__) + '/abtab/drivers/**/*.rb' ].each do |f|
|
56
|
+
require f
|
57
|
+
end
|
data/lib/abtab/driver.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Abtab
|
2
|
+
class Driver
|
3
|
+
def import inp
|
4
|
+
while rec = inp.next_record
|
5
|
+
break if rec.nil?
|
6
|
+
write_record rec
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def url_parse url, options={}
|
11
|
+
schema, rest = url.split '://', 2
|
12
|
+
path, qs = rest.split '?', 2
|
13
|
+
if qs
|
14
|
+
qs.split(/[;&]/).each do |pair|
|
15
|
+
k,v = pair.split '='
|
16
|
+
k = URI.unescape k
|
17
|
+
v = URI.unescape v
|
18
|
+
options[k] = v
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return schema, path, options
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'fastercsv'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
class Abtab::Driver::CSVDriver < Abtab::Driver
|
5
|
+
def initialize url
|
6
|
+
options = {
|
7
|
+
"quote_char" => '"',
|
8
|
+
"col_sep" => ','
|
9
|
+
}
|
10
|
+
@schema, @file, @options = url_parse url, options
|
11
|
+
end
|
12
|
+
|
13
|
+
def open_for_reading
|
14
|
+
if @file == '/dev/stdin'
|
15
|
+
@read_fh = $stdin
|
16
|
+
else
|
17
|
+
if !File.exists? @file
|
18
|
+
raise "Error: can not open for reading, file does not exist: #{@file}"
|
19
|
+
end
|
20
|
+
@read_fh = File.open(@file,'r')
|
21
|
+
end
|
22
|
+
|
23
|
+
header_line = @read_fh.readline
|
24
|
+
header_line.chomp!
|
25
|
+
@columns = FasterCSV.parse(header_line, :quote_char => @options["quote_char"], :col_sep => @options["col_sep"]).first
|
26
|
+
end
|
27
|
+
|
28
|
+
def columns
|
29
|
+
@columns
|
30
|
+
end
|
31
|
+
|
32
|
+
def next_record
|
33
|
+
return nil if @read_fh.eof?
|
34
|
+
line = @read_fh.readline
|
35
|
+
line.chomp!
|
36
|
+
FasterCSV.parse(line, :quote_char => @options["quote_char"], :col_sep => @options["col_sep"]).first
|
37
|
+
end
|
38
|
+
|
39
|
+
def close
|
40
|
+
if @read_fh
|
41
|
+
@read_fh.close
|
42
|
+
@read_fh = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def open_for_writing
|
47
|
+
# NB: truncates the output file
|
48
|
+
if @file == '/dev/stdout'
|
49
|
+
@write_fn = $stdout
|
50
|
+
else
|
51
|
+
@write_fh = File.open(@file,'w')
|
52
|
+
end
|
53
|
+
set_columns(@columns) if @columns && !@columns.empty
|
54
|
+
end
|
55
|
+
|
56
|
+
def write_record rec
|
57
|
+
line = rec.to_csv(:quote_char => @options["quote_char"], :col_sep => @options["col_sep"])
|
58
|
+
@write_fh.puts line
|
59
|
+
end
|
60
|
+
|
61
|
+
def set_columns cols
|
62
|
+
@columns = cols
|
63
|
+
write_record @columns
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
Abtab.register 'csv', Abtab::Driver::CSVDriver
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'dbi'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class Abtab::Driver::DbiDriver < Abtab::Driver
|
5
|
+
CREDENTIALS_RC_FILE = "#{ENV['HOME']}/.abtab.dbrc"
|
6
|
+
|
7
|
+
DB_MODULES = {
|
8
|
+
'Pg' => { :require => 'Pg', :name => 'Pg' },
|
9
|
+
'pg' => { :require => 'Pg', :name => 'Pg' },
|
10
|
+
# TODO: test these
|
11
|
+
#'mysql' => 'mysql',
|
12
|
+
#'Mysql' => 'mysql',
|
13
|
+
#'sqlite' => 'sqlite3',
|
14
|
+
#'SQLite' => 'sqlite3',
|
15
|
+
#'sqlite3' => 'sqlite3',
|
16
|
+
#'SQLite3' => 'sqlite3',
|
17
|
+
}
|
18
|
+
|
19
|
+
def lookup_rc_default *keys
|
20
|
+
#puts "LOOKUP: #{keys.inspect}"
|
21
|
+
m = @rc_defaults
|
22
|
+
keys.each do |k|
|
23
|
+
#puts " k=#{k} m=#{m.inspect}"
|
24
|
+
m = m[k] if m
|
25
|
+
end
|
26
|
+
m
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize url
|
30
|
+
@options = {
|
31
|
+
:dbi_driver => nil,
|
32
|
+
:host => nil,
|
33
|
+
:user => nil,
|
34
|
+
:pass => nil,
|
35
|
+
:database => nil,
|
36
|
+
:table => nil,
|
37
|
+
}
|
38
|
+
|
39
|
+
@rc_defaults = {}
|
40
|
+
if File.exist? CREDENTIALS_RC_FILE
|
41
|
+
@rc_defaults.merge!(YAML.load_file(CREDENTIALS_RC_FILE))
|
42
|
+
end
|
43
|
+
|
44
|
+
@url = url
|
45
|
+
schema, rest = url.split '://', 2
|
46
|
+
|
47
|
+
# expect 'driver' name, eg: pg
|
48
|
+
driver, host, database_name, table = rest.split '/', 4
|
49
|
+
|
50
|
+
if driver =~ /@/
|
51
|
+
user_pass, driver = driver.split '@', 2
|
52
|
+
user, pass = nil, nil
|
53
|
+
if user_pass =~ /:/
|
54
|
+
user, pass = user_pass.split ':', 2
|
55
|
+
@options[:user] = user
|
56
|
+
@options[:pass] = pass
|
57
|
+
end
|
58
|
+
@options[:dbi_driver] = driver
|
59
|
+
end
|
60
|
+
|
61
|
+
@options[:dbi_driver] = driver
|
62
|
+
@options[:database] = database_name
|
63
|
+
@options[:host] = host
|
64
|
+
@options[:table] = table
|
65
|
+
|
66
|
+
@options[:user] ||= lookup_rc_default @options[:host], @options[:database], "user"
|
67
|
+
@options[:pass] ||= lookup_rc_default @options[:host], @options[:database], "pass"
|
68
|
+
|
69
|
+
#puts "OPTIONS: #{@options.inspect}"
|
70
|
+
|
71
|
+
require DB_MODULES[@options[:dbi_driver]][:require]
|
72
|
+
driver_name = DB_MODULES[@options[:dbi_driver]][:name]
|
73
|
+
|
74
|
+
@conn = DBI.connect("DBI:#{driver_name}:database=#{@options[:database]};host=#{@options[:host]}",@options[:user],@options[:pass]);
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def open_for_reading
|
79
|
+
if @options[:table]
|
80
|
+
@columns = get_col_names @options[:table]
|
81
|
+
@statement_handle = @conn.prepare "select * from #{@options[:table]}"
|
82
|
+
@statement_handle.execute
|
83
|
+
else
|
84
|
+
@columns = ['NAME','ROWS']
|
85
|
+
@rows = @conn.tables
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# TODO: implement insertion/append and truncation
|
90
|
+
# def open_for_writing
|
91
|
+
# close_statement_handle
|
92
|
+
# @columns = get_col_names @options[:table]
|
93
|
+
# sql_stmt = sprintf("INSERT INTO #{@options[:table]} VALUES (%s)", get_col_names.map {'?'}.join(","))
|
94
|
+
# puts "sql_stmt: #{sql_stmt}"
|
95
|
+
# @statement_handle = @conn.prepare sql_stmt
|
96
|
+
# end
|
97
|
+
|
98
|
+
def columns
|
99
|
+
@columns
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_col_names table
|
103
|
+
sth = @conn.prepare "SELECT * FROM #{table} where 1=0"
|
104
|
+
sth.execute
|
105
|
+
col_names = sth.column_names
|
106
|
+
sth.finish
|
107
|
+
col_names
|
108
|
+
end
|
109
|
+
|
110
|
+
def next_record
|
111
|
+
return @statement_handle.fetch if @statement_handle
|
112
|
+
if @rows && !@rows.empty?
|
113
|
+
table = @rows.shift
|
114
|
+
sth = @conn.prepare("SELECT COUNT('x') from #{table}")
|
115
|
+
sth.execute
|
116
|
+
count = sth.fetch_array.first
|
117
|
+
sth.finish
|
118
|
+
return [table,count]
|
119
|
+
end
|
120
|
+
return nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def rewind
|
124
|
+
close_connection
|
125
|
+
open_for_reading
|
126
|
+
end
|
127
|
+
|
128
|
+
def close
|
129
|
+
close_statement_handle
|
130
|
+
close_connection
|
131
|
+
end
|
132
|
+
|
133
|
+
def close_connection
|
134
|
+
if @conn
|
135
|
+
@conn.close
|
136
|
+
@conn = nil
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def close_statement_handle
|
141
|
+
if @statement_handle
|
142
|
+
@statement_handle.finish
|
143
|
+
@statement_handle = nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
Abtab.register 'dbi', Abtab::Driver::DbiDriver
|
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
class Abtab::Driver::TabDriver < Abtab::Driver
|
3
|
+
def initialize url
|
4
|
+
@options = {}
|
5
|
+
@options["col_sep"] = "\t"
|
6
|
+
@schema, @file, @options = url_parse url, @options
|
7
|
+
end
|
8
|
+
|
9
|
+
def open_for_reading
|
10
|
+
if !File.exists? @file
|
11
|
+
raise "Error: can not open for reading, file does not exist: #{@file}"
|
12
|
+
end
|
13
|
+
|
14
|
+
@read_fh = File.open(@file,'r')
|
15
|
+
header_line = @read_fh.readline
|
16
|
+
@columns = parse_line header_line
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_line l
|
20
|
+
l.chomp!
|
21
|
+
r = l.split(@options["col_sep"]).map do |f|
|
22
|
+
f.gsub! "\\t", "\t"
|
23
|
+
f.gsub! "\\n", "\n"
|
24
|
+
f.gsub! "\\r", "\r"
|
25
|
+
f
|
26
|
+
end
|
27
|
+
r[0].chomp!
|
28
|
+
r
|
29
|
+
end
|
30
|
+
|
31
|
+
def format_rec r
|
32
|
+
r.map do |f|
|
33
|
+
f.gsub! "\t", "\\t"
|
34
|
+
f.gsub! "\n", "\\n"
|
35
|
+
f.gsub! "\r", "\\r"
|
36
|
+
f
|
37
|
+
end.join(@options["col_sep"])
|
38
|
+
end
|
39
|
+
|
40
|
+
def columns
|
41
|
+
@columns
|
42
|
+
end
|
43
|
+
|
44
|
+
def next_record
|
45
|
+
return nil if @read_fh.eof?
|
46
|
+
parse_line @read_fh.readline
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
if @read_fh
|
51
|
+
@read_fh.close
|
52
|
+
@read_fh = nil
|
53
|
+
end
|
54
|
+
if @write_fh
|
55
|
+
@write_fh.close
|
56
|
+
@write_fh = nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def open_for_writing
|
61
|
+
@write_fh = File.open(@file, 'w')
|
62
|
+
set_columns(@columns) if @columns && !@columns.empty
|
63
|
+
end
|
64
|
+
|
65
|
+
def write_record rec
|
66
|
+
@write_fh.puts format_rec(rec)
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_columns cols
|
70
|
+
@columns = cols
|
71
|
+
write_record @columns
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Abtab.register 'tab', Abtab::Driver::TabDriver
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: abstract-tables
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Kyle Burton
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-02-09 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: dbi
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 5
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 4
|
33
|
+
- 5
|
34
|
+
version: 0.4.5
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
description: |+
|
38
|
+
The best I could come up with was to just show you:
|
39
|
+
|
40
|
+
$ atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
41
|
+
|
42
|
+
That exports a table from Postgres into a comma separated value file. You can
|
43
|
+
read from or write to: tab, csv, dbi, etc. You can opaquely treat any of those
|
44
|
+
'table of records' based sources as an opaque URI. Want to read more?
|
45
|
+
|
46
|
+
https://github.com/kyleburton/abstract-tables
|
47
|
+
|
48
|
+
email: kyle.burton@gmail.com
|
49
|
+
executables:
|
50
|
+
- atcat
|
51
|
+
- atview
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- README.textile
|
56
|
+
files:
|
57
|
+
- abstract-tables-1.0.0.gem
|
58
|
+
- abstract-tables.gemspec
|
59
|
+
- bin/atcat
|
60
|
+
- bin/atview
|
61
|
+
- introducing-abtab/README.textile
|
62
|
+
- lib/abtab/driver.rb
|
63
|
+
- lib/abtab/drivers/csv_driver.rb
|
64
|
+
- lib/abtab/drivers/dbi_driver.rb
|
65
|
+
- lib/abtab/drivers/tab_driver.rb
|
66
|
+
- lib/abtab.rb
|
67
|
+
- README.textile
|
68
|
+
- test/fixtures/files/file1.csv
|
69
|
+
- test/fixtures/files/file1.tab
|
70
|
+
has_rdoc: true
|
71
|
+
homepage: http://github.com/kyleburton/abstract-tables
|
72
|
+
licenses: []
|
73
|
+
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
- bin
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
98
|
+
requirements: []
|
99
|
+
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.5.0
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: "Table Abstraction as a URI : Record Streams, Filters, ETL Ginsu"
|
105
|
+
test_files: []
|
106
|
+
|