abstract-tables 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +76 -0
- data/abstract-tables.gemspec +45 -0
- data/bin/atcat +25 -0
- data/bin/atview +136 -0
- data/introducing-abtab/README.textile +50 -0
- data/lib/abtab.rb +57 -0
- data/lib/abtab/driver.rb +24 -0
- data/lib/abtab/drivers/csv_driver.rb +68 -0
- data/lib/abtab/drivers/dbi_driver.rb +149 -0
- data/lib/abtab/drivers/tab_driver.rb +76 -0
- data/test/fixtures/files/file1.csv +3 -0
- data/test/fixtures/files/file1.tab +3 -0
- metadata +106 -0
data/README.textile
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
h1. Abstract Table Library
|
2
|
+
|
3
|
+
In the spirit of the standard Unix utilites (like cat, grep, cut, etc.), this library creates an abstraction over tabular data. The library implements a series of drivers for sources with different encodings that act like sequences.
|
4
|
+
|
5
|
+
|
6
|
+
h1. Command Line Utilities
|
7
|
+
|
8
|
+
h2. atcat
|
9
|
+
|
10
|
+
<pre>
|
11
|
+
# dump a postgres table to the console (default is tab delimited)
|
12
|
+
atcat dbi://pg/localhost/database_name/table_name
|
13
|
+
|
14
|
+
# export a table to a tab file:
|
15
|
+
atcat dbi://pg/localhost/database_name/table_name tab://table_name.tab
|
16
|
+
|
17
|
+
# you can omit the schema for files, the schema will be guessed based on the file extension
|
18
|
+
atcat dbi://pg/localhost/database_name/table_name table_name.tab
|
19
|
+
|
20
|
+
# dump to csv
|
21
|
+
atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
22
|
+
# you can omit the scehma
|
23
|
+
atcat dbi://pg/localhost/database_name/table_name table_name.csv
|
24
|
+
|
25
|
+
# convert from csv to tab
|
26
|
+
atcat csv://some-file.csv tab://some-file.tab
|
27
|
+
|
28
|
+
atcat some-file.csv some-file.tab
|
29
|
+
|
30
|
+
# convert form csv to pipe
|
31
|
+
atcat csv://some-file.csv 'tab://some-file.tab?col_sep=|'
|
32
|
+
</pre>
|
33
|
+
|
34
|
+
h2. rec-view
|
35
|
+
|
36
|
+
<pre>
|
37
|
+
rec-view tab://some-file.tab | less
|
38
|
+
rec-view csv://some-file.csv | less
|
39
|
+
rec-view dbi://Pg/some-file.csv | less
|
40
|
+
</pre>
|
41
|
+
|
42
|
+
h2. atcat
|
43
|
+
|
44
|
+
Limitations: only implemented 'driver' is dbi. Only supported output port is stdout as tab delimited.
|
45
|
+
|
46
|
+
<pre>
|
47
|
+
atcat dbi://user:pass@Pg/localhost/db_name/table_name | rec-view | less
|
48
|
+
</pre>
|
49
|
+
|
50
|
+
h1. Suported Drivers
|
51
|
+
|
52
|
+
h2. tab
|
53
|
+
|
54
|
+
Native Ruby for now.
|
55
|
+
|
56
|
+
h3. col_sep
|
57
|
+
|
58
|
+
Defaults to a tab character.
|
59
|
+
|
60
|
+
h2. csv
|
61
|
+
|
62
|
+
Via the "FasterCSV":http://fastercsv.rubyforge.org/ ruby gem.
|
63
|
+
|
64
|
+
h3. col_sep
|
65
|
+
|
66
|
+
Override the default ',' column seperator.
|
67
|
+
|
68
|
+
h3. quote_char
|
69
|
+
|
70
|
+
Override the default quote_char (").
|
71
|
+
|
72
|
+
h1. License
|
73
|
+
|
74
|
+
h1. Authors
|
75
|
+
|
76
|
+
Kyle Burton <kyle.burton@gmail.com>
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
SPEC = Gem::Specification.new do |s|
|
4
|
+
s.name = "abstract-tables"
|
5
|
+
s.version = "1.0.0"
|
6
|
+
s.date = '2011-02-09'
|
7
|
+
s.authors = ["Kyle Burton"]
|
8
|
+
s.email = "kyle.burton@gmail.com"
|
9
|
+
s.platform = Gem::Platform::RUBY
|
10
|
+
s.description = <<DESC
|
11
|
+
The best I could come up with was to just show you:
|
12
|
+
|
13
|
+
$ atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
14
|
+
|
15
|
+
That exports a table from Postgres into a comma separated value file. You can
|
16
|
+
read from or write to: tab, csv, dbi, etc. You can opaquely treat any of those
|
17
|
+
'table of records' based sources as an opaque URI. Want to read more?
|
18
|
+
|
19
|
+
https://github.com/kyleburton/abstract-tables
|
20
|
+
|
21
|
+
DESC
|
22
|
+
s.summary = "Table Abstraction as a URI : Record Streams, Filters, ETL Ginsu"
|
23
|
+
s.homepage = "http://github.com/kyleburton/abstract-tables"
|
24
|
+
s.files = %w[
|
25
|
+
abstract-tables-1.0.0.gem
|
26
|
+
abstract-tables.gemspec
|
27
|
+
bin/atcat
|
28
|
+
bin/atview
|
29
|
+
introducing-abtab/README.textile
|
30
|
+
lib/abtab/driver.rb
|
31
|
+
lib/abtab/drivers/csv_driver.rb
|
32
|
+
lib/abtab/drivers/dbi_driver.rb
|
33
|
+
lib/abtab/drivers/tab_driver.rb
|
34
|
+
lib/abtab.rb
|
35
|
+
README.textile
|
36
|
+
test/fixtures/files/file1.csv
|
37
|
+
test/fixtures/files/file1.tab
|
38
|
+
]
|
39
|
+
puts "all files: #{s.files.inspect}"
|
40
|
+
s.executables = %w[atcat atview]
|
41
|
+
s.require_paths = %w[lib bin]
|
42
|
+
s.extra_rdoc_files = %w[README.textile] # LICENSE]
|
43
|
+
s.add_runtime_dependency('dbi', [">= 0.4.5"])
|
44
|
+
s.has_rdoc = false
|
45
|
+
end
|
data/bin/atcat
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'abtab'
|
4
|
+
|
5
|
+
# TODO: if stdin is a pipe, open it as tab:///dev/stdin
|
6
|
+
input_uri = nil
|
7
|
+
if File.pipe?('/dev/stdin')
|
8
|
+
input_uri = 'tab:///dev/stdin'
|
9
|
+
else
|
10
|
+
input_uri = ARGV.shift or raise "You must speicfy a URI to cat"
|
11
|
+
end
|
12
|
+
|
13
|
+
inp = Abtab.read_handle input_uri
|
14
|
+
|
15
|
+
outp = nil
|
16
|
+
if ARGV.empty?
|
17
|
+
outp = Abtab.write_handle 'tab:///dev/stdout'
|
18
|
+
else
|
19
|
+
outp = Abtab.write_handle ARGV.shift
|
20
|
+
end
|
21
|
+
|
22
|
+
outp.set_columns inp.columns
|
23
|
+
|
24
|
+
outp.import inp
|
25
|
+
|
data/bin/atview
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'abtab'
|
4
|
+
|
5
|
+
#
|
6
|
+
# Pipe mysql through this:
|
7
|
+
#
|
8
|
+
# mysql -uuser database_name -e 'select * from some_table' | rec-view.rb
|
9
|
+
#
|
10
|
+
# and it will produce a 'portrait' view of the records
|
11
|
+
#
|
12
|
+
# Record[1]
|
13
|
+
# [ 1] ID : 1
|
14
|
+
# [ 2] FNAME: Kyle
|
15
|
+
# [ 3] LNAME: Burton
|
16
|
+
# [ 4] CITY : Philadelphia
|
17
|
+
# [ 5] ST : PA
|
18
|
+
# [ 6] ZIP : 19101
|
19
|
+
#
|
20
|
+
# Record[2]
|
21
|
+
# [ 1] ID : 2
|
22
|
+
# [ 2] FNAME: Alan
|
23
|
+
# [ 3] LNAME: Barton
|
24
|
+
# [ 4] CITY : Newardk
|
25
|
+
# [ 5] ST : DE
|
26
|
+
# [ 6] ZIP : 10817
|
27
|
+
#
|
28
|
+
|
29
|
+
require 'optparse'
|
30
|
+
|
31
|
+
# $options = {:no_hdr => false}
|
32
|
+
# OptionParser.new do |opts|
|
33
|
+
# opts.banner = "Usage: #$0 [options]"
|
34
|
+
#
|
35
|
+
# opts.on("-s", "--sort", "Sort the fields before display") do |v|
|
36
|
+
# $options[:sort_fields] = v
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
# opts.on("-f", "--fields FIELDS", "Specify the fields as a comma delimited string") do |v|
|
40
|
+
# $options[:fields] = v
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# opts.on("-c", "--const N:V,N:V,...", "Specify constant field values, always prefixed, comma separated") do |v|
|
44
|
+
# $options[:constant_fields] = v
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
# opts.on("-r", "--raw", "Raw output, just tab delimited") do |v|
|
48
|
+
# $options[:raw] = v
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# opts.on("-n", "--no-header", "Suppress printing the header for --raw") do |v|
|
52
|
+
# $options[:no_hdr] = true
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# end.parse!
|
56
|
+
#
|
57
|
+
# def constant_field_names
|
58
|
+
# $options[:constant_fields].split(/,/).map { |pair|
|
59
|
+
# pair.split(/:/)[0]
|
60
|
+
# }
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# def constant_field_values
|
64
|
+
# $options[:constant_fields].split(/,/).map { |pair|
|
65
|
+
# pair.split(/:/)[1]
|
66
|
+
# }
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
# fields = []
|
70
|
+
# if $options[:fields]
|
71
|
+
# fields = $options[:fields].split(/,/)
|
72
|
+
# else
|
73
|
+
# header_line = $stdin.readline
|
74
|
+
# fields = header_line.split(/\t/)
|
75
|
+
# fields[-1].chomp!
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# fields.unshift(*constant_field_names) if $options[:constant_fields]
|
79
|
+
#
|
80
|
+
# orig_order_map = {}
|
81
|
+
# fields.each_with_index do |field,idx|
|
82
|
+
# orig_order_map[field] = idx
|
83
|
+
# end
|
84
|
+
# sorted_fields = $options[:sort_fields] ? fields.sort : fields
|
85
|
+
#
|
86
|
+
# max_width = fields.map {|f| f.size}.max
|
87
|
+
# recno = 0
|
88
|
+
#
|
89
|
+
# if $options[:raw]
|
90
|
+
# puts fields.join("\t") if !$options[:no_hdr]
|
91
|
+
# $stdin.each do |line|
|
92
|
+
# recno = recno + 1
|
93
|
+
# rec = line.split(/\t/)
|
94
|
+
# rec[-1].chomp!
|
95
|
+
# rec.unshift(*constant_field_values) if $options[:constant_fields]
|
96
|
+
# puts rec.join("\t")
|
97
|
+
# end
|
98
|
+
# exit 0
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
# $stdin.each do |line|
|
102
|
+
# recno = recno + 1
|
103
|
+
# puts "Record[#{recno}]"
|
104
|
+
# rec = line.split(/\t/)
|
105
|
+
# rec[-1].chomp!
|
106
|
+
# rec.unshift(*constant_field_values) if $options[:constant_fields]
|
107
|
+
# rows = []
|
108
|
+
# sorted_fields.each_with_index do |field,idx|
|
109
|
+
# actual_idx = orig_order_map[field]
|
110
|
+
# rows << sprintf( "[% 3d] %-*s: %s\n", 1+idx, max_width, field, rec[actual_idx] )
|
111
|
+
# end
|
112
|
+
# if $options[:sort_fields]
|
113
|
+
# puts rows.sort
|
114
|
+
# else
|
115
|
+
# puts rows
|
116
|
+
# end
|
117
|
+
# puts ""
|
118
|
+
# end
|
119
|
+
|
120
|
+
uri = ARGV.shift or raise "You must speicfy a URI to view"
|
121
|
+
inp = Abtab.read_handle uri
|
122
|
+
recno = 0
|
123
|
+
max_width = inp.columns.map {|f| f.size}.max
|
124
|
+
|
125
|
+
while rec = inp.next_record
|
126
|
+
break if rec.nil?
|
127
|
+
recno = recno + 1
|
128
|
+
puts "Record[#{recno}]"
|
129
|
+
rows = []
|
130
|
+
inp.columns.each_with_index do |field,idx|
|
131
|
+
rows << sprintf( "[% 3d] %-*s: %s\n", 1+idx, max_width, field, rec[idx] )
|
132
|
+
end
|
133
|
+
|
134
|
+
puts rows
|
135
|
+
puts ""
|
136
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
I came to Ruby by way of Perl.
|
2
|
+
|
3
|
+
The Perl archipelligo, Wall Island, Chiristiansen, Schwartz, Conway
|
4
|
+
|
5
|
+
* gasp *
|
6
|
+
|
7
|
+
I started with Perl 4 in order to customize the installation of Windows NT so
|
8
|
+
that it could be automated.
|
9
|
+
|
10
|
+
This involved heavy use of regular expressions.
|
11
|
+
|
12
|
+
Mom: "Bobby, we need to talk"
|
13
|
+
"What's is Mom?" : Bobby
|
14
|
+
Mom: "A freiend's mother said
|
15
|
+
other kids at school
|
16
|
+
have been expeirmenting
|
17
|
+
with the regexes..."
|
18
|
+
"It's no big deal Mom, they're : Bobby
|
19
|
+
just =~ /(posix|pcre)/ims"
|
20
|
+
Mom: "OMG, you're taking the regex!"
|
21
|
+
|
22
|
+
Why regexes?
|
23
|
+
|
24
|
+
Some people, when confronted with a problem, think "I know, I'll use regular
|
25
|
+
expressions." Now they have two problems.
|
26
|
+
|
27
|
+
-- Jamie Zawinski
|
28
|
+
|
29
|
+
Regexes are a key that unlocks data
|
30
|
+
|
31
|
+
Find/Comission a graphic of a key-shaped word 'regex' and the word 'data' shaped
|
32
|
+
as an old style padlock (where the key goes in the front/middle)?
|
33
|
+
|
34
|
+
When I worked at Health Market Science, Ben Kennedy and I were asked to help
|
35
|
+
out our data delivery department with a client job that was taking at the time
|
36
|
+
about 48hrs to process.
|
37
|
+
|
38
|
+
During that time we happened upon the idea of a suite of command line tools and
|
39
|
+
a library which abstracted away file encodings and how tables of data were to
|
40
|
+
be represnted.
|
41
|
+
|
42
|
+
Some clients wanted tab files, some csv, some pipe delimited, some Excel and
|
43
|
+
others MS Access databse files.
|
44
|
+
|
45
|
+
The other idea that we stumbled upon was to use a URL to represent both the
|
46
|
+
location of the table's data and its encoding. This meant the command line utilities and the libraries coudl be written to outputs,
|
47
|
+
|
48
|
+
Picture(s):
|
49
|
+
URI: Yuri Gregarin
|
50
|
+
URL: the Ural Mountains
|
data/lib/abtab.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'abtab/driver'
|
2
|
+
|
3
|
+
module Abtab
|
4
|
+
REGISTERED_DRIVERS = {}
|
5
|
+
|
6
|
+
def self.register schema, driver
|
7
|
+
REGISTERED_DRIVERS[schema] = driver
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.open thing
|
11
|
+
if thing !~ /^.+?:\/\//
|
12
|
+
return self.open_file thing
|
13
|
+
end
|
14
|
+
|
15
|
+
viable_drivers = REGISTERED_DRIVERS.keys.select do |schema|
|
16
|
+
thing.start_with? schema
|
17
|
+
end
|
18
|
+
|
19
|
+
if 0 == viable_drivers.size
|
20
|
+
raise "Error: there is no registered driver for url: #{thing}"
|
21
|
+
end
|
22
|
+
|
23
|
+
if 1 == viable_drivers.size
|
24
|
+
REGISTERED_DRIVERS[viable_drivers.first].new thing
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.read_handle uri
|
29
|
+
driver = self.open uri
|
30
|
+
driver.open_for_reading
|
31
|
+
driver
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.write_handle uri
|
35
|
+
driver = self.open uri
|
36
|
+
driver.open_for_writing
|
37
|
+
driver
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.open_file thing
|
41
|
+
# TODO: put in some more 'magick' -- use some hueristics to figure out the likely format by looking at the first few lines...
|
42
|
+
if thing =~ /.tab$/
|
43
|
+
return self.open "tab://#{thing}"
|
44
|
+
end
|
45
|
+
|
46
|
+
if thing =~ /.csv$/
|
47
|
+
return self.open "csv://#{thing}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# tab delimited is the default
|
51
|
+
return self.open "tab://#{thing}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
Dir[File.dirname(__FILE__) + '/abtab/drivers/**/*.rb' ].each do |f|
|
56
|
+
require f
|
57
|
+
end
|
data/lib/abtab/driver.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Abtab
|
2
|
+
class Driver
|
3
|
+
def import inp
|
4
|
+
while rec = inp.next_record
|
5
|
+
break if rec.nil?
|
6
|
+
write_record rec
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def url_parse url, options={}
|
11
|
+
schema, rest = url.split '://', 2
|
12
|
+
path, qs = rest.split '?', 2
|
13
|
+
if qs
|
14
|
+
qs.split(/[;&]/).each do |pair|
|
15
|
+
k,v = pair.split '='
|
16
|
+
k = URI.unescape k
|
17
|
+
v = URI.unescape v
|
18
|
+
options[k] = v
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return schema, path, options
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'fastercsv'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
class Abtab::Driver::CSVDriver < Abtab::Driver
|
5
|
+
def initialize url
|
6
|
+
options = {
|
7
|
+
"quote_char" => '"',
|
8
|
+
"col_sep" => ','
|
9
|
+
}
|
10
|
+
@schema, @file, @options = url_parse url, options
|
11
|
+
end
|
12
|
+
|
13
|
+
def open_for_reading
|
14
|
+
if @file == '/dev/stdin'
|
15
|
+
@read_fh = $stdin
|
16
|
+
else
|
17
|
+
if !File.exists? @file
|
18
|
+
raise "Error: can not open for reading, file does not exist: #{@file}"
|
19
|
+
end
|
20
|
+
@read_fh = File.open(@file,'r')
|
21
|
+
end
|
22
|
+
|
23
|
+
header_line = @read_fh.readline
|
24
|
+
header_line.chomp!
|
25
|
+
@columns = FasterCSV.parse(header_line, :quote_char => @options["quote_char"], :col_sep => @options["col_sep"]).first
|
26
|
+
end
|
27
|
+
|
28
|
+
def columns
|
29
|
+
@columns
|
30
|
+
end
|
31
|
+
|
32
|
+
def next_record
|
33
|
+
return nil if @read_fh.eof?
|
34
|
+
line = @read_fh.readline
|
35
|
+
line.chomp!
|
36
|
+
FasterCSV.parse(line, :quote_char => @options["quote_char"], :col_sep => @options["col_sep"]).first
|
37
|
+
end
|
38
|
+
|
39
|
+
def close
|
40
|
+
if @read_fh
|
41
|
+
@read_fh.close
|
42
|
+
@read_fh = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def open_for_writing
|
47
|
+
# NB: truncates the output file
|
48
|
+
if @file == '/dev/stdout'
|
49
|
+
@write_fn = $stdout
|
50
|
+
else
|
51
|
+
@write_fh = File.open(@file,'w')
|
52
|
+
end
|
53
|
+
set_columns(@columns) if @columns && !@columns.empty
|
54
|
+
end
|
55
|
+
|
56
|
+
def write_record rec
|
57
|
+
line = rec.to_csv(:quote_char => @options["quote_char"], :col_sep => @options["col_sep"])
|
58
|
+
@write_fh.puts line
|
59
|
+
end
|
60
|
+
|
61
|
+
def set_columns cols
|
62
|
+
@columns = cols
|
63
|
+
write_record @columns
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
Abtab.register 'csv', Abtab::Driver::CSVDriver
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'dbi'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class Abtab::Driver::DbiDriver < Abtab::Driver
|
5
|
+
CREDENTIALS_RC_FILE = "#{ENV['HOME']}/.abtab.dbrc"
|
6
|
+
|
7
|
+
DB_MODULES = {
|
8
|
+
'Pg' => { :require => 'Pg', :name => 'Pg' },
|
9
|
+
'pg' => { :require => 'Pg', :name => 'Pg' },
|
10
|
+
# TODO: test these
|
11
|
+
#'mysql' => 'mysql',
|
12
|
+
#'Mysql' => 'mysql',
|
13
|
+
#'sqlite' => 'sqlite3',
|
14
|
+
#'SQLite' => 'sqlite3',
|
15
|
+
#'sqlite3' => 'sqlite3',
|
16
|
+
#'SQLite3' => 'sqlite3',
|
17
|
+
}
|
18
|
+
|
19
|
+
def lookup_rc_default *keys
|
20
|
+
#puts "LOOKUP: #{keys.inspect}"
|
21
|
+
m = @rc_defaults
|
22
|
+
keys.each do |k|
|
23
|
+
#puts " k=#{k} m=#{m.inspect}"
|
24
|
+
m = m[k] if m
|
25
|
+
end
|
26
|
+
m
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize url
|
30
|
+
@options = {
|
31
|
+
:dbi_driver => nil,
|
32
|
+
:host => nil,
|
33
|
+
:user => nil,
|
34
|
+
:pass => nil,
|
35
|
+
:database => nil,
|
36
|
+
:table => nil,
|
37
|
+
}
|
38
|
+
|
39
|
+
@rc_defaults = {}
|
40
|
+
if File.exist? CREDENTIALS_RC_FILE
|
41
|
+
@rc_defaults.merge!(YAML.load_file(CREDENTIALS_RC_FILE))
|
42
|
+
end
|
43
|
+
|
44
|
+
@url = url
|
45
|
+
schema, rest = url.split '://', 2
|
46
|
+
|
47
|
+
# expect 'driver' name, eg: pg
|
48
|
+
driver, host, database_name, table = rest.split '/', 4
|
49
|
+
|
50
|
+
if driver =~ /@/
|
51
|
+
user_pass, driver = driver.split '@', 2
|
52
|
+
user, pass = nil, nil
|
53
|
+
if user_pass =~ /:/
|
54
|
+
user, pass = user_pass.split ':', 2
|
55
|
+
@options[:user] = user
|
56
|
+
@options[:pass] = pass
|
57
|
+
end
|
58
|
+
@options[:dbi_driver] = driver
|
59
|
+
end
|
60
|
+
|
61
|
+
@options[:dbi_driver] = driver
|
62
|
+
@options[:database] = database_name
|
63
|
+
@options[:host] = host
|
64
|
+
@options[:table] = table
|
65
|
+
|
66
|
+
@options[:user] ||= lookup_rc_default @options[:host], @options[:database], "user"
|
67
|
+
@options[:pass] ||= lookup_rc_default @options[:host], @options[:database], "pass"
|
68
|
+
|
69
|
+
#puts "OPTIONS: #{@options.inspect}"
|
70
|
+
|
71
|
+
require DB_MODULES[@options[:dbi_driver]][:require]
|
72
|
+
driver_name = DB_MODULES[@options[:dbi_driver]][:name]
|
73
|
+
|
74
|
+
@conn = DBI.connect("DBI:#{driver_name}:database=#{@options[:database]};host=#{@options[:host]}",@options[:user],@options[:pass]);
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def open_for_reading
|
79
|
+
if @options[:table]
|
80
|
+
@columns = get_col_names @options[:table]
|
81
|
+
@statement_handle = @conn.prepare "select * from #{@options[:table]}"
|
82
|
+
@statement_handle.execute
|
83
|
+
else
|
84
|
+
@columns = ['NAME','ROWS']
|
85
|
+
@rows = @conn.tables
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# TODO: implement insertion/append and truncation
|
90
|
+
# def open_for_writing
|
91
|
+
# close_statement_handle
|
92
|
+
# @columns = get_col_names @options[:table]
|
93
|
+
# sql_stmt = sprintf("INSERT INTO #{@options[:table]} VALUES (%s)", get_col_names.map {'?'}.join(","))
|
94
|
+
# puts "sql_stmt: #{sql_stmt}"
|
95
|
+
# @statement_handle = @conn.prepare sql_stmt
|
96
|
+
# end
|
97
|
+
|
98
|
+
def columns
|
99
|
+
@columns
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_col_names table
|
103
|
+
sth = @conn.prepare "SELECT * FROM #{table} where 1=0"
|
104
|
+
sth.execute
|
105
|
+
col_names = sth.column_names
|
106
|
+
sth.finish
|
107
|
+
col_names
|
108
|
+
end
|
109
|
+
|
110
|
+
def next_record
|
111
|
+
return @statement_handle.fetch if @statement_handle
|
112
|
+
if @rows && !@rows.empty?
|
113
|
+
table = @rows.shift
|
114
|
+
sth = @conn.prepare("SELECT COUNT('x') from #{table}")
|
115
|
+
sth.execute
|
116
|
+
count = sth.fetch_array.first
|
117
|
+
sth.finish
|
118
|
+
return [table,count]
|
119
|
+
end
|
120
|
+
return nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def rewind
|
124
|
+
close_connection
|
125
|
+
open_for_reading
|
126
|
+
end
|
127
|
+
|
128
|
+
def close
|
129
|
+
close_statement_handle
|
130
|
+
close_connection
|
131
|
+
end
|
132
|
+
|
133
|
+
def close_connection
|
134
|
+
if @conn
|
135
|
+
@conn.close
|
136
|
+
@conn = nil
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def close_statement_handle
|
141
|
+
if @statement_handle
|
142
|
+
@statement_handle.finish
|
143
|
+
@statement_handle = nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
Abtab.register 'dbi', Abtab::Driver::DbiDriver
|
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
class Abtab::Driver::TabDriver < Abtab::Driver
|
3
|
+
def initialize url
|
4
|
+
@options = {}
|
5
|
+
@options["col_sep"] = "\t"
|
6
|
+
@schema, @file, @options = url_parse url, @options
|
7
|
+
end
|
8
|
+
|
9
|
+
def open_for_reading
|
10
|
+
if !File.exists? @file
|
11
|
+
raise "Error: can not open for reading, file does not exist: #{@file}"
|
12
|
+
end
|
13
|
+
|
14
|
+
@read_fh = File.open(@file,'r')
|
15
|
+
header_line = @read_fh.readline
|
16
|
+
@columns = parse_line header_line
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_line l
|
20
|
+
l.chomp!
|
21
|
+
r = l.split(@options["col_sep"]).map do |f|
|
22
|
+
f.gsub! "\\t", "\t"
|
23
|
+
f.gsub! "\\n", "\n"
|
24
|
+
f.gsub! "\\r", "\r"
|
25
|
+
f
|
26
|
+
end
|
27
|
+
r[0].chomp!
|
28
|
+
r
|
29
|
+
end
|
30
|
+
|
31
|
+
def format_rec r
|
32
|
+
r.map do |f|
|
33
|
+
f.gsub! "\t", "\\t"
|
34
|
+
f.gsub! "\n", "\\n"
|
35
|
+
f.gsub! "\r", "\\r"
|
36
|
+
f
|
37
|
+
end.join(@options["col_sep"])
|
38
|
+
end
|
39
|
+
|
40
|
+
def columns
|
41
|
+
@columns
|
42
|
+
end
|
43
|
+
|
44
|
+
def next_record
|
45
|
+
return nil if @read_fh.eof?
|
46
|
+
parse_line @read_fh.readline
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
if @read_fh
|
51
|
+
@read_fh.close
|
52
|
+
@read_fh = nil
|
53
|
+
end
|
54
|
+
if @write_fh
|
55
|
+
@write_fh.close
|
56
|
+
@write_fh = nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def open_for_writing
|
61
|
+
@write_fh = File.open(@file, 'w')
|
62
|
+
set_columns(@columns) if @columns && !@columns.empty
|
63
|
+
end
|
64
|
+
|
65
|
+
def write_record rec
|
66
|
+
@write_fh.puts format_rec(rec)
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_columns cols
|
70
|
+
@columns = cols
|
71
|
+
write_record @columns
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Abtab.register 'tab', Abtab::Driver::TabDriver
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: abstract-tables
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Kyle Burton
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-02-09 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: dbi
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 5
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 4
|
33
|
+
- 5
|
34
|
+
version: 0.4.5
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
description: |+
|
38
|
+
The best I could come up with was to just show you:
|
39
|
+
|
40
|
+
$ atcat dbi://pg/localhost/database_name/table_name csv://table_name.csv
|
41
|
+
|
42
|
+
That exports a table from Postgres into a comma separated value file. You can
|
43
|
+
read from or write to: tab, csv, dbi, etc. You can opaquely treat any of those
|
44
|
+
'table of records' based sources as an opaque URI. Want to read more?
|
45
|
+
|
46
|
+
https://github.com/kyleburton/abstract-tables
|
47
|
+
|
48
|
+
email: kyle.burton@gmail.com
|
49
|
+
executables:
|
50
|
+
- atcat
|
51
|
+
- atview
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- README.textile
|
56
|
+
files:
|
57
|
+
- abstract-tables-1.0.0.gem
|
58
|
+
- abstract-tables.gemspec
|
59
|
+
- bin/atcat
|
60
|
+
- bin/atview
|
61
|
+
- introducing-abtab/README.textile
|
62
|
+
- lib/abtab/driver.rb
|
63
|
+
- lib/abtab/drivers/csv_driver.rb
|
64
|
+
- lib/abtab/drivers/dbi_driver.rb
|
65
|
+
- lib/abtab/drivers/tab_driver.rb
|
66
|
+
- lib/abtab.rb
|
67
|
+
- README.textile
|
68
|
+
- test/fixtures/files/file1.csv
|
69
|
+
- test/fixtures/files/file1.tab
|
70
|
+
has_rdoc: true
|
71
|
+
homepage: http://github.com/kyleburton/abstract-tables
|
72
|
+
licenses: []
|
73
|
+
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
- bin
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
98
|
+
requirements: []
|
99
|
+
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.5.0
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: "Table Abstraction as a URI : Record Streams, Filters, ETL Ginsu"
|
105
|
+
test_files: []
|
106
|
+
|