chicago-etl 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.10
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.10"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-02-
|
12
|
+
s.date = "2013-02-25"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -63,6 +63,7 @@ Gem::Specification.new do |s|
|
|
63
63
|
"spec/etl/screens/out_of_bounds_spec.rb",
|
64
64
|
"spec/etl/sequel/dependant_tables_spec.rb",
|
65
65
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
66
|
+
"spec/etl/sequel/load_data_infile_expression_spec.rb",
|
66
67
|
"spec/etl/sequel/load_data_infile_spec.rb",
|
67
68
|
"spec/etl/sink_spec.rb",
|
68
69
|
"spec/etl/table_builder_spec.rb",
|
@@ -1,15 +1,137 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
3
|
module SequelExtensions
|
4
|
+
# @api private
|
5
|
+
class LoadDataInfileExpression
|
6
|
+
attr_reader :path, :table, :columns, :ignore, :character_set
|
7
|
+
|
8
|
+
def initialize(path, table, columns, opts={})
|
9
|
+
@path = path
|
10
|
+
@table = table
|
11
|
+
@columns = columns
|
12
|
+
@ignore = opts[:ignore]
|
13
|
+
@update = opts[:update]
|
14
|
+
@set = opts[:set] || {}
|
15
|
+
@character_set = opts[:character_set] || "utf8"
|
16
|
+
if opts[:format] == :csv
|
17
|
+
@field_terminator = ","
|
18
|
+
@enclosed_by = '"'
|
19
|
+
@escaped_by = '"'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def replace?
|
24
|
+
@update == :replace
|
25
|
+
end
|
26
|
+
|
27
|
+
def ignore?
|
28
|
+
@update == :ignore
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_sql(db)
|
32
|
+
@db = db
|
33
|
+
[load_fragment,
|
34
|
+
replace_fragment,
|
35
|
+
table_fragment,
|
36
|
+
character_set_fragment,
|
37
|
+
field_terminator_fragment,
|
38
|
+
field_enclosure_fragment,
|
39
|
+
escape_fragment,
|
40
|
+
ignore_fragment,
|
41
|
+
column_fragment,
|
42
|
+
set_fragment].compact.join(" ")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def load_fragment
|
48
|
+
"LOAD DATA INFILE '#{path}'"
|
49
|
+
end
|
50
|
+
|
51
|
+
def replace_fragment
|
52
|
+
@update.to_s.upcase if replace? || ignore?
|
53
|
+
end
|
54
|
+
|
55
|
+
def table_fragment
|
56
|
+
"INTO TABLE `#{table}`"
|
57
|
+
end
|
58
|
+
|
59
|
+
def character_set_fragment
|
60
|
+
"CHARACTER SET '#{character_set}'"
|
61
|
+
end
|
62
|
+
|
63
|
+
def field_terminator_fragment
|
64
|
+
"FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
|
65
|
+
end
|
66
|
+
|
67
|
+
def field_enclosure_fragment
|
68
|
+
"OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
|
69
|
+
end
|
70
|
+
|
71
|
+
def escape_fragment
|
72
|
+
"ESCAPED BY '#{@escaped_by}'" if @escaped_by
|
73
|
+
end
|
74
|
+
|
75
|
+
def ignore_fragment
|
76
|
+
"IGNORE #{ignore} LINES" if ignore
|
77
|
+
end
|
78
|
+
|
79
|
+
def column_fragment
|
80
|
+
"(" + columns.map {|c| format_column(c) }.join(",") + ")"
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_fragment
|
84
|
+
unless @set.empty?
|
85
|
+
"SET " + @set.map do |k, v|
|
86
|
+
"#{@db.literal(k)} = #{@db.literal(v)}"
|
87
|
+
end.join(", ")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def format_column(column)
|
92
|
+
column.to_s[0] == "@" ? column : "`#{column}`"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
4
96
|
module LoadDataInfile
|
5
|
-
#
|
6
|
-
|
7
|
-
|
97
|
+
# Load data in file specified at path.
|
98
|
+
#
|
99
|
+
# Columns is a list of columns to load - column names starting
|
100
|
+
# with an @ symbol will be treated as variables.
|
101
|
+
#
|
102
|
+
# By default, this will generate a REPLACE INTO TABLE
|
103
|
+
# statement.
|
104
|
+
#
|
105
|
+
# Options:
|
106
|
+
# :ignore - the number of lines to ignore in the source file
|
107
|
+
# :update - nil, :ignore or :replace
|
108
|
+
# :set - a hash specifying autopopulation of columns
|
109
|
+
# :character_set - the character set of the file, UTF8 default
|
110
|
+
# :format - either nil or :csv
|
111
|
+
def load_infile(path, columns, options={})
|
112
|
+
execute_dui(load_infile_sql(filepath, columns, options))
|
113
|
+
end
|
114
|
+
|
115
|
+
def load_infile_sql(path, columns, options={})
|
116
|
+
replacement = opts[:insert_ignore] ? :ignore : :replace
|
117
|
+
options = {:update => replacement}.merge(options)
|
118
|
+
LoadDataInfileExpression.new(path,
|
119
|
+
opts[:from].first,
|
120
|
+
columns,
|
121
|
+
options).
|
122
|
+
to_sql(db)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Loads the CSV data columns in path into this dataset's
|
126
|
+
# table.
|
127
|
+
#
|
128
|
+
# See load_infile for more options.
|
129
|
+
def load_csv_infile(path, columns, options={})
|
130
|
+
execute_dui(load_csv_infile_sql(filepath, columns, options))
|
8
131
|
end
|
9
132
|
|
10
|
-
def load_csv_infile_sql(
|
11
|
-
|
12
|
-
"LOAD DATA INFILE '#{filepath}' #{replacement} INTO TABLE `#{opts[:from].first}` CHARACTER SET 'utf8' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' (`#{columns.join('`,`')}`);"
|
133
|
+
def load_csv_infile_sql(path, columns, options={})
|
134
|
+
load_infile_sql(path, columns, options.merge(:format => :csv))
|
13
135
|
end
|
14
136
|
end
|
15
137
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
4
|
+
it "loads the data in the file into the table" do
|
5
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
6
|
+
to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
|
7
|
+
end
|
8
|
+
|
9
|
+
it "loads the data with replacment" do
|
10
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'],
|
11
|
+
:update => :replace).
|
12
|
+
to_sql(TEST_DB).should include("REPLACE INTO TABLE")
|
13
|
+
end
|
14
|
+
|
15
|
+
it "loads the data ignoring rows" do
|
16
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
|
17
|
+
to_sql(TEST_DB).should include("IGNORE INTO TABLE")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be in UTF-8 character set by default" do
|
21
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
22
|
+
to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
|
23
|
+
end
|
24
|
+
|
25
|
+
it "may be in other character sets" do
|
26
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
|
27
|
+
to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should load columns" do
|
31
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
32
|
+
to_sql(TEST_DB).should include("(`bar`,`quux`)")
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should load into variables if column begins with @" do
|
36
|
+
described_class.new("bar.csv", :foo, ['@bar', 'quux']).
|
37
|
+
to_sql(TEST_DB).should include("(@bar,`quux`)")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "can ignore lines" do
|
41
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
|
42
|
+
to_sql(TEST_DB).should include("IGNORE 2 LINES")
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can be in csv format" do
|
46
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
|
47
|
+
to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
|
48
|
+
end
|
49
|
+
|
50
|
+
it "can set column values" do
|
51
|
+
described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
|
+
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
|
+
:etl_batch_id => 3}).
|
54
|
+
to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
|
55
|
+
end
|
56
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-02-
|
18
|
+
date: 2013-02-25 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -200,6 +200,7 @@ files:
|
|
200
200
|
- spec/etl/screens/out_of_bounds_spec.rb
|
201
201
|
- spec/etl/sequel/dependant_tables_spec.rb
|
202
202
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
203
|
+
- spec/etl/sequel/load_data_infile_expression_spec.rb
|
203
204
|
- spec/etl/sequel/load_data_infile_spec.rb
|
204
205
|
- spec/etl/sink_spec.rb
|
205
206
|
- spec/etl/table_builder_spec.rb
|