chicago-etl 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.10
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.10"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-02-
|
12
|
+
s.date = "2013-02-25"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -63,6 +63,7 @@ Gem::Specification.new do |s|
|
|
63
63
|
"spec/etl/screens/out_of_bounds_spec.rb",
|
64
64
|
"spec/etl/sequel/dependant_tables_spec.rb",
|
65
65
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
66
|
+
"spec/etl/sequel/load_data_infile_expression_spec.rb",
|
66
67
|
"spec/etl/sequel/load_data_infile_spec.rb",
|
67
68
|
"spec/etl/sink_spec.rb",
|
68
69
|
"spec/etl/table_builder_spec.rb",
|
@@ -1,15 +1,137 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
3
|
module SequelExtensions
|
4
|
+
# @api private
|
5
|
+
class LoadDataInfileExpression
|
6
|
+
attr_reader :path, :table, :columns, :ignore, :character_set
|
7
|
+
|
8
|
+
def initialize(path, table, columns, opts={})
|
9
|
+
@path = path
|
10
|
+
@table = table
|
11
|
+
@columns = columns
|
12
|
+
@ignore = opts[:ignore]
|
13
|
+
@update = opts[:update]
|
14
|
+
@set = opts[:set] || {}
|
15
|
+
@character_set = opts[:character_set] || "utf8"
|
16
|
+
if opts[:format] == :csv
|
17
|
+
@field_terminator = ","
|
18
|
+
@enclosed_by = '"'
|
19
|
+
@escaped_by = '"'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def replace?
|
24
|
+
@update == :replace
|
25
|
+
end
|
26
|
+
|
27
|
+
def ignore?
|
28
|
+
@update == :ignore
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_sql(db)
|
32
|
+
@db = db
|
33
|
+
[load_fragment,
|
34
|
+
replace_fragment,
|
35
|
+
table_fragment,
|
36
|
+
character_set_fragment,
|
37
|
+
field_terminator_fragment,
|
38
|
+
field_enclosure_fragment,
|
39
|
+
escape_fragment,
|
40
|
+
ignore_fragment,
|
41
|
+
column_fragment,
|
42
|
+
set_fragment].compact.join(" ")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def load_fragment
|
48
|
+
"LOAD DATA INFILE '#{path}'"
|
49
|
+
end
|
50
|
+
|
51
|
+
def replace_fragment
|
52
|
+
@update.to_s.upcase if replace? || ignore?
|
53
|
+
end
|
54
|
+
|
55
|
+
def table_fragment
|
56
|
+
"INTO TABLE `#{table}`"
|
57
|
+
end
|
58
|
+
|
59
|
+
def character_set_fragment
|
60
|
+
"CHARACTER SET '#{character_set}'"
|
61
|
+
end
|
62
|
+
|
63
|
+
def field_terminator_fragment
|
64
|
+
"FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
|
65
|
+
end
|
66
|
+
|
67
|
+
def field_enclosure_fragment
|
68
|
+
"OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
|
69
|
+
end
|
70
|
+
|
71
|
+
def escape_fragment
|
72
|
+
"ESCAPED BY '#{@escaped_by}'" if @escaped_by
|
73
|
+
end
|
74
|
+
|
75
|
+
def ignore_fragment
|
76
|
+
"IGNORE #{ignore} LINES" if ignore
|
77
|
+
end
|
78
|
+
|
79
|
+
def column_fragment
|
80
|
+
"(" + columns.map {|c| format_column(c) }.join(",") + ")"
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_fragment
|
84
|
+
unless @set.empty?
|
85
|
+
"SET " + @set.map do |k, v|
|
86
|
+
"#{@db.literal(k)} = #{@db.literal(v)}"
|
87
|
+
end.join(", ")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def format_column(column)
|
92
|
+
column.to_s[0] == "@" ? column : "`#{column}`"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
4
96
|
module LoadDataInfile
|
5
|
-
#
|
6
|
-
|
7
|
-
|
97
|
+
# Load data in file specified at path.
|
98
|
+
#
|
99
|
+
# Columns is a list of columns to load - column names starting
|
100
|
+
# with an @ symbol will be treated as variables.
|
101
|
+
#
|
102
|
+
# By default, this will generate a REPLACE INTO TABLE
|
103
|
+
# statement.
|
104
|
+
#
|
105
|
+
# Options:
|
106
|
+
# :ignore - the number of lines to ignore in the source file
|
107
|
+
# :update - nil, :ignore or :replace
|
108
|
+
# :set - a hash specifying autopopulation of columns
|
109
|
+
# :character_set - the character set of the file, UTF8 default
|
110
|
+
# :format - either nil or :csv
|
111
|
+
def load_infile(path, columns, options={})
|
112
|
+
execute_dui(load_infile_sql(filepath, columns, options))
|
113
|
+
end
|
114
|
+
|
115
|
+
def load_infile_sql(path, columns, options={})
|
116
|
+
replacement = opts[:insert_ignore] ? :ignore : :replace
|
117
|
+
options = {:update => replacement}.merge(options)
|
118
|
+
LoadDataInfileExpression.new(path,
|
119
|
+
opts[:from].first,
|
120
|
+
columns,
|
121
|
+
options).
|
122
|
+
to_sql(db)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Loads the CSV data columns in path into this dataset's
|
126
|
+
# table.
|
127
|
+
#
|
128
|
+
# See load_infile for more options.
|
129
|
+
def load_csv_infile(path, columns, options={})
|
130
|
+
execute_dui(load_csv_infile_sql(filepath, columns, options))
|
8
131
|
end
|
9
132
|
|
10
|
-
def load_csv_infile_sql(
|
11
|
-
|
12
|
-
"LOAD DATA INFILE '#{filepath}' #{replacement} INTO TABLE `#{opts[:from].first}` CHARACTER SET 'utf8' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' (`#{columns.join('`,`')}`);"
|
133
|
+
def load_csv_infile_sql(path, columns, options={})
|
134
|
+
load_infile_sql(path, columns, options.merge(:format => :csv))
|
13
135
|
end
|
14
136
|
end
|
15
137
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
4
|
+
it "loads the data in the file into the table" do
|
5
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
6
|
+
to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
|
7
|
+
end
|
8
|
+
|
9
|
+
it "loads the data with replacment" do
|
10
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'],
|
11
|
+
:update => :replace).
|
12
|
+
to_sql(TEST_DB).should include("REPLACE INTO TABLE")
|
13
|
+
end
|
14
|
+
|
15
|
+
it "loads the data ignoring rows" do
|
16
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
|
17
|
+
to_sql(TEST_DB).should include("IGNORE INTO TABLE")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be in UTF-8 character set by default" do
|
21
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
22
|
+
to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
|
23
|
+
end
|
24
|
+
|
25
|
+
it "may be in other character sets" do
|
26
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
|
27
|
+
to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should load columns" do
|
31
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
32
|
+
to_sql(TEST_DB).should include("(`bar`,`quux`)")
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should load into variables if column begins with @" do
|
36
|
+
described_class.new("bar.csv", :foo, ['@bar', 'quux']).
|
37
|
+
to_sql(TEST_DB).should include("(@bar,`quux`)")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "can ignore lines" do
|
41
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
|
42
|
+
to_sql(TEST_DB).should include("IGNORE 2 LINES")
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can be in csv format" do
|
46
|
+
described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
|
47
|
+
to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
|
48
|
+
end
|
49
|
+
|
50
|
+
it "can set column values" do
|
51
|
+
described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
|
+
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
|
+
:etl_batch_id => 3}).
|
54
|
+
to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
|
55
|
+
end
|
56
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-02-
|
18
|
+
date: 2013-02-25 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -200,6 +200,7 @@ files:
|
|
200
200
|
- spec/etl/screens/out_of_bounds_spec.rb
|
201
201
|
- spec/etl/sequel/dependant_tables_spec.rb
|
202
202
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
203
|
+
- spec/etl/sequel/load_data_infile_expression_spec.rb
|
203
204
|
- spec/etl/sequel/load_data_infile_spec.rb
|
204
205
|
- spec/etl/sink_spec.rb
|
205
206
|
- spec/etl/table_builder_spec.rb
|