chicago-etl 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-02-19"
12
+ s.date = "2013-02-25"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -63,6 +63,7 @@ Gem::Specification.new do |s|
63
63
  "spec/etl/screens/out_of_bounds_spec.rb",
64
64
  "spec/etl/sequel/dependant_tables_spec.rb",
65
65
  "spec/etl/sequel/filter_to_etl_batch_spec.rb",
66
+ "spec/etl/sequel/load_data_infile_expression_spec.rb",
66
67
  "spec/etl/sequel/load_data_infile_spec.rb",
67
68
  "spec/etl/sink_spec.rb",
68
69
  "spec/etl/table_builder_spec.rb",
@@ -1,15 +1,137 @@
1
1
  module Chicago
2
2
  module ETL
3
3
  module SequelExtensions
4
+ # @api private
5
+ class LoadDataInfileExpression
6
+ attr_reader :path, :table, :columns, :ignore, :character_set
7
+
8
+ def initialize(path, table, columns, opts={})
9
+ @path = path
10
+ @table = table
11
+ @columns = columns
12
+ @ignore = opts[:ignore]
13
+ @update = opts[:update]
14
+ @set = opts[:set] || {}
15
+ @character_set = opts[:character_set] || "utf8"
16
+ if opts[:format] == :csv
17
+ @field_terminator = ","
18
+ @enclosed_by = '"'
19
+ @escaped_by = '"'
20
+ end
21
+ end
22
+
23
+ def replace?
24
+ @update == :replace
25
+ end
26
+
27
+ def ignore?
28
+ @update == :ignore
29
+ end
30
+
31
+ def to_sql(db)
32
+ @db = db
33
+ [load_fragment,
34
+ replace_fragment,
35
+ table_fragment,
36
+ character_set_fragment,
37
+ field_terminator_fragment,
38
+ field_enclosure_fragment,
39
+ escape_fragment,
40
+ ignore_fragment,
41
+ column_fragment,
42
+ set_fragment].compact.join(" ")
43
+ end
44
+
45
+ private
46
+
47
+ def load_fragment
48
+ "LOAD DATA INFILE '#{path}'"
49
+ end
50
+
51
+ def replace_fragment
52
+ @update.to_s.upcase if replace? || ignore?
53
+ end
54
+
55
+ def table_fragment
56
+ "INTO TABLE `#{table}`"
57
+ end
58
+
59
+ def character_set_fragment
60
+ "CHARACTER SET '#{character_set}'"
61
+ end
62
+
63
+ def field_terminator_fragment
64
+ "FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
65
+ end
66
+
67
+ def field_enclosure_fragment
68
+ "OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
69
+ end
70
+
71
+ def escape_fragment
72
+ "ESCAPED BY '#{@escaped_by}'" if @escaped_by
73
+ end
74
+
75
+ def ignore_fragment
76
+ "IGNORE #{ignore} LINES" if ignore
77
+ end
78
+
79
+ def column_fragment
80
+ "(" + columns.map {|c| format_column(c) }.join(",") + ")"
81
+ end
82
+
83
+ def set_fragment
84
+ unless @set.empty?
85
+ "SET " + @set.map do |k, v|
86
+ "#{@db.literal(k)} = #{@db.literal(v)}"
87
+ end.join(", ")
88
+ end
89
+ end
90
+
91
+ def format_column(column)
92
+ column.to_s[0] == "@" ? column : "`#{column}`"
93
+ end
94
+ end
95
+
4
96
  module LoadDataInfile
5
- # Loads the CSV data columns in filepath into this dataset's table.
6
- def load_csv_infile(filepath, columns)
7
- execute_dui(load_csv_infile_sql(filepath, columns))
97
+ # Load data in file specified at path.
98
+ #
99
+ # Columns is a list of columns to load - column names starting
100
+ # with an @ symbol will be treated as variables.
101
+ #
102
+ # By default, this will generate a REPLACE INTO TABLE
103
+ # statement.
104
+ #
105
+ # Options:
106
+ # :ignore - the number of lines to ignore in the source file
107
+ # :update - nil, :ignore or :replace
108
+ # :set - a hash specifying autopopulation of columns
109
+ # :character_set - the character set of the file, UTF8 default
110
+ # :format - either nil or :csv
111
+ def load_infile(path, columns, options={})
112
+ execute_dui(load_infile_sql(filepath, columns, options))
113
+ end
114
+
115
+ def load_infile_sql(path, columns, options={})
116
+ replacement = opts[:insert_ignore] ? :ignore : :replace
117
+ options = {:update => replacement}.merge(options)
118
+ LoadDataInfileExpression.new(path,
119
+ opts[:from].first,
120
+ columns,
121
+ options).
122
+ to_sql(db)
123
+ end
124
+
125
+ # Loads the CSV data columns in path into this dataset's
126
+ # table.
127
+ #
128
+ # See load_infile for more options.
129
+ def load_csv_infile(path, columns, options={})
130
+ execute_dui(load_csv_infile_sql(filepath, columns, options))
8
131
  end
9
132
 
10
- def load_csv_infile_sql(filepath, columns)
11
- replacement = opts[:insert_ignore] ? "IGNORE" : "REPLACE"
12
- "LOAD DATA INFILE '#{filepath}' #{replacement} INTO TABLE `#{opts[:from].first}` CHARACTER SET 'utf8' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' (`#{columns.join('`,`')}`);"
133
+ def load_csv_infile_sql(path, columns, options={})
134
+ load_infile_sql(path, columns, options.merge(:format => :csv))
13
135
  end
14
136
  end
15
137
  end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
4
+ it "loads the data in the file into the table" do
5
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
6
+ to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
7
+ end
8
+
9
+ it "loads the data with replacment" do
10
+ described_class.new("bar.csv", :foo, ['bar', 'quux'],
11
+ :update => :replace).
12
+ to_sql(TEST_DB).should include("REPLACE INTO TABLE")
13
+ end
14
+
15
+ it "loads the data ignoring rows" do
16
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
17
+ to_sql(TEST_DB).should include("IGNORE INTO TABLE")
18
+ end
19
+
20
+ it "should be in UTF-8 character set by default" do
21
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
22
+ to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
23
+ end
24
+
25
+ it "may be in other character sets" do
26
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
27
+ to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
28
+ end
29
+
30
+ it "should load columns" do
31
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
32
+ to_sql(TEST_DB).should include("(`bar`,`quux`)")
33
+ end
34
+
35
+ it "should load into variables if column begins with @" do
36
+ described_class.new("bar.csv", :foo, ['@bar', 'quux']).
37
+ to_sql(TEST_DB).should include("(@bar,`quux`)")
38
+ end
39
+
40
+ it "can ignore lines" do
41
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
42
+ to_sql(TEST_DB).should include("IGNORE 2 LINES")
43
+ end
44
+
45
+ it "can be in csv format" do
46
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
47
+ to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
48
+ end
49
+
50
+ it "can set column values" do
51
+ described_class.new("bar.csv", :foo, ['@bar', 'quux'],
52
+ :set => {:bar => :unhex.sql_function("@bar".lit),
53
+ :etl_batch_id => 3}).
54
+ to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
55
+ end
56
+ end
@@ -26,7 +26,7 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfile do
26
26
  end
27
27
 
28
28
  it "loads into the columns specified" do
29
- @sql.should include("(`bar`,`baz`);")
29
+ @sql.should include("(`bar`,`baz`)")
30
30
  end
31
31
 
32
32
  it "can ignore instead of replacing rows" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-02-19 00:00:00 Z
18
+ date: 2013-02-25 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -200,6 +200,7 @@ files:
200
200
  - spec/etl/screens/out_of_bounds_spec.rb
201
201
  - spec/etl/sequel/dependant_tables_spec.rb
202
202
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
203
+ - spec/etl/sequel/load_data_infile_expression_spec.rb
203
204
  - spec/etl/sequel/load_data_infile_spec.rb
204
205
  - spec/etl/sink_spec.rb
205
206
  - spec/etl/table_builder_spec.rb