chicago-etl 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-02-19"
12
+ s.date = "2013-02-25"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -63,6 +63,7 @@ Gem::Specification.new do |s|
63
63
  "spec/etl/screens/out_of_bounds_spec.rb",
64
64
  "spec/etl/sequel/dependant_tables_spec.rb",
65
65
  "spec/etl/sequel/filter_to_etl_batch_spec.rb",
66
+ "spec/etl/sequel/load_data_infile_expression_spec.rb",
66
67
  "spec/etl/sequel/load_data_infile_spec.rb",
67
68
  "spec/etl/sink_spec.rb",
68
69
  "spec/etl/table_builder_spec.rb",
@@ -1,15 +1,137 @@
1
1
  module Chicago
2
2
  module ETL
3
3
  module SequelExtensions
4
+ # @api private
5
+ class LoadDataInfileExpression
6
+ attr_reader :path, :table, :columns, :ignore, :character_set
7
+
8
+ def initialize(path, table, columns, opts={})
9
+ @path = path
10
+ @table = table
11
+ @columns = columns
12
+ @ignore = opts[:ignore]
13
+ @update = opts[:update]
14
+ @set = opts[:set] || {}
15
+ @character_set = opts[:character_set] || "utf8"
16
+ if opts[:format] == :csv
17
+ @field_terminator = ","
18
+ @enclosed_by = '"'
19
+ @escaped_by = '"'
20
+ end
21
+ end
22
+
23
+ def replace?
24
+ @update == :replace
25
+ end
26
+
27
+ def ignore?
28
+ @update == :ignore
29
+ end
30
+
31
+ def to_sql(db)
32
+ @db = db
33
+ [load_fragment,
34
+ replace_fragment,
35
+ table_fragment,
36
+ character_set_fragment,
37
+ field_terminator_fragment,
38
+ field_enclosure_fragment,
39
+ escape_fragment,
40
+ ignore_fragment,
41
+ column_fragment,
42
+ set_fragment].compact.join(" ")
43
+ end
44
+
45
+ private
46
+
47
+ def load_fragment
48
+ "LOAD DATA INFILE '#{path}'"
49
+ end
50
+
51
+ def replace_fragment
52
+ @update.to_s.upcase if replace? || ignore?
53
+ end
54
+
55
+ def table_fragment
56
+ "INTO TABLE `#{table}`"
57
+ end
58
+
59
+ def character_set_fragment
60
+ "CHARACTER SET '#{character_set}'"
61
+ end
62
+
63
+ def field_terminator_fragment
64
+ "FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
65
+ end
66
+
67
+ def field_enclosure_fragment
68
+ "OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
69
+ end
70
+
71
+ def escape_fragment
72
+ "ESCAPED BY '#{@escaped_by}'" if @escaped_by
73
+ end
74
+
75
+ def ignore_fragment
76
+ "IGNORE #{ignore} LINES" if ignore
77
+ end
78
+
79
+ def column_fragment
80
+ "(" + columns.map {|c| format_column(c) }.join(",") + ")"
81
+ end
82
+
83
+ def set_fragment
84
+ unless @set.empty?
85
+ "SET " + @set.map do |k, v|
86
+ "#{@db.literal(k)} = #{@db.literal(v)}"
87
+ end.join(", ")
88
+ end
89
+ end
90
+
91
+ def format_column(column)
92
+ column.to_s[0] == "@" ? column : "`#{column}`"
93
+ end
94
+ end
95
+
4
96
  module LoadDataInfile
5
- # Loads the CSV data columns in filepath into this dataset's table.
6
- def load_csv_infile(filepath, columns)
7
- execute_dui(load_csv_infile_sql(filepath, columns))
97
+ # Load data in file specified at path.
98
+ #
99
+ # Columns is a list of columns to load - column names starting
100
+ # with an @ symbol will be treated as variables.
101
+ #
102
+ # By default, this will generate a REPLACE INTO TABLE
103
+ # statement.
104
+ #
105
+ # Options:
106
+ # :ignore - the number of lines to ignore in the source file
107
+ # :update - nil, :ignore or :replace
108
+ # :set - a hash specifying autopopulation of columns
109
+ # :character_set - the character set of the file, UTF8 default
110
+ # :format - either nil or :csv
111
+ def load_infile(path, columns, options={})
112
+ execute_dui(load_infile_sql(filepath, columns, options))
113
+ end
114
+
115
+ def load_infile_sql(path, columns, options={})
116
+ replacement = opts[:insert_ignore] ? :ignore : :replace
117
+ options = {:update => replacement}.merge(options)
118
+ LoadDataInfileExpression.new(path,
119
+ opts[:from].first,
120
+ columns,
121
+ options).
122
+ to_sql(db)
123
+ end
124
+
125
+ # Loads the CSV data columns in path into this dataset's
126
+ # table.
127
+ #
128
+ # See load_infile for more options.
129
+ def load_csv_infile(path, columns, options={})
130
+ execute_dui(load_csv_infile_sql(filepath, columns, options))
8
131
  end
9
132
 
10
- def load_csv_infile_sql(filepath, columns)
11
- replacement = opts[:insert_ignore] ? "IGNORE" : "REPLACE"
12
- "LOAD DATA INFILE '#{filepath}' #{replacement} INTO TABLE `#{opts[:from].first}` CHARACTER SET 'utf8' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' (`#{columns.join('`,`')}`);"
133
+ def load_csv_infile_sql(path, columns, options={})
134
+ load_infile_sql(path, columns, options.merge(:format => :csv))
13
135
  end
14
136
  end
15
137
  end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
4
+ it "loads the data in the file into the table" do
5
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
6
+ to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
7
+ end
8
+
9
+ it "loads the data with replacment" do
10
+ described_class.new("bar.csv", :foo, ['bar', 'quux'],
11
+ :update => :replace).
12
+ to_sql(TEST_DB).should include("REPLACE INTO TABLE")
13
+ end
14
+
15
+ it "loads the data ignoring rows" do
16
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
17
+ to_sql(TEST_DB).should include("IGNORE INTO TABLE")
18
+ end
19
+
20
+ it "should be in UTF-8 character set by default" do
21
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
22
+ to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
23
+ end
24
+
25
+ it "may be in other character sets" do
26
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
27
+ to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
28
+ end
29
+
30
+ it "should load columns" do
31
+ described_class.new("bar.csv", :foo, ['bar', 'quux']).
32
+ to_sql(TEST_DB).should include("(`bar`,`quux`)")
33
+ end
34
+
35
+ it "should load into variables if column begins with @" do
36
+ described_class.new("bar.csv", :foo, ['@bar', 'quux']).
37
+ to_sql(TEST_DB).should include("(@bar,`quux`)")
38
+ end
39
+
40
+ it "can ignore lines" do
41
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
42
+ to_sql(TEST_DB).should include("IGNORE 2 LINES")
43
+ end
44
+
45
+ it "can be in csv format" do
46
+ described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
47
+ to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
48
+ end
49
+
50
+ it "can set column values" do
51
+ described_class.new("bar.csv", :foo, ['@bar', 'quux'],
52
+ :set => {:bar => :unhex.sql_function("@bar".lit),
53
+ :etl_batch_id => 3}).
54
+ to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
55
+ end
56
+ end
@@ -26,7 +26,7 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfile do
26
26
  end
27
27
 
28
28
  it "loads into the columns specified" do
29
- @sql.should include("(`bar`,`baz`);")
29
+ @sql.should include("(`bar`,`baz`)")
30
30
  end
31
31
 
32
32
  it "can ignore instead of replacing rows" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-02-19 00:00:00 Z
18
+ date: 2013-02-25 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -200,6 +200,7 @@ files:
200
200
  - spec/etl/screens/out_of_bounds_spec.rb
201
201
  - spec/etl/sequel/dependant_tables_spec.rb
202
202
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
203
+ - spec/etl/sequel/load_data_infile_expression_spec.rb
203
204
  - spec/etl/sequel/load_data_infile_spec.rb
204
205
  - spec/etl/sink_spec.rb
205
206
  - spec/etl/table_builder_spec.rb