factorylabs-activewarehouse-etl 0.9.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +198 -0
- data/LICENSE +7 -0
- data/README +85 -0
- data/Rakefile +153 -0
- data/TODO +28 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +78 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +420 -0
- data/lib/etl/control/destination/database_destination.rb +95 -0
- data/lib/etl/control/destination/file_destination.rb +124 -0
- data/lib/etl/control/source.rb +109 -0
- data/lib/etl/control/source/database_source.rb +220 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +556 -0
- data/lib/etl/execution.rb +20 -0
- data/lib/etl/execution/base.rb +9 -0
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +85 -0
- data/lib/etl/execution/record.rb +18 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/delimited_parser.rb +74 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +81 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +35 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +17 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +35 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +151 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/ordinalize_transform.rb +12 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +9 -0
- metadata +195 -0
data/bin/etl
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2006 Anthony Eden
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib/')
|
27
|
+
require 'etl'
|
28
|
+
require 'etl/commands/etl'
|
data/bin/etl.cmd
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
@echo off
|
2
|
+
|
3
|
+
rem The purpose of this Windows script is to let you use the etl command line with a non-gem version of AW-ETL (eg: unpacked gem, pistoned trunk).
|
4
|
+
rem Just add the current folder on top of your PATH variable to use it instead of the etl command provided with the gem release.
|
5
|
+
|
6
|
+
rem %~dp0 returns the absolute path where the current script is. We just append 'etl' to it, and forward all the arguments with %*
|
7
|
+
|
8
|
+
ruby "%~dp0etl" %*
|
@@ -0,0 +1,16 @@
|
|
1
|
+
etl_execution:
|
2
|
+
adapter: mysql
|
3
|
+
username: root
|
4
|
+
host: localhost
|
5
|
+
database: etl_execution
|
6
|
+
encoding: utf8
|
7
|
+
datawarehouse:
|
8
|
+
adapter: mysql
|
9
|
+
username: root
|
10
|
+
host: localhost
|
11
|
+
database: datawarehouse_development
|
12
|
+
operational:
|
13
|
+
adapter: mysql
|
14
|
+
username: root
|
15
|
+
host: localhost
|
16
|
+
database: operational_production
|
data/lib/etl.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# This source file requires all of the necessary gems and source files for ActiveWarehouse ETL. If you
|
2
|
+
# load this source file all of the other required files and gems will also be brought into the
|
3
|
+
# runtime.
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2006-2007 Anthony Eden
|
7
|
+
#
|
8
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
9
|
+
# a copy of this software and associated documentation files (the
|
10
|
+
# "Software"), to deal in the Software without restriction, including
|
11
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
12
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
13
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
14
|
+
# the following conditions:
|
15
|
+
#
|
16
|
+
# The above copyright notice and this permission notice shall be
|
17
|
+
# included in all copies or substantial portions of the Software.
|
18
|
+
#
|
19
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
22
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
23
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
24
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
25
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
#++
|
27
|
+
|
28
|
+
require 'logger'
|
29
|
+
require 'yaml'
|
30
|
+
require 'erb'
|
31
|
+
|
32
|
+
require 'rubygems'
|
33
|
+
|
34
|
+
unless defined?(REXML::VERSION)
|
35
|
+
require 'rexml/rexml'
|
36
|
+
REXML::VERSION = REXML::Version
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'active_support'
|
40
|
+
require 'active_record'
|
41
|
+
require 'adapter_extensions'
|
42
|
+
require 'faster_csv'
|
43
|
+
|
44
|
+
$:.unshift(File.dirname(__FILE__))
|
45
|
+
|
46
|
+
require 'etl/core_ext'
|
47
|
+
require 'etl/util'
|
48
|
+
require 'etl/http_tools'
|
49
|
+
require 'etl/builder'
|
50
|
+
require 'etl/version'
|
51
|
+
require 'etl/engine'
|
52
|
+
require 'etl/control'
|
53
|
+
require 'etl/batch'
|
54
|
+
require 'etl/row'
|
55
|
+
require 'etl/parser'
|
56
|
+
require 'etl/transform'
|
57
|
+
require 'etl/processor'
|
58
|
+
require 'etl/generator'
|
59
|
+
require 'etl/screen'
|
60
|
+
|
61
|
+
module ETL #:nodoc:
|
62
|
+
class ETLError < StandardError #:nodoc:
|
63
|
+
end
|
64
|
+
class ControlError < ETLError #:nodoc:
|
65
|
+
end
|
66
|
+
class DefinitionError < ControlError #:nodoc:
|
67
|
+
end
|
68
|
+
class ConfigurationError < ControlError #:nodoc:
|
69
|
+
end
|
70
|
+
class MismatchError < ETLError #:nodoc:
|
71
|
+
end
|
72
|
+
class ResolverError < ETLError #:nodoc:
|
73
|
+
end
|
74
|
+
class ScreenError < ETLError #:nodoc:
|
75
|
+
end
|
76
|
+
class FatalScreenError < ScreenError #:nodoc:
|
77
|
+
end
|
78
|
+
end
|
data/lib/etl/batch.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Batch
|
3
|
+
class Context
|
4
|
+
attr_reader :batch
|
5
|
+
|
6
|
+
class << self
|
7
|
+
# Create a context that is used when evaluating the batch file
|
8
|
+
def create(batch)
|
9
|
+
Context.new(batch).get_binding
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(batch)
|
14
|
+
@batch = batch
|
15
|
+
end
|
16
|
+
|
17
|
+
def file
|
18
|
+
batch.file
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_binding
|
22
|
+
binding
|
23
|
+
end
|
24
|
+
|
25
|
+
def run(file)
|
26
|
+
batch.run(File.dirname(self.file) + "/" + file)
|
27
|
+
end
|
28
|
+
|
29
|
+
def use_temp_tables(value=true)
|
30
|
+
batch.use_temp_tables(value)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
class Batch
|
35
|
+
attr_accessor :file
|
36
|
+
attr_accessor :engine
|
37
|
+
|
38
|
+
class << self
|
39
|
+
# Resolve the given object to an ETL::Control::Control instance. Acceptable arguments
|
40
|
+
# are:
|
41
|
+
# * The path to a control file as a String
|
42
|
+
# * A File object referencing the control file
|
43
|
+
# * The ETL::Control::Control object (which will just be returned)
|
44
|
+
#
|
45
|
+
# Raises a ControlError if any other type is given
|
46
|
+
def resolve(batch, engine)
|
47
|
+
batch = do_resolve(batch)
|
48
|
+
batch.engine = engine
|
49
|
+
batch
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
def parse(batch_file)
|
54
|
+
batch_file = batch_file.path if batch_file.instance_of?(File)
|
55
|
+
batch = ETL::Batch::Batch.new(batch_file)
|
56
|
+
eval(IO.readlines(batch_file).join("\n"), Context.create(batch), batch_file)
|
57
|
+
batch
|
58
|
+
end
|
59
|
+
|
60
|
+
def do_resolve(batch)
|
61
|
+
case batch
|
62
|
+
when String
|
63
|
+
ETL::Batch::Batch.parse(File.new(batch))
|
64
|
+
when File
|
65
|
+
ETL::Batch::Batch.parse(batch)
|
66
|
+
when ETL::Batch::Batch
|
67
|
+
batch
|
68
|
+
else
|
69
|
+
raise RuntimeError, "Batch must be a String, File or Batch object"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def initialize(file)
|
75
|
+
@file = file
|
76
|
+
end
|
77
|
+
|
78
|
+
def run(file)
|
79
|
+
directives << Run.new(self, file)
|
80
|
+
end
|
81
|
+
|
82
|
+
def use_temp_tables(value = true)
|
83
|
+
directives << UseTempTables.new(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
def execute
|
87
|
+
engine.say "Executing batch"
|
88
|
+
before_execute
|
89
|
+
directives.each do |directive|
|
90
|
+
directive.execute
|
91
|
+
end
|
92
|
+
engine.say "Finishing batch"
|
93
|
+
after_execute
|
94
|
+
engine.say "Batch complete"
|
95
|
+
end
|
96
|
+
|
97
|
+
def directives
|
98
|
+
@directives ||= []
|
99
|
+
end
|
100
|
+
|
101
|
+
def before_execute
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
def after_execute
|
106
|
+
ETL::Engine.finish # TODO: should be moved to the directive?
|
107
|
+
ETL::Engine.use_temp_tables = false # reset the temp tables
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Batch #:nodoc:
|
3
|
+
# Abstract base class for directives
|
4
|
+
class Directive
|
5
|
+
# Method to access the batch object
|
6
|
+
attr_reader :batch
|
7
|
+
|
8
|
+
# Initialize the directive with the given batch object
|
9
|
+
def initialize(batch)
|
10
|
+
@batch = batch
|
11
|
+
end
|
12
|
+
|
13
|
+
# Execute the directive
|
14
|
+
def execute
|
15
|
+
do_execute
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
# Implemented by subclasses
|
20
|
+
def do_execute
|
21
|
+
raise RuntimeError, "Directive must implement do_execute method"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Directive indicating that the specified ETL control file should be
|
26
|
+
# run
|
27
|
+
class Run < Directive
|
28
|
+
# The file to execute
|
29
|
+
attr_reader :file
|
30
|
+
|
31
|
+
# Initialize the directive with the given batch object and file
|
32
|
+
def initialize(batch, file)
|
33
|
+
super(batch)
|
34
|
+
@file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
# Execute the process
|
39
|
+
def do_execute
|
40
|
+
batch.engine.process(file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Directive indicating temp tables should be used.
|
45
|
+
class UseTempTables < Directive
|
46
|
+
def initialize(batch)
|
47
|
+
super(batch)
|
48
|
+
end
|
49
|
+
protected
|
50
|
+
def do_execute
|
51
|
+
ETL::Engine.use_temp_tables = true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/etl/builder.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
# A builder which will build a data structure which can be used to populate a date dimension using
|
4
|
+
# commonly used date dimension columns.
|
5
|
+
class DateDimensionBuilder
|
6
|
+
# Specify the start date for the first record
|
7
|
+
attr_accessor :start_date
|
8
|
+
|
9
|
+
# Specify the end date for the last record
|
10
|
+
attr_accessor :end_date
|
11
|
+
|
12
|
+
# Define any holiday indicators
|
13
|
+
attr_accessor :holiday_indicators
|
14
|
+
|
15
|
+
# Add offset month for fiscal year
|
16
|
+
attr_accessor :fiscal_year_offset_month
|
17
|
+
|
18
|
+
# Define the weekday indicators. The default array begins on Sunday and goes to Saturday.
|
19
|
+
cattr_accessor :weekday_indicators
|
20
|
+
@@weekday_indicators = ['Weekend','Weekday','Weekday','Weekday','Weekday','Weekday','Weekend']
|
21
|
+
|
22
|
+
# Initialize the builder.
|
23
|
+
#
|
24
|
+
# * <tt>start_date</tt>: The start date. Defaults to 5 years ago from today.
|
25
|
+
# * <tt>end_date</tt>: The end date. Defaults to now.
|
26
|
+
def initialize(start_date=Time.now.years_ago(5), end_date=Time.now, fiscal_year_offset_month=10)
|
27
|
+
@start_date = start_date.to_date
|
28
|
+
@end_date = end_date.to_date
|
29
|
+
@fiscal_year_offset_month = fiscal_year_offset_month.to_i
|
30
|
+
@holiday_indicators = []
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns an array of hashes representing records in the dimension.
|
34
|
+
def build(options={})
|
35
|
+
(start_date..end_date).map { |date| record_from_date(date) }
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Returns a hash representing a record in the dimension. The values for each record are
|
41
|
+
# accessed by name.
|
42
|
+
def record_from_date(date)
|
43
|
+
time = date.to_time # need methods only available in Time
|
44
|
+
record = {}
|
45
|
+
record[:date] = time.strftime("%m/%d/%Y")
|
46
|
+
record[:full_date_description] = time.strftime("%B %d,%Y")
|
47
|
+
record[:day_of_week] = time.strftime("%A")
|
48
|
+
record[:day_in_week] = record[:day_of_week] # alias
|
49
|
+
#record[:day_number_in_epoch] = time.to_i / 24
|
50
|
+
#record[:week_number_in_epoch] = time.to_i / (24 * 7)
|
51
|
+
#record[:month_number_in_epoch] = time.to_i / (24 * 7 * 30)
|
52
|
+
record[:day_number_in_calendar_month] = time.day
|
53
|
+
record[:day_number_in_calendar_year] = time.yday
|
54
|
+
record[:day_number_in_fiscal_month] = time.day # should this be different from CY?
|
55
|
+
record[:day_number_in_fiscal_year] = time.fiscal_year_yday(fiscal_year_offset_month)
|
56
|
+
#record[:last_day_in_week_indicator] =
|
57
|
+
#record[:last_day_in_month_indicator] =
|
58
|
+
#record[:calendar_week_ending_date] =
|
59
|
+
record[:calendar_week] = "Week #{time.week}"
|
60
|
+
record[:calendar_week_number] = time.week
|
61
|
+
record[:calendar_week_number_in_year] = time.week # DEPRECATED
|
62
|
+
record[:calendar_month_name] = time.strftime("%B")
|
63
|
+
record[:calendar_month_number_in_year] = time.month # DEPRECATED
|
64
|
+
record[:calendar_month_number] = time.month
|
65
|
+
record[:calendar_year_month] = time.strftime("%Y-%m")
|
66
|
+
record[:calendar_quarter] = "Q#{time.quarter}"
|
67
|
+
record[:calendar_quarter_number] = time.quarter
|
68
|
+
record[:calendar_quarter_number_in_year] = time.quarter # DEPRECATED
|
69
|
+
record[:calendar_year_quarter] = "#{time.strftime('%Y')}-#{record[:calendar_quarter]}"
|
70
|
+
#record[:calendar_half_year] =
|
71
|
+
record[:calendar_year] = "#{time.year}"
|
72
|
+
record[:fiscal_week] = "FY Week #{time.fiscal_year_week(fiscal_year_offset_month)}"
|
73
|
+
record[:fiscal_week_number_in_year] = time.fiscal_year_week(fiscal_year_offset_month) # DEPRECATED
|
74
|
+
record[:fiscal_week_number] = time.fiscal_year_week(fiscal_year_offset_month)
|
75
|
+
record[:fiscal_month] = time.fiscal_year_month(fiscal_year_offset_month)
|
76
|
+
record[:fiscal_month_number] = time.fiscal_year_month(fiscal_year_offset_month)
|
77
|
+
record[:fiscal_month_number_in_year] = time.fiscal_year_month(fiscal_year_offset_month) # DEPRECATED
|
78
|
+
record[:fiscal_year_month] = "FY#{time.fiscal_year(fiscal_year_offset_month)}-" + time.fiscal_year_month(fiscal_year_offset_month).to_s.rjust(2, '0')
|
79
|
+
record[:fiscal_quarter] = "FY Q#{time.fiscal_year_quarter(fiscal_year_offset_month)}"
|
80
|
+
record[:fiscal_year_quarter] = "FY#{time.fiscal_year(fiscal_year_offset_month)}-Q#{time.fiscal_year_quarter(fiscal_year_offset_month)}"
|
81
|
+
record[:fiscal_quarter_number] = time.fiscal_year_quarter(fiscal_year_offset_month) # DEPRECATED
|
82
|
+
record[:fiscal_year_quarter_number] = time.fiscal_year_quarter(fiscal_year_offset_month)
|
83
|
+
#record[:fiscal_half_year] =
|
84
|
+
record[:fiscal_year] = "FY#{time.fiscal_year(fiscal_year_offset_month)}"
|
85
|
+
record[:fiscal_year_number] = time.fiscal_year(fiscal_year_offset_month)
|
86
|
+
record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
|
87
|
+
record[:weekday_indicator] = weekday_indicators[time.wday]
|
88
|
+
record[:selling_season] = 'None'
|
89
|
+
record[:major_event] = 'None'
|
90
|
+
record[:sql_date_stamp] = date
|
91
|
+
|
92
|
+
record
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
# Builder that creates a simple time dimension.
|
4
|
+
class TimeDimensionBuilder
|
5
|
+
def initialize
|
6
|
+
# Returns an array of hashes representing records in the dimension. The values for each record are
|
7
|
+
# accessed by name.
|
8
|
+
def build(options={})
|
9
|
+
records = []
|
10
|
+
0.upto(23) do |t_hour|
|
11
|
+
0.upto(59) do |t_minute|
|
12
|
+
0.upto(59) do |t_second|
|
13
|
+
t_hour_string = t_hour.to_s.rjust(2, '0')
|
14
|
+
t_minute_string = t_minute.to_s.rjust(2, '0')
|
15
|
+
t_second_string = t_second.to_s.rjust(2, '0')
|
16
|
+
record = {}
|
17
|
+
record[:hour] = t_hour
|
18
|
+
record[:minute] = t_minute
|
19
|
+
record[:second] = t_second
|
20
|
+
record[:minute_description] = "#{t_hour_string}:#{t_minute_string}"
|
21
|
+
record[:full_description] = "#{t_hour_string}:#{t_minute_string}:#{t_second_string}"
|
22
|
+
records << record
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
records
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Anthony Eden
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'benchmark'
|
25
|
+
require 'getoptlong'
|
26
|
+
|
27
|
+
# Print a usage statement
|
28
|
+
def usage #:nodoc:
|
29
|
+
puts "Usage: etl file [file file ...]" # TODO: add the command line options
|
30
|
+
end
|
31
|
+
|
32
|
+
def execute
|
33
|
+
opts = GetoptLong.new(
|
34
|
+
[ '--version', '-v', GetoptLong::NO_ARGUMENT],
|
35
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
36
|
+
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT ],
|
37
|
+
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
|
38
|
+
[ '--offset', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
39
|
+
[ '--newlog', '-n', GetoptLong::NO_ARGUMENT ],
|
40
|
+
[ '--skip-bulk-import', '-s', GetoptLong::NO_ARGUMENT ],
|
41
|
+
[ '--read-locally', GetoptLong::NO_ARGUMENT],
|
42
|
+
[ '--rails-root', GetoptLong::REQUIRED_ARGUMENT]
|
43
|
+
)
|
44
|
+
|
45
|
+
options = {}
|
46
|
+
opts.each do |opt, arg|
|
47
|
+
case opt
|
48
|
+
when '--version'
|
49
|
+
puts "ActiveWarehouse ETL version #{ETL::VERSION::STRING}"
|
50
|
+
return
|
51
|
+
when '--help'
|
52
|
+
usage
|
53
|
+
return
|
54
|
+
when '--config'
|
55
|
+
options[:config] = arg
|
56
|
+
when '--limit'
|
57
|
+
options[:limit] = arg.to_i
|
58
|
+
when '--offset'
|
59
|
+
options[:offset] = arg.to_i
|
60
|
+
when '--newlog'
|
61
|
+
options[:newlog] = true
|
62
|
+
when '--skip-bulk-import'
|
63
|
+
puts "skip bulk import enabled"
|
64
|
+
options[:skip_bulk_import] = true
|
65
|
+
when '--read-locally'
|
66
|
+
puts "read locally enabled"
|
67
|
+
options[:read_locally] = true
|
68
|
+
when '--rails-root'
|
69
|
+
options[:rails_root] = arg
|
70
|
+
puts "rails root set to #{options[:rails_root]}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
if ARGV.length < 1
|
75
|
+
usage
|
76
|
+
else
|
77
|
+
puts "Starting ETL process"
|
78
|
+
|
79
|
+
ETL::Engine.init(options)
|
80
|
+
ARGV.each do |f|
|
81
|
+
ETL::Engine.realtime_activity = true
|
82
|
+
ETL::Engine.process(f)
|
83
|
+
end
|
84
|
+
|
85
|
+
puts "ETL process complete\n\n"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
execute
|