activewarehouse-etl 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +98 -62
- data/Rakefile +11 -0
- data/TODO +2 -1
- data/lib/etl.rb +9 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +1 -0
- data/lib/etl/builder/date_dimension_builder.rb +83 -0
- data/lib/etl/commands/etl.rb +56 -43
- data/lib/etl/control/control.rb +58 -9
- data/lib/etl/control/destination.rb +29 -4
- data/lib/etl/control/destination/database_destination.rb +17 -27
- data/lib/etl/control/source/database_source.rb +17 -40
- data/lib/etl/control/source/file_source.rb +8 -5
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +40 -0
- data/lib/etl/engine.rb +184 -83
- data/lib/etl/execution.rb +1 -0
- data/lib/etl/execution/base.rb +1 -1
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +1 -0
- data/lib/etl/execution/migration.rb +16 -4
- data/lib/etl/generator/surrogate_key_generator.rb +20 -4
- data/lib/etl/http_tools.rb +1 -1
- data/lib/etl/processor/bulk_import_processor.rb +16 -19
- data/lib/etl/processor/check_exist_processor.rb +16 -7
- data/lib/etl/processor/hierarchy_exploder_processor.rb +2 -1
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/surrogate_key_processor.rb +22 -2
- data/lib/etl/processor/truncate_processor.rb +13 -13
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +7 -2
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -5
- data/lib/etl/transform/hierarchy_lookup_transform.rb +7 -14
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +2 -2
- metadata +19 -2
@@ -3,26 +3,24 @@ module ETL #:nodoc:
|
|
3
3
|
# Transform which walks up the hierarchy tree to find a value of the current level's value
|
4
4
|
# is nil.
|
5
5
|
#
|
6
|
-
# Configuration options:
|
7
|
-
# * <tt>:table</tt>: The name of the table to use for lookup (required)
|
8
|
-
# * <tt>:connection</tt>: The database adapter connection (required)
|
9
|
-
# * <tt>:parent_id_field</tt>: The name of the parent ID field (defaults to :parent_id)
|
10
|
-
#
|
11
6
|
# TODO: Let the resolver be implemented in a class so different resolution methods are
|
12
7
|
# possible.
|
13
8
|
class HierarchyLookupTransform < ETL::Transform::Transform
|
14
9
|
# The name of the field to use for the parent ID
|
15
10
|
attr_accessor :parent_id_field
|
16
11
|
|
12
|
+
# The target connection name
|
13
|
+
attr_accessor :target
|
14
|
+
|
17
15
|
# Initialize the transform
|
18
16
|
#
|
19
17
|
# Configuration options:
|
20
|
-
# * <tt>:
|
21
|
-
# * <tt>:connection</tt>: The ActiveRecord adapter (required)
|
18
|
+
# * <tt>:target</tt>: The target connection name (required)
|
22
19
|
# * <tt>:parent_id_field</tt>: The name of the field to use for the parent ID (defaults to :parent_id)
|
23
20
|
def initialize(control, name, configuration={})
|
24
21
|
super
|
25
22
|
@parent_id_field = configuration[:parent_id_field] || :parent_id
|
23
|
+
@target = configuration[:target]
|
26
24
|
end
|
27
25
|
|
28
26
|
# Transform the value.
|
@@ -40,15 +38,10 @@ module ETL #:nodoc:
|
|
40
38
|
value
|
41
39
|
end
|
42
40
|
|
43
|
-
# Lookup the parent value.
|
44
|
-
# is specified
|
41
|
+
# Lookup the parent value.
|
45
42
|
def lookup(field, table, parent_id, parent_id_field)
|
46
|
-
unless configuration.has_key?(:connection)
|
47
|
-
raise ETL::ControlError, "The configuration hash must include the database connection"
|
48
|
-
end
|
49
|
-
|
50
43
|
q = "SELECT #{parent_id_field}, #{field} FROM #{table} WHERE id = #{parent_id}"
|
51
|
-
row =
|
44
|
+
row = ETL::Engine.connection(target).select_one(q)
|
52
45
|
return row[parent_id_field.to_s], row[field.to_s]
|
53
46
|
end
|
54
47
|
end
|
data/lib/etl/util.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
module ETL
|
2
|
+
module Util
|
3
|
+
# Return the distance of time in words from the given from_time to the specified to_time. If to_time
|
4
|
+
# is not specified then Time.now is used. By default seconds are included...set the include_seconds
|
5
|
+
# argument to false to disable the seconds.
|
6
|
+
def distance_of_time_in_words(from_time, to_time=Time.now)
|
7
|
+
from_time = from_time.to_time if from_time.respond_to?(:to_time)
|
8
|
+
to_time = to_time.to_time if to_time.respond_to?(:to_time)
|
9
|
+
seconds = (to_time - from_time).round
|
10
|
+
distance_in_days = (seconds/(60*60*24)).round
|
11
|
+
seconds = seconds % (60*60*24)
|
12
|
+
distance_in_hours = (seconds/(60*60)).round
|
13
|
+
seconds = seconds % (60*60)
|
14
|
+
distance_in_minutes = (seconds/60).round
|
15
|
+
seconds = seconds % 60
|
16
|
+
distance_in_seconds = seconds
|
17
|
+
|
18
|
+
s = ''
|
19
|
+
s << "#{distance_in_days} days," if distance_in_days > 0
|
20
|
+
s << "#{distance_in_hours} hours, " if distance_in_hours > 0
|
21
|
+
s << "#{distance_in_minutes} minutes, " if distance_in_minutes > 0
|
22
|
+
s << "#{distance_in_seconds} seconds"
|
23
|
+
s
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get the approximate disntance of time in words from the given from_time
|
27
|
+
# to the the given to_time. If to_time is not specified then it is set
|
28
|
+
# to Time.now. By default seconds are included...set the include_seconds
|
29
|
+
# argument to false to disable the seconds.
|
30
|
+
def approximate_distance_of_time_in_words(from_time, to_time=Time.now, include_seconds=true)
|
31
|
+
from_time = from_time.to_time if from_time.respond_to?(:to_time)
|
32
|
+
to_time = to_time.to_time if to_time.respond_to?(:to_time)
|
33
|
+
distance_in_minutes = (((to_time - from_time).abs)/60).round
|
34
|
+
distance_in_seconds = ((to_time - from_time).abs).round
|
35
|
+
|
36
|
+
case distance_in_minutes
|
37
|
+
when 0..1
|
38
|
+
return (distance_in_minutes == 0) ? 'less than a minute' : '1 minute' unless include_seconds
|
39
|
+
case distance_in_seconds
|
40
|
+
when 0..4 then 'less than 5 seconds'
|
41
|
+
when 5..9 then 'less than 10 seconds'
|
42
|
+
when 10..19 then 'less than 20 seconds'
|
43
|
+
when 20..39 then 'half a minute'
|
44
|
+
when 40..59 then 'less than a minute'
|
45
|
+
else '1 minute'
|
46
|
+
end
|
47
|
+
when 2..44 then "#{distance_in_minutes} minutes"
|
48
|
+
when 45..89 then 'about 1 hour'
|
49
|
+
when 90..1439 then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
|
50
|
+
when 1440..2879 then '1 day'
|
51
|
+
when 2880..43199 then "#{(distance_in_minutes / 1440).round} days"
|
52
|
+
when 43200..86399 then 'about 1 month'
|
53
|
+
when 86400..525959 then "#{(distance_in_minutes / 43200).round} months"
|
54
|
+
when 525960..1051919 then 'about 1 year'
|
55
|
+
else "over #{(distance_in_minutes / 525960).round} years"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.9.0
|
7
|
+
date: 2007-08-09 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,6 +36,13 @@ files:
|
|
36
36
|
- Rakefile
|
37
37
|
- bin/etl
|
38
38
|
- lib/etl
|
39
|
+
- lib/etl/batch
|
40
|
+
- lib/etl/batch/batch.rb
|
41
|
+
- lib/etl/batch/directives.rb
|
42
|
+
- lib/etl/batch.rb
|
43
|
+
- lib/etl/builder
|
44
|
+
- lib/etl/builder/date_dimension_builder.rb
|
45
|
+
- lib/etl/builder.rb
|
39
46
|
- lib/etl/commands
|
40
47
|
- lib/etl/commands/etl.rb
|
41
48
|
- lib/etl/control
|
@@ -48,11 +55,18 @@ files:
|
|
48
55
|
- lib/etl/control/source/database_source.rb
|
49
56
|
- lib/etl/control/source/enumerable_source.rb
|
50
57
|
- lib/etl/control/source/file_source.rb
|
58
|
+
- lib/etl/control/source/model_source.rb
|
51
59
|
- lib/etl/control/source.rb
|
52
60
|
- lib/etl/control.rb
|
61
|
+
- lib/etl/core_ext
|
62
|
+
- lib/etl/core_ext/time
|
63
|
+
- lib/etl/core_ext/time/calculations.rb
|
64
|
+
- lib/etl/core_ext/time.rb
|
65
|
+
- lib/etl/core_ext.rb
|
53
66
|
- lib/etl/engine.rb
|
54
67
|
- lib/etl/execution
|
55
68
|
- lib/etl/execution/base.rb
|
69
|
+
- lib/etl/execution/batch.rb
|
56
70
|
- lib/etl/execution/job.rb
|
57
71
|
- lib/etl/execution/migration.rb
|
58
72
|
- lib/etl/execution/record.rb
|
@@ -79,6 +93,7 @@ files:
|
|
79
93
|
- lib/etl/processor/print_row_processor.rb
|
80
94
|
- lib/etl/processor/processor.rb
|
81
95
|
- lib/etl/processor/rename_processor.rb
|
96
|
+
- lib/etl/processor/require_non_blank_processor.rb
|
82
97
|
- lib/etl/processor/row_processor.rb
|
83
98
|
- lib/etl/processor/sequence_processor.rb
|
84
99
|
- lib/etl/processor/surrogate_key_processor.rb
|
@@ -87,6 +102,7 @@ files:
|
|
87
102
|
- lib/etl/row.rb
|
88
103
|
- lib/etl/screen
|
89
104
|
- lib/etl/screen/row_count_screen.rb
|
105
|
+
- lib/etl/screen.rb
|
90
106
|
- lib/etl/transform
|
91
107
|
- lib/etl/transform/block_transform.rb
|
92
108
|
- lib/etl/transform/date_to_string_transform.rb
|
@@ -103,6 +119,7 @@ files:
|
|
103
119
|
- lib/etl/transform/trim_transform.rb
|
104
120
|
- lib/etl/transform/type_transform.rb
|
105
121
|
- lib/etl/transform.rb
|
122
|
+
- lib/etl/util.rb
|
106
123
|
- lib/etl/version.rb
|
107
124
|
- lib/etl.rb
|
108
125
|
- examples/database.example.yml
|