affiliate_window_etl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +109 -0
- data/lib/affiliate_window/etl.rb +11 -0
- data/lib/affiliate_window/etl/base.rb +78 -0
- data/lib/affiliate_window/etl/config.rb +37 -0
- data/lib/affiliate_window/etl/database.rb +89 -0
- data/lib/affiliate_window/etl/extracter.rb +183 -0
- data/lib/affiliate_window/etl/loader.rb +25 -0
- data/lib/affiliate_window/etl/normaliser.rb +22 -0
- data/lib/affiliate_window/etl/scheduler.rb +67 -0
- data/lib/affiliate_window/etl/tasks.rake +12 -0
- data/lib/affiliate_window/etl/transformer.rb +135 -0
- data/lib/affiliate_window/etl/version.rb +5 -0
- data/lib/affiliate_window_etl.rb +1 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3c47d9293527c807d7c97c7c549ae5b29604a6a9
|
4
|
+
data.tar.gz: 07b406b3253d7b040b2c925167ac9ef5fd0c387e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 65c1afd3e2086c82ca1f383e468e5f6522fb8d3372304f441beeee7e66beed14647040301f10681bdb90c3214343098c222bfbe2036504b70adb6f6dfb663be2
|
7
|
+
data.tar.gz: 83334372aef519037ca82a0920bbbb1253ec0642e583b6ec150759bf8afca3a93b442032aa178a196e9d5d73454ce985e462ded16fa78d1c9b6c4f336b87c694
|
data/README.md
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
##Affiliate Window ETL
|
2
|
+
|
3
|
+
[](https://travis-ci.org/reevoo/affiliate_window_etl)
|
4
|
+
|
5
|
+
This gem provides an extract-transform-load process for retrieving records from
|
6
|
+
the [Affiliate Window](http://www.affiliatewindow.com/) API and loading them
|
7
|
+
into a Postgres database. It works incrementally, updating existing records
|
8
|
+
rather than creating duplicates. It handles its own scheduling and determines
|
9
|
+
which records need to be retrieved from the API based on the current state of
|
10
|
+
the database.
|
11
|
+
|
12
|
+
##How to run it
|
13
|
+
|
14
|
+
You can either clone this repository and run the ETL with rake or add it as a
|
15
|
+
dependency of another application.
|
16
|
+
|
17
|
+
**To run from this repository:**
|
18
|
+
|
19
|
+
1. Create the database: `createdb affiliate_window`
|
20
|
+
2. Migrate the database: `rake db:migrate`
|
21
|
+
3. Run the ETL: `rake run`
|
22
|
+
|
23
|
+
**To run as a dependency:**
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require "affiliate_window/etl"
|
27
|
+
etl = AffiliateWindow::ETL.new
|
28
|
+
|
29
|
+
etl.migrate
|
30
|
+
etl.run
|
31
|
+
```
|
32
|
+
|
33
|
+
By default, it will inherit config from your environment variables. If you don't
|
34
|
+
want it do this, you can pass in a hash:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
AffiliateWindow::ETL.new(
|
38
|
+
"START_DATE" => "2016-01-01",
|
39
|
+
"END_DATE" => "2016-01-07",
|
40
|
+
# ...
|
41
|
+
)
|
42
|
+
```
|
43
|
+
|
44
|
+
If you would like, you can add the gem's rake tasks to your app:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
# Rakefile
|
48
|
+
namespace :etl do
|
49
|
+
spec = Gem::Specification.find_by_name "affiliate_window_etl"
|
50
|
+
load "#{spec.gem_dir}/lib/affiliate_window/etl/tasks.rake"
|
51
|
+
end
|
52
|
+
```
|
53
|
+
|
54
|
+
If you are incorporating the ETL into an app that has its own migrations, it is
|
55
|
+
recommended you copy the migrations into the container app:
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
etl.migration_filenames.each do |filename|
|
59
|
+
# copy to db/migrate/
|
60
|
+
end
|
61
|
+
```
|
62
|
+
|
63
|
+
##How to configure it
|
64
|
+
|
65
|
+
The library tries to be [twelve-factor](https://12factor.net/) compliant and is
|
66
|
+
configured via environment variables. When not specified, the configuration
|
67
|
+
defaults to something suitable for development purposes. If the library is used
|
68
|
+
as a dependency of something else, this configuration can be passed into the
|
69
|
+
`AffiliateWindow::ETL` initializer.
|
70
|
+
|
71
|
+
`ACCOUNT_ID`
|
72
|
+
|
73
|
+
The id of the Affiliate Window account for which to retrieve records.
|
74
|
+
|
75
|
+
`AFFILIATE_API_PASSWORD`
|
76
|
+
|
77
|
+
The API token of for the Publisher Service. Can be retrieved from
|
78
|
+
[this page](https://www.affiliatewindow.com/affiliates/accountdetails.php).
|
79
|
+
|
80
|
+
`DATABASE_URL`
|
81
|
+
|
82
|
+
The connection to the database may be configured with `DATABASE_URL`.
|
83
|
+
|
84
|
+
e.g. `postgres://user:password@database_host:1234/my-database?pool=5&encoding=unicode`
|
85
|
+
|
86
|
+
`LAST_N_DAYS`
|
87
|
+
|
88
|
+
The number of days to retrieve. When the ETL runs, it will fetch this many days
|
89
|
+
of data, prior to today. It can take a while for transactions to appear in the
|
90
|
+
API. It is recommended this be set to 60 in production. Defaults to 7.
|
91
|
+
|
92
|
+
`DEBUG_STREAM`
|
93
|
+
|
94
|
+
The stream to write debug output. Defaults to `stdout`.
|
95
|
+
|
96
|
+
Valid options are: `stdout`, `stderr` and `none`.
|
97
|
+
|
98
|
+
## How to contribute
|
99
|
+
|
100
|
+
Bug reports and pull requests are welcome on
|
101
|
+
[GitHub](https://github.com/reevoo/affiliate_window_etl). This project is
|
102
|
+
intended to be a safe, welcoming space for collaboration, and contributors are
|
103
|
+
expected to adhere to the
|
104
|
+
[Contributor Covenant](http://contributor-covenant.org/) code of conduct.
|
105
|
+
|
106
|
+
## License
|
107
|
+
|
108
|
+
The gem is available as open-source under the terms of the
|
109
|
+
[MIT License](http://opensource.org/licenses/MIT).
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require "active_record"
|
2
|
+
require "affiliate_window"
|
3
|
+
|
4
|
+
require "affiliate_window/etl/base"
|
5
|
+
require "affiliate_window/etl/scheduler"
|
6
|
+
require "affiliate_window/etl/database"
|
7
|
+
require "affiliate_window/etl/extracter"
|
8
|
+
require "affiliate_window/etl/transformer"
|
9
|
+
require "affiliate_window/etl/normaliser"
|
10
|
+
require "affiliate_window/etl/loader"
|
11
|
+
require "affiliate_window/etl/config"
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
attr_accessor :config
|
4
|
+
|
5
|
+
def initialize(env: ENV)
|
6
|
+
self.config = Config.new(env: env)
|
7
|
+
end
|
8
|
+
|
9
|
+
def run
|
10
|
+
database.connect!
|
11
|
+
|
12
|
+
scheduler.jobs.each do |job|
|
13
|
+
extracter.extract(job.type, job.args).each do |record|
|
14
|
+
transformer.transform(record).each do |transformed_record|
|
15
|
+
loader.load(transformed_record)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def migrate
|
22
|
+
database.connect!
|
23
|
+
ActiveRecord::Migrator.migrate(migrations_path)
|
24
|
+
end
|
25
|
+
|
26
|
+
def migration_filenames
|
27
|
+
path = File.expand_path("../../../db/migrate/*", File.dirname(__FILE__))
|
28
|
+
Dir[path].each
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def scheduler
|
34
|
+
Scheduler.new(
|
35
|
+
database: database,
|
36
|
+
last_n_days: config.last_n_days,
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def extracter
|
41
|
+
Extracter.new(
|
42
|
+
client: client,
|
43
|
+
output: config.output_stream,
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
def transformer
|
48
|
+
Transformer.new(normaliser: normaliser)
|
49
|
+
end
|
50
|
+
|
51
|
+
def loader
|
52
|
+
Loader.new(database: database)
|
53
|
+
end
|
54
|
+
|
55
|
+
def database
|
56
|
+
Database.new(config.database_url)
|
57
|
+
end
|
58
|
+
|
59
|
+
def client
|
60
|
+
AffiliateWindow.login(
|
61
|
+
account_id: config.account_id,
|
62
|
+
affiliate_api_password: config.affiliate_api_password,
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
def schema
|
67
|
+
Schema.new
|
68
|
+
end
|
69
|
+
|
70
|
+
def normaliser
|
71
|
+
Normaliser.new
|
72
|
+
end
|
73
|
+
|
74
|
+
def migrator
|
75
|
+
Migrator.new
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Config
|
4
|
+
attr_accessor :env
|
5
|
+
|
6
|
+
def initialize(env:)
|
7
|
+
self.env = env
|
8
|
+
end
|
9
|
+
|
10
|
+
def database_url
|
11
|
+
env.fetch(
|
12
|
+
"DATABASE_URL",
|
13
|
+
"postgres://#{`whoami`.strip}@localhost:5432/affiliate_window?pool=5&encoding=unicode",
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def account_id
|
18
|
+
env.fetch("ACCOUNT_ID", 1234)
|
19
|
+
end
|
20
|
+
|
21
|
+
def affiliate_api_password
|
22
|
+
env.fetch("AFFILIATE_API_PASSWORD", "password")
|
23
|
+
end
|
24
|
+
|
25
|
+
def last_n_days
|
26
|
+
env.fetch("LAST_N_DAYS", "7").to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
def output_stream
|
30
|
+
name = env.fetch("DEBUG_STREAM", "stdout")
|
31
|
+
name = name.downcase.to_sym
|
32
|
+
|
33
|
+
{ stdout: $stdout, stderr: $stderr, none: nil }.fetch(name)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Database
|
4
|
+
attr_accessor :database_url
|
5
|
+
|
6
|
+
def initialize(database_url)
|
7
|
+
self.database_url = database_url
|
8
|
+
end
|
9
|
+
|
10
|
+
def connect!
|
11
|
+
ActiveRecord::Base.establish_connection(database_url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def model(record_type)
|
15
|
+
MODELS.fetch(record_type)
|
16
|
+
end
|
17
|
+
|
18
|
+
class ClickStat < ActiveRecord::Base
|
19
|
+
def self.identity
|
20
|
+
[:date, :merchant_name]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class CommissionGroup < ActiveRecord::Base
|
25
|
+
def self.identity
|
26
|
+
[:merchant_id, :commission_group_code]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class CommissionRange < ActiveRecord::Base
|
31
|
+
self.inheritance_column = :_disabled
|
32
|
+
|
33
|
+
def self.identity
|
34
|
+
[:merchant_id]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class ImpressionStat < ActiveRecord::Base
|
39
|
+
def self.identity
|
40
|
+
[:date, :merchant_name]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Merchant < ActiveRecord::Base
|
45
|
+
def self.identity
|
46
|
+
[:id]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class MerchantSector < ActiveRecord::Base
|
51
|
+
def self.identity
|
52
|
+
[:merchant_id, :sector_id]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Transaction < ActiveRecord::Base
|
57
|
+
self.inheritance_column = :_disabled
|
58
|
+
|
59
|
+
def self.identity
|
60
|
+
[:id]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class TransactionPart < ActiveRecord::Base
|
65
|
+
def self.identity
|
66
|
+
[:transaction_id, :commission_group_name]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class TransactionProduct < ActiveRecord::Base
|
71
|
+
def self.identity
|
72
|
+
[:id]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
MODELS = {
|
77
|
+
click_stat: ClickStat,
|
78
|
+
commission_group: CommissionGroup,
|
79
|
+
commission_range: CommissionRange,
|
80
|
+
impression_stat: ImpressionStat,
|
81
|
+
merchant: Merchant,
|
82
|
+
merchant_sector: MerchantSector,
|
83
|
+
transaction: Transaction,
|
84
|
+
transaction_part: TransactionPart,
|
85
|
+
transaction_product: TransactionProduct,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Extracter # rubocop:disable Metrics/ClassLength
|
4
|
+
CHUNK_SIZE = 100
|
5
|
+
|
6
|
+
attr_accessor :client, :output
|
7
|
+
|
8
|
+
def initialize(client:, output: nil)
|
9
|
+
self.client = client
|
10
|
+
self.output = output
|
11
|
+
end
|
12
|
+
|
13
|
+
def extract(type, params = {})
|
14
|
+
Enumerator.new do |yielder|
|
15
|
+
public_send("extract_#{type}", yielder, params)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract_merchants(yielder, _params) # rubocop:disable Metrics/AbcSize
|
20
|
+
response = client.get_merchant_list
|
21
|
+
merchants = response.fetch(:merchant)
|
22
|
+
merchant_ids = merchants.map { |m| m.fetch(:i_id) }
|
23
|
+
|
24
|
+
count = 0
|
25
|
+
merchant_ids.each_slice(CHUNK_SIZE) do |ids|
|
26
|
+
response = client.get_merchant(merchant_ids: ids)
|
27
|
+
merchants = response.fetch(:merchant)
|
28
|
+
|
29
|
+
merchants.each do |record|
|
30
|
+
yielder.yield(record.merge(record_type: :merchant))
|
31
|
+
end
|
32
|
+
|
33
|
+
count += [CHUNK_SIZE, ids.count].min
|
34
|
+
write "Extracted #{count} / #{merchant_ids.count} merchants"
|
35
|
+
end
|
36
|
+
|
37
|
+
extract_commission_groups(yielder, merchant_ids: merchant_ids)
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_commission_groups(yielder, merchant_ids:)
|
41
|
+
merchant_ids.each.with_index do |id, index|
|
42
|
+
maybe_response = catch_invalid_relationship_error do
|
43
|
+
client.get_commission_group_list(merchant_id: id)
|
44
|
+
end
|
45
|
+
|
46
|
+
next unless maybe_response
|
47
|
+
response = maybe_response
|
48
|
+
|
49
|
+
commission_groups = [response.fetch(:commission_group)].flatten
|
50
|
+
|
51
|
+
commission_groups.each do |record|
|
52
|
+
yielder.yield(record.merge(
|
53
|
+
record_type: :commission_group,
|
54
|
+
merchant_id: id,
|
55
|
+
))
|
56
|
+
end
|
57
|
+
|
58
|
+
write "Extracted commission groups for #{index + 1} / #{merchant_ids.count} merchants"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def extract_daily_transactions(yielder, date:)
|
63
|
+
response = client.get_transaction_list(
|
64
|
+
start_date: "#{date}T00:00:00",
|
65
|
+
end_date: "#{date}T23:59:59",
|
66
|
+
date_type: "transaction",
|
67
|
+
)
|
68
|
+
results = response.fetch(:results)
|
69
|
+
pagination = response.fetch(:pagination)
|
70
|
+
|
71
|
+
check_all_records_received!(pagination)
|
72
|
+
|
73
|
+
transactions = results.fetch(:transaction)
|
74
|
+
transactions.each do |record|
|
75
|
+
yielder.yield(record.merge(record_type: :transaction))
|
76
|
+
end
|
77
|
+
|
78
|
+
write "Extracted #{transactions.count} transactions for #{date}"
|
79
|
+
|
80
|
+
transaction_ids = transactions.map { |t| t.fetch(:i_id) }
|
81
|
+
extract_transaction_products(yielder, transaction_ids: transaction_ids)
|
82
|
+
end
|
83
|
+
|
84
|
+
def extract_transactions(yielder, transaction_ids:)
|
85
|
+
count = 0
|
86
|
+
transaction_ids.each_slice(CHUNK_SIZE) do |ids|
|
87
|
+
response = client.get_transaction(transaction_ids: ids)
|
88
|
+
|
89
|
+
transactions = response.fetch(:transaction)
|
90
|
+
transactions.each do |record|
|
91
|
+
yielder.yield(record.merge(record_type: :transaction))
|
92
|
+
end
|
93
|
+
|
94
|
+
count += [CHUNK_SIZE, ids.count].min
|
95
|
+
write "Extracted #{count} / #{transaction_ids.count} transactions"
|
96
|
+
end
|
97
|
+
|
98
|
+
extract_transaction_products(yielder, transaction_ids: transaction_ids)
|
99
|
+
end
|
100
|
+
|
101
|
+
def extract_transaction_products(yielder, transaction_ids:)
|
102
|
+
count = 0
|
103
|
+
transaction_ids.each_slice(CHUNK_SIZE) do |ids|
|
104
|
+
response = client.get_transaction_product(transaction_ids: ids)
|
105
|
+
transaction_products = [response.fetch(:transaction_product)].flatten
|
106
|
+
|
107
|
+
transaction_products.each do |record|
|
108
|
+
yielder.yield(record.merge(record_type: :transaction_product))
|
109
|
+
end
|
110
|
+
|
111
|
+
count += [CHUNK_SIZE, ids.count].min
|
112
|
+
write "Extracted #{transaction_products.count} products for #{count} / #{transaction_ids.count} transactions"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def extract_daily_clicks(yielder, date:)
|
117
|
+
response = client.get_click_stats(
|
118
|
+
start_date: "#{date}T00:00:00",
|
119
|
+
end_date: "#{date}T23:59:59",
|
120
|
+
date_type: "transaction",
|
121
|
+
)
|
122
|
+
results = response.fetch(:results)
|
123
|
+
pagination = response.fetch(:pagination)
|
124
|
+
|
125
|
+
check_all_records_received!(pagination)
|
126
|
+
|
127
|
+
click_stats = results.fetch(:click_stats)
|
128
|
+
click_stats.each do |record|
|
129
|
+
yielder.yield(record.merge(
|
130
|
+
record_type: :click_stat,
|
131
|
+
date: date,
|
132
|
+
))
|
133
|
+
end
|
134
|
+
|
135
|
+
write "Extracted #{click_stats.count} click stats for #{date}"
|
136
|
+
end
|
137
|
+
|
138
|
+
def extract_daily_impressions(yielder, date:)
|
139
|
+
response = client.get_impression_stats(
|
140
|
+
start_date: "#{date}T00:00:00",
|
141
|
+
end_date: "#{date}T23:59:59",
|
142
|
+
date_type: "transaction",
|
143
|
+
)
|
144
|
+
results = response.fetch(:results)
|
145
|
+
pagination = response.fetch(:pagination)
|
146
|
+
|
147
|
+
check_all_records_received!(pagination)
|
148
|
+
|
149
|
+
impression_stats = results.fetch(:impression_stats)
|
150
|
+
impression_stats.each do |record|
|
151
|
+
yielder.yield(record.merge(
|
152
|
+
record_type: :impression_stat,
|
153
|
+
date: date,
|
154
|
+
))
|
155
|
+
end
|
156
|
+
|
157
|
+
write "Extracted #{impression_stats.count} impression stats for #{date}"
|
158
|
+
end
|
159
|
+
|
160
|
+
def check_all_records_received!(pagination)
|
161
|
+
retrieved = pagination.fetch(:i_rows_returned)
|
162
|
+
total = pagination.fetch(:i_rows_available)
|
163
|
+
|
164
|
+
fail "Did not receive all records: #{retrieved} retrieved out of #{total}" unless total == retrieved
|
165
|
+
end
|
166
|
+
|
167
|
+
# If the current account is not affiliated with the merchant, the API does
|
168
|
+
# not let you retrieve commission groups for that merchant.
|
169
|
+
def catch_invalid_relationship_error(&block)
|
170
|
+
block.call
|
171
|
+
rescue AffiliateWindow::Error => e
|
172
|
+
raise unless e.message.match(/Invalid merchant \/ affiliate relationship/)
|
173
|
+
nil
|
174
|
+
end
|
175
|
+
|
176
|
+
def write(message)
|
177
|
+
return unless output
|
178
|
+
message_with_quota = "[quota:#{client.remaining_quota}] #{message}"
|
179
|
+
output.puts(message_with_quota)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Loader
|
4
|
+
attr_accessor :database
|
5
|
+
|
6
|
+
def initialize(database:)
|
7
|
+
self.database = database
|
8
|
+
end
|
9
|
+
|
10
|
+
def load(attributes)
|
11
|
+
record_type = attributes.delete(:record_type)
|
12
|
+
attributes.delete_if { |_, v| v.nil? }
|
13
|
+
|
14
|
+
model = database.model(record_type)
|
15
|
+
identity = attributes.slice(*model.identity)
|
16
|
+
|
17
|
+
if (record = model.find_by(identity))
|
18
|
+
record.update!(attributes)
|
19
|
+
else
|
20
|
+
model.create!(attributes)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Normaliser
|
4
|
+
def normalise!(record, field_name:, nested_name:, foreign_name:, id_name: :i_id, record_type: nil) # rubocop:disable Metrics/ParameterLists, Metrics/LineLength
|
5
|
+
record_type ||= nested_name
|
6
|
+
|
7
|
+
value = record.delete(field_name)
|
8
|
+
return [] unless value
|
9
|
+
|
10
|
+
elements = [value.fetch(nested_name)].flatten
|
11
|
+
foreign_id = record.fetch(id_name)
|
12
|
+
|
13
|
+
elements.map do |attributes|
|
14
|
+
attributes.merge(
|
15
|
+
record_type: record_type,
|
16
|
+
foreign_name => foreign_id,
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Scheduler
|
4
|
+
attr_accessor :database, :last_n_days
|
5
|
+
|
6
|
+
def initialize(database:, last_n_days:)
|
7
|
+
self.database = database
|
8
|
+
self.last_n_days = last_n_days
|
9
|
+
end
|
10
|
+
|
11
|
+
def jobs
|
12
|
+
jobs = []
|
13
|
+
|
14
|
+
schedule_last_n_days(:daily_transactions, jobs)
|
15
|
+
schedule_last_n_days(:daily_clicks, jobs)
|
16
|
+
schedule_last_n_days(:daily_impressions, jobs)
|
17
|
+
schedule_old_pending_transactions(jobs)
|
18
|
+
schedule_merchants(jobs)
|
19
|
+
|
20
|
+
jobs
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def schedule_last_n_days(type, jobs)
|
26
|
+
today = Date.today
|
27
|
+
|
28
|
+
(n_days_ago..today).each do |date|
|
29
|
+
job = Job.new(type, date: date.to_s)
|
30
|
+
jobs.push(job)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def schedule_old_pending_transactions(jobs)
|
35
|
+
model = database.model(:transaction)
|
36
|
+
pending = model.where(status: "pending")
|
37
|
+
old_pending = pending.where("transaction_date < ?", n_days_ago)
|
38
|
+
|
39
|
+
transaction_ids = old_pending.pluck(:id)
|
40
|
+
|
41
|
+
job = Job.new(:transactions, transaction_ids: transaction_ids)
|
42
|
+
jobs.push(job)
|
43
|
+
end
|
44
|
+
|
45
|
+
def schedule_merchants(jobs)
|
46
|
+
jobs.push(Job.new(:merchants))
|
47
|
+
end
|
48
|
+
|
49
|
+
def n_days_ago
|
50
|
+
Date.today - last_n_days + 1
|
51
|
+
end
|
52
|
+
|
53
|
+
class Job
|
54
|
+
attr_accessor :type, :args
|
55
|
+
|
56
|
+
def initialize(type, args = {})
|
57
|
+
self.type = type
|
58
|
+
self.args = args
|
59
|
+
end
|
60
|
+
|
61
|
+
def ==(other)
|
62
|
+
type == other.type && args == other.args
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
class AffiliateWindow
|
2
|
+
class ETL
|
3
|
+
class Transformer # rubocop:disable Metrics/ClassLength
|
4
|
+
attr_accessor :normaliser
|
5
|
+
|
6
|
+
def initialize(normaliser:)
|
7
|
+
self.normaliser = normaliser
|
8
|
+
end
|
9
|
+
|
10
|
+
def transform(record)
|
11
|
+
record = record.dup
|
12
|
+
record_type = record.fetch(:record_type)
|
13
|
+
|
14
|
+
transformed_records = []
|
15
|
+
|
16
|
+
case record_type
|
17
|
+
when :merchant
|
18
|
+
normalise_commision_ranges!(record, transformed_records)
|
19
|
+
normalise_sectors!(record, transformed_records)
|
20
|
+
when :transaction
|
21
|
+
normalise_transaction_parts!(record, transformed_records)
|
22
|
+
when :transaction_product
|
23
|
+
normalise_transaction_products!(record, transformed_records)
|
24
|
+
|
25
|
+
# transaction_product has no other top-level attributes
|
26
|
+
return transformed_records
|
27
|
+
end
|
28
|
+
|
29
|
+
attributes = infer_field_names(record)
|
30
|
+
transformed_records.push(attributes)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def normalise_commision_ranges!(record, transformed_records)
|
36
|
+
commision_ranges = normaliser.normalise!(
|
37
|
+
record,
|
38
|
+
field_name: :a_commission_ranges,
|
39
|
+
nested_name: :commission_range,
|
40
|
+
foreign_name: :merchant_id,
|
41
|
+
)
|
42
|
+
|
43
|
+
commision_ranges.each do |commision_range|
|
44
|
+
attributes = infer_field_names(commision_range)
|
45
|
+
transformed_records.push(attributes)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def normalise_sectors!(record, transformed_records)
|
50
|
+
sectors = normaliser.normalise!(
|
51
|
+
record,
|
52
|
+
field_name: :a_sectors,
|
53
|
+
nested_name: :merchant_sector,
|
54
|
+
foreign_name: :merchant_id,
|
55
|
+
)
|
56
|
+
|
57
|
+
sectors.each do |sector|
|
58
|
+
attributes = infer_field_names(sector)
|
59
|
+
transformed_records.push(attributes)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def normalise_transaction_parts!(record, transformed_records)
|
64
|
+
transaction_parts = normaliser.normalise!(
|
65
|
+
record,
|
66
|
+
field_name: :a_transaction_parts,
|
67
|
+
nested_name: :transaction_part,
|
68
|
+
foreign_name: :transaction_id,
|
69
|
+
)
|
70
|
+
|
71
|
+
transaction_parts.each do |transaction_part|
|
72
|
+
attributes = infer_field_names(transaction_part)
|
73
|
+
transformed_records.push(attributes)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def normalise_transaction_products!(record, transformed_records)
|
78
|
+
transaction_products = normaliser.normalise!(
|
79
|
+
record,
|
80
|
+
field_name: :a_products,
|
81
|
+
nested_name: :product,
|
82
|
+
foreign_name: :transaction_id,
|
83
|
+
id_name: :i_transaction_id,
|
84
|
+
record_type: :transaction_product,
|
85
|
+
)
|
86
|
+
|
87
|
+
transaction_products.each do |transaction_product|
|
88
|
+
attributes = infer_field_names(transaction_product)
|
89
|
+
transformed_records.push(attributes)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def infer_field_names(record, prefix = nil)
|
94
|
+
record.keys.each.with_object({}) do |field_name, hash|
|
95
|
+
value = record.fetch(field_name)
|
96
|
+
new_name = new_field_name(field_name)
|
97
|
+
|
98
|
+
case value
|
99
|
+
when Hash
|
100
|
+
sub_record = record.fetch(field_name)
|
101
|
+
sub_prefix = "#{prefix}#{new_name}_"
|
102
|
+
sub_attributes = infer_field_names(sub_record, sub_prefix)
|
103
|
+
|
104
|
+
hash.merge!(sub_attributes)
|
105
|
+
when Array
|
106
|
+
fail arrays_unsupported_error(field_name, value)
|
107
|
+
else
|
108
|
+
new_name = "#{prefix}#{new_name}".to_sym
|
109
|
+
attributes = record.fetch(field_name, nil)
|
110
|
+
|
111
|
+
hash.merge!(new_name => attributes)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def new_field_name(field_name)
|
117
|
+
if field_name.to_s[1] == "_"
|
118
|
+
field_name.to_s[2..-1]
|
119
|
+
else
|
120
|
+
field_name
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def arrays_unsupported_error(field_name, _array)
|
125
|
+
message = "Unable to transform '#{field_name}' because its value is an array.\n"
|
126
|
+
message += "To cope with this, normalise elements of the array into a separate table.\n"
|
127
|
+
message += "Then, add a foreign key from the normalised record to this one."
|
128
|
+
|
129
|
+
TypeError.new(message)
|
130
|
+
end
|
131
|
+
|
132
|
+
class TypeError < StandardError; end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "affiliate_window/etl"
|
metadata
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: affiliate_window_etl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Reevoo Developers
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-10-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: affiliate_window
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activerecord
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pg
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.19'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.19'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.10'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.10'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '11.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '11.3'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: timecop
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.8'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.8'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: bundler-audit
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.5'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0.5'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: reevoocop
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.0.8
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.0.8
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: simplecov
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0.11'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0.11'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: simplecov-console
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.3'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.3'
|
167
|
+
description: An ETL for retrieving records from the Affiliate Window APIand loading
|
168
|
+
them into a Postgres database.
|
169
|
+
email: developers@reevoo.com
|
170
|
+
executables: []
|
171
|
+
extensions: []
|
172
|
+
extra_rdoc_files: []
|
173
|
+
files:
|
174
|
+
- README.md
|
175
|
+
- lib/affiliate_window/etl.rb
|
176
|
+
- lib/affiliate_window/etl/base.rb
|
177
|
+
- lib/affiliate_window/etl/config.rb
|
178
|
+
- lib/affiliate_window/etl/database.rb
|
179
|
+
- lib/affiliate_window/etl/extracter.rb
|
180
|
+
- lib/affiliate_window/etl/loader.rb
|
181
|
+
- lib/affiliate_window/etl/normaliser.rb
|
182
|
+
- lib/affiliate_window/etl/scheduler.rb
|
183
|
+
- lib/affiliate_window/etl/tasks.rake
|
184
|
+
- lib/affiliate_window/etl/transformer.rb
|
185
|
+
- lib/affiliate_window/etl/version.rb
|
186
|
+
- lib/affiliate_window_etl.rb
|
187
|
+
homepage: https://github.com/reevoo/affiliate_window_etl
|
188
|
+
licenses:
|
189
|
+
- MIT
|
190
|
+
metadata: {}
|
191
|
+
post_install_message:
|
192
|
+
rdoc_options: []
|
193
|
+
require_paths:
|
194
|
+
- lib
|
195
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
196
|
+
requirements:
|
197
|
+
- - ">="
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
version: '0'
|
200
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
201
|
+
requirements:
|
202
|
+
- - ">="
|
203
|
+
- !ruby/object:Gem::Version
|
204
|
+
version: '0'
|
205
|
+
requirements: []
|
206
|
+
rubyforge_project:
|
207
|
+
rubygems_version: 2.6.7
|
208
|
+
signing_key:
|
209
|
+
specification_version: 4
|
210
|
+
summary: Affiliate Window ETL
|
211
|
+
test_files: []
|