adops_report_scrapper 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +5 -0
- data/adops_report_scrapper.gemspec +1 -0
- data/lib/adops_report_scrapper/email_client.rb +72 -0
- data/lib/adops_report_scrapper/tremor_client.rb +2 -0
- data/lib/adops_report_scrapper/version.rb +1 -1
- data/lib/adops_report_scrapper.rb +1 -0
- data/secret.sample.yml +10 -1
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10d81f7b1c8357521bbf4a3e9be25f6cb8e6e8d1
|
4
|
+
data.tar.gz: 2faf4bef4bccdc0125e666fbbf532fa3c6cf62e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87438a0eaf28f6a35942db7e43ec8d76f593f4d644cdf9c45913fe4f0dbd8bf1418033accf67d55c05b9f75f8bd4a29fa28fc3dfdaa3938b665f5b09e234d25e
|
7
|
+
data.tar.gz: a198b3cbe5db8eb61cf4ed1886c72d242f3a33ceb26b9cbf0487334b76a32ebd38887f2d742a2005046761c6a03f9e7dae753bde79dcd12b7e841eec45eff521
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -196,6 +196,11 @@ task :spotxchange do
|
|
196
196
|
save_as_csv :spotxchange, :spotxchange
|
197
197
|
end
|
198
198
|
|
199
|
+
desc 'Collect email data'
|
200
|
+
task :mediabong do
|
201
|
+
save_as_csv :mediabong, :email
|
202
|
+
end
|
203
|
+
|
199
204
|
def date
|
200
205
|
@date ||= ENV['date'].nil? ? Date.today - 1 : Date.today - ENV['date'].to_i
|
201
206
|
end
|
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_dependency 'cheddar', '~> 1.0'
|
32
32
|
spec.add_dependency 'roo', '~> 2.4.0'
|
33
33
|
spec.add_dependency 'selenium-webdriver', '~> 3.3.0'
|
34
|
+
spec.add_dependency 'mail', '~> 2.6.4'
|
34
35
|
|
35
36
|
spec.add_development_dependency "bundler", "~> 1.8"
|
36
37
|
spec.add_development_dependency "rake", "~> 10.0"
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'date'
|
2
|
+
require_relative 'base_client'
|
3
|
+
require 'net/imap'
|
4
|
+
require 'mail'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
class AdopsReportScrapper::EmailClient < AdopsReportScrapper::BaseClient
|
8
|
+
def date_supported?(date = nil)
|
9
|
+
_date = date || @date
|
10
|
+
return true if _date >= Date.today - 30
|
11
|
+
false
|
12
|
+
end
|
13
|
+
private
|
14
|
+
|
15
|
+
def init_client
|
16
|
+
fail 'please specify email imap_server' unless @options['imap_server']
|
17
|
+
fail 'please specify email imap_port' unless @options['imap_port']
|
18
|
+
fail 'please specify email imap_ssl' unless @options['imap_ssl']
|
19
|
+
fail 'please specify email title' unless @options['title']
|
20
|
+
@imap_server = @options['imap_server']
|
21
|
+
@imap_port = @options['imap_port']
|
22
|
+
@imap_ssl = @options['imap_ssl']
|
23
|
+
@title = @options['title'] # supports data macro e.g. `XXX Report %Y-%m-%d` will match XXX Report `2017-04-26`
|
24
|
+
@date_column = @options['date_column'] # optional. supports data macro e.g. `0||%Y-%m-%d` will match rows that has `2017-04-26` for their first column
|
25
|
+
end
|
26
|
+
|
27
|
+
def before_quit_with_error
|
28
|
+
end
|
29
|
+
|
30
|
+
def scrap
|
31
|
+
@data = []
|
32
|
+
email_received_date = Net::IMAP.format_date(@date+1)
|
33
|
+
title = @date.strftime(@title)
|
34
|
+
|
35
|
+
imap = Net::IMAP.new(@imap_server, @imap_port, @imap_ssl)
|
36
|
+
imap.login(@login, @secret)
|
37
|
+
imap.select('INBOX')
|
38
|
+
report_email_ids = imap.search(['ON', email_received_date, 'SUBJECT', title])
|
39
|
+
if report_email_ids.count == 0
|
40
|
+
imap.logout
|
41
|
+
imap.disconnect
|
42
|
+
fail 'no email found with the given date and title'
|
43
|
+
elsif report_email_ids.count > 1
|
44
|
+
puts 'more than one email found with the given date and title, try to use the first one'
|
45
|
+
end
|
46
|
+
report_email_id = report_email_ids.first
|
47
|
+
|
48
|
+
body = imap.fetch(report_email_id, 'RFC822')[0].attr['RFC822']
|
49
|
+
mail = Mail.new(body)
|
50
|
+
if mail.attachments.blank?
|
51
|
+
imap.logout
|
52
|
+
imap.disconnect
|
53
|
+
fail 'no attachment found for the given report'
|
54
|
+
end
|
55
|
+
|
56
|
+
raw_data = mail.attachments.first.body.decoded
|
57
|
+
|
58
|
+
imap.logout
|
59
|
+
imap.disconnect
|
60
|
+
|
61
|
+
@data = CSV.parse(raw_data)
|
62
|
+
|
63
|
+
if @date_column
|
64
|
+
column_index, date_format_str = @date_column.split('||')
|
65
|
+
column_index = column_index.to_i
|
66
|
+
date_str = @date.strftime(date_format_str)
|
67
|
+
header = @data.shift
|
68
|
+
@data = @data.select { |row| row[column_index] == date_str }
|
69
|
+
@data.unshift header
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -14,6 +14,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
|
|
14
14
|
def login
|
15
15
|
@client.visit 'https://console.tremorhub.com/ssp'
|
16
16
|
@client.driver.browser.manage.window.resize_to(1366,768)
|
17
|
+
byebug
|
17
18
|
@client.fill_in 'username', :with => @login
|
18
19
|
@client.fill_in 'password', :with => @secret
|
19
20
|
@client.click_button 'Sign In'
|
@@ -77,6 +78,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
|
|
77
78
|
end
|
78
79
|
|
79
80
|
def extract_data_from_report
|
81
|
+
byebug
|
80
82
|
page = Nokogiri::HTML @client.html
|
81
83
|
rows = page.xpath '//table[@id="DataTables_Table_1"]/*/tr'
|
82
84
|
@data = rows.map { |tr| tr.css('td,th').map { |td| td.text } }
|
data/secret.sample.yml
CHANGED
@@ -136,4 +136,13 @@ spotxchange:
|
|
136
136
|
secret: ------
|
137
137
|
options:
|
138
138
|
client_id: ------
|
139
|
-
client_secret: ------
|
139
|
+
client_secret: ------
|
140
|
+
mediabong:
|
141
|
+
login: ------
|
142
|
+
secret: ------
|
143
|
+
options:
|
144
|
+
imap_server: ------
|
145
|
+
imap_port: ------
|
146
|
+
imap_ssl: true/false
|
147
|
+
title: XXX Report
|
148
|
+
date_column: 1||%Y-%m-%d
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adops_report_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stayman Hou
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 3.3.0
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: mail
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 2.6.4
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 2.6.4
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: bundler
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +261,7 @@ files:
|
|
247
261
|
- lib/adops_report_scrapper/contentad_client.rb
|
248
262
|
- lib/adops_report_scrapper/conversant_client.rb
|
249
263
|
- lib/adops_report_scrapper/criteo_client.rb
|
264
|
+
- lib/adops_report_scrapper/email_client.rb
|
250
265
|
- lib/adops_report_scrapper/facebookaudience_client.rb
|
251
266
|
- lib/adops_report_scrapper/gcs_client.rb
|
252
267
|
- lib/adops_report_scrapper/gumgum_client.rb
|