adops_report_scrapper 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +5 -0
- data/adops_report_scrapper.gemspec +1 -0
- data/lib/adops_report_scrapper/email_client.rb +72 -0
- data/lib/adops_report_scrapper/tremor_client.rb +2 -0
- data/lib/adops_report_scrapper/version.rb +1 -1
- data/lib/adops_report_scrapper.rb +1 -0
- data/secret.sample.yml +10 -1
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10d81f7b1c8357521bbf4a3e9be25f6cb8e6e8d1
|
4
|
+
data.tar.gz: 2faf4bef4bccdc0125e666fbbf532fa3c6cf62e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87438a0eaf28f6a35942db7e43ec8d76f593f4d644cdf9c45913fe4f0dbd8bf1418033accf67d55c05b9f75f8bd4a29fa28fc3dfdaa3938b665f5b09e234d25e
|
7
|
+
data.tar.gz: a198b3cbe5db8eb61cf4ed1886c72d242f3a33ceb26b9cbf0487334b76a32ebd38887f2d742a2005046761c6a03f9e7dae753bde79dcd12b7e841eec45eff521
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -196,6 +196,11 @@ task :spotxchange do
|
|
196
196
|
save_as_csv :spotxchange, :spotxchange
|
197
197
|
end
|
198
198
|
|
199
|
+
desc 'Collect email data'
|
200
|
+
task :mediabong do
|
201
|
+
save_as_csv :mediabong, :email
|
202
|
+
end
|
203
|
+
|
199
204
|
def date
|
200
205
|
@date ||= ENV['date'].nil? ? Date.today - 1 : Date.today - ENV['date'].to_i
|
201
206
|
end
|
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_dependency 'cheddar', '~> 1.0'
|
32
32
|
spec.add_dependency 'roo', '~> 2.4.0'
|
33
33
|
spec.add_dependency 'selenium-webdriver', '~> 3.3.0'
|
34
|
+
spec.add_dependency 'mail', '~> 2.6.4'
|
34
35
|
|
35
36
|
spec.add_development_dependency "bundler", "~> 1.8"
|
36
37
|
spec.add_development_dependency "rake", "~> 10.0"
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'date'
|
2
|
+
require_relative 'base_client'
|
3
|
+
require 'net/imap'
|
4
|
+
require 'mail'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
class AdopsReportScrapper::EmailClient < AdopsReportScrapper::BaseClient
|
8
|
+
def date_supported?(date = nil)
|
9
|
+
_date = date || @date
|
10
|
+
return true if _date >= Date.today - 30
|
11
|
+
false
|
12
|
+
end
|
13
|
+
private
|
14
|
+
|
15
|
+
def init_client
|
16
|
+
fail 'please specify email imap_server' unless @options['imap_server']
|
17
|
+
fail 'please specify email imap_port' unless @options['imap_port']
|
18
|
+
fail 'please specify email imap_ssl' unless @options['imap_ssl']
|
19
|
+
fail 'please specify email title' unless @options['title']
|
20
|
+
@imap_server = @options['imap_server']
|
21
|
+
@imap_port = @options['imap_port']
|
22
|
+
@imap_ssl = @options['imap_ssl']
|
23
|
+
@title = @options['title'] # supports data macro e.g. `XXX Report %Y-%m-%d` will match XXX Report `2017-04-26`
|
24
|
+
@date_column = @options['date_column'] # optional. supports data macro e.g. `0||%Y-%m-%d` will match rows that has `2017-04-26` for their first column
|
25
|
+
end
|
26
|
+
|
27
|
+
def before_quit_with_error
|
28
|
+
end
|
29
|
+
|
30
|
+
def scrap
|
31
|
+
@data = []
|
32
|
+
email_received_date = Net::IMAP.format_date(@date+1)
|
33
|
+
title = @date.strftime(@title)
|
34
|
+
|
35
|
+
imap = Net::IMAP.new(@imap_server, @imap_port, @imap_ssl)
|
36
|
+
imap.login(@login, @secret)
|
37
|
+
imap.select('INBOX')
|
38
|
+
report_email_ids = imap.search(['ON', email_received_date, 'SUBJECT', title])
|
39
|
+
if report_email_ids.count == 0
|
40
|
+
imap.logout
|
41
|
+
imap.disconnect
|
42
|
+
fail 'no email found with the given date and title'
|
43
|
+
elsif report_email_ids.count > 1
|
44
|
+
puts 'more than one email found with the given date and title, try to use the first one'
|
45
|
+
end
|
46
|
+
report_email_id = report_email_ids.first
|
47
|
+
|
48
|
+
body = imap.fetch(report_email_id, 'RFC822')[0].attr['RFC822']
|
49
|
+
mail = Mail.new(body)
|
50
|
+
if mail.attachments.blank?
|
51
|
+
imap.logout
|
52
|
+
imap.disconnect
|
53
|
+
fail 'no attachment found for the given report'
|
54
|
+
end
|
55
|
+
|
56
|
+
raw_data = mail.attachments.first.body.decoded
|
57
|
+
|
58
|
+
imap.logout
|
59
|
+
imap.disconnect
|
60
|
+
|
61
|
+
@data = CSV.parse(raw_data)
|
62
|
+
|
63
|
+
if @date_column
|
64
|
+
column_index, date_format_str = @date_column.split('||')
|
65
|
+
column_index = column_index.to_i
|
66
|
+
date_str = @date.strftime(date_format_str)
|
67
|
+
header = @data.shift
|
68
|
+
@data = @data.select { |row| row[column_index] == date_str }
|
69
|
+
@data.unshift header
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -14,6 +14,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
|
|
14
14
|
def login
|
15
15
|
@client.visit 'https://console.tremorhub.com/ssp'
|
16
16
|
@client.driver.browser.manage.window.resize_to(1366,768)
|
17
|
+
byebug
|
17
18
|
@client.fill_in 'username', :with => @login
|
18
19
|
@client.fill_in 'password', :with => @secret
|
19
20
|
@client.click_button 'Sign In'
|
@@ -77,6 +78,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
|
|
77
78
|
end
|
78
79
|
|
79
80
|
def extract_data_from_report
|
81
|
+
byebug
|
80
82
|
page = Nokogiri::HTML @client.html
|
81
83
|
rows = page.xpath '//table[@id="DataTables_Table_1"]/*/tr'
|
82
84
|
@data = rows.map { |tr| tr.css('td,th').map { |td| td.text } }
|
data/secret.sample.yml
CHANGED
@@ -136,4 +136,13 @@ spotxchange:
|
|
136
136
|
secret: ------
|
137
137
|
options:
|
138
138
|
client_id: ------
|
139
|
-
client_secret: ------
|
139
|
+
client_secret: ------
|
140
|
+
mediabong:
|
141
|
+
login: ------
|
142
|
+
secret: ------
|
143
|
+
options:
|
144
|
+
imap_server: ------
|
145
|
+
imap_port: ------
|
146
|
+
imap_ssl: true/false
|
147
|
+
title: XXX Report
|
148
|
+
date_column: 1||%Y-%m-%d
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adops_report_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stayman Hou
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 3.3.0
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: mail
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 2.6.4
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 2.6.4
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: bundler
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +261,7 @@ files:
|
|
247
261
|
- lib/adops_report_scrapper/contentad_client.rb
|
248
262
|
- lib/adops_report_scrapper/conversant_client.rb
|
249
263
|
- lib/adops_report_scrapper/criteo_client.rb
|
264
|
+
- lib/adops_report_scrapper/email_client.rb
|
250
265
|
- lib/adops_report_scrapper/facebookaudience_client.rb
|
251
266
|
- lib/adops_report_scrapper/gcs_client.rb
|
252
267
|
- lib/adops_report_scrapper/gumgum_client.rb
|