adops_report_scrapper 0.2.9 → 0.2.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2062161736aa199788f24a787649cc654fce4f67
4
- data.tar.gz: dd424d62acb31fc097d8a4dd6951aa9b75e37d5b
3
+ metadata.gz: 10d81f7b1c8357521bbf4a3e9be25f6cb8e6e8d1
4
+ data.tar.gz: 2faf4bef4bccdc0125e666fbbf532fa3c6cf62e5
5
5
  SHA512:
6
- metadata.gz: f8e3e4d325b51ff318ab3a944d2c2fbcdc29bd7583a0e5287000417fec93377fc7bc1f5ebdf5edd07894337c059bc6307efac91ec5edc88a5c3189bd203d065f
7
- data.tar.gz: 9090366aabcb84f92dc3c86127059e5d29d70cac949a0a98c88b6dcca2759ad65dd29baa4042da5b555df047e13f159e329150bc05ac6e86ccb0acc4c2baba2d
6
+ metadata.gz: 87438a0eaf28f6a35942db7e43ec8d76f593f4d644cdf9c45913fe4f0dbd8bf1418033accf67d55c05b9f75f8bd4a29fa28fc3dfdaa3938b665f5b09e234d25e
7
+ data.tar.gz: a198b3cbe5db8eb61cf4ed1886c72d242f3a33ceb26b9cbf0487334b76a32ebd38887f2d742a2005046761c6a03f9e7dae753bde79dcd12b7e841eec45eff521
data/Gemfile CHANGED
@@ -17,3 +17,4 @@ gem 'google-api-client', '~> 0.8.2'
17
17
  gem 'cheddar'
18
18
  gem 'roo', '~> 2.4.0'
19
19
  gem 'selenium-webdriver'
20
+ gem 'mail'
data/Rakefile CHANGED
@@ -196,6 +196,11 @@ task :spotxchange do
196
196
  save_as_csv :spotxchange, :spotxchange
197
197
  end
198
198
 
199
+ desc 'Collect email data'
200
+ task :mediabong do
201
+ save_as_csv :mediabong, :email
202
+ end
203
+
199
204
  def date
200
205
  @date ||= ENV['date'].nil? ? Date.today - 1 : Date.today - ENV['date'].to_i
201
206
  end
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_dependency 'cheddar', '~> 1.0'
32
32
  spec.add_dependency 'roo', '~> 2.4.0'
33
33
  spec.add_dependency 'selenium-webdriver', '~> 3.3.0'
34
+ spec.add_dependency 'mail', '~> 2.6.4'
34
35
 
35
36
  spec.add_development_dependency "bundler", "~> 1.8"
36
37
  spec.add_development_dependency "rake", "~> 10.0"
@@ -0,0 +1,72 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'net/imap'
4
+ require 'mail'
5
+ require 'csv'
6
+
7
+ class AdopsReportScrapper::EmailClient < AdopsReportScrapper::BaseClient
8
+ def date_supported?(date = nil)
9
+ _date = date || @date
10
+ return true if _date >= Date.today - 30
11
+ false
12
+ end
13
+ private
14
+
15
+ def init_client
16
+ fail 'please specify email imap_server' unless @options['imap_server']
17
+ fail 'please specify email imap_port' unless @options['imap_port']
18
+ fail 'please specify email imap_ssl' unless @options['imap_ssl']
19
+ fail 'please specify email title' unless @options['title']
20
+ @imap_server = @options['imap_server']
21
+ @imap_port = @options['imap_port']
22
+ @imap_ssl = @options['imap_ssl']
23
+ @title = @options['title'] # supports data macro e.g. `XXX Report %Y-%m-%d` will match XXX Report `2017-04-26`
24
+ @date_column = @options['date_column'] # optional. supports data macro e.g. `0||%Y-%m-%d` will match rows that has `2017-04-26` for their first column
25
+ end
26
+
27
+ def before_quit_with_error
28
+ end
29
+
30
+ def scrap
31
+ @data = []
32
+ email_received_date = Net::IMAP.format_date(@date+1)
33
+ title = @date.strftime(@title)
34
+
35
+ imap = Net::IMAP.new(@imap_server, @imap_port, @imap_ssl)
36
+ imap.login(@login, @secret)
37
+ imap.select('INBOX')
38
+ report_email_ids = imap.search(['ON', email_received_date, 'SUBJECT', title])
39
+ if report_email_ids.count == 0
40
+ imap.logout
41
+ imap.disconnect
42
+ fail 'no email found with the given date and title'
43
+ elsif report_email_ids.count > 1
44
+ puts 'more than one email found with the given date and title, try to use the first one'
45
+ end
46
+ report_email_id = report_email_ids.first
47
+
48
+ body = imap.fetch(report_email_id, 'RFC822')[0].attr['RFC822']
49
+ mail = Mail.new(body)
50
+ if mail.attachments.blank?
51
+ imap.logout
52
+ imap.disconnect
53
+ fail 'no attachment found for the given report'
54
+ end
55
+
56
+ raw_data = mail.attachments.first.body.decoded
57
+
58
+ imap.logout
59
+ imap.disconnect
60
+
61
+ @data = CSV.parse(raw_data)
62
+
63
+ if @date_column
64
+ column_index, date_format_str = @date_column.split('||')
65
+ column_index = column_index.to_i
66
+ date_str = @date.strftime(date_format_str)
67
+ header = @data.shift
68
+ @data = @data.select { |row| row[column_index] == date_str }
69
+ @data.unshift header
70
+ end
71
+ end
72
+ end
@@ -14,6 +14,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
14
14
  def login
15
15
  @client.visit 'https://console.tremorhub.com/ssp'
16
16
  @client.driver.browser.manage.window.resize_to(1366,768)
17
+ byebug
17
18
  @client.fill_in 'username', :with => @login
18
19
  @client.fill_in 'password', :with => @secret
19
20
  @client.click_button 'Sign In'
@@ -77,6 +78,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
77
78
  end
78
79
 
79
80
  def extract_data_from_report
81
+ byebug
80
82
  page = Nokogiri::HTML @client.html
81
83
  rows = page.xpath '//table[@id="DataTables_Table_1"]/*/tr'
82
84
  @data = rows.map { |tr| tr.css('td,th').map { |td| td.text } }
@@ -1,3 +1,3 @@
1
1
  module AdopsReportScrapper
2
- VERSION = "0.2.9"
2
+ VERSION = "0.2.10"
3
3
  end
@@ -50,3 +50,4 @@ require 'adops_report_scrapper/appnexus_client'
50
50
  require 'adops_report_scrapper/sovrn_client'
51
51
  require 'adops_report_scrapper/gumgum_client'
52
52
  require 'adops_report_scrapper/spotxchange_client'
53
+ require 'adops_report_scrapper/email_client'
data/secret.sample.yml CHANGED
@@ -136,4 +136,13 @@ spotxchange:
136
136
  secret: ------
137
137
  options:
138
138
  client_id: ------
139
- client_secret: ------
139
+ client_secret: ------
140
+ mediabong:
141
+ login: ------
142
+ secret: ------
143
+ options:
144
+ imap_server: ------
145
+ imap_port: ------
146
+ imap_ssl: true/false
147
+ title: XXX Report
148
+ date_column: 1||%Y-%m-%d
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adops_report_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stayman Hou
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-04-05 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient
@@ -178,6 +178,20 @@ dependencies:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 3.3.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: mail
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 2.6.4
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 2.6.4
181
195
  - !ruby/object:Gem::Dependency
182
196
  name: bundler
183
197
  requirement: !ruby/object:Gem::Requirement
@@ -247,6 +261,7 @@ files:
247
261
  - lib/adops_report_scrapper/contentad_client.rb
248
262
  - lib/adops_report_scrapper/conversant_client.rb
249
263
  - lib/adops_report_scrapper/criteo_client.rb
264
+ - lib/adops_report_scrapper/email_client.rb
250
265
  - lib/adops_report_scrapper/facebookaudience_client.rb
251
266
  - lib/adops_report_scrapper/gcs_client.rb
252
267
  - lib/adops_report_scrapper/gumgum_client.rb