adops_report_scrapper 0.2.9 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2062161736aa199788f24a787649cc654fce4f67
4
- data.tar.gz: dd424d62acb31fc097d8a4dd6951aa9b75e37d5b
3
+ metadata.gz: 10d81f7b1c8357521bbf4a3e9be25f6cb8e6e8d1
4
+ data.tar.gz: 2faf4bef4bccdc0125e666fbbf532fa3c6cf62e5
5
5
  SHA512:
6
- metadata.gz: f8e3e4d325b51ff318ab3a944d2c2fbcdc29bd7583a0e5287000417fec93377fc7bc1f5ebdf5edd07894337c059bc6307efac91ec5edc88a5c3189bd203d065f
7
- data.tar.gz: 9090366aabcb84f92dc3c86127059e5d29d70cac949a0a98c88b6dcca2759ad65dd29baa4042da5b555df047e13f159e329150bc05ac6e86ccb0acc4c2baba2d
6
+ metadata.gz: 87438a0eaf28f6a35942db7e43ec8d76f593f4d644cdf9c45913fe4f0dbd8bf1418033accf67d55c05b9f75f8bd4a29fa28fc3dfdaa3938b665f5b09e234d25e
7
+ data.tar.gz: a198b3cbe5db8eb61cf4ed1886c72d242f3a33ceb26b9cbf0487334b76a32ebd38887f2d742a2005046761c6a03f9e7dae753bde79dcd12b7e841eec45eff521
data/Gemfile CHANGED
@@ -17,3 +17,4 @@ gem 'google-api-client', '~> 0.8.2'
17
17
  gem 'cheddar'
18
18
  gem 'roo', '~> 2.4.0'
19
19
  gem 'selenium-webdriver'
20
+ gem 'mail'
data/Rakefile CHANGED
@@ -196,6 +196,11 @@ task :spotxchange do
196
196
  save_as_csv :spotxchange, :spotxchange
197
197
  end
198
198
 
199
+ desc 'Collect email data'
200
+ task :mediabong do
201
+ save_as_csv :mediabong, :email
202
+ end
203
+
199
204
  def date
200
205
  @date ||= ENV['date'].nil? ? Date.today - 1 : Date.today - ENV['date'].to_i
201
206
  end
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_dependency 'cheddar', '~> 1.0'
32
32
  spec.add_dependency 'roo', '~> 2.4.0'
33
33
  spec.add_dependency 'selenium-webdriver', '~> 3.3.0'
34
+ spec.add_dependency 'mail', '~> 2.6.4'
34
35
 
35
36
  spec.add_development_dependency "bundler", "~> 1.8"
36
37
  spec.add_development_dependency "rake", "~> 10.0"
@@ -0,0 +1,72 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'net/imap'
4
+ require 'mail'
5
+ require 'csv'
6
+
7
+ class AdopsReportScrapper::EmailClient < AdopsReportScrapper::BaseClient
8
+ def date_supported?(date = nil)
9
+ _date = date || @date
10
+ return true if _date >= Date.today - 30
11
+ false
12
+ end
13
+ private
14
+
15
+ def init_client
16
+ fail 'please specify email imap_server' unless @options['imap_server']
17
+ fail 'please specify email imap_port' unless @options['imap_port']
18
+ fail 'please specify email imap_ssl' unless @options['imap_ssl']
19
+ fail 'please specify email title' unless @options['title']
20
+ @imap_server = @options['imap_server']
21
+ @imap_port = @options['imap_port']
22
+ @imap_ssl = @options['imap_ssl']
23
+ @title = @options['title'] # supports data macro e.g. `XXX Report %Y-%m-%d` will match XXX Report `2017-04-26`
24
+ @date_column = @options['date_column'] # optional. supports data macro e.g. `0||%Y-%m-%d` will match rows that has `2017-04-26` for their first column
25
+ end
26
+
27
+ def before_quit_with_error
28
+ end
29
+
30
+ def scrap
31
+ @data = []
32
+ email_received_date = Net::IMAP.format_date(@date+1)
33
+ title = @date.strftime(@title)
34
+
35
+ imap = Net::IMAP.new(@imap_server, @imap_port, @imap_ssl)
36
+ imap.login(@login, @secret)
37
+ imap.select('INBOX')
38
+ report_email_ids = imap.search(['ON', email_received_date, 'SUBJECT', title])
39
+ if report_email_ids.count == 0
40
+ imap.logout
41
+ imap.disconnect
42
+ fail 'no email found with the given date and title'
43
+ elsif report_email_ids.count > 1
44
+ puts 'more than one email found with the given date and title, try to use the first one'
45
+ end
46
+ report_email_id = report_email_ids.first
47
+
48
+ body = imap.fetch(report_email_id, 'RFC822')[0].attr['RFC822']
49
+ mail = Mail.new(body)
50
+ if mail.attachments.blank?
51
+ imap.logout
52
+ imap.disconnect
53
+ fail 'no attachment found for the given report'
54
+ end
55
+
56
+ raw_data = mail.attachments.first.body.decoded
57
+
58
+ imap.logout
59
+ imap.disconnect
60
+
61
+ @data = CSV.parse(raw_data)
62
+
63
+ if @date_column
64
+ column_index, date_format_str = @date_column.split('||')
65
+ column_index = column_index.to_i
66
+ date_str = @date.strftime(date_format_str)
67
+ header = @data.shift
68
+ @data = @data.select { |row| row[column_index] == date_str }
69
+ @data.unshift header
70
+ end
71
+ end
72
+ end
@@ -14,6 +14,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
14
14
  def login
15
15
  @client.visit 'https://console.tremorhub.com/ssp'
16
16
  @client.driver.browser.manage.window.resize_to(1366,768)
17
+ byebug
17
18
  @client.fill_in 'username', :with => @login
18
19
  @client.fill_in 'password', :with => @secret
19
20
  @client.click_button 'Sign In'
@@ -77,6 +78,7 @@ class AdopsReportScrapper::TremorClient < AdopsReportScrapper::BaseClient
77
78
  end
78
79
 
79
80
  def extract_data_from_report
81
+ byebug
80
82
  page = Nokogiri::HTML @client.html
81
83
  rows = page.xpath '//table[@id="DataTables_Table_1"]/*/tr'
82
84
  @data = rows.map { |tr| tr.css('td,th').map { |td| td.text } }
@@ -1,3 +1,3 @@
1
1
  module AdopsReportScrapper
2
- VERSION = "0.2.9"
2
+ VERSION = "0.2.10"
3
3
  end
@@ -50,3 +50,4 @@ require 'adops_report_scrapper/appnexus_client'
50
50
  require 'adops_report_scrapper/sovrn_client'
51
51
  require 'adops_report_scrapper/gumgum_client'
52
52
  require 'adops_report_scrapper/spotxchange_client'
53
+ require 'adops_report_scrapper/email_client'
data/secret.sample.yml CHANGED
@@ -136,4 +136,13 @@ spotxchange:
136
136
  secret: ------
137
137
  options:
138
138
  client_id: ------
139
- client_secret: ------
139
+ client_secret: ------
140
+ mediabong:
141
+ login: ------
142
+ secret: ------
143
+ options:
144
+ imap_server: ------
145
+ imap_port: ------
146
+ imap_ssl: true/false
147
+ title: XXX Report
148
+ date_column: 1||%Y-%m-%d
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adops_report_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stayman Hou
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-04-05 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient
@@ -178,6 +178,20 @@ dependencies:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 3.3.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: mail
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 2.6.4
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 2.6.4
181
195
  - !ruby/object:Gem::Dependency
182
196
  name: bundler
183
197
  requirement: !ruby/object:Gem::Requirement
@@ -247,6 +261,7 @@ files:
247
261
  - lib/adops_report_scrapper/contentad_client.rb
248
262
  - lib/adops_report_scrapper/conversant_client.rb
249
263
  - lib/adops_report_scrapper/criteo_client.rb
264
+ - lib/adops_report_scrapper/email_client.rb
250
265
  - lib/adops_report_scrapper/facebookaudience_client.rb
251
266
  - lib/adops_report_scrapper/gcs_client.rb
252
267
  - lib/adops_report_scrapper/gumgum_client.rb