adops_report_scrapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +18 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +51 -0
  9. data/Rakefile +152 -0
  10. data/adops_report_scrapper.gemspec +24 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/adops_report_scrapper.rb +38 -0
  14. data/lib/adops_report_scrapper/adaptv_client.rb +80 -0
  15. data/lib/adops_report_scrapper/adforge_client.rb +35 -0
  16. data/lib/adops_report_scrapper/adiply_client.rb +49 -0
  17. data/lib/adops_report_scrapper/adsense_client.rb +55 -0
  18. data/lib/adops_report_scrapper/adsupply_client.rb +27 -0
  19. data/lib/adops_report_scrapper/adx_client.rb +55 -0
  20. data/lib/adops_report_scrapper/base_client.rb +74 -0
  21. data/lib/adops_report_scrapper/brightroll_client.rb +47 -0
  22. data/lib/adops_report_scrapper/browsi_client.rb +44 -0
  23. data/lib/adops_report_scrapper/contentad_client.rb +91 -0
  24. data/lib/adops_report_scrapper/conversant_client.rb +66 -0
  25. data/lib/adops_report_scrapper/criteo_client.rb +25 -0
  26. data/lib/adops_report_scrapper/facebookaudience_client.rb +56 -0
  27. data/lib/adops_report_scrapper/gcs_client.rb +56 -0
  28. data/lib/adops_report_scrapper/liveintent_client.rb +90 -0
  29. data/lib/adops_report_scrapper/marfeel_client.rb +59 -0
  30. data/lib/adops_report_scrapper/nativo_client.rb +60 -0
  31. data/lib/adops_report_scrapper/netseer_client.rb +73 -0
  32. data/lib/adops_report_scrapper/openx_client.rb +161 -0
  33. data/lib/adops_report_scrapper/revcontent_client.rb +51 -0
  34. data/lib/adops_report_scrapper/sonobi_client.rb +91 -0
  35. data/lib/adops_report_scrapper/springserve_client.rb +52 -0
  36. data/lib/adops_report_scrapper/tremor_client.rb +52 -0
  37. data/lib/adops_report_scrapper/triplelift_client.rb +72 -0
  38. data/lib/adops_report_scrapper/version.rb +3 -0
  39. data/secret.sample.yml +77 -0
  40. data/tmp/.keep +0 -0
  41. metadata +117 -0
@@ -0,0 +1,35 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::AdforgeClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://495.as.adforgeinc.com/www/admin/index.php'
9
+ @client.fill_in 'username', :with => @login
10
+ @client.fill_in 'password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Reports"]'
14
+ rescue Exception => e
15
+ raise e, 'Adforge login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
26
+ sleep 5
27
+ end
28
+
29
+ def extract_data_from_report
30
+ rows = @client.find_all :xpath, '//td/table/*/tr'
31
+ rows = rows.to_a
32
+ rows.delete_at 1
33
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
34
+ end
35
+ end
@@ -0,0 +1,49 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::AdiplyClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://pub.adiply.com/login'
9
+ @client.fill_in '_username', :with => @login
10
+ @client.fill_in '_password', :with => @secret
11
+ @client.click_button 'Sign me in'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Go"]'
14
+ rescue Exception => e
15
+ raise e, 'Adiply login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ zones = @client.find_all(:css, '#AppBundle_filtersForm_zone > option')
21
+ zones = zones.to_a
22
+ zones.shift
23
+ zones = zones.map { |zone| zone.text }
24
+ @client.find(:xpath, '//a[contains(text(),"Performance")]').click
25
+ @client.find(:css, '.dr-presets').click
26
+ @client.find(:xpath, '//*[contains(text(),"Last 30 days")]').click
27
+ @data = []
28
+ zones.each do |zone|
29
+ extract_data zone
30
+ end
31
+ end
32
+
33
+ def extract_data(zone)
34
+ @client.find(:xpath, "//option[text()=\"#{zone}\"]").select_option
35
+ @client.find(:xpath, '//*[text()="Go"]').click
36
+ sleep 2
37
+ date_str = @date.strftime '%m/%d/%Y'
38
+ if @data.count == 0
39
+ header = @client.find_all(:xpath, '//table/thead/tr/th').map { |th| th.text }
40
+ header[-1] = 'Zone'
41
+ @data << header
42
+ end
43
+ tds = @client.find_all :xpath, "//td[../td[contains(text(),\"#{date_str}\")]]"
44
+ row = tds.map { |td| td.text }
45
+ row[-1] = zone
46
+ row[0] = date_str
47
+ @data << row
48
+ end
49
+ end
@@ -0,0 +1,55 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'google/api_client'
4
+ require 'google/api_client/service'
5
+ require 'google/api_client/client_secrets'
6
+ require 'google/api_client/auth/file_storage'
7
+ require 'google/api_client/auth/installed_app'
8
+ require 'csv'
9
+
10
+ # require option with network_id to be passed into constructor
11
+ class AdopsReportScrapper::AdsenseClient < AdopsReportScrapper::BaseClient
12
+ API_NAME = 'adsense'
13
+ API_VERSION = 'v1.4'
14
+ CREDENTIAL_STORE_FILE = "#{API_NAME}-oauth2.json"
15
+ API_SCOPE = 'https://www.googleapis.com/auth/adsense.readonly'
16
+
17
+ private
18
+
19
+ def init_client
20
+ fail 'please specify adsense account id' unless @options['account_id']
21
+ @account_id = @options['account_id']
22
+ authorization = nil
23
+
24
+ file_storage = Google::APIClient::FileStorage.new(CREDENTIAL_STORE_FILE)
25
+ if file_storage.authorization.nil?
26
+ flow = Google::APIClient::InstalledAppFlow.new(
27
+ :client_id => @login,
28
+ :client_secret => @secret,
29
+ :scope => [API_SCOPE]
30
+ )
31
+ authorization = flow.authorize(file_storage)
32
+ else
33
+ authorization = file_storage.authorization
34
+ end
35
+
36
+ @client = Google::APIClient::Service.new(API_NAME, API_VERSION,
37
+ {
38
+ :application_name => "Ruby #{API_NAME} ad report scrapper",
39
+ :application_version => '1.0.0',
40
+ :authorization => authorization
41
+ }
42
+ )
43
+ end
44
+
45
+ def scrap
46
+ result = @client.accounts.reports.generate(
47
+ :accountId => @account_id,
48
+ :startDate => 'today-1d',
49
+ :endDate => 'today-1d',
50
+ :metric => ['AD_REQUESTS', 'INDIVIDUAL_AD_IMPRESSIONS', 'CLICKS', 'EARNINGS'],
51
+ :dimension => ['DATE', 'AD_UNIT_CODE', 'AD_UNIT_NAME', 'COUNTRY_CODE', 'PLATFORM_TYPE_NAME'],
52
+ :alt => 'csv').execute
53
+ @data = CSV.parse(result.body)
54
+ end
55
+ end
@@ -0,0 +1,27 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'rest-client'
4
+
5
+ class AdopsReportScrapper::AdsupplyClient < AdopsReportScrapper::BaseClient
6
+ def init_client
7
+ end
8
+
9
+ def before_quit_with_error
10
+ end
11
+
12
+ private
13
+
14
+ def scrap
15
+ date_str = @date.strftime('%-m/%-d/%Y')
16
+ time_zone_id = 'Eastern Standard Time'
17
+
18
+ response = RestClient.post "https://ui.adsupply.com/PublicPortal/Publisher/#{@login}/Report/Export", SqlCommandId: '', ExportToExcel: 'False', IsOLAP: 'False', DateFilter: date_str, TimeZoneId: time_zone_id, Grouping: '1', 'DimPublisher.Value': "#{@login}~", 'DimPublisher.IsActive': 'True', 'DimSiteName.Value': '', 'DimSiteName.IsActive': 'True', 'DimCountry.Value': '', 'DimCountry.IsActive': 'True', 'DimMediaType.Value': '', 'DimMediaType.IsActive': 'True', ApiKey: @secret
19
+
20
+ data = JSON.parse response
21
+ header = data[0].keys
22
+ @data = [header]
23
+ @data += data.map do |datum|
24
+ header.map { |key| datum[key] }
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'google/api_client'
4
+ require 'google/api_client/service'
5
+ require 'google/api_client/client_secrets'
6
+ require 'google/api_client/auth/file_storage'
7
+ require 'google/api_client/auth/installed_app'
8
+ require 'csv'
9
+
10
+ # require option with network_id to be passed into constructor
11
+ class AdopsReportScrapper::AdxClient < AdopsReportScrapper::BaseClient
12
+ API_NAME = 'adexchangeseller'
13
+ API_VERSION = 'v2.0'
14
+ CREDENTIAL_STORE_FILE = "#{API_NAME}-oauth2.json"
15
+ API_SCOPE = 'https://www.googleapis.com/auth/adexchange.seller.readonly'
16
+
17
+ private
18
+
19
+ def init_client
20
+ fail 'please specify adx account id' unless @options['account_id']
21
+ @account_id = @options['account_id']
22
+ authorization = nil
23
+
24
+ file_storage = Google::APIClient::FileStorage.new(CREDENTIAL_STORE_FILE)
25
+ if file_storage.authorization.nil?
26
+ flow = Google::APIClient::InstalledAppFlow.new(
27
+ :client_id => @login,
28
+ :client_secret => @secret,
29
+ :scope => [API_SCOPE]
30
+ )
31
+ authorization = flow.authorize(file_storage)
32
+ else
33
+ authorization = file_storage.authorization
34
+ end
35
+
36
+ @client = Google::APIClient::Service.new(API_NAME, API_VERSION,
37
+ {
38
+ :application_name => "Ruby #{API_NAME} ad report scrapper",
39
+ :application_version => '1.0.0',
40
+ :authorization => authorization
41
+ }
42
+ )
43
+ end
44
+
45
+ def scrap
46
+ result = @client.accounts.reports.generate(
47
+ :accountId => @account_id,
48
+ :startDate => 'today-1d',
49
+ :endDate => 'today-1d',
50
+ :metric => ['AD_REQUESTS', 'AD_IMPRESSIONS', 'CLICKS', 'EARNINGS'],
51
+ :dimension => ['DATE', 'DFP_AD_UNITS', 'DFP_AD_UNIT_ID', 'COUNTRY_CODE', 'PLATFORM_TYPE_NAME'],
52
+ :alt => 'csv').execute
53
+ @data = CSV.parse(result.body)
54
+ end
55
+ end
@@ -0,0 +1,74 @@
1
+ require 'date'
2
+ require 'phantomjs'
3
+ require 'capybara'
4
+ require 'capybara/poltergeist'
5
+
6
+ class AdopsReportScrapper::BaseClient
7
+ # login: username, id, email, or api token
8
+ # secret: password or api secret
9
+ # options: {
10
+ # :date => (optional) default: yesterday
11
+ # }
12
+ def initialize(login, secret, options = nil)
13
+ @login = login
14
+ @secret = secret
15
+ @options = options || {}
16
+ @date = @options[:date] || Date.today.prev_day
17
+ end
18
+
19
+ # date: (optional)
20
+ # return data in array of array, first array is the headers, no total included
21
+ def get_data(date = nil)
22
+ @date = date if date
23
+ fail "specified date is not supported by this scrapper #{self.class.name}" unless date_supported?
24
+ init_client
25
+ login
26
+ begin
27
+ scrap
28
+ rescue Exception => e
29
+ begin
30
+ before_quit_with_error
31
+ logout
32
+ rescue Exception => _e
33
+ # do nothing
34
+ end
35
+ raise e
36
+ end
37
+ logout
38
+ return @data
39
+ end
40
+
41
+ def init_client
42
+ Capybara.register_driver :poltergeist do |app|
43
+ Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
44
+ end
45
+ Capybara.default_max_wait_time = 10
46
+ @client = Capybara::Session.new(:poltergeist)
47
+ @client.driver.browser.js_errors = false
48
+ @client.driver.resize(1920, 985)
49
+ end
50
+
51
+ def login
52
+ # do nothing by default
53
+ end
54
+
55
+ def scrap
56
+ # do nothing by default
57
+ end
58
+
59
+ # logout can be optional
60
+ def logout
61
+ # do nothing by default
62
+ end
63
+
64
+ def before_quit_with_error
65
+ @client.save_screenshot
66
+ end
67
+
68
+ # by default only support yesterday
69
+ def date_supported?(date = nil)
70
+ _date = date || @date
71
+ return true if _date == Date.today.prev_day
72
+ false
73
+ end
74
+ end
@@ -0,0 +1,47 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::BrightrollClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://login.brightroll.com/login'
9
+ @client.fill_in 'user_login', :with => @login
10
+ @client.fill_in 'user_password', :with => @secret
11
+ @client.click_button 'Sign In'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Tags"]'
14
+ rescue Exception => e
15
+ raise e, 'Brightroll login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.find(:xpath, '//*[text()="Tags"]').click
26
+ @client.find(:css, '.details-date-filter').click
27
+ # select date
28
+ @client.find(:xpath, '//*[text()="Yesterday"]').click
29
+
30
+ 30.times do |_i| # wait 5 min
31
+ begin
32
+ @client.find(:css, '.bubble-loader.bubble-loader-3')
33
+ rescue Exception => e
34
+ break
35
+ end
36
+ sleep 10
37
+ end
38
+ sleep 10
39
+ end
40
+
41
+ def extract_data_from_report
42
+ rows = @client.find_all :xpath, '//table[1]/*/tr'
43
+ rows = rows.to_a
44
+ rows.delete_at 1
45
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
46
+ end
47
+ end
@@ -0,0 +1,44 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'open-uri'
4
+
5
+ class AdopsReportScrapper::BrowsiClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def login
9
+ @client.visit 'https://reports.brow.si'
10
+ @client.fill_in 'Email', :with => @login
11
+ @client.fill_in 'Password', :with => @secret
12
+ @client.find(:xpath, '//*[text()="Login"]').click
13
+ begin
14
+ @client.find :css, '.ico-calendar'
15
+ rescue Exception => e
16
+ raise e, 'Browsi login error'
17
+ end
18
+ end
19
+
20
+ def scrap
21
+ request_report
22
+ extract_data_from_report
23
+ end
24
+
25
+ def request_report
26
+ date_range_obj = {
27
+ startDate: @date.to_time.utc.strftime('%FT%T.000Z'),
28
+ endDate: (@date.to_time+86400-1).utc.strftime('%FT%T.999Z')
29
+ }
30
+ @client.visit "https://reports.brow.si/client/app/index.html#/report/home?dateRange=#{URI::encode(date_range_obj.to_json)}"
31
+ sleep 10
32
+ end
33
+
34
+ def extract_data_from_report
35
+ rows = @client.find_all :xpath, '//li[@ng-class="{opened:site.isGraphOpen}"]'
36
+ @data = [['Site', 'Revenues', 'Page Views']]
37
+ @data += rows.map do |row|
38
+ site = row.find_css('.header-full').first.visible_text
39
+ rev = @client.find_all(:xpath, row.path+'//*[../span[text()="Revenues"]]').first.text
40
+ pv = @client.find_all(:xpath, row.path+'//*[../span[text()="Page Views"]]').first.text
41
+ [site, rev, pv]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,91 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ # please ensure that all live widgets have distinct name
5
+
6
+ class AdopsReportScrapper::ContentadClient < AdopsReportScrapper::BaseClient
7
+ def init_client
8
+ Capybara.register_driver :poltergeist do |app|
9
+ Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
10
+ end
11
+ Capybara.default_max_wait_time = 3
12
+ @client = Capybara::Session.new(:poltergeist)
13
+ @client.driver.browser.js_errors = false
14
+ @client.driver.resize(1920, 985)
15
+ end
16
+
17
+ private
18
+
19
+ def login
20
+ @client.visit 'https://www.content.ad/Login.aspx'
21
+ @client.fill_in 'Email', :with => @login
22
+ @client.fill_in 'Password', :with => @secret
23
+ @client.click_button 'Log In'
24
+ begin
25
+ @client.find :xpath, '//*[text()="Widget Report"]'
26
+ rescue Exception => e
27
+ raise e, 'Contentad login error'
28
+ end
29
+ end
30
+
31
+ def scrap
32
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
33
+ @client.click_link 'Apply'
34
+ wait_for_loading
35
+
36
+ @client.find(:xpath, '//span[text()="Widgets"]').click
37
+ wait_for_loading
38
+
39
+ rows = @client.find_all :xpath, '//table/tbody/tr'
40
+ widgets = rows.map do |row|
41
+ row = row.find_css('td').map { |td| td.visible_text }
42
+ { widget_name: row[0], domain_name: row[1] }
43
+ end
44
+
45
+ @client.visit 'https://app.content.ad/Publisher/DeviceGeoReport'
46
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
47
+ @client.click_link 'Apply'
48
+ wait_for_loading
49
+
50
+ @data = []
51
+ widgets.each do |widget|
52
+ request_report widget
53
+ end
54
+ end
55
+
56
+ def request_report(widget)
57
+ widget_options = @client.find(:xpath, "//option[text()=\"#{widget[:widget_name]}\"]").select_option
58
+ @client.click_link 'Apply'
59
+ wait_for_loading
60
+ extract_data_from_report widget
61
+ end
62
+
63
+ def extract_data_from_report(widget)
64
+ rows = @client.find_all :xpath, '//table/*/tr'
65
+ rows = rows.to_a
66
+ header = rows.shift
67
+ if @data.count == 0
68
+ header = ['Date', 'Domain', 'Widget'] + header.find_css('td,th').map { |td| td.visible_text }
69
+ @data << header
70
+ end
71
+ rows.pop
72
+ @data += rows.map do |tr|
73
+ row = tr.find_css('td,th').map do |td|
74
+ td.visible_text
75
+ end
76
+ [@date.to_s, widget[:domain_name], widget[:widget_name]] + row
77
+ end
78
+ end
79
+
80
+ def wait_for_loading
81
+ 18.times do |_i| # wait 3 min
82
+ begin
83
+ @client.find(:css, '#loadingProgress')
84
+ rescue Exception => e
85
+ break
86
+ end
87
+ sleep 3
88
+ end
89
+ sleep 1
90
+ end
91
+ end