adops_report_scrapper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +18 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +51 -0
  9. data/Rakefile +152 -0
  10. data/adops_report_scrapper.gemspec +24 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/adops_report_scrapper.rb +38 -0
  14. data/lib/adops_report_scrapper/adaptv_client.rb +80 -0
  15. data/lib/adops_report_scrapper/adforge_client.rb +35 -0
  16. data/lib/adops_report_scrapper/adiply_client.rb +49 -0
  17. data/lib/adops_report_scrapper/adsense_client.rb +55 -0
  18. data/lib/adops_report_scrapper/adsupply_client.rb +27 -0
  19. data/lib/adops_report_scrapper/adx_client.rb +55 -0
  20. data/lib/adops_report_scrapper/base_client.rb +74 -0
  21. data/lib/adops_report_scrapper/brightroll_client.rb +47 -0
  22. data/lib/adops_report_scrapper/browsi_client.rb +44 -0
  23. data/lib/adops_report_scrapper/contentad_client.rb +91 -0
  24. data/lib/adops_report_scrapper/conversant_client.rb +66 -0
  25. data/lib/adops_report_scrapper/criteo_client.rb +25 -0
  26. data/lib/adops_report_scrapper/facebookaudience_client.rb +56 -0
  27. data/lib/adops_report_scrapper/gcs_client.rb +56 -0
  28. data/lib/adops_report_scrapper/liveintent_client.rb +90 -0
  29. data/lib/adops_report_scrapper/marfeel_client.rb +59 -0
  30. data/lib/adops_report_scrapper/nativo_client.rb +60 -0
  31. data/lib/adops_report_scrapper/netseer_client.rb +73 -0
  32. data/lib/adops_report_scrapper/openx_client.rb +161 -0
  33. data/lib/adops_report_scrapper/revcontent_client.rb +51 -0
  34. data/lib/adops_report_scrapper/sonobi_client.rb +91 -0
  35. data/lib/adops_report_scrapper/springserve_client.rb +52 -0
  36. data/lib/adops_report_scrapper/tremor_client.rb +52 -0
  37. data/lib/adops_report_scrapper/triplelift_client.rb +72 -0
  38. data/lib/adops_report_scrapper/version.rb +3 -0
  39. data/secret.sample.yml +77 -0
  40. data/tmp/.keep +0 -0
  41. metadata +117 -0
@@ -0,0 +1,35 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::AdforgeClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://495.as.adforgeinc.com/www/admin/index.php'
9
+ @client.fill_in 'username', :with => @login
10
+ @client.fill_in 'password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Reports"]'
14
+ rescue Exception => e
15
+ raise e, 'Adforge login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
26
+ sleep 5
27
+ end
28
+
29
+ def extract_data_from_report
30
+ rows = @client.find_all :xpath, '//td/table/*/tr'
31
+ rows = rows.to_a
32
+ rows.delete_at 1
33
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
34
+ end
35
+ end
@@ -0,0 +1,49 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::AdiplyClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://pub.adiply.com/login'
9
+ @client.fill_in '_username', :with => @login
10
+ @client.fill_in '_password', :with => @secret
11
+ @client.click_button 'Sign me in'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Go"]'
14
+ rescue Exception => e
15
+ raise e, 'Adiply login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ zones = @client.find_all(:css, '#AppBundle_filtersForm_zone > option')
21
+ zones = zones.to_a
22
+ zones.shift
23
+ zones = zones.map { |zone| zone.text }
24
+ @client.find(:xpath, '//a[contains(text(),"Performance")]').click
25
+ @client.find(:css, '.dr-presets').click
26
+ @client.find(:xpath, '//*[contains(text(),"Last 30 days")]').click
27
+ @data = []
28
+ zones.each do |zone|
29
+ extract_data zone
30
+ end
31
+ end
32
+
33
+ def extract_data(zone)
34
+ @client.find(:xpath, "//option[text()=\"#{zone}\"]").select_option
35
+ @client.find(:xpath, '//*[text()="Go"]').click
36
+ sleep 2
37
+ date_str = @date.strftime '%m/%d/%Y'
38
+ if @data.count == 0
39
+ header = @client.find_all(:xpath, '//table/thead/tr/th').map { |th| th.text }
40
+ header[-1] = 'Zone'
41
+ @data << header
42
+ end
43
+ tds = @client.find_all :xpath, "//td[../td[contains(text(),\"#{date_str}\")]]"
44
+ row = tds.map { |td| td.text }
45
+ row[-1] = zone
46
+ row[0] = date_str
47
+ @data << row
48
+ end
49
+ end
@@ -0,0 +1,55 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'google/api_client'
4
+ require 'google/api_client/service'
5
+ require 'google/api_client/client_secrets'
6
+ require 'google/api_client/auth/file_storage'
7
+ require 'google/api_client/auth/installed_app'
8
+ require 'csv'
9
+
10
+ # require option with network_id to be passed into constructor
11
+ class AdopsReportScrapper::AdsenseClient < AdopsReportScrapper::BaseClient
12
+ API_NAME = 'adsense'
13
+ API_VERSION = 'v1.4'
14
+ CREDENTIAL_STORE_FILE = "#{API_NAME}-oauth2.json"
15
+ API_SCOPE = 'https://www.googleapis.com/auth/adsense.readonly'
16
+
17
+ private
18
+
19
+ def init_client
20
+ fail 'please specify adsense account id' unless @options['account_id']
21
+ @account_id = @options['account_id']
22
+ authorization = nil
23
+
24
+ file_storage = Google::APIClient::FileStorage.new(CREDENTIAL_STORE_FILE)
25
+ if file_storage.authorization.nil?
26
+ flow = Google::APIClient::InstalledAppFlow.new(
27
+ :client_id => @login,
28
+ :client_secret => @secret,
29
+ :scope => [API_SCOPE]
30
+ )
31
+ authorization = flow.authorize(file_storage)
32
+ else
33
+ authorization = file_storage.authorization
34
+ end
35
+
36
+ @client = Google::APIClient::Service.new(API_NAME, API_VERSION,
37
+ {
38
+ :application_name => "Ruby #{API_NAME} ad report scrapper",
39
+ :application_version => '1.0.0',
40
+ :authorization => authorization
41
+ }
42
+ )
43
+ end
44
+
45
+ def scrap
46
+ result = @client.accounts.reports.generate(
47
+ :accountId => @account_id,
48
+ :startDate => 'today-1d',
49
+ :endDate => 'today-1d',
50
+ :metric => ['AD_REQUESTS', 'INDIVIDUAL_AD_IMPRESSIONS', 'CLICKS', 'EARNINGS'],
51
+ :dimension => ['DATE', 'AD_UNIT_CODE', 'AD_UNIT_NAME', 'COUNTRY_CODE', 'PLATFORM_TYPE_NAME'],
52
+ :alt => 'csv').execute
53
+ @data = CSV.parse(result.body)
54
+ end
55
+ end
@@ -0,0 +1,27 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'rest-client'
4
+
5
+ class AdopsReportScrapper::AdsupplyClient < AdopsReportScrapper::BaseClient
6
+ def init_client
7
+ end
8
+
9
+ def before_quit_with_error
10
+ end
11
+
12
+ private
13
+
14
+ def scrap
15
+ date_str = @date.strftime('%-m/%-d/%Y')
16
+ time_zone_id = 'Eastern Standard Time'
17
+
18
+ response = RestClient.post "https://ui.adsupply.com/PublicPortal/Publisher/#{@login}/Report/Export", SqlCommandId: '', ExportToExcel: 'False', IsOLAP: 'False', DateFilter: date_str, TimeZoneId: time_zone_id, Grouping: '1', 'DimPublisher.Value': "#{@login}~", 'DimPublisher.IsActive': 'True', 'DimSiteName.Value': '', 'DimSiteName.IsActive': 'True', 'DimCountry.Value': '', 'DimCountry.IsActive': 'True', 'DimMediaType.Value': '', 'DimMediaType.IsActive': 'True', ApiKey: @secret
19
+
20
+ data = JSON.parse response
21
+ header = data[0].keys
22
+ @data = [header]
23
+ @data += data.map do |datum|
24
+ header.map { |key| datum[key] }
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'google/api_client'
4
+ require 'google/api_client/service'
5
+ require 'google/api_client/client_secrets'
6
+ require 'google/api_client/auth/file_storage'
7
+ require 'google/api_client/auth/installed_app'
8
+ require 'csv'
9
+
10
+ # require option with network_id to be passed into constructor
11
+ class AdopsReportScrapper::AdxClient < AdopsReportScrapper::BaseClient
12
+ API_NAME = 'adexchangeseller'
13
+ API_VERSION = 'v2.0'
14
+ CREDENTIAL_STORE_FILE = "#{API_NAME}-oauth2.json"
15
+ API_SCOPE = 'https://www.googleapis.com/auth/adexchange.seller.readonly'
16
+
17
+ private
18
+
19
+ def init_client
20
+ fail 'please specify adx account id' unless @options['account_id']
21
+ @account_id = @options['account_id']
22
+ authorization = nil
23
+
24
+ file_storage = Google::APIClient::FileStorage.new(CREDENTIAL_STORE_FILE)
25
+ if file_storage.authorization.nil?
26
+ flow = Google::APIClient::InstalledAppFlow.new(
27
+ :client_id => @login,
28
+ :client_secret => @secret,
29
+ :scope => [API_SCOPE]
30
+ )
31
+ authorization = flow.authorize(file_storage)
32
+ else
33
+ authorization = file_storage.authorization
34
+ end
35
+
36
+ @client = Google::APIClient::Service.new(API_NAME, API_VERSION,
37
+ {
38
+ :application_name => "Ruby #{API_NAME} ad report scrapper",
39
+ :application_version => '1.0.0',
40
+ :authorization => authorization
41
+ }
42
+ )
43
+ end
44
+
45
+ def scrap
46
+ result = @client.accounts.reports.generate(
47
+ :accountId => @account_id,
48
+ :startDate => 'today-1d',
49
+ :endDate => 'today-1d',
50
+ :metric => ['AD_REQUESTS', 'AD_IMPRESSIONS', 'CLICKS', 'EARNINGS'],
51
+ :dimension => ['DATE', 'DFP_AD_UNITS', 'DFP_AD_UNIT_ID', 'COUNTRY_CODE', 'PLATFORM_TYPE_NAME'],
52
+ :alt => 'csv').execute
53
+ @data = CSV.parse(result.body)
54
+ end
55
+ end
@@ -0,0 +1,74 @@
1
+ require 'date'
2
+ require 'phantomjs'
3
+ require 'capybara'
4
+ require 'capybara/poltergeist'
5
+
6
+ class AdopsReportScrapper::BaseClient
7
+ # login: username, id, email, or api token
8
+ # secret: password or api secret
9
+ # options: {
10
+ # :date => (optional) default: yesterday
11
+ # }
12
+ def initialize(login, secret, options = nil)
13
+ @login = login
14
+ @secret = secret
15
+ @options = options || {}
16
+ @date = @options[:date] || Date.today.prev_day
17
+ end
18
+
19
+ # date: (optional)
20
+ # return data in array of array, first array is the headers, no total included
21
+ def get_data(date = nil)
22
+ @date = date if date
23
+ fail "specified date is not supported by this scrapper #{self.class.name}" unless date_supported?
24
+ init_client
25
+ login
26
+ begin
27
+ scrap
28
+ rescue Exception => e
29
+ begin
30
+ before_quit_with_error
31
+ logout
32
+ rescue Exception => _e
33
+ # do nothing
34
+ end
35
+ raise e
36
+ end
37
+ logout
38
+ return @data
39
+ end
40
+
41
+ def init_client
42
+ Capybara.register_driver :poltergeist do |app|
43
+ Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
44
+ end
45
+ Capybara.default_max_wait_time = 10
46
+ @client = Capybara::Session.new(:poltergeist)
47
+ @client.driver.browser.js_errors = false
48
+ @client.driver.resize(1920, 985)
49
+ end
50
+
51
+ def login
52
+ # do nothing by default
53
+ end
54
+
55
+ def scrap
56
+ # do nothing by default
57
+ end
58
+
59
+ # logout can be optional
60
+ def logout
61
+ # do nothing by default
62
+ end
63
+
64
+ def before_quit_with_error
65
+ @client.save_screenshot
66
+ end
67
+
68
+ # by default only support yesterday
69
+ def date_supported?(date = nil)
70
+ _date = date || @date
71
+ return true if _date == Date.today.prev_day
72
+ false
73
+ end
74
+ end
@@ -0,0 +1,47 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::BrightrollClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://login.brightroll.com/login'
9
+ @client.fill_in 'user_login', :with => @login
10
+ @client.fill_in 'user_password', :with => @secret
11
+ @client.click_button 'Sign In'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Tags"]'
14
+ rescue Exception => e
15
+ raise e, 'Brightroll login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.find(:xpath, '//*[text()="Tags"]').click
26
+ @client.find(:css, '.details-date-filter').click
27
+ # select date
28
+ @client.find(:xpath, '//*[text()="Yesterday"]').click
29
+
30
+ 30.times do |_i| # wait 5 min
31
+ begin
32
+ @client.find(:css, '.bubble-loader.bubble-loader-3')
33
+ rescue Exception => e
34
+ break
35
+ end
36
+ sleep 10
37
+ end
38
+ sleep 10
39
+ end
40
+
41
+ def extract_data_from_report
42
+ rows = @client.find_all :xpath, '//table[1]/*/tr'
43
+ rows = rows.to_a
44
+ rows.delete_at 1
45
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
46
+ end
47
+ end
@@ -0,0 +1,44 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'open-uri'
4
+
5
+ class AdopsReportScrapper::BrowsiClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def login
9
+ @client.visit 'https://reports.brow.si'
10
+ @client.fill_in 'Email', :with => @login
11
+ @client.fill_in 'Password', :with => @secret
12
+ @client.find(:xpath, '//*[text()="Login"]').click
13
+ begin
14
+ @client.find :css, '.ico-calendar'
15
+ rescue Exception => e
16
+ raise e, 'Browsi login error'
17
+ end
18
+ end
19
+
20
+ def scrap
21
+ request_report
22
+ extract_data_from_report
23
+ end
24
+
25
+ def request_report
26
+ date_range_obj = {
27
+ startDate: @date.to_time.utc.strftime('%FT%T.000Z'),
28
+ endDate: (@date.to_time+86400-1).utc.strftime('%FT%T.999Z')
29
+ }
30
+ @client.visit "https://reports.brow.si/client/app/index.html#/report/home?dateRange=#{URI::encode(date_range_obj.to_json)}"
31
+ sleep 10
32
+ end
33
+
34
+ def extract_data_from_report
35
+ rows = @client.find_all :xpath, '//li[@ng-class="{opened:site.isGraphOpen}"]'
36
+ @data = [['Site', 'Revenues', 'Page Views']]
37
+ @data += rows.map do |row|
38
+ site = row.find_css('.header-full').first.visible_text
39
+ rev = @client.find_all(:xpath, row.path+'//*[../span[text()="Revenues"]]').first.text
40
+ pv = @client.find_all(:xpath, row.path+'//*[../span[text()="Page Views"]]').first.text
41
+ [site, rev, pv]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,91 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ # please ensure that all live widgets have distinct name
5
+
6
+ class AdopsReportScrapper::ContentadClient < AdopsReportScrapper::BaseClient
7
+ def init_client
8
+ Capybara.register_driver :poltergeist do |app|
9
+ Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
10
+ end
11
+ Capybara.default_max_wait_time = 3
12
+ @client = Capybara::Session.new(:poltergeist)
13
+ @client.driver.browser.js_errors = false
14
+ @client.driver.resize(1920, 985)
15
+ end
16
+
17
+ private
18
+
19
+ def login
20
+ @client.visit 'https://www.content.ad/Login.aspx'
21
+ @client.fill_in 'Email', :with => @login
22
+ @client.fill_in 'Password', :with => @secret
23
+ @client.click_button 'Log In'
24
+ begin
25
+ @client.find :xpath, '//*[text()="Widget Report"]'
26
+ rescue Exception => e
27
+ raise e, 'Contentad login error'
28
+ end
29
+ end
30
+
31
+ def scrap
32
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
33
+ @client.click_link 'Apply'
34
+ wait_for_loading
35
+
36
+ @client.find(:xpath, '//span[text()="Widgets"]').click
37
+ wait_for_loading
38
+
39
+ rows = @client.find_all :xpath, '//table/tbody/tr'
40
+ widgets = rows.map do |row|
41
+ row = row.find_css('td').map { |td| td.visible_text }
42
+ { widget_name: row[0], domain_name: row[1] }
43
+ end
44
+
45
+ @client.visit 'https://app.content.ad/Publisher/DeviceGeoReport'
46
+ @client.find(:xpath, '//option[text()="Yesterday"]').select_option
47
+ @client.click_link 'Apply'
48
+ wait_for_loading
49
+
50
+ @data = []
51
+ widgets.each do |widget|
52
+ request_report widget
53
+ end
54
+ end
55
+
56
+ def request_report(widget)
57
+ widget_options = @client.find(:xpath, "//option[text()=\"#{widget[:widget_name]}\"]").select_option
58
+ @client.click_link 'Apply'
59
+ wait_for_loading
60
+ extract_data_from_report widget
61
+ end
62
+
63
+ def extract_data_from_report(widget)
64
+ rows = @client.find_all :xpath, '//table/*/tr'
65
+ rows = rows.to_a
66
+ header = rows.shift
67
+ if @data.count == 0
68
+ header = ['Date', 'Domain', 'Widget'] + header.find_css('td,th').map { |td| td.visible_text }
69
+ @data << header
70
+ end
71
+ rows.pop
72
+ @data += rows.map do |tr|
73
+ row = tr.find_css('td,th').map do |td|
74
+ td.visible_text
75
+ end
76
+ [@date.to_s, widget[:domain_name], widget[:widget_name]] + row
77
+ end
78
+ end
79
+
80
+ def wait_for_loading
81
+ 18.times do |_i| # wait 3 min
82
+ begin
83
+ @client.find(:css, '#loadingProgress')
84
+ rescue Exception => e
85
+ break
86
+ end
87
+ sleep 3
88
+ end
89
+ sleep 1
90
+ end
91
+ end