adops_report_scrapper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +18 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +51 -0
  9. data/Rakefile +152 -0
  10. data/adops_report_scrapper.gemspec +24 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/adops_report_scrapper.rb +38 -0
  14. data/lib/adops_report_scrapper/adaptv_client.rb +80 -0
  15. data/lib/adops_report_scrapper/adforge_client.rb +35 -0
  16. data/lib/adops_report_scrapper/adiply_client.rb +49 -0
  17. data/lib/adops_report_scrapper/adsense_client.rb +55 -0
  18. data/lib/adops_report_scrapper/adsupply_client.rb +27 -0
  19. data/lib/adops_report_scrapper/adx_client.rb +55 -0
  20. data/lib/adops_report_scrapper/base_client.rb +74 -0
  21. data/lib/adops_report_scrapper/brightroll_client.rb +47 -0
  22. data/lib/adops_report_scrapper/browsi_client.rb +44 -0
  23. data/lib/adops_report_scrapper/contentad_client.rb +91 -0
  24. data/lib/adops_report_scrapper/conversant_client.rb +66 -0
  25. data/lib/adops_report_scrapper/criteo_client.rb +25 -0
  26. data/lib/adops_report_scrapper/facebookaudience_client.rb +56 -0
  27. data/lib/adops_report_scrapper/gcs_client.rb +56 -0
  28. data/lib/adops_report_scrapper/liveintent_client.rb +90 -0
  29. data/lib/adops_report_scrapper/marfeel_client.rb +59 -0
  30. data/lib/adops_report_scrapper/nativo_client.rb +60 -0
  31. data/lib/adops_report_scrapper/netseer_client.rb +73 -0
  32. data/lib/adops_report_scrapper/openx_client.rb +161 -0
  33. data/lib/adops_report_scrapper/revcontent_client.rb +51 -0
  34. data/lib/adops_report_scrapper/sonobi_client.rb +91 -0
  35. data/lib/adops_report_scrapper/springserve_client.rb +52 -0
  36. data/lib/adops_report_scrapper/tremor_client.rb +52 -0
  37. data/lib/adops_report_scrapper/triplelift_client.rb +72 -0
  38. data/lib/adops_report_scrapper/version.rb +3 -0
  39. data/secret.sample.yml +77 -0
  40. data/tmp/.keep +0 -0
  41. metadata +117 -0
@@ -0,0 +1,66 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'nokogiri'
4
+
5
+ class AdopsReportScrapper::ConversantClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def login
9
+ @client.visit 'https://admin.valueclickmedia.com/corp/login'
10
+ @client.fill_in 'user_name', :with => @login
11
+ @client.fill_in 'password', :with => @secret
12
+ @client.click_button 'Submit'
13
+ begin
14
+ @client.find :xpath, '//*[contains(text(),"Reports")]'
15
+ rescue Exception => e
16
+ raise e, 'Conversant login error'
17
+ end
18
+ end
19
+
20
+ def scrap
21
+ @client.click_link 'Earnings'
22
+
23
+ sites = @client.find_all(:css, '#search-site_id > option')
24
+ n_sites = []
25
+ sites.each do |site|
26
+ site_id = site[:value].to_i
27
+ next if site_id <= 0
28
+ site_name = site.text
29
+ n_sites << { site_id: site_id, site_name: site_name }
30
+ end
31
+
32
+ @data = [['Site', 'Device', 'Country', 'Impressions', 'Clicks', 'Earnings']]
33
+ n_sites.each do |site|
34
+ extract_data site
35
+ end
36
+ end
37
+
38
+ def extract_data(site)
39
+ d_us_datum = get_line_data(site, :desktop, :us)
40
+ m_us_datum = get_line_data(site, :mobile, :us)
41
+ d_all_datum = get_line_data(site, :desktop, :all)
42
+ m_all_datum = get_line_data(site, :mobile, :all)
43
+ d_intl_datum = d_all_datum.zip(d_us_datum).map { |x, y| x - y }
44
+ m_intl_datum = m_all_datum.zip(m_us_datum).map { |x, y| x - y }
45
+
46
+ d_us_datum = [site[:site_name], 'Desktop', 'US'] + d_us_datum
47
+ m_us_datum = [site[:site_name], 'Mobile', 'US'] + m_us_datum
48
+ d_intl_datum = [site[:site_name], 'Desktop', 'Intl'] + d_intl_datum
49
+ m_intl_datum = [site[:site_name], 'Mobile', 'Intl'] + m_intl_datum
50
+
51
+ [d_us_datum, m_us_datum, d_intl_datum, m_intl_datum].each do |datum|
52
+ next if datum[2..-1] == [0,0,0]
53
+ @data << datum
54
+ end
55
+ end
56
+
57
+ def get_line_data(site, device, country)
58
+ date_str = @date.strftime '%Y-%m-%d'
59
+ supply_type_id_map = { desktop: 1, mobile: 2 }
60
+ country_id_map = { all: 0, us: 254 }
61
+ @client.visit "https://pub.valueclickmedia.com/reports/earnings/detailed_media_grid?start_date=#{date_str}&end_date=#{date_str}&site_id=#{site[:site_id]}&media_type_id=&supply_type_id=#{supply_type_id_map[device]}&country_id=#{country_id_map[country]}"
62
+ doc = Nokogiri::XML(@client.body)
63
+ cells = doc.css('cell')
64
+ return [cells[1].content.to_i, cells[2].content.to_i, cells[-1].content.to_f] # [imp, click, earning]
65
+ end
66
+ end
@@ -0,0 +1,25 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'httpclient'
4
+
5
+ class AdopsReportScrapper::CriteoClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def init_client
9
+ end
10
+
11
+ def before_quit_with_error
12
+ end
13
+
14
+ def scrap
15
+ @data = []
16
+ date_str = @date.strftime('%Y-%m-%d')
17
+
18
+ response = HTTPClient.get "https://publishers.criteo.com/api/2.0/stats.json", apitoken: @secret, begindate: date_str, enddate: date_str
19
+
20
+ data = JSON.parse response.body
21
+ header = data[0].keys
22
+ @data = [header]
23
+ @data += data.map { |datum| header.map { |key| datum[key].is_a?(Hash) ? datum[key]['value'] : datum[key] } }
24
+ end
25
+ end
@@ -0,0 +1,56 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::FacebookaudienceClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def init_client
8
+ fail 'please specify facebook app id' unless @options['app_id']
9
+ @app_id = @options['app_id']
10
+ super
11
+ end
12
+
13
+ def login
14
+ @client.visit "https://developers.facebook.com/apps/#{@app_id}/audience-network/placement"
15
+ @client.fill_in 'email', :with => @login
16
+ @client.fill_in 'pass', :with => @secret
17
+ @client.click_button 'Log In'
18
+ begin
19
+ @client.find :xpath, '//*[text()="Dashboard"]'
20
+ rescue Exception => e
21
+ raise e, 'Facebookaudience login error'
22
+ end
23
+ end
24
+
25
+ def scrap
26
+ @client.find(:xpath, '//*[text()="All Ad Placements"]').click
27
+ placements = @client.find_all(:xpath, '//span[../../a[@role="menuitem"]]')
28
+ placements = placements.map(&:text)
29
+ @client.find(:xpath, '//*[text()="All Ad Placements"]').click
30
+ @prev_placement = 'All Ad Placements'
31
+ @data = []
32
+ placements.each do |placement|
33
+ request_report placement
34
+ extract_data_from_report placement
35
+ end
36
+ end
37
+
38
+ def request_report(placement)
39
+ @client.find(:xpath, "//*[text()=\"#{@prev_placement}\"]").click
40
+ @client.find(:xpath, "//*[text()=\"#{placement}\"]").click
41
+ @prev_placement = placement.match(/^(.+) \(\d+\)/).captures[0]
42
+ sleep 1
43
+ end
44
+
45
+ def extract_data_from_report(placement)
46
+ if @data.count == 0
47
+ header = @client.find :xpath, '//table/thead/tr'
48
+ @data << ['Placement'] + header.find_css('td,th').map { |td| td.visible_text }
49
+ end
50
+ data_str = @date.strftime '%a %b %d, %Y'
51
+ rows = @client.find_all :xpath, "//table/*/tr[./td[text()=\"#{data_str}\"]]"
52
+ return if rows.count == 0
53
+ row = rows.first
54
+ @data << [placement] + row.find_css('td,th').map { |td| td.visible_text }
55
+ end
56
+ end
@@ -0,0 +1,56 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ # gcs sometimes doesn't update data in 24 hours
5
+
6
+ class AdopsReportScrapper::GcsClient < AdopsReportScrapper::BaseClient
7
+ private
8
+
9
+ def login
10
+ @client.visit 'https://www.google.com/insights/consumersurveys/your-surveys'
11
+ @client.fill_in 'Email', :with => @login
12
+ @client.click_button 'Next'
13
+ @client.fill_in 'Passwd', :with => @secret
14
+ @client.click_button 'Sign in'
15
+ begin
16
+ @client.find :xpath, '//*[text()="Sites"]'
17
+ rescue Exception => e
18
+ raise e, 'Gcs login error'
19
+ end
20
+ end
21
+
22
+ def scrap
23
+ sites = @client.find_all(:xpath, '//*[contains(@class,"sites-menu-item")]', visible: false)
24
+ sites = sites.to_a
25
+ sites.pop
26
+ n_sites = sites.map do |site|
27
+ {
28
+ name: site.text(:all),
29
+ url: site[:href].sub('settings', 'report')
30
+ }
31
+ end
32
+ @data = []
33
+ n_sites.each do |site|
34
+ request_report site
35
+ extract_data_from_report site
36
+ end
37
+ end
38
+
39
+ def request_report(site)
40
+ @client.visit site[:url]
41
+ end
42
+
43
+ def extract_data_from_report(site)
44
+ rows = @client.find_all :xpath, %Q{//table/*/tr[./td[contains(text(),"#{@date.strftime('%b')}") and contains(text(),"#{@date.strftime('%e, %Y')}")]]}
45
+ return if rows.count == 0
46
+ if @data.count == 0
47
+ header = @client.find :xpath, '//table/thead/tr'
48
+ n_header = header.find_css('td,th').map { |td| td.visible_text }
49
+ n_header.unshift 'Site'
50
+ @data << n_header
51
+ end
52
+ row = rows[0].find_css('td,th').map { |td| td.visible_text }
53
+ row.unshift site[:name]
54
+ @data << row
55
+ end
56
+ end
@@ -0,0 +1,90 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'httpclient'
4
+ require 'roo'
5
+
6
+ class AdopsReportScrapper::LiveintentClient < AdopsReportScrapper::BaseClient
7
+ private
8
+
9
+ def login
10
+ @client.visit 'https://lfm.liveintent.com/'
11
+ @client.fill_in 'username', :with => @login
12
+ @client.fill_in 'password', :with => @secret
13
+ @client.click_button 'Login'
14
+ begin
15
+ @client.find :xpath, '//*[text()="Analysis"]'
16
+ rescue Exception => e
17
+ raise e, 'Liveintent login error'
18
+ end
19
+ end
20
+
21
+ def scrap
22
+ request_report
23
+ end
24
+
25
+ def request_report
26
+ @client.find(:xpath, '//*[text()="Analysis"]').click
27
+ @client.find(:xpath, '//*[contains(text(),"Reporting")]').click
28
+ @client.find(:xpath, '//*[text()="New"]').click
29
+ sleep 1
30
+ @client.find(:xpath, '//*[text()="Publisher ID"]').click
31
+ @client.find(:xpath, '//*[text()="Ad Slot ID"]').click
32
+ @client.find(:xpath, '//*[text()="Add additional split"]').click
33
+ @client.find(:xpath, '//*[text()="Year/Month"]').click
34
+ @client.find(:xpath, '//*[text()="Device Type (inexact values)"]').click
35
+ @client.find(:xpath, '//input[@id="intervalBegin"]').set @date.strftime('%Y-%m-%d / %Y-%m-%d')
36
+ @client.click_button 'Download'
37
+ wait_for_spin
38
+
39
+ request_data = @client.driver.network_traffic.last.instance_variable_get(:@data)
40
+ report_file_url = @client.driver.network_traffic.last.url
41
+
42
+ cookies = @client.driver.cookies
43
+ @client = HTTPClient.new
44
+ @client.cookie_manager.cookies = cookies.values.map do |cookie|
45
+ cookie = cookie.instance_variable_get(:@attributes)
46
+ HTTP::Cookie.new cookie
47
+ end
48
+
49
+ header = {
50
+ Accept: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
51
+ Referer: 'https://lfm.liveintent.com/reporting/',
52
+ Origin: 'https://lfm.liveintent.com',
53
+ 'User-Agent': 'Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1',
54
+ 'Content-Type': 'application/json',
55
+ }
56
+
57
+ @client.receive_timeout = 300
58
+ response = @client.post(report_file_url, header: header, body: request_data['postData'] )
59
+
60
+ tmpfile = Tempfile.new('liveintent.xlsx')
61
+ begin
62
+ tmpfile.binmode
63
+ tmpfile.write(response.body)
64
+ tmpfile.close
65
+
66
+ xlsx = Roo::Spreadsheet.open(tmpfile.path, extension: :xlsx)
67
+ extract_data_from_report xlsx
68
+ ensure
69
+ tmpfile.close
70
+ tmpfile.unlink # deletes the temp file
71
+ end
72
+
73
+ end
74
+
75
+ def extract_data_from_report(xlsx)
76
+ @data = xlsx.to_a.reject { |row| row[1] == '(totals)' || row[0] == '(totals)' }
77
+ end
78
+
79
+ def wait_for_spin
80
+ 30.times do |_i| # wait 5 min
81
+ begin
82
+ @client.find(:css, '.fa.fa-spinner.fa-spin')
83
+ rescue Exception => e
84
+ break
85
+ end
86
+ sleep 10
87
+ end
88
+ sleep 5
89
+ end
90
+ end
@@ -0,0 +1,59 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::MarfeelClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://insight.marfeel.com/hub/login'
9
+ @client.fill_in 'j_username', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :xpath, '//*[text()="REPORTING"]'
14
+ rescue Exception => e
15
+ raise e, 'Marfeel login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.visit 'https://insight.marfeel.com/hub/insight/reporting?date=1d'
26
+ sleep 1
27
+ end
28
+
29
+ def extract_data_from_report
30
+ rows = @client.find_all :xpath, '//table/*/tr'
31
+ n_rows = rows.map do |tr|
32
+ tr.find_css('td,th').map do |td|
33
+ datum = td.visible_text
34
+ if datum.end_with? ' K'
35
+ datum = datum.to_f * 1_000
36
+ elsif datum.end_with? ' M'
37
+ datum = datum.to_f * 1_000_000
38
+ end
39
+ datum.to_s
40
+ end
41
+ end
42
+ n_rows[0][0] = 'Site'
43
+ site = nil
44
+ l = n_rows[0].count
45
+ @data = n_rows.map do |row|
46
+ if row.count == l
47
+ row[0] = row[0].split(' $')[0]
48
+ site = row[0]
49
+ else
50
+ row.unshift site
51
+ end
52
+ row
53
+ end
54
+ (@data.count - 1).times do |i|
55
+ i.even? ? @data[i+1].unshift('Smartphone') : @data[i+1].unshift('Tablet')
56
+ end
57
+ @data[0].unshift 'Device'
58
+ end
59
+ end
@@ -0,0 +1,60 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::NativoClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://admin.nativo.net/'
9
+ @client.fill_in 'Email', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Log In'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Reports"]'
14
+ rescue Exception => e
15
+ raise e, 'Nativo login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.click_link 'Reports'
26
+ @client.click_link 'Marketplace Campaigns'
27
+
28
+ sleep 1
29
+ @client.find(:css, '.date-range').click
30
+ sleep 1
31
+ @client.find(:xpath, '//*[text()="Yesterday"]').click
32
+
33
+ @client.check 'Publisher'
34
+ @client.check 'Device'
35
+
36
+ @client.check 'Clicks'
37
+ @client.check 'Publisher Revenue'
38
+ @client.check 'Video Views'
39
+ @client.check 'Video Views to 100%'
40
+ sleep 1
41
+ wait_for_loading
42
+ end
43
+
44
+ def extract_data_from_report
45
+ rows = @client.find_all :xpath, '//div/table/*[self::thead|self::tbody]/tr'
46
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
47
+ end
48
+
49
+ def wait_for_loading
50
+ 30.times do |_i| # wait 5 min
51
+ begin
52
+ @client.find(:css, '.loading')
53
+ rescue Exception => e
54
+ break
55
+ end
56
+ sleep 10
57
+ end
58
+ sleep 1
59
+ end
60
+ end
@@ -0,0 +1,73 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::NetseerClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://publisher.netseer.com/login'
9
+ @client.fill_in 'Email', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :css, '.icon-calender'
14
+ rescue Exception => e
15
+ raise e, 'Netseer login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ sleep 3
26
+ @client.find(:xpath, '//*[contains(text(),"Run Reports")]').click
27
+ wait_for_loading
28
+
29
+ @client.find(:css, '.icon-calender').click
30
+ @client.fill_in 'daterangepicker_start', :with => @date.strftime('%m/%d/%Y')
31
+ @client.fill_in 'daterangepicker_end', :with => @date.strftime('%m/%d/%Y')
32
+ @client.click_button 'Apply'
33
+ sleep 3
34
+
35
+ @client.choose 'Day'
36
+ @client.choose 'Tag'
37
+ @client.check 'Delivery Medium'
38
+
39
+ @client.click_link_or_button 'Run Report'
40
+ wait_for_loading
41
+
42
+ end
43
+
44
+ def extract_data_from_report
45
+ @data = []
46
+ loop do
47
+ rows = @client.find_all :xpath, '//table/*/tr'
48
+ rows = rows.to_a
49
+ header = rows.shift
50
+ if @data.count == 0
51
+ n_header = header.find_css('td,th').map { |td| td.visible_text }
52
+ @data << n_header
53
+ end
54
+ @data += rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
55
+ pagee = @client.find(:xpath, '//*[contains(text(),"Showing ")]').text.match(/to (\d+) of (\d+)/).captures
56
+ break if pagee[0] == pagee[1]
57
+ @client.find(:css, 'a > .fa.fa-angle-right').click
58
+ wait_for_loading
59
+ end
60
+ end
61
+
62
+ def wait_for_loading
63
+ 30.times do |_i| # wait 5 min
64
+ begin
65
+ @client.find(:xpath, '//*[text()="Loading ..."]')
66
+ rescue Exception => e
67
+ break
68
+ end
69
+ sleep 10
70
+ end
71
+ sleep 5
72
+ end
73
+ end