adops_report_scrapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +18 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +51 -0
  9. data/Rakefile +152 -0
  10. data/adops_report_scrapper.gemspec +24 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/adops_report_scrapper.rb +38 -0
  14. data/lib/adops_report_scrapper/adaptv_client.rb +80 -0
  15. data/lib/adops_report_scrapper/adforge_client.rb +35 -0
  16. data/lib/adops_report_scrapper/adiply_client.rb +49 -0
  17. data/lib/adops_report_scrapper/adsense_client.rb +55 -0
  18. data/lib/adops_report_scrapper/adsupply_client.rb +27 -0
  19. data/lib/adops_report_scrapper/adx_client.rb +55 -0
  20. data/lib/adops_report_scrapper/base_client.rb +74 -0
  21. data/lib/adops_report_scrapper/brightroll_client.rb +47 -0
  22. data/lib/adops_report_scrapper/browsi_client.rb +44 -0
  23. data/lib/adops_report_scrapper/contentad_client.rb +91 -0
  24. data/lib/adops_report_scrapper/conversant_client.rb +66 -0
  25. data/lib/adops_report_scrapper/criteo_client.rb +25 -0
  26. data/lib/adops_report_scrapper/facebookaudience_client.rb +56 -0
  27. data/lib/adops_report_scrapper/gcs_client.rb +56 -0
  28. data/lib/adops_report_scrapper/liveintent_client.rb +90 -0
  29. data/lib/adops_report_scrapper/marfeel_client.rb +59 -0
  30. data/lib/adops_report_scrapper/nativo_client.rb +60 -0
  31. data/lib/adops_report_scrapper/netseer_client.rb +73 -0
  32. data/lib/adops_report_scrapper/openx_client.rb +161 -0
  33. data/lib/adops_report_scrapper/revcontent_client.rb +51 -0
  34. data/lib/adops_report_scrapper/sonobi_client.rb +91 -0
  35. data/lib/adops_report_scrapper/springserve_client.rb +52 -0
  36. data/lib/adops_report_scrapper/tremor_client.rb +52 -0
  37. data/lib/adops_report_scrapper/triplelift_client.rb +72 -0
  38. data/lib/adops_report_scrapper/version.rb +3 -0
  39. data/secret.sample.yml +77 -0
  40. data/tmp/.keep +0 -0
  41. metadata +117 -0
@@ -0,0 +1,66 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'nokogiri'
4
+
5
+ class AdopsReportScrapper::ConversantClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def login
9
+ @client.visit 'https://admin.valueclickmedia.com/corp/login'
10
+ @client.fill_in 'user_name', :with => @login
11
+ @client.fill_in 'password', :with => @secret
12
+ @client.click_button 'Submit'
13
+ begin
14
+ @client.find :xpath, '//*[contains(text(),"Reports")]'
15
+ rescue Exception => e
16
+ raise e, 'Conversant login error'
17
+ end
18
+ end
19
+
20
+ def scrap
21
+ @client.click_link 'Earnings'
22
+
23
+ sites = @client.find_all(:css, '#search-site_id > option')
24
+ n_sites = []
25
+ sites.each do |site|
26
+ site_id = site[:value].to_i
27
+ next if site_id <= 0
28
+ site_name = site.text
29
+ n_sites << { site_id: site_id, site_name: site_name }
30
+ end
31
+
32
+ @data = [['Site', 'Device', 'Country', 'Impressions', 'Clicks', 'Earnings']]
33
+ n_sites.each do |site|
34
+ extract_data site
35
+ end
36
+ end
37
+
38
+ def extract_data(site)
39
+ d_us_datum = get_line_data(site, :desktop, :us)
40
+ m_us_datum = get_line_data(site, :mobile, :us)
41
+ d_all_datum = get_line_data(site, :desktop, :all)
42
+ m_all_datum = get_line_data(site, :mobile, :all)
43
+ d_intl_datum = d_all_datum.zip(d_us_datum).map { |x, y| x - y }
44
+ m_intl_datum = m_all_datum.zip(m_us_datum).map { |x, y| x - y }
45
+
46
+ d_us_datum = [site[:site_name], 'Desktop', 'US'] + d_us_datum
47
+ m_us_datum = [site[:site_name], 'Mobile', 'US'] + m_us_datum
48
+ d_intl_datum = [site[:site_name], 'Desktop', 'Intl'] + d_intl_datum
49
+ m_intl_datum = [site[:site_name], 'Mobile', 'Intl'] + m_intl_datum
50
+
51
+ [d_us_datum, m_us_datum, d_intl_datum, m_intl_datum].each do |datum|
52
+ next if datum[2..-1] == [0,0,0]
53
+ @data << datum
54
+ end
55
+ end
56
+
57
+ def get_line_data(site, device, country)
58
+ date_str = @date.strftime '%Y-%m-%d'
59
+ supply_type_id_map = { desktop: 1, mobile: 2 }
60
+ country_id_map = { all: 0, us: 254 }
61
+ @client.visit "https://pub.valueclickmedia.com/reports/earnings/detailed_media_grid?start_date=#{date_str}&end_date=#{date_str}&site_id=#{site[:site_id]}&media_type_id=&supply_type_id=#{supply_type_id_map[device]}&country_id=#{country_id_map[country]}"
62
+ doc = Nokogiri::XML(@client.body)
63
+ cells = doc.css('cell')
64
+ return [cells[1].content.to_i, cells[2].content.to_i, cells[-1].content.to_f] # [imp, click, earning]
65
+ end
66
+ end
@@ -0,0 +1,25 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'httpclient'
4
+
5
+ class AdopsReportScrapper::CriteoClient < AdopsReportScrapper::BaseClient
6
+ private
7
+
8
+ def init_client
9
+ end
10
+
11
+ def before_quit_with_error
12
+ end
13
+
14
+ def scrap
15
+ @data = []
16
+ date_str = @date.strftime('%Y-%m-%d')
17
+
18
+ response = HTTPClient.get "https://publishers.criteo.com/api/2.0/stats.json", apitoken: @secret, begindate: date_str, enddate: date_str
19
+
20
+ data = JSON.parse response.body
21
+ header = data[0].keys
22
+ @data = [header]
23
+ @data += data.map { |datum| header.map { |key| datum[key].is_a?(Hash) ? datum[key]['value'] : datum[key] } }
24
+ end
25
+ end
@@ -0,0 +1,56 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::FacebookaudienceClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def init_client
8
+ fail 'please specify facebook app id' unless @options['app_id']
9
+ @app_id = @options['app_id']
10
+ super
11
+ end
12
+
13
+ def login
14
+ @client.visit "https://developers.facebook.com/apps/#{@app_id}/audience-network/placement"
15
+ @client.fill_in 'email', :with => @login
16
+ @client.fill_in 'pass', :with => @secret
17
+ @client.click_button 'Log In'
18
+ begin
19
+ @client.find :xpath, '//*[text()="Dashboard"]'
20
+ rescue Exception => e
21
+ raise e, 'Facebookaudience login error'
22
+ end
23
+ end
24
+
25
+ def scrap
26
+ @client.find(:xpath, '//*[text()="All Ad Placements"]').click
27
+ placements = @client.find_all(:xpath, '//span[../../a[@role="menuitem"]]')
28
+ placements = placements.map(&:text)
29
+ @client.find(:xpath, '//*[text()="All Ad Placements"]').click
30
+ @prev_placement = 'All Ad Placements'
31
+ @data = []
32
+ placements.each do |placement|
33
+ request_report placement
34
+ extract_data_from_report placement
35
+ end
36
+ end
37
+
38
+ def request_report(placement)
39
+ @client.find(:xpath, "//*[text()=\"#{@prev_placement}\"]").click
40
+ @client.find(:xpath, "//*[text()=\"#{placement}\"]").click
41
+ @prev_placement = placement.match(/^(.+) \(\d+\)/).captures[0]
42
+ sleep 1
43
+ end
44
+
45
+ def extract_data_from_report(placement)
46
+ if @data.count == 0
47
+ header = @client.find :xpath, '//table/thead/tr'
48
+ @data << ['Placement'] + header.find_css('td,th').map { |td| td.visible_text }
49
+ end
50
+ data_str = @date.strftime '%a %b %d, %Y'
51
+ rows = @client.find_all :xpath, "//table/*/tr[./td[text()=\"#{data_str}\"]]"
52
+ return if rows.count == 0
53
+ row = rows.first
54
+ @data << [placement] + row.find_css('td,th').map { |td| td.visible_text }
55
+ end
56
+ end
@@ -0,0 +1,56 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ # gcs sometimes doesn't update data in 24 hours
5
+
6
+ class AdopsReportScrapper::GcsClient < AdopsReportScrapper::BaseClient
7
+ private
8
+
9
+ def login
10
+ @client.visit 'https://www.google.com/insights/consumersurveys/your-surveys'
11
+ @client.fill_in 'Email', :with => @login
12
+ @client.click_button 'Next'
13
+ @client.fill_in 'Passwd', :with => @secret
14
+ @client.click_button 'Sign in'
15
+ begin
16
+ @client.find :xpath, '//*[text()="Sites"]'
17
+ rescue Exception => e
18
+ raise e, 'Gcs login error'
19
+ end
20
+ end
21
+
22
+ def scrap
23
+ sites = @client.find_all(:xpath, '//*[contains(@class,"sites-menu-item")]', visible: false)
24
+ sites = sites.to_a
25
+ sites.pop
26
+ n_sites = sites.map do |site|
27
+ {
28
+ name: site.text(:all),
29
+ url: site[:href].sub('settings', 'report')
30
+ }
31
+ end
32
+ @data = []
33
+ n_sites.each do |site|
34
+ request_report site
35
+ extract_data_from_report site
36
+ end
37
+ end
38
+
39
+ def request_report(site)
40
+ @client.visit site[:url]
41
+ end
42
+
43
+ def extract_data_from_report(site)
44
+ rows = @client.find_all :xpath, %Q{//table/*/tr[./td[contains(text(),"#{@date.strftime('%b')}") and contains(text(),"#{@date.strftime('%e, %Y')}")]]}
45
+ return if rows.count == 0
46
+ if @data.count == 0
47
+ header = @client.find :xpath, '//table/thead/tr'
48
+ n_header = header.find_css('td,th').map { |td| td.visible_text }
49
+ n_header.unshift 'Site'
50
+ @data << n_header
51
+ end
52
+ row = rows[0].find_css('td,th').map { |td| td.visible_text }
53
+ row.unshift site[:name]
54
+ @data << row
55
+ end
56
+ end
@@ -0,0 +1,90 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+ require 'httpclient'
4
+ require 'roo'
5
+
6
+ class AdopsReportScrapper::LiveintentClient < AdopsReportScrapper::BaseClient
7
+ private
8
+
9
+ def login
10
+ @client.visit 'https://lfm.liveintent.com/'
11
+ @client.fill_in 'username', :with => @login
12
+ @client.fill_in 'password', :with => @secret
13
+ @client.click_button 'Login'
14
+ begin
15
+ @client.find :xpath, '//*[text()="Analysis"]'
16
+ rescue Exception => e
17
+ raise e, 'Liveintent login error'
18
+ end
19
+ end
20
+
21
+ def scrap
22
+ request_report
23
+ end
24
+
25
+ def request_report
26
+ @client.find(:xpath, '//*[text()="Analysis"]').click
27
+ @client.find(:xpath, '//*[contains(text(),"Reporting")]').click
28
+ @client.find(:xpath, '//*[text()="New"]').click
29
+ sleep 1
30
+ @client.find(:xpath, '//*[text()="Publisher ID"]').click
31
+ @client.find(:xpath, '//*[text()="Ad Slot ID"]').click
32
+ @client.find(:xpath, '//*[text()="Add additional split"]').click
33
+ @client.find(:xpath, '//*[text()="Year/Month"]').click
34
+ @client.find(:xpath, '//*[text()="Device Type (inexact values)"]').click
35
+ @client.find(:xpath, '//input[@id="intervalBegin"]').set @date.strftime('%Y-%m-%d / %Y-%m-%d')
36
+ @client.click_button 'Download'
37
+ wait_for_spin
38
+
39
+ request_data = @client.driver.network_traffic.last.instance_variable_get(:@data)
40
+ report_file_url = @client.driver.network_traffic.last.url
41
+
42
+ cookies = @client.driver.cookies
43
+ @client = HTTPClient.new
44
+ @client.cookie_manager.cookies = cookies.values.map do |cookie|
45
+ cookie = cookie.instance_variable_get(:@attributes)
46
+ HTTP::Cookie.new cookie
47
+ end
48
+
49
+ header = {
50
+ Accept: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
51
+ Referer: 'https://lfm.liveintent.com/reporting/',
52
+ Origin: 'https://lfm.liveintent.com',
53
+ 'User-Agent': 'Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1',
54
+ 'Content-Type': 'application/json',
55
+ }
56
+
57
+ @client.receive_timeout = 300
58
+ response = @client.post(report_file_url, header: header, body: request_data['postData'] )
59
+
60
+ tmpfile = Tempfile.new('liveintent.xlsx')
61
+ begin
62
+ tmpfile.binmode
63
+ tmpfile.write(response.body)
64
+ tmpfile.close
65
+
66
+ xlsx = Roo::Spreadsheet.open(tmpfile.path, extension: :xlsx)
67
+ extract_data_from_report xlsx
68
+ ensure
69
+ tmpfile.close
70
+ tmpfile.unlink # deletes the temp file
71
+ end
72
+
73
+ end
74
+
75
+ def extract_data_from_report(xlsx)
76
+ @data = xlsx.to_a.reject { |row| row[1] == '(totals)' || row[0] == '(totals)' }
77
+ end
78
+
79
+ def wait_for_spin
80
+ 30.times do |_i| # wait 5 min
81
+ begin
82
+ @client.find(:css, '.fa.fa-spinner.fa-spin')
83
+ rescue Exception => e
84
+ break
85
+ end
86
+ sleep 10
87
+ end
88
+ sleep 5
89
+ end
90
+ end
@@ -0,0 +1,59 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::MarfeelClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://insight.marfeel.com/hub/login'
9
+ @client.fill_in 'j_username', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :xpath, '//*[text()="REPORTING"]'
14
+ rescue Exception => e
15
+ raise e, 'Marfeel login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.visit 'https://insight.marfeel.com/hub/insight/reporting?date=1d'
26
+ sleep 1
27
+ end
28
+
29
+ def extract_data_from_report
30
+ rows = @client.find_all :xpath, '//table/*/tr'
31
+ n_rows = rows.map do |tr|
32
+ tr.find_css('td,th').map do |td|
33
+ datum = td.visible_text
34
+ if datum.end_with? ' K'
35
+ datum = datum.to_f * 1_000
36
+ elsif datum.end_with? ' M'
37
+ datum = datum.to_f * 1_000_000
38
+ end
39
+ datum.to_s
40
+ end
41
+ end
42
+ n_rows[0][0] = 'Site'
43
+ site = nil
44
+ l = n_rows[0].count
45
+ @data = n_rows.map do |row|
46
+ if row.count == l
47
+ row[0] = row[0].split(' $')[0]
48
+ site = row[0]
49
+ else
50
+ row.unshift site
51
+ end
52
+ row
53
+ end
54
+ (@data.count - 1).times do |i|
55
+ i.even? ? @data[i+1].unshift('Smartphone') : @data[i+1].unshift('Tablet')
56
+ end
57
+ @data[0].unshift 'Device'
58
+ end
59
+ end
@@ -0,0 +1,60 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::NativoClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'https://admin.nativo.net/'
9
+ @client.fill_in 'Email', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Log In'
12
+ begin
13
+ @client.find :xpath, '//*[text()="Reports"]'
14
+ rescue Exception => e
15
+ raise e, 'Nativo login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ @client.click_link 'Reports'
26
+ @client.click_link 'Marketplace Campaigns'
27
+
28
+ sleep 1
29
+ @client.find(:css, '.date-range').click
30
+ sleep 1
31
+ @client.find(:xpath, '//*[text()="Yesterday"]').click
32
+
33
+ @client.check 'Publisher'
34
+ @client.check 'Device'
35
+
36
+ @client.check 'Clicks'
37
+ @client.check 'Publisher Revenue'
38
+ @client.check 'Video Views'
39
+ @client.check 'Video Views to 100%'
40
+ sleep 1
41
+ wait_for_loading
42
+ end
43
+
44
+ def extract_data_from_report
45
+ rows = @client.find_all :xpath, '//div/table/*[self::thead|self::tbody]/tr'
46
+ @data = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
47
+ end
48
+
49
+ def wait_for_loading
50
+ 30.times do |_i| # wait 5 min
51
+ begin
52
+ @client.find(:css, '.loading')
53
+ rescue Exception => e
54
+ break
55
+ end
56
+ sleep 10
57
+ end
58
+ sleep 1
59
+ end
60
+ end
@@ -0,0 +1,73 @@
1
+ require 'date'
2
+ require_relative 'base_client'
3
+
4
+ class AdopsReportScrapper::NetseerClient < AdopsReportScrapper::BaseClient
5
+ private
6
+
7
+ def login
8
+ @client.visit 'http://publisher.netseer.com/login'
9
+ @client.fill_in 'Email', :with => @login
10
+ @client.fill_in 'Password', :with => @secret
11
+ @client.click_button 'Login'
12
+ begin
13
+ @client.find :css, '.icon-calender'
14
+ rescue Exception => e
15
+ raise e, 'Netseer login error'
16
+ end
17
+ end
18
+
19
+ def scrap
20
+ request_report
21
+ extract_data_from_report
22
+ end
23
+
24
+ def request_report
25
+ sleep 3
26
+ @client.find(:xpath, '//*[contains(text(),"Run Reports")]').click
27
+ wait_for_loading
28
+
29
+ @client.find(:css, '.icon-calender').click
30
+ @client.fill_in 'daterangepicker_start', :with => @date.strftime('%m/%d/%Y')
31
+ @client.fill_in 'daterangepicker_end', :with => @date.strftime('%m/%d/%Y')
32
+ @client.click_button 'Apply'
33
+ sleep 3
34
+
35
+ @client.choose 'Day'
36
+ @client.choose 'Tag'
37
+ @client.check 'Delivery Medium'
38
+
39
+ @client.click_link_or_button 'Run Report'
40
+ wait_for_loading
41
+
42
+ end
43
+
44
+ def extract_data_from_report
45
+ @data = []
46
+ loop do
47
+ rows = @client.find_all :xpath, '//table/*/tr'
48
+ rows = rows.to_a
49
+ header = rows.shift
50
+ if @data.count == 0
51
+ n_header = header.find_css('td,th').map { |td| td.visible_text }
52
+ @data << n_header
53
+ end
54
+ @data += rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
55
+ pagee = @client.find(:xpath, '//*[contains(text(),"Showing ")]').text.match(/to (\d+) of (\d+)/).captures
56
+ break if pagee[0] == pagee[1]
57
+ @client.find(:css, 'a > .fa.fa-angle-right').click
58
+ wait_for_loading
59
+ end
60
+ end
61
+
62
+ def wait_for_loading
63
+ 30.times do |_i| # wait 5 min
64
+ begin
65
+ @client.find(:xpath, '//*[text()="Loading ..."]')
66
+ rescue Exception => e
67
+ break
68
+ end
69
+ sleep 10
70
+ end
71
+ sleep 5
72
+ end
73
+ end