adops_report_scrapper 0.1.13 → 0.1.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1ca67e7025786adde54c380f4e623e8989e9789f
4
- data.tar.gz: e6fc196fdedaaa407b4f98546de5fc0320d0f147
3
+ metadata.gz: a8602cb3bc3c3466d5cd6f67b9cba9ed0002f75e
4
+ data.tar.gz: 8b37b2be038428dc7d31792a5e179aeb5cbb13f6
5
5
  SHA512:
6
- metadata.gz: 6edd51ce4a9953513989ff11c8b80fa7678847288eccfeebde43d79d62210e148f90dd6e5aad2aae92cdaaa91c89e2d7a330c901d026c515962f2914be1a7827
7
- data.tar.gz: aad764f4dfec0aec24418ed7d40b2534a7f4f8f50d920303ccd0fee0313428e07c63c93cca4fe68d73c9724e73d495025b64e271a647f4348ac658561d12e5d7
6
+ metadata.gz: 04a02dd62bbb3261fc3ec6f8039c4ed10d9386001d48d6f3b7f640b4fd45c897b8048d64148eef1b4969f3e33c9d444eacfdebd0c6c01cac388ec4038e284379
7
+ data.tar.gz: cc2bba6c3399ce0996b8014a8e4ce714df5313df839dfc4b1a9bcc910eef552685796c7de121fb0904ee183d24729898acb37709113b45921af2784910071114
@@ -26,7 +26,7 @@ class AdopsReportScrapper::OpenxClient < AdopsReportScrapper::BaseClient
26
26
  end
27
27
 
28
28
  def request_report
29
- @client.visit 'http://cmci-ui.openx.net/#/reports?tab=my_reports'
29
+ @client.visit "http://#{@account_prefix}.openx.net/#/reports?tab=my_reports"
30
30
  sleep 5
31
31
 
32
32
  begin
@@ -4,12 +4,25 @@ require_relative 'base_client'
4
4
  class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
5
5
  def date_supported?(date = nil)
6
6
  _date = date || @date
7
- return true if _date >= Date.today - 2
7
+ return true if _date >= Date.today - 3
8
8
  false
9
9
  end
10
10
 
11
11
  private
12
12
 
13
+ def init_client
14
+ fail 'please specify sonobi key' unless @options['key']
15
+ fail 'please specify sonobi code' unless @options['code']
16
+ fail 'please specify sonobi userid' unless @options['userid']
17
+ @key = @options['key']
18
+ @code = @options['code']
19
+ @userid = @options['userid']
20
+ super
21
+ end
22
+
23
+ def before_quit_with_error
24
+ end
25
+
13
26
  def login
14
27
  @client.visit 'https://jetstream.sonobi.com/welcome/login.php'
15
28
  @client.fill_in 'user name', :with => @login
@@ -20,6 +33,14 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
20
33
  rescue Exception => e
21
34
  raise e, 'Sonobi login error'
22
35
  end
36
+ cookies = @client.driver.cookies
37
+ @client.driver.close_window('0')
38
+
39
+ @client = HTTPClient.new
40
+ @client.cookie_manager.cookies = cookies.values.map do |cookie|
41
+ cookie = cookie.instance_variable_get(:@attributes)
42
+ HTTP::Cookie.new cookie
43
+ end
23
44
  end
24
45
 
25
46
  def scrap
@@ -33,74 +54,53 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
33
54
  def request_report(country)
34
55
  date_str = @date.strftime('%Y-%m-%d')
35
56
  is_us = country == :us
36
- @client.find(:xpath, '//*[text()="Reports"]').click
37
- sleep 2
38
- # set date
39
- @client.select 'Custom'
40
- sleep 1
41
- @client.find(:xpath, '//input[@name="_range_start_date"]').click
42
- @client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
43
- @client.find(:xpath, '//input[@name="_range_end_date"]').click
44
- @client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
45
57
 
46
- # all sites
47
- @client.find(:xpath, '//div[@name="_siteid"]').click
48
- sleep 2
49
- @client.find(:xpath, '//*[text()="Select All"]').click
58
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'category.list', key: @key, code: @code, _userid: @userid, _parentid: 'locations')
59
+ countries = JSON.parse response.body
50
60
 
51
- # select country
52
- @client.find(:xpath, '//*[text()="Add New Filter"]').click
53
- @client.find_all(:xpath, '//option[text()="Country"]').last.select_option
54
- sleep 2
55
- @client.find(:xpath, '//div[@name="_country"]').click
56
- sleep 2
57
- if is_us
58
- @client.find(:xpath, '//*[text()="United States"]').click
59
- else
60
- @client.find(:xpath, '//*[text()="Select All"]').click
61
- @client.find(:xpath, '//*[@class="remove_icon"][../*[text()="United States"]]').click
62
- end
61
+ report_body = {
62
+ '_userid': @userid,
63
+ '_country' => is_us ? 'US' : 'AF,AX,AL,DZ,AS,AD,AO,AI,AQ,AG,AR,AM,AW,AP,AU,AT,AZ,BS,BH,BD,BB,BY,BE,BZ,BJ,BM,BT,BO,BA,BW,BV,BR,IO,BN,BG,BF,BI,KH,CM,CA,CV,KY,CF,TD,CL,CN,CX,CC,CO,KM,CG,CD,CK,CR,CI,HR,CU,CY,CZ,DK,DJ,DM,DO,EC,EG,SV,GQ,ER,EE,ET,EU,FK,FO,FJ,FI,FR,GF,PF,TF,GA,GM,GE,DE,GH,GI,GR,GL,GD,GP,GU,GT,GG,GN,GW,GY,HT,HM,VA,HN,HK,HU,IS,IN,ID,IR,IQ,IE,IM,IL,IT,JM,JP,JE,JO,KZ,KE,KI,KP,KR,KW,KG,LA,LV,LB,LS,LR,LY,LI,LT,LU,MO,MK,MG,MW,MY,MV,ML,MT,MH,MQ,MR,MU,YT,MX,FM,MD,MC,MN,ME,MS,MA,MZ,MM,NA,NR,NP,NL,AN,NC,NZ,NI,NE,NG,NU,NF,MP,NO,OM,PK,PW,PS,PA,PG,PY,PE,PH,PN,PL,PT,PR,QA,RE,RO,RU,RW,SH,KN,LC,PM,VC,WS,SM,ST,SA,SN,RS,SC,SL,SG,SK,SI,SB,SO,ZA,GS,ES,LK,SD,SR,SJ,SZ,SE,CH,SY,TW,TJ,TZ,TH,TL,TG,TK,TO,TT,TN,TR,TM,TC,TV,UG,UA,AE,GB,UM,UY,UZ,VU,VE,VN,VG,VI,WF,EH,YE,ZM,ZW,--',
64
+ 'groupby' => 'day',
65
+ 'row_per' => '_date,_placementid,_siteid',
66
+ 'columns' => '_date,_placementid_name,_siteid_name,_impression_count,_impression_count_viewed,_impression_count_clicked,_unfilled_impressions,_revenue,_ecpm,_device_type,_placementid,_siteid',
67
+ 'tz_offset' => 'UTC',
68
+ '__column_info' => '[{\"name\":\"_date\",\"label\":\"Date\",\"no_limit\":true,\"tip\":\"Shows+the+date+that+impressions+were+served+on\"},{\"name\":\"_placementid_name\",\"label\":\"Placement\",\"tip\":\"Shows+the+Placement+name+that+impressions+were+served+on\"},{\"name\":\"_siteid_name\",\"label\":\"Site\",\"tip\":\"Shows+the+name+of+the+site+that+impressions+were+served+on\"},{\"name\":\"_impression_count\",\"label\":\"Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+that+were+served\"},{\"name\":\"_impression_count_viewed\",\"label\":\"Viewable+Impressions\",\"format\":true,\"tip\":\"The+number+of+times+that+the+ad+was+viewed\"},{\"name\":\"_impression_count_clicked\",\"label\":\"Clicks\",\"format\":true,\"tip\":\"The+number+of+clicks+that+were+recorded\"},{\"name\":\"_unfilled_impressions\",\"label\":\"Unfilled+Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+could+not+be+served+due+to+ad+server+decisioning\"},{\"name\":\"_revenue\",\"label\":\"Revenue\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Gross+revenue+of+impressions+served\"},{\"name\":\"_ecpm\",\"label\":\"Delivered+CPM\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Average+CPM+of+impressions+served\"},{\"name\":\"_device_type\",\"label\":\"Device+Type\",\"tip\":\"Device+Types+include:+Desktop,+Mobile\"}]'
69
+ }
70
+
71
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.request', key: @key, code: @code, _report: report_body.to_json, _report_type: 'publisher_report', _report_origin: 'publisher_reporting', _range: 'custom', _range_start_date: date_str, _range_end_date: date_str)
72
+ report = JSON.parse response.body
73
+ report_id = report['package']['_reportid']
74
+ sleep 5
63
75
 
64
- # check group by
65
- @client.check 'Date'
66
- @client.check 'Placement'
67
- @client.check 'Site'
68
- @client.check 'Clicks'
69
- @client.check 'Views'
70
- @client.check 'Unfilled Impressions'
71
- @client.check 'Device Type'
76
+ 30.times do # pull report 30 times
77
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.get', key: @key, code: @code, _reportid: report_id, _wait: 'true')
78
+ report = JSON.parse response.body
79
+ report_status = report['package']['status']
80
+ case report_status
81
+ when 'complete'
82
+ break
83
+ when 'processing'
84
+ sleep 10
85
+ else
86
+ fail 'sonobi scrapper: unknown report status'
87
+ end
88
+ end
72
89
 
73
- @client.click_button 'Run Report'
74
- sleep 1
75
- wait_for_spin
90
+ @response = response
76
91
  end
77
92
 
78
93
  def extract_data_from_report(country)
79
- rows = @client.find_all :xpath, '//*[@class="reports_tab_item_body"]//table/*/tr'
80
- rows = rows.to_a
81
- header = rows.shift
94
+ report = JSON.parse @response.body
95
+ rows = report['package']['result']
82
96
  if @data.count == 0
83
- n_header = header.find_css('td,th').map { |td| td.visible_text }
84
- n_header << 'Country'
85
- @data << n_header
97
+ @data << ['Date', 'Placement', 'Site', 'Impressions', 'Clicks', 'Views', 'Unfilled Impressions', 'Revenue', 'Device Type', 'Country']
86
98
  end
87
- rows.shift
88
99
  @data += rows.map do |row|
89
- n_row = row.find_css('td,th').map { |td| td.visible_text }
100
+ n_keys = ["_date", "_placementid_name", "_siteid_name", "_impression_count", "_impression_count_clicked", "_impression_count_viewed", "_unfilled_impressions", "_revenue", "_device_type"]
101
+ n_row = n_keys.map { |k| row[k] }
90
102
  n_row << country.to_s.upcase
91
103
  n_row
92
104
  end
93
105
  end
94
-
95
- def wait_for_spin
96
- 30.times do |_i| # wait 5 min
97
- begin
98
- @client.find(:css, '.circle xlarge')
99
- rescue Exception => e
100
- break
101
- end
102
- sleep 10
103
- end
104
- sleep 5
105
- end
106
106
  end
@@ -1,3 +1,3 @@
1
1
  module AdopsReportScrapper
2
- VERSION = "0.1.13"
2
+ VERSION = "0.1.14"
3
3
  end
data/secret.sample.yml CHANGED
@@ -38,6 +38,10 @@ netseer:
38
38
  sonobi:
39
39
  login: ------
40
40
  secret: ------
41
+ options:
42
+ key: ------
43
+ code: ------
44
+ userid: ------
41
45
  nativo:
42
46
  login: ------
43
47
  secret: ------
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adops_report_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stayman Hou
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-02 00:00:00.000000000 Z
11
+ date: 2016-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient