adops_report_scrapper 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1ca67e7025786adde54c380f4e623e8989e9789f
4
- data.tar.gz: e6fc196fdedaaa407b4f98546de5fc0320d0f147
3
+ metadata.gz: a8602cb3bc3c3466d5cd6f67b9cba9ed0002f75e
4
+ data.tar.gz: 8b37b2be038428dc7d31792a5e179aeb5cbb13f6
5
5
  SHA512:
6
- metadata.gz: 6edd51ce4a9953513989ff11c8b80fa7678847288eccfeebde43d79d62210e148f90dd6e5aad2aae92cdaaa91c89e2d7a330c901d026c515962f2914be1a7827
7
- data.tar.gz: aad764f4dfec0aec24418ed7d40b2534a7f4f8f50d920303ccd0fee0313428e07c63c93cca4fe68d73c9724e73d495025b64e271a647f4348ac658561d12e5d7
6
+ metadata.gz: 04a02dd62bbb3261fc3ec6f8039c4ed10d9386001d48d6f3b7f640b4fd45c897b8048d64148eef1b4969f3e33c9d444eacfdebd0c6c01cac388ec4038e284379
7
+ data.tar.gz: cc2bba6c3399ce0996b8014a8e4ce714df5313df839dfc4b1a9bcc910eef552685796c7de121fb0904ee183d24729898acb37709113b45921af2784910071114
@@ -26,7 +26,7 @@ class AdopsReportScrapper::OpenxClient < AdopsReportScrapper::BaseClient
26
26
  end
27
27
 
28
28
  def request_report
29
- @client.visit 'http://cmci-ui.openx.net/#/reports?tab=my_reports'
29
+ @client.visit "http://#{@account_prefix}.openx.net/#/reports?tab=my_reports"
30
30
  sleep 5
31
31
 
32
32
  begin
@@ -4,12 +4,25 @@ require_relative 'base_client'
4
4
  class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
5
5
  def date_supported?(date = nil)
6
6
  _date = date || @date
7
- return true if _date >= Date.today - 2
7
+ return true if _date >= Date.today - 3
8
8
  false
9
9
  end
10
10
 
11
11
  private
12
12
 
13
+ def init_client
14
+ fail 'please specify sonobi key' unless @options['key']
15
+ fail 'please specify sonobi code' unless @options['code']
16
+ fail 'please specify sonobi userid' unless @options['userid']
17
+ @key = @options['key']
18
+ @code = @options['code']
19
+ @userid = @options['userid']
20
+ super
21
+ end
22
+
23
+ def before_quit_with_error
24
+ end
25
+
13
26
  def login
14
27
  @client.visit 'https://jetstream.sonobi.com/welcome/login.php'
15
28
  @client.fill_in 'user name', :with => @login
@@ -20,6 +33,14 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
20
33
  rescue Exception => e
21
34
  raise e, 'Sonobi login error'
22
35
  end
36
+ cookies = @client.driver.cookies
37
+ @client.driver.close_window('0')
38
+
39
+ @client = HTTPClient.new
40
+ @client.cookie_manager.cookies = cookies.values.map do |cookie|
41
+ cookie = cookie.instance_variable_get(:@attributes)
42
+ HTTP::Cookie.new cookie
43
+ end
23
44
  end
24
45
 
25
46
  def scrap
@@ -33,74 +54,53 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
33
54
  def request_report(country)
34
55
  date_str = @date.strftime('%Y-%m-%d')
35
56
  is_us = country == :us
36
- @client.find(:xpath, '//*[text()="Reports"]').click
37
- sleep 2
38
- # set date
39
- @client.select 'Custom'
40
- sleep 1
41
- @client.find(:xpath, '//input[@name="_range_start_date"]').click
42
- @client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
43
- @client.find(:xpath, '//input[@name="_range_end_date"]').click
44
- @client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
45
57
 
46
- # all sites
47
- @client.find(:xpath, '//div[@name="_siteid"]').click
48
- sleep 2
49
- @client.find(:xpath, '//*[text()="Select All"]').click
58
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'category.list', key: @key, code: @code, _userid: @userid, _parentid: 'locations')
59
+ countries = JSON.parse response.body
50
60
 
51
- # select country
52
- @client.find(:xpath, '//*[text()="Add New Filter"]').click
53
- @client.find_all(:xpath, '//option[text()="Country"]').last.select_option
54
- sleep 2
55
- @client.find(:xpath, '//div[@name="_country"]').click
56
- sleep 2
57
- if is_us
58
- @client.find(:xpath, '//*[text()="United States"]').click
59
- else
60
- @client.find(:xpath, '//*[text()="Select All"]').click
61
- @client.find(:xpath, '//*[@class="remove_icon"][../*[text()="United States"]]').click
62
- end
61
+ report_body = {
62
+ '_userid': @userid,
63
+ '_country' => is_us ? 'US' : 'AF,AX,AL,DZ,AS,AD,AO,AI,AQ,AG,AR,AM,AW,AP,AU,AT,AZ,BS,BH,BD,BB,BY,BE,BZ,BJ,BM,BT,BO,BA,BW,BV,BR,IO,BN,BG,BF,BI,KH,CM,CA,CV,KY,CF,TD,CL,CN,CX,CC,CO,KM,CG,CD,CK,CR,CI,HR,CU,CY,CZ,DK,DJ,DM,DO,EC,EG,SV,GQ,ER,EE,ET,EU,FK,FO,FJ,FI,FR,GF,PF,TF,GA,GM,GE,DE,GH,GI,GR,GL,GD,GP,GU,GT,GG,GN,GW,GY,HT,HM,VA,HN,HK,HU,IS,IN,ID,IR,IQ,IE,IM,IL,IT,JM,JP,JE,JO,KZ,KE,KI,KP,KR,KW,KG,LA,LV,LB,LS,LR,LY,LI,LT,LU,MO,MK,MG,MW,MY,MV,ML,MT,MH,MQ,MR,MU,YT,MX,FM,MD,MC,MN,ME,MS,MA,MZ,MM,NA,NR,NP,NL,AN,NC,NZ,NI,NE,NG,NU,NF,MP,NO,OM,PK,PW,PS,PA,PG,PY,PE,PH,PN,PL,PT,PR,QA,RE,RO,RU,RW,SH,KN,LC,PM,VC,WS,SM,ST,SA,SN,RS,SC,SL,SG,SK,SI,SB,SO,ZA,GS,ES,LK,SD,SR,SJ,SZ,SE,CH,SY,TW,TJ,TZ,TH,TL,TG,TK,TO,TT,TN,TR,TM,TC,TV,UG,UA,AE,GB,UM,UY,UZ,VU,VE,VN,VG,VI,WF,EH,YE,ZM,ZW,--',
64
+ 'groupby' => 'day',
65
+ 'row_per' => '_date,_placementid,_siteid',
66
+ 'columns' => '_date,_placementid_name,_siteid_name,_impression_count,_impression_count_viewed,_impression_count_clicked,_unfilled_impressions,_revenue,_ecpm,_device_type,_placementid,_siteid',
67
+ 'tz_offset' => 'UTC',
68
+ '__column_info' => '[{\"name\":\"_date\",\"label\":\"Date\",\"no_limit\":true,\"tip\":\"Shows+the+date+that+impressions+were+served+on\"},{\"name\":\"_placementid_name\",\"label\":\"Placement\",\"tip\":\"Shows+the+Placement+name+that+impressions+were+served+on\"},{\"name\":\"_siteid_name\",\"label\":\"Site\",\"tip\":\"Shows+the+name+of+the+site+that+impressions+were+served+on\"},{\"name\":\"_impression_count\",\"label\":\"Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+that+were+served\"},{\"name\":\"_impression_count_viewed\",\"label\":\"Viewable+Impressions\",\"format\":true,\"tip\":\"The+number+of+times+that+the+ad+was+viewed\"},{\"name\":\"_impression_count_clicked\",\"label\":\"Clicks\",\"format\":true,\"tip\":\"The+number+of+clicks+that+were+recorded\"},{\"name\":\"_unfilled_impressions\",\"label\":\"Unfilled+Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+could+not+be+served+due+to+ad+server+decisioning\"},{\"name\":\"_revenue\",\"label\":\"Revenue\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Gross+revenue+of+impressions+served\"},{\"name\":\"_ecpm\",\"label\":\"Delivered+CPM\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Average+CPM+of+impressions+served\"},{\"name\":\"_device_type\",\"label\":\"Device+Type\",\"tip\":\"Device+Types+include:+Desktop,+Mobile\"}]'
69
+ }
70
+
71
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.request', key: @key, code: @code, _report: report_body.to_json, _report_type: 'publisher_report', _report_origin: 'publisher_reporting', _range: 'custom', _range_start_date: date_str, _range_end_date: date_str)
72
+ report = JSON.parse response.body
73
+ report_id = report['package']['_reportid']
74
+ sleep 5
63
75
 
64
- # check group by
65
- @client.check 'Date'
66
- @client.check 'Placement'
67
- @client.check 'Site'
68
- @client.check 'Clicks'
69
- @client.check 'Views'
70
- @client.check 'Unfilled Impressions'
71
- @client.check 'Device Type'
76
+ 30.times do # pull report 30 times
77
+ response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.get', key: @key, code: @code, _reportid: report_id, _wait: 'true')
78
+ report = JSON.parse response.body
79
+ report_status = report['package']['status']
80
+ case report_status
81
+ when 'complete'
82
+ break
83
+ when 'processing'
84
+ sleep 10
85
+ else
86
+ fail 'sonobi scrapper: unknown report status'
87
+ end
88
+ end
72
89
 
73
- @client.click_button 'Run Report'
74
- sleep 1
75
- wait_for_spin
90
+ @response = response
76
91
  end
77
92
 
78
93
  def extract_data_from_report(country)
79
- rows = @client.find_all :xpath, '//*[@class="reports_tab_item_body"]//table/*/tr'
80
- rows = rows.to_a
81
- header = rows.shift
94
+ report = JSON.parse @response.body
95
+ rows = report['package']['result']
82
96
  if @data.count == 0
83
- n_header = header.find_css('td,th').map { |td| td.visible_text }
84
- n_header << 'Country'
85
- @data << n_header
97
+ @data << ['Date', 'Placement', 'Site', 'Impressions', 'Clicks', 'Views', 'Unfilled Impressions', 'Revenue', 'Device Type', 'Country']
86
98
  end
87
- rows.shift
88
99
  @data += rows.map do |row|
89
- n_row = row.find_css('td,th').map { |td| td.visible_text }
100
+ n_keys = ["_date", "_placementid_name", "_siteid_name", "_impression_count", "_impression_count_clicked", "_impression_count_viewed", "_unfilled_impressions", "_revenue", "_device_type"]
101
+ n_row = n_keys.map { |k| row[k] }
90
102
  n_row << country.to_s.upcase
91
103
  n_row
92
104
  end
93
105
  end
94
-
95
- def wait_for_spin
96
- 30.times do |_i| # wait 5 min
97
- begin
98
- @client.find(:css, '.circle xlarge')
99
- rescue Exception => e
100
- break
101
- end
102
- sleep 10
103
- end
104
- sleep 5
105
- end
106
106
  end
@@ -1,3 +1,3 @@
1
1
  module AdopsReportScrapper
2
- VERSION = "0.1.13"
2
+ VERSION = "0.1.14"
3
3
  end
data/secret.sample.yml CHANGED
@@ -38,6 +38,10 @@ netseer:
38
38
  sonobi:
39
39
  login: ------
40
40
  secret: ------
41
+ options:
42
+ key: ------
43
+ code: ------
44
+ userid: ------
41
45
  nativo:
42
46
  login: ------
43
47
  secret: ------
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adops_report_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stayman Hou
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-02 00:00:00.000000000 Z
11
+ date: 2016-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient