adops_report_scrapper 0.1.13 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8602cb3bc3c3466d5cd6f67b9cba9ed0002f75e
|
4
|
+
data.tar.gz: 8b37b2be038428dc7d31792a5e179aeb5cbb13f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04a02dd62bbb3261fc3ec6f8039c4ed10d9386001d48d6f3b7f640b4fd45c897b8048d64148eef1b4969f3e33c9d444eacfdebd0c6c01cac388ec4038e284379
|
7
|
+
data.tar.gz: cc2bba6c3399ce0996b8014a8e4ce714df5313df839dfc4b1a9bcc910eef552685796c7de121fb0904ee183d24729898acb37709113b45921af2784910071114
|
@@ -26,7 +26,7 @@ class AdopsReportScrapper::OpenxClient < AdopsReportScrapper::BaseClient
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def request_report
|
29
|
-
@client.visit
|
29
|
+
@client.visit "http://#{@account_prefix}.openx.net/#/reports?tab=my_reports"
|
30
30
|
sleep 5
|
31
31
|
|
32
32
|
begin
|
@@ -4,12 +4,25 @@ require_relative 'base_client'
|
|
4
4
|
class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
|
5
5
|
def date_supported?(date = nil)
|
6
6
|
_date = date || @date
|
7
|
-
return true if _date >= Date.today -
|
7
|
+
return true if _date >= Date.today - 3
|
8
8
|
false
|
9
9
|
end
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
+
def init_client
|
14
|
+
fail 'please specify sonobi key' unless @options['key']
|
15
|
+
fail 'please specify sonobi code' unless @options['code']
|
16
|
+
fail 'please specify sonobi userid' unless @options['userid']
|
17
|
+
@key = @options['key']
|
18
|
+
@code = @options['code']
|
19
|
+
@userid = @options['userid']
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def before_quit_with_error
|
24
|
+
end
|
25
|
+
|
13
26
|
def login
|
14
27
|
@client.visit 'https://jetstream.sonobi.com/welcome/login.php'
|
15
28
|
@client.fill_in 'user name', :with => @login
|
@@ -20,6 +33,14 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
|
|
20
33
|
rescue Exception => e
|
21
34
|
raise e, 'Sonobi login error'
|
22
35
|
end
|
36
|
+
cookies = @client.driver.cookies
|
37
|
+
@client.driver.close_window('0')
|
38
|
+
|
39
|
+
@client = HTTPClient.new
|
40
|
+
@client.cookie_manager.cookies = cookies.values.map do |cookie|
|
41
|
+
cookie = cookie.instance_variable_get(:@attributes)
|
42
|
+
HTTP::Cookie.new cookie
|
43
|
+
end
|
23
44
|
end
|
24
45
|
|
25
46
|
def scrap
|
@@ -33,74 +54,53 @@ class AdopsReportScrapper::SonobiClient < AdopsReportScrapper::BaseClient
|
|
33
54
|
def request_report(country)
|
34
55
|
date_str = @date.strftime('%Y-%m-%d')
|
35
56
|
is_us = country == :us
|
36
|
-
@client.find(:xpath, '//*[text()="Reports"]').click
|
37
|
-
sleep 2
|
38
|
-
# set date
|
39
|
-
@client.select 'Custom'
|
40
|
-
sleep 1
|
41
|
-
@client.find(:xpath, '//input[@name="_range_start_date"]').click
|
42
|
-
@client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
|
43
|
-
@client.find(:xpath, '//input[@name="_range_end_date"]').click
|
44
|
-
@client.find(:xpath, "//*[@date=\"#{date_str}\"]").click
|
45
57
|
|
46
|
-
|
47
|
-
|
48
|
-
sleep 2
|
49
|
-
@client.find(:xpath, '//*[text()="Select All"]').click
|
58
|
+
response = @client.post('https://jetstream.sonobi.com/public/', cm: 'category.list', key: @key, code: @code, _userid: @userid, _parentid: 'locations')
|
59
|
+
countries = JSON.parse response.body
|
50
60
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
61
|
+
report_body = {
|
62
|
+
'_userid': @userid,
|
63
|
+
'_country' => is_us ? 'US' : 'AF,AX,AL,DZ,AS,AD,AO,AI,AQ,AG,AR,AM,AW,AP,AU,AT,AZ,BS,BH,BD,BB,BY,BE,BZ,BJ,BM,BT,BO,BA,BW,BV,BR,IO,BN,BG,BF,BI,KH,CM,CA,CV,KY,CF,TD,CL,CN,CX,CC,CO,KM,CG,CD,CK,CR,CI,HR,CU,CY,CZ,DK,DJ,DM,DO,EC,EG,SV,GQ,ER,EE,ET,EU,FK,FO,FJ,FI,FR,GF,PF,TF,GA,GM,GE,DE,GH,GI,GR,GL,GD,GP,GU,GT,GG,GN,GW,GY,HT,HM,VA,HN,HK,HU,IS,IN,ID,IR,IQ,IE,IM,IL,IT,JM,JP,JE,JO,KZ,KE,KI,KP,KR,KW,KG,LA,LV,LB,LS,LR,LY,LI,LT,LU,MO,MK,MG,MW,MY,MV,ML,MT,MH,MQ,MR,MU,YT,MX,FM,MD,MC,MN,ME,MS,MA,MZ,MM,NA,NR,NP,NL,AN,NC,NZ,NI,NE,NG,NU,NF,MP,NO,OM,PK,PW,PS,PA,PG,PY,PE,PH,PN,PL,PT,PR,QA,RE,RO,RU,RW,SH,KN,LC,PM,VC,WS,SM,ST,SA,SN,RS,SC,SL,SG,SK,SI,SB,SO,ZA,GS,ES,LK,SD,SR,SJ,SZ,SE,CH,SY,TW,TJ,TZ,TH,TL,TG,TK,TO,TT,TN,TR,TM,TC,TV,UG,UA,AE,GB,UM,UY,UZ,VU,VE,VN,VG,VI,WF,EH,YE,ZM,ZW,--',
|
64
|
+
'groupby' => 'day',
|
65
|
+
'row_per' => '_date,_placementid,_siteid',
|
66
|
+
'columns' => '_date,_placementid_name,_siteid_name,_impression_count,_impression_count_viewed,_impression_count_clicked,_unfilled_impressions,_revenue,_ecpm,_device_type,_placementid,_siteid',
|
67
|
+
'tz_offset' => 'UTC',
|
68
|
+
'__column_info' => '[{\"name\":\"_date\",\"label\":\"Date\",\"no_limit\":true,\"tip\":\"Shows+the+date+that+impressions+were+served+on\"},{\"name\":\"_placementid_name\",\"label\":\"Placement\",\"tip\":\"Shows+the+Placement+name+that+impressions+were+served+on\"},{\"name\":\"_siteid_name\",\"label\":\"Site\",\"tip\":\"Shows+the+name+of+the+site+that+impressions+were+served+on\"},{\"name\":\"_impression_count\",\"label\":\"Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+that+were+served\"},{\"name\":\"_impression_count_viewed\",\"label\":\"Viewable+Impressions\",\"format\":true,\"tip\":\"The+number+of+times+that+the+ad+was+viewed\"},{\"name\":\"_impression_count_clicked\",\"label\":\"Clicks\",\"format\":true,\"tip\":\"The+number+of+clicks+that+were+recorded\"},{\"name\":\"_unfilled_impressions\",\"label\":\"Unfilled+Impressions\",\"format\":true,\"tip\":\"The+number+of+impressions+could+not+be+served+due+to+ad+server+decisioning\"},{\"name\":\"_revenue\",\"label\":\"Revenue\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Gross+revenue+of+impressions+served\"},{\"name\":\"_ecpm\",\"label\":\"Delivered+CPM\",\"format\":\"currency\",\"pre\":\"$\",\"tip\":\"Average+CPM+of+impressions+served\"},{\"name\":\"_device_type\",\"label\":\"Device+Type\",\"tip\":\"Device+Types+include:+Desktop,+Mobile\"}]'
|
69
|
+
}
|
70
|
+
|
71
|
+
response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.request', key: @key, code: @code, _report: report_body.to_json, _report_type: 'publisher_report', _report_origin: 'publisher_reporting', _range: 'custom', _range_start_date: date_str, _range_end_date: date_str)
|
72
|
+
report = JSON.parse response.body
|
73
|
+
report_id = report['package']['_reportid']
|
74
|
+
sleep 5
|
63
75
|
|
64
|
-
#
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
76
|
+
30.times do # pull report 30 times
|
77
|
+
response = @client.post('https://jetstream.sonobi.com/public/', cm: 'report.get', key: @key, code: @code, _reportid: report_id, _wait: 'true')
|
78
|
+
report = JSON.parse response.body
|
79
|
+
report_status = report['package']['status']
|
80
|
+
case report_status
|
81
|
+
when 'complete'
|
82
|
+
break
|
83
|
+
when 'processing'
|
84
|
+
sleep 10
|
85
|
+
else
|
86
|
+
fail 'sonobi scrapper: unknown report status'
|
87
|
+
end
|
88
|
+
end
|
72
89
|
|
73
|
-
@
|
74
|
-
sleep 1
|
75
|
-
wait_for_spin
|
90
|
+
@response = response
|
76
91
|
end
|
77
92
|
|
78
93
|
def extract_data_from_report(country)
|
79
|
-
|
80
|
-
rows =
|
81
|
-
header = rows.shift
|
94
|
+
report = JSON.parse @response.body
|
95
|
+
rows = report['package']['result']
|
82
96
|
if @data.count == 0
|
83
|
-
|
84
|
-
n_header << 'Country'
|
85
|
-
@data << n_header
|
97
|
+
@data << ['Date', 'Placement', 'Site', 'Impressions', 'Clicks', 'Views', 'Unfilled Impressions', 'Revenue', 'Device Type', 'Country']
|
86
98
|
end
|
87
|
-
rows.shift
|
88
99
|
@data += rows.map do |row|
|
89
|
-
|
100
|
+
n_keys = ["_date", "_placementid_name", "_siteid_name", "_impression_count", "_impression_count_clicked", "_impression_count_viewed", "_unfilled_impressions", "_revenue", "_device_type"]
|
101
|
+
n_row = n_keys.map { |k| row[k] }
|
90
102
|
n_row << country.to_s.upcase
|
91
103
|
n_row
|
92
104
|
end
|
93
105
|
end
|
94
|
-
|
95
|
-
def wait_for_spin
|
96
|
-
30.times do |_i| # wait 5 min
|
97
|
-
begin
|
98
|
-
@client.find(:css, '.circle xlarge')
|
99
|
-
rescue Exception => e
|
100
|
-
break
|
101
|
-
end
|
102
|
-
sleep 10
|
103
|
-
end
|
104
|
-
sleep 5
|
105
|
-
end
|
106
106
|
end
|
data/secret.sample.yml
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adops_report_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stayman Hou
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|