adops_report_scrapper 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +6 -1
- data/lib/adops_report_scrapper.rb +1 -0
- data/lib/adops_report_scrapper/littlethings_client.rb +50 -0
- data/lib/adops_report_scrapper/version.rb +1 -1
- data/secret.sample.yml +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c70e5a5ebc549c6f187dc54438c39e6c1030dc5
|
4
|
+
data.tar.gz: b546f11751c3e55fae68847bea85a12547599875
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 463417877c7112eefe8adf41e1781232e25d8599108898d2d90ff1f4fa1e03ccb4b8767032bcf84bbdda1b946ebe0b1b5599aa9a6d545fc83bcc3716233fde62
|
7
|
+
data.tar.gz: 00c69ea686a7dae418294da3e3fe46857f04f1d69a8ace9f9685f70e4e0e66483b5a19e61e91d0981939a4917d12b5ca817b05cba226c50003cf6467c021547b
|
data/Rakefile
CHANGED
@@ -7,7 +7,7 @@ require 'adops_report_scrapper'
|
|
7
7
|
require 'byebug'
|
8
8
|
|
9
9
|
desc 'Collect all data'
|
10
|
-
task :all => [:openx, :tremor, :brightroll, :yellowhammer, :adaptv, :fourninefive, :adx, :revcontent, :gcs, :browsi, :netseer, :sonobi, :nativo, :adsupply, :marfeel, :adsense, :criteo, :triplelift, :conversant, :liveintent, :adiply, :contentad, :facebookaudience, :adtechus, :adtomation, :rhythmone] do # openx is the most unstable one, run it first
|
10
|
+
task :all => [:openx, :tremor, :brightroll, :yellowhammer, :adaptv, :fourninefive, :adx, :revcontent, :gcs, :browsi, :netseer, :sonobi, :nativo, :adsupply, :marfeel, :adsense, :criteo, :triplelift, :conversant, :liveintent, :adiply, :contentad, :facebookaudience, :adtechus, :adtomation, :rhythmone, :littlethings] do # openx is the most unstable one, run it first
|
11
11
|
puts '========== You are all set'
|
12
12
|
end
|
13
13
|
|
@@ -146,6 +146,11 @@ task :rhythmone do
|
|
146
146
|
save_as_csv :rhythmone, :rhythmone
|
147
147
|
end
|
148
148
|
|
149
|
+
desc 'Collect littlethings data'
|
150
|
+
task :littlethings do
|
151
|
+
save_as_csv :littlethings, :littlethings
|
152
|
+
end
|
153
|
+
|
149
154
|
def date
|
150
155
|
@date ||= ENV['date'].nil? ? Date.today - 1 : Date.today - ENV['date'].to_i
|
151
156
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'date'
|
2
|
+
require_relative 'base_client'
|
3
|
+
|
4
|
+
class AdopsReportScrapper::LittlethingsClient < AdopsReportScrapper::BaseClient
|
5
|
+
def date_supported?(date = nil)
|
6
|
+
_date = date || @date
|
7
|
+
return true if _date >= Date.today - 4
|
8
|
+
false
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def login
|
14
|
+
@client.visit 'http://www.reportingthings.com'
|
15
|
+
@client.fill_in 'email', :with => @login
|
16
|
+
@client.fill_in 'password', :with => @secret
|
17
|
+
@client.click_button 'Sign In'
|
18
|
+
begin
|
19
|
+
@client.find :xpath, '//*[contains(text(),"Report")]'
|
20
|
+
rescue Exception => e
|
21
|
+
raise e, 'Littlethings login error'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def scrap
|
26
|
+
request_report
|
27
|
+
extract_data_from_report
|
28
|
+
end
|
29
|
+
|
30
|
+
def request_report
|
31
|
+
@client.find(:xpath, '//*[contains(text(),"Report")]').click
|
32
|
+
pub_id = @client.body.match(/\/report\/story\/(\d+)\/all\/all/).captures[0]
|
33
|
+
daterange_end_str = Date.today.strftime('%m/%d/%Y')
|
34
|
+
daterange_begin_str = (Date.today - 5).strftime('%m/%d/%Y')
|
35
|
+
daterange_str = "#{daterange_begin_str} - #{daterange_end_str}"
|
36
|
+
@client.visit "http://www.reportingthings.com/report/story/#{pub_id}/all/all?type=revenue&daterange=#{URI.encode(daterange_str)}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def extract_data_from_report
|
40
|
+
date_str = @date.strftime('%m/%d/%Y')
|
41
|
+
rows = @client.find_all :xpath, '//table[@id="report-table"]/*/tr'
|
42
|
+
rows = rows.map { |tr| tr.find_css('td,th').map { |td| td.visible_text } }
|
43
|
+
header = rows.shift
|
44
|
+
@data = [header]
|
45
|
+
rows.each do |row|
|
46
|
+
next unless row[0] == date_str
|
47
|
+
@data << row
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/secret.sample.yml
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adops_report_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stayman Hou
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -233,6 +233,7 @@ files:
|
|
233
233
|
- lib/adops_report_scrapper/criteo_client.rb
|
234
234
|
- lib/adops_report_scrapper/facebookaudience_client.rb
|
235
235
|
- lib/adops_report_scrapper/gcs_client.rb
|
236
|
+
- lib/adops_report_scrapper/littlethings_client.rb
|
236
237
|
- lib/adops_report_scrapper/liveintent_client.rb
|
237
238
|
- lib/adops_report_scrapper/marfeel_client.rb
|
238
239
|
- lib/adops_report_scrapper/nativo_client.rb
|