datahen 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 915387ff86d8c0f0c92a1f103dc9d7d346df5d281aecec611e4ced0f37498253
|
|
4
|
+
data.tar.gz: 5565fa02cf9c234fc3ac4e5ccaafea64316981569d500e1cf13d2d3f5b80ffa8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bb66f09cfb02c96cc31f6659b00185bd407b40ba60457fd919f1efa5fee51c5f412fdac8febe897ecb370ce4bfe9881f76700b076802f4608c7d065eaff904d6
|
|
7
|
+
data.tar.gz: a5979d502604f16af80bd431782b4ba7275b470476733f0af6be5a0c1d0bfeb77f3a09ec3cec03791e47ddee1531f0135684214a1cd3c92ef521e09d61cbf71a
|
|
@@ -12,28 +12,15 @@ module Datahen
|
|
|
12
12
|
def content(gid)
|
|
13
13
|
client = Client::GlobalPage.new(options)
|
|
14
14
|
result = JSON.parse(client.find_content(gid).to_s)
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
if result['available'] == true
|
|
17
17
|
puts "Preview content url: \"#{result['preview_url']}\""
|
|
18
18
|
`open "#{result['preview_url']}"`
|
|
19
19
|
else
|
|
20
20
|
puts "Content does not exist"
|
|
21
|
-
end
|
|
21
|
+
end
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
desc "failedcontent <gid>", "Show failed content of a globalpage"
|
|
25
|
-
def failedcontent(gid)
|
|
26
|
-
client = Client::GlobalPage.new(options)
|
|
27
|
-
result = JSON.parse(client.find_failed_content(gid).to_s)
|
|
28
|
-
|
|
29
|
-
if result['available'] == true
|
|
30
|
-
puts "Preview failed content url: \"#{result['preview_url']}\""
|
|
31
|
-
`open "#{result['preview_url']}"`
|
|
32
|
-
else
|
|
33
|
-
puts "Failed Content does not exist"
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
24
|
end
|
|
38
25
|
end
|
|
39
26
|
end
|
|
@@ -17,6 +17,7 @@ module Datahen
|
|
|
17
17
|
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
|
18
18
|
option :fetch_fail, type: :boolean, desc: 'Returns only pages that fails fetching.'
|
|
19
19
|
option :parse_fail, type: :boolean, desc: 'Returns only pages that fails parsing.'
|
|
20
|
+
option :status, type: :string, desc: 'Returns only pages with specific status.'
|
|
20
21
|
def list(scraper_name)
|
|
21
22
|
if options[:job]
|
|
22
23
|
client = Client::JobPage.new(options)
|
|
@@ -197,6 +198,46 @@ module Datahen
|
|
|
197
198
|
end
|
|
198
199
|
end
|
|
199
200
|
|
|
201
|
+
desc "content <scraper_name> <gid>", "Show a page's content in scraper's current job"
|
|
202
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
|
203
|
+
def content(scraper_name, gid)
|
|
204
|
+
result = nil
|
|
205
|
+
if options[:job]
|
|
206
|
+
client = Client::JobPage.new(options)
|
|
207
|
+
result = JSON.parse(client.find_content(options[:job], gid).to_s)
|
|
208
|
+
else
|
|
209
|
+
client = Client::ScraperJobPage.new(options)
|
|
210
|
+
result = JSON.parse(client.find_content(scraper_name, gid).to_s)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
if result['available'] == true
|
|
214
|
+
puts "Preview content url: \"#{result['preview_url']}\""
|
|
215
|
+
`open "#{result['preview_url']}"`
|
|
216
|
+
else
|
|
217
|
+
puts "Content does not exist"
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
|
|
222
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
|
223
|
+
def failedcontent(scraper_name, gid)
|
|
224
|
+
result = nil
|
|
225
|
+
if options[:job]
|
|
226
|
+
client = Client::JobPage.new(options)
|
|
227
|
+
result = JSON.parse(client.find_failed_content(options[:job], gid).to_s)
|
|
228
|
+
else
|
|
229
|
+
client = Client::ScraperJobPage.new(options)
|
|
230
|
+
result = JSON.parse(client.find_failed_content(scraper_name, gid).to_s)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
if result['available'] == true
|
|
234
|
+
puts "Preview failed content url: \"#{result['preview_url']}\""
|
|
235
|
+
`open "#{result['preview_url']}"`
|
|
236
|
+
else
|
|
237
|
+
puts "Failed Content does not exist"
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
200
241
|
end
|
|
201
242
|
end
|
|
202
243
|
|
|
@@ -53,6 +53,14 @@ module Datahen
|
|
|
53
53
|
|
|
54
54
|
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
|
|
55
55
|
end
|
|
56
|
+
|
|
57
|
+
def find_content(job_id, gid)
|
|
58
|
+
self.class.get("/jobs/#{job_id}/pages/#{gid}/content", @options)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def find_failed_content(job_id, gid)
|
|
62
|
+
self.class.get("/jobs/#{job_id}/pages/#{gid}/failed_content", @options)
|
|
63
|
+
end
|
|
56
64
|
end
|
|
57
65
|
end
|
|
58
66
|
end
|
|
@@ -62,6 +62,14 @@ module Datahen
|
|
|
62
62
|
self.class.post("/scrapers/#{scraper_name}/current_job/pages", params)
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
+
def find_content(scraper_name, gid)
|
|
66
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/content", @options)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def find_failed_content(scraper_name, gid)
|
|
70
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/failed_content", @options)
|
|
71
|
+
end
|
|
72
|
+
|
|
65
73
|
end
|
|
66
74
|
end
|
|
67
75
|
end
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: datahen
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.12.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Parama Danoesubroto
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-02-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: thor
|