datahen 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8acff6d62851c24f1bd86d993255ca3c2233ec9b45f933964bf804b49e637611
|
4
|
+
data.tar.gz: 978f1ae63011cd20ad978640b5fa437416cc65930b29f143f6431c14bd059840
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4531cadcddab26fd218bd1c7462114985d72460dd1d68aaca2338bea2f7d9e16278b639795d379feaab62f68924376fd17fd93df2916069cb47797210ad6b370
|
7
|
+
data.tar.gz: dcd734cb8ed3876bbf7f62ed6a66004c8d21b7f8009f7b7591144887ddf5a1d12c07b6768a54e84daeaf09d20db6ea7702adaf9c526c83043351a7cf04612388
|
@@ -133,6 +133,21 @@ module Datahen
|
|
133
133
|
end
|
134
134
|
end
|
135
135
|
|
136
|
+
desc "sync_schema <scraper_name>", "deploy schema config"
|
137
|
+
long_desc <<-LONGDESC
|
138
|
+
Deploy a scraper's schema config
|
139
|
+
LONGDESC
|
140
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
141
|
+
def sync_schema(scraper_name)
|
142
|
+
if options[:job]
|
143
|
+
client = Client::Job.new(options)
|
144
|
+
puts "#{client.sync_schema(options[:job])}"
|
145
|
+
else
|
146
|
+
client = Client::ScraperJob.new(options)
|
147
|
+
puts "#{client.sync_schema(scraper_name)}"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
136
151
|
desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
|
137
152
|
subcommand "var", ScraperJobVar
|
138
153
|
|
data/lib/datahen/client/job.rb
CHANGED
@@ -75,6 +75,11 @@ module Datahen
|
|
75
75
|
params = @options.merge(opts)
|
76
76
|
self.class.delete("/scrapers/#{scraper_name}/current_job", params)
|
77
77
|
end
|
78
|
+
|
79
|
+
def sync_schema(scraper_name, opts={})
|
80
|
+
params = @options.merge(opts)
|
81
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/sync/schema", params)
|
82
|
+
end
|
78
83
|
end
|
79
84
|
end
|
80
85
|
end
|
@@ -108,16 +108,32 @@ module Datahen
|
|
108
108
|
end
|
109
109
|
end
|
110
110
|
|
111
|
+
def update_parsing_status page_gid, status
|
112
|
+
return unless save
|
113
|
+
|
114
|
+
response = parsing_update(
|
115
|
+
job_id: job_id,
|
116
|
+
gid: page_gid,
|
117
|
+
parsing_status: status)
|
118
|
+
|
119
|
+
if response.code == 200
|
120
|
+
puts "Page #{page_gid} status changed to #{status}."
|
121
|
+
else
|
122
|
+
puts "Error: Unable to change page #{page_gid} status: #{response.body} to #{status}"
|
123
|
+
raise "Unable to change page #{page_gid} status: #{response.body} to #{status}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
111
127
|
def save_type
|
112
128
|
:parsing
|
113
129
|
end
|
114
130
|
|
115
|
-
def refetch_page
|
131
|
+
def refetch_page page_gid
|
116
132
|
if save
|
117
|
-
|
118
|
-
puts "Refetch page #{
|
133
|
+
update_parsing_status page_gid, :to_refetch
|
134
|
+
puts "Refetch page #{page_gid}"
|
119
135
|
else
|
120
|
-
puts "Would have refetch page #{
|
136
|
+
puts "Would have refetch page #{page_gid}"
|
121
137
|
end
|
122
138
|
end
|
123
139
|
|
@@ -130,12 +146,12 @@ module Datahen
|
|
130
146
|
refetch_page page_gid
|
131
147
|
end
|
132
148
|
|
133
|
-
def reparse_page
|
149
|
+
def reparse_page page_gid
|
134
150
|
if save
|
135
|
-
|
136
|
-
puts "Reparse page #{
|
151
|
+
update_parsing_status page_gid, :to_reparse
|
152
|
+
puts "Reparse page #{page_gid}"
|
137
153
|
else
|
138
|
-
puts "Would have reparse page #{
|
154
|
+
puts "Would have reparse page #{page_gid}"
|
139
155
|
end
|
140
156
|
end
|
141
157
|
|
@@ -148,12 +164,12 @@ module Datahen
|
|
148
164
|
reparse_page page_gid
|
149
165
|
end
|
150
166
|
|
151
|
-
def limbo_page
|
167
|
+
def limbo_page page_gid
|
152
168
|
if save
|
153
|
-
|
154
|
-
puts "Limbo page #{
|
169
|
+
update_parsing_status page_gid, :limbo
|
170
|
+
puts "Limbo page #{page_gid}"
|
155
171
|
else
|
156
|
-
puts "Would have limbo page #{
|
172
|
+
puts "Would have limbo page #{page_gid}"
|
157
173
|
end
|
158
174
|
end
|
159
175
|
|
@@ -204,13 +220,13 @@ module Datahen
|
|
204
220
|
end
|
205
221
|
|
206
222
|
if refetch_self
|
207
|
-
|
223
|
+
update_parsing_status gid, :to_refetch
|
208
224
|
elsif reparse_self
|
209
|
-
|
225
|
+
update_parsing_status gid, :to_reparse
|
210
226
|
elsif limbo_self
|
211
|
-
|
227
|
+
update_parsing_status gid, :limbo
|
212
228
|
else
|
213
|
-
|
229
|
+
update_parsing_status gid, :done
|
214
230
|
end
|
215
231
|
end
|
216
232
|
proc.call
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -278,7 +278,7 @@ metadata:
|
|
278
278
|
allowed_push_host: https://rubygems.org
|
279
279
|
homepage_uri: https://datahen.com
|
280
280
|
source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
|
281
|
-
post_install_message:
|
281
|
+
post_install_message:
|
282
282
|
rdoc_options: []
|
283
283
|
require_paths:
|
284
284
|
- lib
|
@@ -293,8 +293,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
293
293
|
- !ruby/object:Gem::Version
|
294
294
|
version: '0'
|
295
295
|
requirements: []
|
296
|
-
rubygems_version: 3.
|
297
|
-
signing_key:
|
296
|
+
rubygems_version: 3.1.4
|
297
|
+
signing_key:
|
298
298
|
specification_version: 4
|
299
299
|
summary: DataHen toolbelt for developers
|
300
300
|
test_files: []
|