lockss-pyclient 0.1.0.dev2__py3-none-any.whl → 0.1.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lockss/pyclient/__init__.py +8 -29
- lockss/pyclient/__main__.py +37 -0
- lockss/pyclient/_internal_common.py +395 -0
- lockss/pyclient/_internal_config.py +207 -0
- lockss/pyclient/_internal_crawler.py +251 -0
- lockss/pyclient/_internal_md.py +111 -0
- lockss/pyclient/_internal_poller.py +174 -0
- lockss/pyclient/_internal_rs.py +232 -0
- lockss/pyclient/cli.py +839 -0
- lockss/pyclient/config/__init__.py +15 -0
- lockss/pyclient/config/api/aus_api.py +14 -6
- lockss/pyclient/config/api/config_api.py +12 -12
- lockss/pyclient/config/api/utils_api.py +2 -2
- lockss/pyclient/config/configuration.py +1 -1
- lockss/pyclient/config/models/__init__.py +15 -0
- lockss/pyclient/config/models/access_type.py +90 -0
- lockss/pyclient/config/models/au_agreements.py +136 -0
- lockss/pyclient/config/models/au_config_page_info.py +140 -0
- lockss/pyclient/config/models/au_state_bean.py +942 -0
- lockss/pyclient/config/models/au_status.py +837 -8
- lockss/pyclient/config/models/au_suspect_url_versions.py +136 -0
- lockss/pyclient/config/models/check_substance_result.py +6 -22
- lockss/pyclient/config/models/dated_peer_id_set_impl.py +162 -0
- lockss/pyclient/config/models/hash_result.py +136 -0
- lockss/pyclient/config/models/page_info.py +226 -0
- lockss/pyclient/config/models/peer_agreement.py +188 -0
- lockss/pyclient/config/models/peer_agreements.py +136 -0
- lockss/pyclient/config/models/platform_configuration_ws_result.py +421 -8
- lockss/pyclient/config/models/platform_configuration_ws_result_daemon_version.py +188 -0
- lockss/pyclient/config/models/platform_configuration_ws_result_java_version.py +188 -0
- lockss/pyclient/config/models/platform_configuration_ws_result_platform.py +162 -0
- lockss/pyclient/config/models/substance_checker_state.py +91 -0
- lockss/pyclient/config/models/suspect_url_version.py +214 -0
- lockss/pyclient/config/swagger.yaml +2031 -0
- lockss/pyclient/crawler/__init__.py +1 -0
- lockss/pyclient/crawler/api/crawls_api.py +2 -2
- lockss/pyclient/crawler/configuration.py +1 -1
- lockss/pyclient/crawler/models/__init__.py +1 -0
- lockss/pyclient/crawler/models/crawl_desc.py +4 -12
- lockss/pyclient/crawler/models/crawl_kind_enum.py +90 -0
- lockss/pyclient/crawler/models/page_info.py +22 -24
- lockss/pyclient/crawler/swagger.yaml +1197 -0
- lockss/pyclient/md/configuration.py +1 -1
- lockss/pyclient/md/models/page_info.py +22 -24
- lockss/pyclient/md/swagger.yaml +583 -0
- lockss/pyclient/output.py +131 -0
- lockss/pyclient/poller/__init__.py +11 -5
- lockss/pyclient/poller/api/export_api.py +5 -5
- lockss/pyclient/poller/api/hash_api.py +3 -3
- lockss/pyclient/poller/api/poll_detail_api.py +42 -42
- lockss/pyclient/poller/api/poller_polls_api.py +18 -18
- lockss/pyclient/poller/api/service_api.py +2 -2
- lockss/pyclient/poller/api/voter_polls_api.py +18 -18
- lockss/pyclient/poller/configuration.py +1 -1
- lockss/pyclient/poller/models/__init__.py +11 -5
- lockss/pyclient/poller/models/export_file_type_enum.py +93 -0
- lockss/pyclient/poller/models/export_filename_translation_enum.py +91 -0
- lockss/pyclient/poller/models/page_info.py +226 -0
- lockss/pyclient/poller/models/poll_desc.py +3 -11
- lockss/pyclient/poller/models/poll_variant_enum.py +92 -0
- lockss/pyclient/poller/models/poller_page_info.py +140 -0
- lockss/pyclient/poller/models/repair_page_info.py +140 -0
- lockss/pyclient/poller/models/repair_type_enum.py +91 -0
- lockss/pyclient/poller/models/tally_type_enum.py +93 -0
- lockss/pyclient/poller/models/url_page_info.py +140 -0
- lockss/pyclient/poller/models/voter_page_info.py +140 -0
- lockss/pyclient/poller/models/voter_urls_enum.py +92 -0
- lockss/pyclient/poller/swagger.yaml +1658 -0
- lockss/pyclient/rs/__init__.py +6 -0
- lockss/pyclient/rs/api/artifacts_api.py +20 -20
- lockss/pyclient/rs/api/aus_api.py +5 -5
- lockss/pyclient/rs/api/repo_api.py +4 -4
- lockss/pyclient/rs/api/status_api.py +1 -1
- lockss/pyclient/rs/api/wayback_api.py +12 -12
- lockss/pyclient/rs/configuration.py +8 -1
- lockss/pyclient/rs/models/__init__.py +6 -0
- lockss/pyclient/rs/models/artifact.py +111 -81
- lockss/pyclient/rs/models/au_size.py +6 -0
- lockss/pyclient/rs/models/auid_page_info.py +2 -2
- lockss/pyclient/rs/models/bulk_au_op_enum.py +90 -0
- lockss/pyclient/rs/models/include_content_enum.py +91 -0
- lockss/pyclient/rs/models/page_info.py +26 -29
- lockss/pyclient/rs/models/pywb_match_enum.py +93 -0
- lockss/pyclient/rs/models/pywb_output_enum.py +90 -0
- lockss/pyclient/rs/models/pywb_sort_enum.py +91 -0
- lockss/pyclient/rs/models/storage_info.py +131 -80
- lockss/pyclient/rs/models/versions_enum.py +90 -0
- lockss/pyclient/rs/swagger.yaml +1306 -0
- {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/METADATA +10 -3
- {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/RECORD +93 -45
- {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/WHEEL +1 -1
- lockss_pyclient-0.1.0.dev3.dist-info/entry_points.txt +3 -0
- {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,1197 @@
|
|
|
1
|
+
# Copyright (c) 2000-2026, Board of Trustees of Leland Stanford Jr. University
|
|
2
|
+
#
|
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
|
4
|
+
# modification, are permitted provided that the following conditions are met:
|
|
5
|
+
#
|
|
6
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
|
7
|
+
# this list of conditions and the following disclaimer.
|
|
8
|
+
#
|
|
9
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
10
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
11
|
+
# and/or other materials provided with the distribution.
|
|
12
|
+
#
|
|
13
|
+
# 3. Neither the name of the copyright holder nor the names of its contributors
|
|
14
|
+
# may be used to endorse or promote products derived from this software without
|
|
15
|
+
# specific prior written permission.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
openapi: 3.0.3
|
|
30
|
+
info:
|
|
31
|
+
title: LOCKSS Crawler Service REST API
|
|
32
|
+
description: REST API of the LOCKSS Crawler Service
|
|
33
|
+
version: 2.0.0
|
|
34
|
+
contact:
|
|
35
|
+
name: LOCKSS Support
|
|
36
|
+
url: https://www.lockss.org/
|
|
37
|
+
email: lockss-support@lockss.org
|
|
38
|
+
license:
|
|
39
|
+
name: BSD-3-Clause
|
|
40
|
+
url: https://opensource.org/licenses/BSD-3-Clause
|
|
41
|
+
servers:
|
|
42
|
+
- url: "{proto}://{hostname}:{port}/"
|
|
43
|
+
description: LOCKSS Crawler Service
|
|
44
|
+
variables:
|
|
45
|
+
proto:
|
|
46
|
+
description: "The protocol (default: http)."
|
|
47
|
+
enum:
|
|
48
|
+
- http
|
|
49
|
+
- https
|
|
50
|
+
default: http
|
|
51
|
+
hostname:
|
|
52
|
+
description: The service host name (or IP address).
|
|
53
|
+
default: localhost
|
|
54
|
+
port:
|
|
55
|
+
description: "The service port (default: 24614)."
|
|
56
|
+
default: 24614
|
|
57
|
+
security:
|
|
58
|
+
- basicAuth: []
|
|
59
|
+
tags:
|
|
60
|
+
- name: crawlers
|
|
61
|
+
description: Crawler operations
|
|
62
|
+
- name: crawls
|
|
63
|
+
description: Crawl operations
|
|
64
|
+
- name: jobs
|
|
65
|
+
description: Crawl job operations
|
|
66
|
+
- name: status
|
|
67
|
+
description: Status operations
|
|
68
|
+
- name: ws
|
|
69
|
+
description: Legacy SOAP compatibility operations
|
|
70
|
+
paths:
|
|
71
|
+
/crawlers:
|
|
72
|
+
get:
|
|
73
|
+
tags:
|
|
74
|
+
- crawlers
|
|
75
|
+
summary: Get the list of supported crawlers.
|
|
76
|
+
description: Return the list of supported crawlers.
|
|
77
|
+
operationId: getCrawlers
|
|
78
|
+
# roles: ROLE_AU_ADMIN
|
|
79
|
+
responses:
|
|
80
|
+
200:
|
|
81
|
+
description: The Status of supported Crawlers.
|
|
82
|
+
content:
|
|
83
|
+
application/json:
|
|
84
|
+
schema:
|
|
85
|
+
$ref: '#/components/schemas/crawlerStatuses'
|
|
86
|
+
default:
|
|
87
|
+
description: The resulting error payload.
|
|
88
|
+
content:
|
|
89
|
+
application/json:
|
|
90
|
+
schema:
|
|
91
|
+
$ref: '#/components/schemas/errorResult'
|
|
92
|
+
/crawlers/{crawlerId}:
|
|
93
|
+
get:
|
|
94
|
+
tags:
|
|
95
|
+
- crawlers
|
|
96
|
+
summary: Return information about a crawler.
|
|
97
|
+
description: Get information related to a installed crawler.
|
|
98
|
+
operationId: getCrawlerConfig
|
|
99
|
+
# roles: ROLE_AU_ADMIN
|
|
100
|
+
parameters:
|
|
101
|
+
- name: crawlerId
|
|
102
|
+
in: path
|
|
103
|
+
description: Identifier for the crawler
|
|
104
|
+
required: true
|
|
105
|
+
schema:
|
|
106
|
+
type: string
|
|
107
|
+
responses:
|
|
108
|
+
200:
|
|
109
|
+
description: Crawler Configuration Found
|
|
110
|
+
content:
|
|
111
|
+
application/json:
|
|
112
|
+
schema:
|
|
113
|
+
$ref: '#/components/schemas/crawlerConfig'
|
|
114
|
+
default:
|
|
115
|
+
description: The resulting error payload.
|
|
116
|
+
content:
|
|
117
|
+
application/json:
|
|
118
|
+
schema:
|
|
119
|
+
$ref: '#/components/schemas/errorResult'
|
|
120
|
+
/crawls:
|
|
121
|
+
get:
|
|
122
|
+
tags:
|
|
123
|
+
- crawls
|
|
124
|
+
summary: Get the list of crawls.
|
|
125
|
+
description: Get a list of crawls a pageful at a time as defined by limit.
|
|
126
|
+
operationId: getCrawls
|
|
127
|
+
# roles: ROLE_AU_ADMIN
|
|
128
|
+
parameters:
|
|
129
|
+
- name: limit
|
|
130
|
+
in: query
|
|
131
|
+
description: The number of jobs per page
|
|
132
|
+
schema:
|
|
133
|
+
type: integer
|
|
134
|
+
default: 50
|
|
135
|
+
- name: continuationToken
|
|
136
|
+
in: query
|
|
137
|
+
description: The continuation token of the next page of crawl status data
|
|
138
|
+
to be returned.
|
|
139
|
+
schema:
|
|
140
|
+
type: string
|
|
141
|
+
responses:
|
|
142
|
+
200:
|
|
143
|
+
description: The requested crawls
|
|
144
|
+
content:
|
|
145
|
+
application/json:
|
|
146
|
+
schema:
|
|
147
|
+
$ref: '#/components/schemas/crawlPager'
|
|
148
|
+
default:
|
|
149
|
+
description: The resulting error payload.
|
|
150
|
+
content:
|
|
151
|
+
application/json:
|
|
152
|
+
schema:
|
|
153
|
+
$ref: '#/components/schemas/errorResult'
|
|
154
|
+
/crawls/{jobId}:
|
|
155
|
+
get:
|
|
156
|
+
tags:
|
|
157
|
+
- crawls
|
|
158
|
+
summary: Get the crawl status of this job
|
|
159
|
+
description: Get the job represented by this crawl id
|
|
160
|
+
operationId: getCrawlById
|
|
161
|
+
# roles: ROLE_AU_ADMIN
|
|
162
|
+
parameters:
|
|
163
|
+
- name: jobId
|
|
164
|
+
in: path
|
|
165
|
+
required: true
|
|
166
|
+
schema:
|
|
167
|
+
type: string
|
|
168
|
+
responses:
|
|
169
|
+
200:
|
|
170
|
+
description: The crawl status of the requested crawl
|
|
171
|
+
content:
|
|
172
|
+
application/json:
|
|
173
|
+
schema:
|
|
174
|
+
$ref: '#/components/schemas/crawlStatus'
|
|
175
|
+
default:
|
|
176
|
+
description: The resulting error payload.
|
|
177
|
+
content:
|
|
178
|
+
application/json:
|
|
179
|
+
schema:
|
|
180
|
+
$ref: '#/components/schemas/errorResult'
|
|
181
|
+
/crawls/{jobId}/fetched:
|
|
182
|
+
get:
|
|
183
|
+
tags:
|
|
184
|
+
- crawls
|
|
185
|
+
summary: A pageable list of fetched urls.
|
|
186
|
+
description: Get a list of fetched urls.
|
|
187
|
+
operationId: getCrawlFetched
|
|
188
|
+
# roles: ROLE_AU_ADMIN
|
|
189
|
+
parameters:
|
|
190
|
+
- name: jobId
|
|
191
|
+
in: path
|
|
192
|
+
required: true
|
|
193
|
+
schema:
|
|
194
|
+
type: string
|
|
195
|
+
- name: limit
|
|
196
|
+
in: query
|
|
197
|
+
description: The number of jobs per page.
|
|
198
|
+
schema:
|
|
199
|
+
type: integer
|
|
200
|
+
- name: continuationToken
|
|
201
|
+
in: query
|
|
202
|
+
description: The continuation token of the next page of jobs to be returned.
|
|
203
|
+
schema:
|
|
204
|
+
type: string
|
|
205
|
+
responses:
|
|
206
|
+
200:
|
|
207
|
+
description: The requested fetched urls.
|
|
208
|
+
content:
|
|
209
|
+
application/json:
|
|
210
|
+
schema:
|
|
211
|
+
$ref: '#/components/schemas/urlPager'
|
|
212
|
+
default:
|
|
213
|
+
description: The resulting error payload.
|
|
214
|
+
content:
|
|
215
|
+
application/json:
|
|
216
|
+
schema:
|
|
217
|
+
$ref: '#/components/schemas/errorResult'
|
|
218
|
+
/crawls/{jobId}/excluded:
|
|
219
|
+
get:
|
|
220
|
+
tags:
|
|
221
|
+
- crawls
|
|
222
|
+
summary: A pageable list of excluded urls.
|
|
223
|
+
description: Get a list of excluded urls.
|
|
224
|
+
operationId: getCrawlExcluded
|
|
225
|
+
# roles: ROLE_AU_ADMIN
|
|
226
|
+
parameters:
|
|
227
|
+
- name: jobId
|
|
228
|
+
in: path
|
|
229
|
+
description: identifier used to identify a specific crawl.
|
|
230
|
+
required: true
|
|
231
|
+
schema:
|
|
232
|
+
type: string
|
|
233
|
+
- name: limit
|
|
234
|
+
in: query
|
|
235
|
+
description: The number of jobs per page.
|
|
236
|
+
schema:
|
|
237
|
+
type: integer
|
|
238
|
+
- name: continuationToken
|
|
239
|
+
in: query
|
|
240
|
+
description: The continuation token of the next page of urls to be returned.
|
|
241
|
+
schema:
|
|
242
|
+
type: string
|
|
243
|
+
responses:
|
|
244
|
+
200:
|
|
245
|
+
description: The requested excluded urls.
|
|
246
|
+
content:
|
|
247
|
+
application/json:
|
|
248
|
+
schema:
|
|
249
|
+
$ref: '#/components/schemas/urlPager'
|
|
250
|
+
default:
|
|
251
|
+
description: The resulting error payload.
|
|
252
|
+
content:
|
|
253
|
+
application/json:
|
|
254
|
+
schema:
|
|
255
|
+
$ref: '#/components/schemas/errorResult'
|
|
256
|
+
/crawls/{jobId}/notmodified:
|
|
257
|
+
get:
|
|
258
|
+
tags:
|
|
259
|
+
- crawls
|
|
260
|
+
summary: A pageable list of not modified urls.
|
|
261
|
+
description: Get a list of not modified urls.
|
|
262
|
+
operationId: getCrawlNotModified
|
|
263
|
+
# roles: ROLE_AU_ADMIN
|
|
264
|
+
parameters:
|
|
265
|
+
- name: jobId
|
|
266
|
+
in: path
|
|
267
|
+
required: true
|
|
268
|
+
schema:
|
|
269
|
+
type: string
|
|
270
|
+
- name: limit
|
|
271
|
+
in: query
|
|
272
|
+
description: The number of jobs per page.
|
|
273
|
+
schema:
|
|
274
|
+
type: integer
|
|
275
|
+
- name: continuationToken
|
|
276
|
+
in: query
|
|
277
|
+
description: The continuation token of the next page of urls to be returned.
|
|
278
|
+
schema:
|
|
279
|
+
type: string
|
|
280
|
+
responses:
|
|
281
|
+
200:
|
|
282
|
+
description: The requested not modified urls.
|
|
283
|
+
content:
|
|
284
|
+
application/json:
|
|
285
|
+
schema:
|
|
286
|
+
$ref: '#/components/schemas/urlPager'
|
|
287
|
+
default:
|
|
288
|
+
description: The resulting error payload.
|
|
289
|
+
content:
|
|
290
|
+
application/json:
|
|
291
|
+
schema:
|
|
292
|
+
$ref: '#/components/schemas/errorResult'
|
|
293
|
+
/crawls/{jobId}/pending:
|
|
294
|
+
get:
|
|
295
|
+
tags:
|
|
296
|
+
- crawls
|
|
297
|
+
summary: A pageable list of pending urls.
|
|
298
|
+
description: Get a list of pending urls.
|
|
299
|
+
operationId: getCrawlPending
|
|
300
|
+
# roles: ROLE_AU_ADMIN
|
|
301
|
+
parameters:
|
|
302
|
+
- name: jobId
|
|
303
|
+
in: path
|
|
304
|
+
required: true
|
|
305
|
+
schema:
|
|
306
|
+
type: string
|
|
307
|
+
- name: limit
|
|
308
|
+
in: query
|
|
309
|
+
description: The number of jobs per page.
|
|
310
|
+
schema:
|
|
311
|
+
type: integer
|
|
312
|
+
- name: continuationToken
|
|
313
|
+
in: query
|
|
314
|
+
description: The continuation token of the next page of urls to be returned.
|
|
315
|
+
schema:
|
|
316
|
+
type: string
|
|
317
|
+
responses:
|
|
318
|
+
200:
|
|
319
|
+
description: The requested pending urls.
|
|
320
|
+
content:
|
|
321
|
+
application/json:
|
|
322
|
+
schema:
|
|
323
|
+
$ref: '#/components/schemas/urlPager'
|
|
324
|
+
default:
|
|
325
|
+
description: The resulting error payload.
|
|
326
|
+
content:
|
|
327
|
+
application/json:
|
|
328
|
+
schema:
|
|
329
|
+
$ref: '#/components/schemas/errorResult'
|
|
330
|
+
/crawls/{jobId}/parsed:
|
|
331
|
+
get:
|
|
332
|
+
tags:
|
|
333
|
+
- crawls
|
|
334
|
+
summary: A pageable list of parsed urls.
|
|
335
|
+
description: Get a list of parsed urls.
|
|
336
|
+
operationId: getCrawlParsed
|
|
337
|
+
# roles: ROLE_AU_ADMIN
|
|
338
|
+
parameters:
|
|
339
|
+
- name: jobId
|
|
340
|
+
in: path
|
|
341
|
+
required: true
|
|
342
|
+
schema:
|
|
343
|
+
type: string
|
|
344
|
+
- name: limit
|
|
345
|
+
in: query
|
|
346
|
+
description: The number of jobs per page.
|
|
347
|
+
schema:
|
|
348
|
+
type: integer
|
|
349
|
+
- name: continuationToken
|
|
350
|
+
in: query
|
|
351
|
+
description: The continuation token of the next page of urls to be returned.
|
|
352
|
+
schema:
|
|
353
|
+
type: string
|
|
354
|
+
responses:
|
|
355
|
+
200:
|
|
356
|
+
description: The requested parsed urls.
|
|
357
|
+
content:
|
|
358
|
+
application/json:
|
|
359
|
+
schema:
|
|
360
|
+
$ref: '#/components/schemas/urlPager'
|
|
361
|
+
default:
|
|
362
|
+
description: The resulting error payload.
|
|
363
|
+
content:
|
|
364
|
+
application/json:
|
|
365
|
+
schema:
|
|
366
|
+
$ref: '#/components/schemas/errorResult'
|
|
367
|
+
/crawls/{jobId}/errors:
|
|
368
|
+
get:
|
|
369
|
+
tags:
|
|
370
|
+
- crawls
|
|
371
|
+
summary: A pageable list of urls with errors.
|
|
372
|
+
description: Get a list of urls with errors.
|
|
373
|
+
operationId: getCrawlErrors
|
|
374
|
+
# roles: ROLE_AU_ADMIN
|
|
375
|
+
parameters:
|
|
376
|
+
- name: jobId
|
|
377
|
+
in: path
|
|
378
|
+
required: true
|
|
379
|
+
schema:
|
|
380
|
+
type: string
|
|
381
|
+
- name: limit
|
|
382
|
+
in: query
|
|
383
|
+
description: The number of jobs per page.
|
|
384
|
+
schema:
|
|
385
|
+
type: integer
|
|
386
|
+
- name: continuationToken
|
|
387
|
+
in: query
|
|
388
|
+
description: The continuation token of the next page of urls to be returned.
|
|
389
|
+
schema:
|
|
390
|
+
type: string
|
|
391
|
+
responses:
|
|
392
|
+
200:
|
|
393
|
+
description: The requested urls with errors.
|
|
394
|
+
content:
|
|
395
|
+
application/json:
|
|
396
|
+
schema:
|
|
397
|
+
$ref: '#/components/schemas/urlPager'
|
|
398
|
+
default:
|
|
399
|
+
description: The resulting error payload.
|
|
400
|
+
content:
|
|
401
|
+
application/json:
|
|
402
|
+
schema:
|
|
403
|
+
$ref: '#/components/schemas/errorResult'
|
|
404
|
+
/crawls/{jobId}/mediatypes/{type}:
|
|
405
|
+
get:
|
|
406
|
+
tags:
|
|
407
|
+
- crawls
|
|
408
|
+
summary: A pageable list of urls of mimetype.
|
|
409
|
+
description: Get a list of urls of mimetype.
|
|
410
|
+
operationId: getCrawlByMimeType
|
|
411
|
+
# roles: ROLE_AU_ADMIN
|
|
412
|
+
parameters:
|
|
413
|
+
- name: jobId
|
|
414
|
+
in: path
|
|
415
|
+
required: true
|
|
416
|
+
schema:
|
|
417
|
+
type: string
|
|
418
|
+
- name: type
|
|
419
|
+
in: path
|
|
420
|
+
required: true
|
|
421
|
+
schema:
|
|
422
|
+
type: string
|
|
423
|
+
- name: limit
|
|
424
|
+
in: query
|
|
425
|
+
description: The number of jobs per page.
|
|
426
|
+
schema:
|
|
427
|
+
type: integer
|
|
428
|
+
- name: continuationToken
|
|
429
|
+
in: query
|
|
430
|
+
description: The continuation token of the next page of urls to be returned.
|
|
431
|
+
schema:
|
|
432
|
+
type: string
|
|
433
|
+
responses:
|
|
434
|
+
200:
|
|
435
|
+
description: The requested urls.
|
|
436
|
+
content:
|
|
437
|
+
application/json:
|
|
438
|
+
schema:
|
|
439
|
+
$ref: '#/components/schemas/urlPager'
|
|
440
|
+
default:
|
|
441
|
+
description: The resulting error payload.
|
|
442
|
+
content:
|
|
443
|
+
application/json:
|
|
444
|
+
schema:
|
|
445
|
+
$ref: '#/components/schemas/errorResult'
|
|
446
|
+
/jobs:
|
|
447
|
+
get:
|
|
448
|
+
tags:
|
|
449
|
+
- jobs
|
|
450
|
+
summary: Get the list of crawl jobs.
|
|
451
|
+
description: Get a list of crawl jobs a pageful at a time as defined by the
|
|
452
|
+
continuation token and limit.
|
|
453
|
+
operationId: getJobs
|
|
454
|
+
# roles: ROLE_AU_ADMIN
|
|
455
|
+
parameters:
|
|
456
|
+
- name: limit
|
|
457
|
+
in: query
|
|
458
|
+
description: The number of jobs per page
|
|
459
|
+
schema:
|
|
460
|
+
type: integer
|
|
461
|
+
default: 50
|
|
462
|
+
- name: continuationToken
|
|
463
|
+
in: query
|
|
464
|
+
description: The continuation token of the next page of jobs to be returned.
|
|
465
|
+
schema:
|
|
466
|
+
type: string
|
|
467
|
+
responses:
|
|
468
|
+
200:
|
|
469
|
+
description: The requested crawls
|
|
470
|
+
content:
|
|
471
|
+
application/json:
|
|
472
|
+
schema:
|
|
473
|
+
$ref: '#/components/schemas/jobPager'
|
|
474
|
+
default:
|
|
475
|
+
description: The resulting error payload.
|
|
476
|
+
content:
|
|
477
|
+
application/json:
|
|
478
|
+
schema:
|
|
479
|
+
$ref: '#/components/schemas/errorResult'
|
|
480
|
+
post:
|
|
481
|
+
tags:
|
|
482
|
+
- jobs
|
|
483
|
+
summary: Request a crawl as defined by the descriptor
|
|
484
|
+
description: Enqueue a new crawl job as defined by the crawl descriptor and
|
|
485
|
+
return it.
|
|
486
|
+
operationId: queueJob
|
|
487
|
+
# roles: ROLE_AU_ADMIN
|
|
488
|
+
requestBody:
|
|
489
|
+
description: crawl request
|
|
490
|
+
content:
|
|
491
|
+
'*/*':
|
|
492
|
+
schema:
|
|
493
|
+
$ref: '#/components/schemas/crawlDesc'
|
|
494
|
+
required: true
|
|
495
|
+
responses:
|
|
496
|
+
202:
|
|
497
|
+
description: The crawl request has been queued for operation.
|
|
498
|
+
content:
|
|
499
|
+
application/json:
|
|
500
|
+
schema:
|
|
501
|
+
$ref: '#/components/schemas/crawlJob'
|
|
502
|
+
default:
|
|
503
|
+
description: The resulting error payload.
|
|
504
|
+
content:
|
|
505
|
+
application/json:
|
|
506
|
+
schema:
|
|
507
|
+
$ref: '#/components/schemas/errorResult'
|
|
508
|
+
x-codegen-request-body-name: crawlDesc
|
|
509
|
+
delete:
|
|
510
|
+
tags:
|
|
511
|
+
- jobs
|
|
512
|
+
summary: Delete all of the currently queued and active jobs
|
|
513
|
+
description: Halt and delete all of the currently queued and active crawl jobs
|
|
514
|
+
operationId: deleteJobs
|
|
515
|
+
# roles: ROLE_AU_ADMIN
|
|
516
|
+
responses:
|
|
517
|
+
200:
|
|
518
|
+
description: All crawl jobs have been stopped and deleted.
|
|
519
|
+
content: {}
|
|
520
|
+
default:
|
|
521
|
+
description: The resulting error payload.
|
|
522
|
+
content:
|
|
523
|
+
application/json:
|
|
524
|
+
schema:
|
|
525
|
+
$ref: '#/components/schemas/errorResult'
|
|
526
|
+
/jobs/{jobId}:
|
|
527
|
+
get:
|
|
528
|
+
tags:
|
|
529
|
+
- jobs
|
|
530
|
+
summary: Get the crawl status of this job
|
|
531
|
+
description: Get the crawl job with a given crawl id
|
|
532
|
+
operationId: getCrawlJob
|
|
533
|
+
# roles: ROLE_AU_ADMIN
|
|
534
|
+
parameters:
|
|
535
|
+
- name: jobId
|
|
536
|
+
in: path
|
|
537
|
+
required: true
|
|
538
|
+
schema:
|
|
539
|
+
type: string
|
|
540
|
+
responses:
|
|
541
|
+
200:
|
|
542
|
+
description: The crawl Job of the requested crawl
|
|
543
|
+
content:
|
|
544
|
+
application/json:
|
|
545
|
+
schema:
|
|
546
|
+
$ref: '#/components/schemas/crawlJob'
|
|
547
|
+
default:
|
|
548
|
+
description: The resulting error payload.
|
|
549
|
+
content:
|
|
550
|
+
application/json:
|
|
551
|
+
schema:
|
|
552
|
+
$ref: '#/components/schemas/errorResult'
|
|
553
|
+
delete:
|
|
554
|
+
tags:
|
|
555
|
+
- jobs
|
|
556
|
+
summary: Remove or stop a crawl job.
|
|
557
|
+
description: Delete a crawl job with the given job id, stopping any current
|
|
558
|
+
processing, if necessary.
|
|
559
|
+
operationId: deleteCrawlJob
|
|
560
|
+
# roles: ROLE_AU_ADMIN
|
|
561
|
+
parameters:
|
|
562
|
+
- name: jobId
|
|
563
|
+
in: path
|
|
564
|
+
description: The identifier used to identify a specific crawl job.
|
|
565
|
+
required: true
|
|
566
|
+
schema:
|
|
567
|
+
type: string
|
|
568
|
+
responses:
|
|
569
|
+
200:
|
|
570
|
+
description: The crawlJob of the deleted crawl.
|
|
571
|
+
content:
|
|
572
|
+
application/json:
|
|
573
|
+
schema:
|
|
574
|
+
$ref: '#/components/schemas/crawlJob'
|
|
575
|
+
default:
|
|
576
|
+
description: The resulting error payload.
|
|
577
|
+
content:
|
|
578
|
+
application/json:
|
|
579
|
+
schema:
|
|
580
|
+
$ref: '#/components/schemas/errorResult'
|
|
581
|
+
/status:
|
|
582
|
+
get:
|
|
583
|
+
tags:
|
|
584
|
+
- status
|
|
585
|
+
summary: Get the status of the service
|
|
586
|
+
description: Get the status of the service
|
|
587
|
+
operationId: getStatus
|
|
588
|
+
# no credentials required
|
|
589
|
+
responses:
|
|
590
|
+
200:
|
|
591
|
+
description: The status of the service
|
|
592
|
+
content:
|
|
593
|
+
application/json:
|
|
594
|
+
schema:
|
|
595
|
+
$ref: '#/components/schemas/apiStatus'
|
|
596
|
+
default:
|
|
597
|
+
description: The resulting error payload.
|
|
598
|
+
content:
|
|
599
|
+
application/json:
|
|
600
|
+
schema:
|
|
601
|
+
$ref: '#/components/schemas/errorResult'
|
|
602
|
+
/ws/crawls:
|
|
603
|
+
get:
|
|
604
|
+
tags:
|
|
605
|
+
- ws
|
|
606
|
+
summary: Query for list of crawls based on subset defined by query string
|
|
607
|
+
description: Query for crawls that meet a set of specified conditions
|
|
608
|
+
operationId: getWsCrawls
|
|
609
|
+
# roles: ROLE_AU_ADMIN
|
|
610
|
+
parameters:
|
|
611
|
+
- name: crawlQuery
|
|
612
|
+
in: query
|
|
613
|
+
description: The query that specifies the crawls to be returned
|
|
614
|
+
required: true
|
|
615
|
+
schema:
|
|
616
|
+
type: string
|
|
617
|
+
responses:
|
|
618
|
+
200:
|
|
619
|
+
description: Information about the requested crawls
|
|
620
|
+
content:
|
|
621
|
+
application/json:
|
|
622
|
+
schema:
|
|
623
|
+
$ref: '#/components/schemas/crawlWsResult'
|
|
624
|
+
default:
|
|
625
|
+
description: The resulting error payload.
|
|
626
|
+
content:
|
|
627
|
+
application/json:
|
|
628
|
+
schema:
|
|
629
|
+
$ref: '#/components/schemas/errorResult'
|
|
630
|
+
components:
|
|
631
|
+
schemas:
|
|
632
|
+
apiStatus:
|
|
633
|
+
required:
|
|
634
|
+
- apiVersion
|
|
635
|
+
- ready
|
|
636
|
+
type: object
|
|
637
|
+
properties:
|
|
638
|
+
apiVersion:
|
|
639
|
+
type: string
|
|
640
|
+
description: The version of the API
|
|
641
|
+
componentName:
|
|
642
|
+
type: string
|
|
643
|
+
description: The name of the component
|
|
644
|
+
componentVersion:
|
|
645
|
+
type: string
|
|
646
|
+
description: The version of the component software
|
|
647
|
+
lockssVersion:
|
|
648
|
+
type: string
|
|
649
|
+
description: The version of the LOCKSS system
|
|
650
|
+
ready:
|
|
651
|
+
type: boolean
|
|
652
|
+
description: The indication of whether the service is available
|
|
653
|
+
serviceName:
|
|
654
|
+
type: string
|
|
655
|
+
description: The name of the service
|
|
656
|
+
readyTime:
|
|
657
|
+
type: integer
|
|
658
|
+
description: The time the service last became ready.
|
|
659
|
+
format: int64
|
|
660
|
+
reason:
|
|
661
|
+
type: string
|
|
662
|
+
description: The reason the service isn't ready.
|
|
663
|
+
startupStatus:
|
|
664
|
+
type: string
|
|
665
|
+
description: Enum indicating progress of plugin/AU processing at startup.
|
|
666
|
+
enum:
|
|
667
|
+
- NONE
|
|
668
|
+
- PLUGINS_CRAWLING
|
|
669
|
+
- PLUGINS_COLLECTED
|
|
670
|
+
- PLUGINS_LOADING
|
|
671
|
+
- PLUGINS_LOADED
|
|
672
|
+
- AUS_STARTING
|
|
673
|
+
- AUS_STARTED
|
|
674
|
+
description: The status information of the service
|
|
675
|
+
counter:
|
|
676
|
+
required:
|
|
677
|
+
- count
|
|
678
|
+
- itemsLink
|
|
679
|
+
type: object
|
|
680
|
+
properties:
|
|
681
|
+
count:
|
|
682
|
+
type: integer
|
|
683
|
+
description: The number of elements
|
|
684
|
+
format: int32
|
|
685
|
+
itemsLink:
|
|
686
|
+
type: string
|
|
687
|
+
description: A link to the list of count items or to a pager with count\
|
|
688
|
+
\ items.
|
|
689
|
+
description: A counter for urls.
|
|
690
|
+
crawlDesc:
|
|
691
|
+
required:
|
|
692
|
+
- auId
|
|
693
|
+
- crawlKind
|
|
694
|
+
type: object
|
|
695
|
+
properties:
|
|
696
|
+
auId:
|
|
697
|
+
type: string
|
|
698
|
+
description: The identifier of the archival unit to be crawled.
|
|
699
|
+
crawlKind:
|
|
700
|
+
$ref: '#/components/schemas/crawlKindEnum'
|
|
701
|
+
crawlerId:
|
|
702
|
+
type: string
|
|
703
|
+
description: The crawler to be used for this crawl.
|
|
704
|
+
default: classic
|
|
705
|
+
forceCrawl:
|
|
706
|
+
type: boolean
|
|
707
|
+
description: An indication of whether the crawl is to be forced, suppressing
|
|
708
|
+
conditions that might otherwise prevent the crawl from happening.
|
|
709
|
+
default: false
|
|
710
|
+
refetchDepth:
|
|
711
|
+
type: integer
|
|
712
|
+
description: The refetch depth to use for a deep crawl.
|
|
713
|
+
format: int32
|
|
714
|
+
default: -1
|
|
715
|
+
priority:
|
|
716
|
+
type: integer
|
|
717
|
+
description: The priority for the crawl.
|
|
718
|
+
format: int32
|
|
719
|
+
nullable: true
|
|
720
|
+
default: null
|
|
721
|
+
crawlList:
|
|
722
|
+
type: array
|
|
723
|
+
description: The list of URLs to crawl.
|
|
724
|
+
items:
|
|
725
|
+
type: string
|
|
726
|
+
crawlDepth:
|
|
727
|
+
type: integer
|
|
728
|
+
description: The depth to which the links should be followed. 0 means
|
|
729
|
+
do not follow links.
|
|
730
|
+
format: int32
|
|
731
|
+
extraCrawlerData:
|
|
732
|
+
type: object
|
|
733
|
+
additionalProperties:
|
|
734
|
+
type: object
|
|
735
|
+
properties: {}
|
|
736
|
+
description: A map of additional properties for a crawl on a given crawler.
|
|
737
|
+
description: A descriptor for a crawl.
|
|
738
|
+
crawlJob:
|
|
739
|
+
required:
|
|
740
|
+
- crawlDesc
|
|
741
|
+
- jobId
|
|
742
|
+
- jobStatus
|
|
743
|
+
- requestDate
|
|
744
|
+
type: object
|
|
745
|
+
properties:
|
|
746
|
+
crawlDesc:
|
|
747
|
+
$ref: '#/components/schemas/crawlDesc'
|
|
748
|
+
requestDate:
|
|
749
|
+
type: integer
|
|
750
|
+
description: The timestamp when the crawl was requested.
|
|
751
|
+
format: int64
|
|
752
|
+
jobId:
|
|
753
|
+
type: string
|
|
754
|
+
description: Identifier of the crawl job.
|
|
755
|
+
jobStatus:
|
|
756
|
+
$ref: '#/components/schemas/jobStatus'
|
|
757
|
+
startDate:
|
|
758
|
+
type: integer
|
|
759
|
+
description: The timestamp when the crawl began.
|
|
760
|
+
format: int64
|
|
761
|
+
endDate:
|
|
762
|
+
type: integer
|
|
763
|
+
description: The timestamp when the crawl ended.
|
|
764
|
+
format: int64
|
|
765
|
+
result:
|
|
766
|
+
type: string
|
|
767
|
+
description: A URI which can be used to retrieve the crawl data.
|
|
768
|
+
description: The job resulting from a request to perform a crawl.
|
|
769
|
+
crawlKindEnum:
|
|
770
|
+
title: Crawl Kind
|
|
771
|
+
description: The kind of crawl being performed either 'newContent' or 'repair'.
|
|
772
|
+
type: string
|
|
773
|
+
enum:
|
|
774
|
+
- newContent
|
|
775
|
+
- repair
|
|
776
|
+
crawlPager:
|
|
777
|
+
required:
|
|
778
|
+
- crawls
|
|
779
|
+
- pageInfo
|
|
780
|
+
type: object
|
|
781
|
+
properties:
|
|
782
|
+
crawls:
|
|
783
|
+
type: array
|
|
784
|
+
description: The crawls displayed in the page
|
|
785
|
+
items:
|
|
786
|
+
$ref: '#/components/schemas/crawlStatus'
|
|
787
|
+
pageInfo:
|
|
788
|
+
$ref: '#/components/schemas/pageInfo'
|
|
789
|
+
description: A display page of crawl status
|
|
790
|
+
crawlStatus:
|
|
791
|
+
required:
|
|
792
|
+
- auId
|
|
793
|
+
- auName
|
|
794
|
+
- crawlerId
|
|
795
|
+
- endTime
|
|
796
|
+
- jobId
|
|
797
|
+
- jobStatus
|
|
798
|
+
- priority
|
|
799
|
+
- startTime
|
|
800
|
+
- startUrls
|
|
801
|
+
- type
|
|
802
|
+
type: object
|
|
803
|
+
properties:
|
|
804
|
+
jobId:
|
|
805
|
+
type: string
|
|
806
|
+
description: The id for the crawl.
|
|
807
|
+
auId:
|
|
808
|
+
type: string
|
|
809
|
+
description: The id for the au.
|
|
810
|
+
auName:
|
|
811
|
+
type: string
|
|
812
|
+
description: The name for the au.
|
|
813
|
+
type:
|
|
814
|
+
type: string
|
|
815
|
+
description: The type of crawl.
|
|
816
|
+
startUrls:
|
|
817
|
+
type: array
|
|
818
|
+
description: The array of start urls.
|
|
819
|
+
items:
|
|
820
|
+
type: string
|
|
821
|
+
priority:
|
|
822
|
+
type: integer
|
|
823
|
+
description: The priority for this crawl.
|
|
824
|
+
format: int32
|
|
825
|
+
crawlerId:
|
|
826
|
+
type: string
|
|
827
|
+
description: The id of the crawler used for this crawl.
|
|
828
|
+
default: classic
|
|
829
|
+
sources:
|
|
830
|
+
type: array
|
|
831
|
+
description: The sources to use for the crawl.
|
|
832
|
+
items:
|
|
833
|
+
type: string
|
|
834
|
+
depth:
|
|
835
|
+
type: integer
|
|
836
|
+
description: The depth of the crawl.
|
|
837
|
+
format: int32
|
|
838
|
+
refetchDepth:
|
|
839
|
+
type: integer
|
|
840
|
+
description: The refetch depth of the crawl.
|
|
841
|
+
format: int32
|
|
842
|
+
proxy:
|
|
843
|
+
type: string
|
|
844
|
+
description: The proxy used for crawling.
|
|
845
|
+
startTime:
|
|
846
|
+
type: integer
|
|
847
|
+
description: The timestamp for the start of crawl.
|
|
848
|
+
format: int64
|
|
849
|
+
endTime:
|
|
850
|
+
type: integer
|
|
851
|
+
description: The timestamp for the end of the crawl.
|
|
852
|
+
format: int64
|
|
853
|
+
jobStatus:
|
|
854
|
+
$ref: '#/components/schemas/jobStatus'
|
|
855
|
+
isWaiting:
|
|
856
|
+
type: boolean
|
|
857
|
+
description: True if the crawl waiting to start.
|
|
858
|
+
isActive:
|
|
859
|
+
type: boolean
|
|
860
|
+
description: True if the crawl is active.
|
|
861
|
+
isError:
|
|
862
|
+
type: boolean
|
|
863
|
+
description: True if the crawl has errored.
|
|
864
|
+
bytesFetched:
|
|
865
|
+
type: integer
|
|
866
|
+
description: The number of bytes fetched.
|
|
867
|
+
format: int64
|
|
868
|
+
fetchedItems:
|
|
869
|
+
$ref: '#/components/schemas/counter'
|
|
870
|
+
excludedItems:
|
|
871
|
+
$ref: '#/components/schemas/counter'
|
|
872
|
+
notModifiedItems:
|
|
873
|
+
$ref: '#/components/schemas/counter'
|
|
874
|
+
parsedItems:
|
|
875
|
+
$ref: '#/components/schemas/counter'
|
|
876
|
+
pendingItems:
|
|
877
|
+
$ref: '#/components/schemas/counter'
|
|
878
|
+
errors:
|
|
879
|
+
$ref: '#/components/schemas/counter'
|
|
880
|
+
mimeTypes:
|
|
881
|
+
type: array
|
|
882
|
+
description: The list of urls by mimeType.
|
|
883
|
+
items:
|
|
884
|
+
$ref: '#/components/schemas/mimeCounter'
|
|
885
|
+
description: The status of a single crawl.
|
|
886
|
+
crawlWsResult:
|
|
887
|
+
required:
|
|
888
|
+
- auId
|
|
889
|
+
- auName
|
|
890
|
+
type: object
|
|
891
|
+
properties:
|
|
892
|
+
auId:
|
|
893
|
+
type: string
|
|
894
|
+
auName:
|
|
895
|
+
type: string
|
|
896
|
+
priority:
|
|
897
|
+
type: integer
|
|
898
|
+
format: int32
|
|
899
|
+
crawlKey:
|
|
900
|
+
type: string
|
|
901
|
+
crawlType:
|
|
902
|
+
type: string
|
|
903
|
+
startTime:
|
|
904
|
+
type: integer
|
|
905
|
+
format: int32
|
|
906
|
+
duration:
|
|
907
|
+
type: integer
|
|
908
|
+
format: int32
|
|
909
|
+
crawlStatus:
|
|
910
|
+
type: string
|
|
911
|
+
bytesFetchedCount:
|
|
912
|
+
type: integer
|
|
913
|
+
format: int32
|
|
914
|
+
pagesFetchedCount:
|
|
915
|
+
type: integer
|
|
916
|
+
format: int32
|
|
917
|
+
pagesFetched:
|
|
918
|
+
type: array
|
|
919
|
+
items:
|
|
920
|
+
type: string
|
|
921
|
+
pagesParsedCount:
|
|
922
|
+
type: integer
|
|
923
|
+
format: int32
|
|
924
|
+
pagesParsed:
|
|
925
|
+
type: array
|
|
926
|
+
items:
|
|
927
|
+
type: string
|
|
928
|
+
pagesPendingCount:
|
|
929
|
+
type: integer
|
|
930
|
+
format: int32
|
|
931
|
+
pagesPending:
|
|
932
|
+
type: array
|
|
933
|
+
items:
|
|
934
|
+
type: string
|
|
935
|
+
pagesExcludedCount:
|
|
936
|
+
type: integer
|
|
937
|
+
format: int32
|
|
938
|
+
pagesExcluded:
|
|
939
|
+
type: array
|
|
940
|
+
items:
|
|
941
|
+
type: string
|
|
942
|
+
offSiteUrlsExcludedCount:
|
|
943
|
+
type: integer
|
|
944
|
+
format: int32
|
|
945
|
+
pagesNotModifiedCount:
|
|
946
|
+
type: integer
|
|
947
|
+
format: int32
|
|
948
|
+
pagesNotModified:
|
|
949
|
+
type: array
|
|
950
|
+
items:
|
|
951
|
+
type: string
|
|
952
|
+
pagesWithErrorsCount:
|
|
953
|
+
type: integer
|
|
954
|
+
format: int32
|
|
955
|
+
pagesWithErrors:
|
|
956
|
+
type: array
|
|
957
|
+
items:
|
|
958
|
+
type: object
|
|
959
|
+
properties:
|
|
960
|
+
url:
|
|
961
|
+
type: string
|
|
962
|
+
severity:
|
|
963
|
+
type: string
|
|
964
|
+
message:
|
|
965
|
+
type: string
|
|
966
|
+
mimeTypeCount:
|
|
967
|
+
type: integer
|
|
968
|
+
format: int32
|
|
969
|
+
mimeTypes:
|
|
970
|
+
type: array
|
|
971
|
+
items:
|
|
972
|
+
type: string
|
|
973
|
+
sources:
|
|
974
|
+
type: array
|
|
975
|
+
items:
|
|
976
|
+
type: string
|
|
977
|
+
startingUrls:
|
|
978
|
+
type: array
|
|
979
|
+
items:
|
|
980
|
+
type: string
|
|
981
|
+
refetchDepth:
|
|
982
|
+
type: integer
|
|
983
|
+
format: int32
|
|
984
|
+
linkDepth:
|
|
985
|
+
type: integer
|
|
986
|
+
format: int32
|
|
987
|
+
crawlerConfig:
|
|
988
|
+
required:
|
|
989
|
+
- attributes
|
|
990
|
+
- crawlerId
|
|
991
|
+
type: object
|
|
992
|
+
properties:
|
|
993
|
+
crawlerId:
|
|
994
|
+
type: string
|
|
995
|
+
description: The identifier for this crawler
|
|
996
|
+
example: classic
|
|
997
|
+
attributes:
|
|
998
|
+
type: object
|
|
999
|
+
additionalProperties:
|
|
1000
|
+
type: string
|
|
1001
|
+
description: key value pairs specific providing attributes and configuration
|
|
1002
|
+
information.
|
|
1003
|
+
description: Configuration information about a specific crawler.
|
|
1004
|
+
crawlerStatus:
|
|
1005
|
+
required:
|
|
1006
|
+
- isEnabled
|
|
1007
|
+
type: object
|
|
1008
|
+
properties:
|
|
1009
|
+
isEnabled:
|
|
1010
|
+
type: boolean
|
|
1011
|
+
description: Is the crawler enabled
|
|
1012
|
+
isAutoCrawlEnabled:
|
|
1013
|
+
type: boolean
|
|
1014
|
+
description: Does crawler autocrawl AUs when needed.
|
|
1015
|
+
numJobsActive:
|
|
1016
|
+
type: integer
|
|
1017
|
+
description: The number of jobs running.
|
|
1018
|
+
format: int32
|
|
1019
|
+
numJobsFailed:
|
|
1020
|
+
type: integer
|
|
1021
|
+
description: The number of jobs failed.
|
|
1022
|
+
format: int32
|
|
1023
|
+
numJobsSuccessful:
|
|
1024
|
+
type: integer
|
|
1025
|
+
description: The number of jobs succeeded
|
|
1026
|
+
format: int32
|
|
1027
|
+
numJobsPending:
|
|
1028
|
+
type: integer
|
|
1029
|
+
description: The number of active jobs
|
|
1030
|
+
format: int32
|
|
1031
|
+
errMessage:
|
|
1032
|
+
type: string
|
|
1033
|
+
description: Status about a specific crawler.
|
|
1034
|
+
crawlerStatuses:
|
|
1035
|
+
type: object
|
|
1036
|
+
properties:
|
|
1037
|
+
crawlerMap:
|
|
1038
|
+
type: object
|
|
1039
|
+
additionalProperties:
|
|
1040
|
+
$ref: '#/components/schemas/crawlerStatus'
|
|
1041
|
+
description: An map of crawler status objects
|
|
1042
|
+
description: The metadata generated for a single item
|
|
1043
|
+
errorResult:
|
|
1044
|
+
type: object
|
|
1045
|
+
required:
|
|
1046
|
+
- message
|
|
1047
|
+
- code
|
|
1048
|
+
properties:
|
|
1049
|
+
message:
|
|
1050
|
+
type: string
|
|
1051
|
+
code:
|
|
1052
|
+
type: integer
|
|
1053
|
+
rootCause:
|
|
1054
|
+
type: string
|
|
1055
|
+
jobPager:
|
|
1056
|
+
required:
|
|
1057
|
+
- jobs
|
|
1058
|
+
- pageInfo
|
|
1059
|
+
type: object
|
|
1060
|
+
properties:
|
|
1061
|
+
jobs:
|
|
1062
|
+
type: array
|
|
1063
|
+
description: The jobs displayed in the page
|
|
1064
|
+
items:
|
|
1065
|
+
$ref: '#/components/schemas/crawlJob'
|
|
1066
|
+
pageInfo:
|
|
1067
|
+
$ref: '#/components/schemas/pageInfo'
|
|
1068
|
+
description: A display page of jobs
|
|
1069
|
+
jobStatus:
|
|
1070
|
+
required:
|
|
1071
|
+
- statusCode
|
|
1072
|
+
type: object
|
|
1073
|
+
properties:
|
|
1074
|
+
statusCode:
|
|
1075
|
+
type: string
|
|
1076
|
+
description: The numeric value for this status.
|
|
1077
|
+
enum:
|
|
1078
|
+
- STATUS_UNKNOWN
|
|
1079
|
+
- STATUS_QUEUED
|
|
1080
|
+
- STATUS_ACTIVE
|
|
1081
|
+
- STATUS_SUCCESSFUL
|
|
1082
|
+
- STATUS_ERROR
|
|
1083
|
+
- STATUS_ABORTED
|
|
1084
|
+
- STATUS_WINDOW_CLOSED
|
|
1085
|
+
- STATUS_FETCH_ERROR
|
|
1086
|
+
- STATUS_NO_PUB_PERMISSION
|
|
1087
|
+
- STATUS_PLUGIN_ERROR
|
|
1088
|
+
- STATUS_REPO_ERR
|
|
1089
|
+
- STATUS_RUNNING_AT_CRASH
|
|
1090
|
+
- STATUS_EXTRACTOR_ERROR
|
|
1091
|
+
- STATUS_CRAWL_TEST_SUCCESSFUL
|
|
1092
|
+
- STATUS_CRAWL_TEST_FAIL
|
|
1093
|
+
- STATUS_INELIGIBLE
|
|
1094
|
+
- STATUS_INACTIVE_REQUEST
|
|
1095
|
+
- STATUS_INTERRUPTED
|
|
1096
|
+
msg:
|
|
1097
|
+
type: string
|
|
1098
|
+
description: A text message explaining this status.
|
|
1099
|
+
description: A status which includes a code and a message.
|
|
1100
|
+
mimeCounter:
|
|
1101
|
+
required:
|
|
1102
|
+
- mimeType
|
|
1103
|
+
type: object
|
|
1104
|
+
properties:
|
|
1105
|
+
mimeType:
|
|
1106
|
+
type: string
|
|
1107
|
+
description: The mime type to count.
|
|
1108
|
+
count:
|
|
1109
|
+
type: integer
|
|
1110
|
+
description: The number of elements of mime type
|
|
1111
|
+
format: int32
|
|
1112
|
+
counterLink:
|
|
1113
|
+
type: string
|
|
1114
|
+
description: A link to the list of count elements or to a pager with
|
|
1115
|
+
count elements.
|
|
1116
|
+
description: A counter for mimeTypes seen during a crawl.
|
|
1117
|
+
pageInfo:
|
|
1118
|
+
required:
|
|
1119
|
+
- continuationToken
|
|
1120
|
+
- curLink
|
|
1121
|
+
- itemsInPage
|
|
1122
|
+
- totalCount
|
|
1123
|
+
type: object
|
|
1124
|
+
properties:
|
|
1125
|
+
totalCount:
|
|
1126
|
+
type: integer
|
|
1127
|
+
description: The total number of elements to be paginated
|
|
1128
|
+
format: int32
|
|
1129
|
+
nullable: true
|
|
1130
|
+
itemsInPage:
|
|
1131
|
+
type: integer
|
|
1132
|
+
description: The number of items in page.
|
|
1133
|
+
format: int32
|
|
1134
|
+
continuationToken:
|
|
1135
|
+
type: string
|
|
1136
|
+
description: The continuation token.
|
|
1137
|
+
nullable: true
|
|
1138
|
+
curLink:
|
|
1139
|
+
type: string
|
|
1140
|
+
description: The link to the current page.
|
|
1141
|
+
nextLink:
|
|
1142
|
+
type: string
|
|
1143
|
+
description: The link to the next page.
|
|
1144
|
+
nullable: true
|
|
1145
|
+
description: The information related to pagination of content
|
|
1146
|
+
urlError:
|
|
1147
|
+
required:
|
|
1148
|
+
- message
|
|
1149
|
+
- severity
|
|
1150
|
+
type: object
|
|
1151
|
+
properties:
|
|
1152
|
+
message:
|
|
1153
|
+
type: string
|
|
1154
|
+
description: The error message
|
|
1155
|
+
severity:
|
|
1156
|
+
type: string
|
|
1157
|
+
description: the severity of the error.
|
|
1158
|
+
enum:
|
|
1159
|
+
- Warning
|
|
1160
|
+
- Error
|
|
1161
|
+
- Fatal
|
|
1162
|
+
description: information related to an error for a url.
|
|
1163
|
+
urlInfo:
|
|
1164
|
+
required:
|
|
1165
|
+
- url
|
|
1166
|
+
type: object
|
|
1167
|
+
properties:
|
|
1168
|
+
url:
|
|
1169
|
+
type: string
|
|
1170
|
+
description: The url string
|
|
1171
|
+
error:
|
|
1172
|
+
$ref: '#/components/schemas/urlError'
|
|
1173
|
+
referrers:
|
|
1174
|
+
type: array
|
|
1175
|
+
description: An optional list of referrers.
|
|
1176
|
+
items:
|
|
1177
|
+
type: string
|
|
1178
|
+
description: information related to an url.
|
|
1179
|
+
urlPager:
|
|
1180
|
+
required:
|
|
1181
|
+
- pageInfo
|
|
1182
|
+
- urls
|
|
1183
|
+
type: object
|
|
1184
|
+
properties:
|
|
1185
|
+
pageInfo:
|
|
1186
|
+
$ref: '#/components/schemas/pageInfo'
|
|
1187
|
+
urls:
|
|
1188
|
+
type: array
|
|
1189
|
+
description: An list of url with related info.
|
|
1190
|
+
items:
|
|
1191
|
+
$ref: '#/components/schemas/urlInfo'
|
|
1192
|
+
description: A Pager for urls with maps.
|
|
1193
|
+
securitySchemes:
|
|
1194
|
+
basicAuth:
|
|
1195
|
+
type: http
|
|
1196
|
+
description: HTTP Basic Authentication. Works over `HTTP` and `HTTPS`
|
|
1197
|
+
scheme: basic
|