lockss-pyclient 0.1.0.dev2__py3-none-any.whl → 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. lockss/pyclient/__init__.py +8 -29
  2. lockss/pyclient/__main__.py +37 -0
  3. lockss/pyclient/_internal_common.py +395 -0
  4. lockss/pyclient/_internal_config.py +207 -0
  5. lockss/pyclient/_internal_crawler.py +251 -0
  6. lockss/pyclient/_internal_md.py +111 -0
  7. lockss/pyclient/_internal_poller.py +174 -0
  8. lockss/pyclient/_internal_rs.py +232 -0
  9. lockss/pyclient/cli.py +839 -0
  10. lockss/pyclient/config/__init__.py +15 -0
  11. lockss/pyclient/config/api/aus_api.py +14 -6
  12. lockss/pyclient/config/api/config_api.py +12 -12
  13. lockss/pyclient/config/api/utils_api.py +2 -2
  14. lockss/pyclient/config/configuration.py +1 -1
  15. lockss/pyclient/config/models/__init__.py +15 -0
  16. lockss/pyclient/config/models/access_type.py +90 -0
  17. lockss/pyclient/config/models/au_agreements.py +136 -0
  18. lockss/pyclient/config/models/au_config_page_info.py +140 -0
  19. lockss/pyclient/config/models/au_state_bean.py +942 -0
  20. lockss/pyclient/config/models/au_status.py +837 -8
  21. lockss/pyclient/config/models/au_suspect_url_versions.py +136 -0
  22. lockss/pyclient/config/models/check_substance_result.py +6 -22
  23. lockss/pyclient/config/models/dated_peer_id_set_impl.py +162 -0
  24. lockss/pyclient/config/models/hash_result.py +136 -0
  25. lockss/pyclient/config/models/page_info.py +226 -0
  26. lockss/pyclient/config/models/peer_agreement.py +188 -0
  27. lockss/pyclient/config/models/peer_agreements.py +136 -0
  28. lockss/pyclient/config/models/platform_configuration_ws_result.py +421 -8
  29. lockss/pyclient/config/models/platform_configuration_ws_result_daemon_version.py +188 -0
  30. lockss/pyclient/config/models/platform_configuration_ws_result_java_version.py +188 -0
  31. lockss/pyclient/config/models/platform_configuration_ws_result_platform.py +162 -0
  32. lockss/pyclient/config/models/substance_checker_state.py +91 -0
  33. lockss/pyclient/config/models/suspect_url_version.py +214 -0
  34. lockss/pyclient/config/swagger.yaml +2031 -0
  35. lockss/pyclient/crawler/__init__.py +1 -0
  36. lockss/pyclient/crawler/api/crawls_api.py +2 -2
  37. lockss/pyclient/crawler/configuration.py +1 -1
  38. lockss/pyclient/crawler/models/__init__.py +1 -0
  39. lockss/pyclient/crawler/models/crawl_desc.py +4 -12
  40. lockss/pyclient/crawler/models/crawl_kind_enum.py +90 -0
  41. lockss/pyclient/crawler/models/page_info.py +22 -24
  42. lockss/pyclient/crawler/swagger.yaml +1197 -0
  43. lockss/pyclient/md/configuration.py +1 -1
  44. lockss/pyclient/md/models/page_info.py +22 -24
  45. lockss/pyclient/md/swagger.yaml +583 -0
  46. lockss/pyclient/output.py +131 -0
  47. lockss/pyclient/poller/__init__.py +11 -5
  48. lockss/pyclient/poller/api/export_api.py +5 -5
  49. lockss/pyclient/poller/api/hash_api.py +3 -3
  50. lockss/pyclient/poller/api/poll_detail_api.py +42 -42
  51. lockss/pyclient/poller/api/poller_polls_api.py +18 -18
  52. lockss/pyclient/poller/api/service_api.py +2 -2
  53. lockss/pyclient/poller/api/voter_polls_api.py +18 -18
  54. lockss/pyclient/poller/configuration.py +1 -1
  55. lockss/pyclient/poller/models/__init__.py +11 -5
  56. lockss/pyclient/poller/models/export_file_type_enum.py +93 -0
  57. lockss/pyclient/poller/models/export_filename_translation_enum.py +91 -0
  58. lockss/pyclient/poller/models/page_info.py +226 -0
  59. lockss/pyclient/poller/models/poll_desc.py +3 -11
  60. lockss/pyclient/poller/models/poll_variant_enum.py +92 -0
  61. lockss/pyclient/poller/models/poller_page_info.py +140 -0
  62. lockss/pyclient/poller/models/repair_page_info.py +140 -0
  63. lockss/pyclient/poller/models/repair_type_enum.py +91 -0
  64. lockss/pyclient/poller/models/tally_type_enum.py +93 -0
  65. lockss/pyclient/poller/models/url_page_info.py +140 -0
  66. lockss/pyclient/poller/models/voter_page_info.py +140 -0
  67. lockss/pyclient/poller/models/voter_urls_enum.py +92 -0
  68. lockss/pyclient/poller/swagger.yaml +1658 -0
  69. lockss/pyclient/rs/__init__.py +6 -0
  70. lockss/pyclient/rs/api/artifacts_api.py +20 -20
  71. lockss/pyclient/rs/api/aus_api.py +5 -5
  72. lockss/pyclient/rs/api/repo_api.py +4 -4
  73. lockss/pyclient/rs/api/status_api.py +1 -1
  74. lockss/pyclient/rs/api/wayback_api.py +12 -12
  75. lockss/pyclient/rs/configuration.py +8 -1
  76. lockss/pyclient/rs/models/__init__.py +6 -0
  77. lockss/pyclient/rs/models/artifact.py +111 -81
  78. lockss/pyclient/rs/models/au_size.py +6 -0
  79. lockss/pyclient/rs/models/auid_page_info.py +2 -2
  80. lockss/pyclient/rs/models/bulk_au_op_enum.py +90 -0
  81. lockss/pyclient/rs/models/include_content_enum.py +91 -0
  82. lockss/pyclient/rs/models/page_info.py +26 -29
  83. lockss/pyclient/rs/models/pywb_match_enum.py +93 -0
  84. lockss/pyclient/rs/models/pywb_output_enum.py +90 -0
  85. lockss/pyclient/rs/models/pywb_sort_enum.py +91 -0
  86. lockss/pyclient/rs/models/storage_info.py +131 -80
  87. lockss/pyclient/rs/models/versions_enum.py +90 -0
  88. lockss/pyclient/rs/swagger.yaml +1306 -0
  89. {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/METADATA +10 -3
  90. {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/RECORD +93 -45
  91. {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info}/WHEEL +1 -1
  92. lockss_pyclient-0.1.0.dev3.dist-info/entry_points.txt +3 -0
  93. {lockss_pyclient-0.1.0.dev2.dist-info → lockss_pyclient-0.1.0.dev3.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,1197 @@
1
+ # Copyright (c) 2000-2026, Board of Trustees of Leland Stanford Jr. University
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are met:
5
+ #
6
+ # 1. Redistributions of source code must retain the above copyright notice,
7
+ # this list of conditions and the following disclaimer.
8
+ #
9
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ # this list of conditions and the following disclaimer in the documentation
11
+ # and/or other materials provided with the distribution.
12
+ #
13
+ # 3. Neither the name of the copyright holder nor the names of its contributors
14
+ # may be used to endorse or promote products derived from this software without
15
+ # specific prior written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ openapi: 3.0.3
30
+ info:
31
+ title: LOCKSS Crawler Service REST API
32
+ description: REST API of the LOCKSS Crawler Service
33
+ version: 2.0.0
34
+ contact:
35
+ name: LOCKSS Support
36
+ url: https://www.lockss.org/
37
+ email: lockss-support@lockss.org
38
+ license:
39
+ name: BSD-3-Clause
40
+ url: https://opensource.org/licenses/BSD-3-Clause
41
+ servers:
42
+ - url: "{proto}://{hostname}:{port}/"
43
+ description: LOCKSS Crawler Service
44
+ variables:
45
+ proto:
46
+ description: "The protocol (default: http)."
47
+ enum:
48
+ - http
49
+ - https
50
+ default: http
51
+ hostname:
52
+ description: The service host name (or IP address).
53
+ default: localhost
54
+ port:
55
+ description: "The service port (default: 24614)."
56
+ default: 24614
57
+ security:
58
+ - basicAuth: []
59
+ tags:
60
+ - name: crawlers
61
+ description: Crawler operations
62
+ - name: crawls
63
+ description: Crawl operations
64
+ - name: jobs
65
+ description: Crawl job operations
66
+ - name: status
67
+ description: Status operations
68
+ - name: ws
69
+ description: Legacy SOAP compatibility operations
70
+ paths:
71
+ /crawlers:
72
+ get:
73
+ tags:
74
+ - crawlers
75
+ summary: Get the list of supported crawlers.
76
+ description: Return the list of supported crawlers.
77
+ operationId: getCrawlers
78
+ # roles: ROLE_AU_ADMIN
79
+ responses:
80
+ 200:
81
+ description: The Status of supported Crawlers.
82
+ content:
83
+ application/json:
84
+ schema:
85
+ $ref: '#/components/schemas/crawlerStatuses'
86
+ default:
87
+ description: The resulting error payload.
88
+ content:
89
+ application/json:
90
+ schema:
91
+ $ref: '#/components/schemas/errorResult'
92
+ /crawlers/{crawlerId}:
93
+ get:
94
+ tags:
95
+ - crawlers
96
+ summary: Return information about a crawler.
97
+ description: Get information related to a installed crawler.
98
+ operationId: getCrawlerConfig
99
+ # roles: ROLE_AU_ADMIN
100
+ parameters:
101
+ - name: crawlerId
102
+ in: path
103
+ description: Identifier for the crawler
104
+ required: true
105
+ schema:
106
+ type: string
107
+ responses:
108
+ 200:
109
+ description: Crawler Configuration Found
110
+ content:
111
+ application/json:
112
+ schema:
113
+ $ref: '#/components/schemas/crawlerConfig'
114
+ default:
115
+ description: The resulting error payload.
116
+ content:
117
+ application/json:
118
+ schema:
119
+ $ref: '#/components/schemas/errorResult'
120
+ /crawls:
121
+ get:
122
+ tags:
123
+ - crawls
124
+ summary: Get the list of crawls.
125
+ description: Get a list of crawls a pageful at a time as defined by limit.
126
+ operationId: getCrawls
127
+ # roles: ROLE_AU_ADMIN
128
+ parameters:
129
+ - name: limit
130
+ in: query
131
+ description: The number of jobs per page
132
+ schema:
133
+ type: integer
134
+ default: 50
135
+ - name: continuationToken
136
+ in: query
137
+ description: The continuation token of the next page of crawl status data
138
+ to be returned.
139
+ schema:
140
+ type: string
141
+ responses:
142
+ 200:
143
+ description: The requested crawls
144
+ content:
145
+ application/json:
146
+ schema:
147
+ $ref: '#/components/schemas/crawlPager'
148
+ default:
149
+ description: The resulting error payload.
150
+ content:
151
+ application/json:
152
+ schema:
153
+ $ref: '#/components/schemas/errorResult'
154
+ /crawls/{jobId}:
155
+ get:
156
+ tags:
157
+ - crawls
158
+ summary: Get the crawl status of this job
159
+ description: Get the job represented by this crawl id
160
+ operationId: getCrawlById
161
+ # roles: ROLE_AU_ADMIN
162
+ parameters:
163
+ - name: jobId
164
+ in: path
165
+ required: true
166
+ schema:
167
+ type: string
168
+ responses:
169
+ 200:
170
+ description: The crawl status of the requested crawl
171
+ content:
172
+ application/json:
173
+ schema:
174
+ $ref: '#/components/schemas/crawlStatus'
175
+ default:
176
+ description: The resulting error payload.
177
+ content:
178
+ application/json:
179
+ schema:
180
+ $ref: '#/components/schemas/errorResult'
181
+ /crawls/{jobId}/fetched:
182
+ get:
183
+ tags:
184
+ - crawls
185
+ summary: A pageable list of fetched urls.
186
+ description: Get a list of fetched urls.
187
+ operationId: getCrawlFetched
188
+ # roles: ROLE_AU_ADMIN
189
+ parameters:
190
+ - name: jobId
191
+ in: path
192
+ required: true
193
+ schema:
194
+ type: string
195
+ - name: limit
196
+ in: query
197
+ description: The number of jobs per page.
198
+ schema:
199
+ type: integer
200
+ - name: continuationToken
201
+ in: query
202
+ description: The continuation token of the next page of jobs to be returned.
203
+ schema:
204
+ type: string
205
+ responses:
206
+ 200:
207
+ description: The requested fetched urls.
208
+ content:
209
+ application/json:
210
+ schema:
211
+ $ref: '#/components/schemas/urlPager'
212
+ default:
213
+ description: The resulting error payload.
214
+ content:
215
+ application/json:
216
+ schema:
217
+ $ref: '#/components/schemas/errorResult'
218
+ /crawls/{jobId}/excluded:
219
+ get:
220
+ tags:
221
+ - crawls
222
+ summary: A pageable list of excluded urls.
223
+ description: Get a list of excluded urls.
224
+ operationId: getCrawlExcluded
225
+ # roles: ROLE_AU_ADMIN
226
+ parameters:
227
+ - name: jobId
228
+ in: path
229
+ description: identifier used to identify a specific crawl.
230
+ required: true
231
+ schema:
232
+ type: string
233
+ - name: limit
234
+ in: query
235
+ description: The number of jobs per page.
236
+ schema:
237
+ type: integer
238
+ - name: continuationToken
239
+ in: query
240
+ description: The continuation token of the next page of urls to be returned.
241
+ schema:
242
+ type: string
243
+ responses:
244
+ 200:
245
+ description: The requested excluded urls.
246
+ content:
247
+ application/json:
248
+ schema:
249
+ $ref: '#/components/schemas/urlPager'
250
+ default:
251
+ description: The resulting error payload.
252
+ content:
253
+ application/json:
254
+ schema:
255
+ $ref: '#/components/schemas/errorResult'
256
+ /crawls/{jobId}/notmodified:
257
+ get:
258
+ tags:
259
+ - crawls
260
+ summary: A pageable list of not modified urls.
261
+ description: Get a list of not modified urls.
262
+ operationId: getCrawlNotModified
263
+ # roles: ROLE_AU_ADMIN
264
+ parameters:
265
+ - name: jobId
266
+ in: path
267
+ required: true
268
+ schema:
269
+ type: string
270
+ - name: limit
271
+ in: query
272
+ description: The number of jobs per page.
273
+ schema:
274
+ type: integer
275
+ - name: continuationToken
276
+ in: query
277
+ description: The continuation token of the next page of urls to be returned.
278
+ schema:
279
+ type: string
280
+ responses:
281
+ 200:
282
+ description: The requested not modified urls.
283
+ content:
284
+ application/json:
285
+ schema:
286
+ $ref: '#/components/schemas/urlPager'
287
+ default:
288
+ description: The resulting error payload.
289
+ content:
290
+ application/json:
291
+ schema:
292
+ $ref: '#/components/schemas/errorResult'
293
+ /crawls/{jobId}/pending:
294
+ get:
295
+ tags:
296
+ - crawls
297
+ summary: A pageable list of pending urls.
298
+ description: Get a list of pending urls.
299
+ operationId: getCrawlPending
300
+ # roles: ROLE_AU_ADMIN
301
+ parameters:
302
+ - name: jobId
303
+ in: path
304
+ required: true
305
+ schema:
306
+ type: string
307
+ - name: limit
308
+ in: query
309
+ description: The number of jobs per page.
310
+ schema:
311
+ type: integer
312
+ - name: continuationToken
313
+ in: query
314
+ description: The continuation token of the next page of urls to be returned.
315
+ schema:
316
+ type: string
317
+ responses:
318
+ 200:
319
+ description: The requested pending urls.
320
+ content:
321
+ application/json:
322
+ schema:
323
+ $ref: '#/components/schemas/urlPager'
324
+ default:
325
+ description: The resulting error payload.
326
+ content:
327
+ application/json:
328
+ schema:
329
+ $ref: '#/components/schemas/errorResult'
330
+ /crawls/{jobId}/parsed:
331
+ get:
332
+ tags:
333
+ - crawls
334
+ summary: A pageable list of parsed urls.
335
+ description: Get a list of parsed urls.
336
+ operationId: getCrawlParsed
337
+ # roles: ROLE_AU_ADMIN
338
+ parameters:
339
+ - name: jobId
340
+ in: path
341
+ required: true
342
+ schema:
343
+ type: string
344
+ - name: limit
345
+ in: query
346
+ description: The number of jobs per page.
347
+ schema:
348
+ type: integer
349
+ - name: continuationToken
350
+ in: query
351
+ description: The continuation token of the next page of urls to be returned.
352
+ schema:
353
+ type: string
354
+ responses:
355
+ 200:
356
+ description: The requested parsed urls.
357
+ content:
358
+ application/json:
359
+ schema:
360
+ $ref: '#/components/schemas/urlPager'
361
+ default:
362
+ description: The resulting error payload.
363
+ content:
364
+ application/json:
365
+ schema:
366
+ $ref: '#/components/schemas/errorResult'
367
+ /crawls/{jobId}/errors:
368
+ get:
369
+ tags:
370
+ - crawls
371
+ summary: A pageable list of urls with errors.
372
+ description: Get a list of urls with errors.
373
+ operationId: getCrawlErrors
374
+ # roles: ROLE_AU_ADMIN
375
+ parameters:
376
+ - name: jobId
377
+ in: path
378
+ required: true
379
+ schema:
380
+ type: string
381
+ - name: limit
382
+ in: query
383
+ description: The number of jobs per page.
384
+ schema:
385
+ type: integer
386
+ - name: continuationToken
387
+ in: query
388
+ description: The continuation token of the next page of urls to be returned.
389
+ schema:
390
+ type: string
391
+ responses:
392
+ 200:
393
+ description: The requested urls with errors.
394
+ content:
395
+ application/json:
396
+ schema:
397
+ $ref: '#/components/schemas/urlPager'
398
+ default:
399
+ description: The resulting error payload.
400
+ content:
401
+ application/json:
402
+ schema:
403
+ $ref: '#/components/schemas/errorResult'
404
+ /crawls/{jobId}/mediatypes/{type}:
405
+ get:
406
+ tags:
407
+ - crawls
408
+ summary: A pageable list of urls of mimetype.
409
+ description: Get a list of urls of mimetype.
410
+ operationId: getCrawlByMimeType
411
+ # roles: ROLE_AU_ADMIN
412
+ parameters:
413
+ - name: jobId
414
+ in: path
415
+ required: true
416
+ schema:
417
+ type: string
418
+ - name: type
419
+ in: path
420
+ required: true
421
+ schema:
422
+ type: string
423
+ - name: limit
424
+ in: query
425
+ description: The number of jobs per page.
426
+ schema:
427
+ type: integer
428
+ - name: continuationToken
429
+ in: query
430
+ description: The continuation token of the next page of urls to be returned.
431
+ schema:
432
+ type: string
433
+ responses:
434
+ 200:
435
+ description: The requested urls.
436
+ content:
437
+ application/json:
438
+ schema:
439
+ $ref: '#/components/schemas/urlPager'
440
+ default:
441
+ description: The resulting error payload.
442
+ content:
443
+ application/json:
444
+ schema:
445
+ $ref: '#/components/schemas/errorResult'
446
+ /jobs:
447
+ get:
448
+ tags:
449
+ - jobs
450
+ summary: Get the list of crawl jobs.
451
+ description: Get a list of crawl jobs a pageful at a time as defined by the
452
+ continuation token and limit.
453
+ operationId: getJobs
454
+ # roles: ROLE_AU_ADMIN
455
+ parameters:
456
+ - name: limit
457
+ in: query
458
+ description: The number of jobs per page
459
+ schema:
460
+ type: integer
461
+ default: 50
462
+ - name: continuationToken
463
+ in: query
464
+ description: The continuation token of the next page of jobs to be returned.
465
+ schema:
466
+ type: string
467
+ responses:
468
+ 200:
469
+ description: The requested crawls
470
+ content:
471
+ application/json:
472
+ schema:
473
+ $ref: '#/components/schemas/jobPager'
474
+ default:
475
+ description: The resulting error payload.
476
+ content:
477
+ application/json:
478
+ schema:
479
+ $ref: '#/components/schemas/errorResult'
480
+ post:
481
+ tags:
482
+ - jobs
483
+ summary: Request a crawl as defined by the descriptor
484
+ description: Enqueue a new crawl job as defined by the crawl descriptor and
485
+ return it.
486
+ operationId: queueJob
487
+ # roles: ROLE_AU_ADMIN
488
+ requestBody:
489
+ description: crawl request
490
+ content:
491
+ '*/*':
492
+ schema:
493
+ $ref: '#/components/schemas/crawlDesc'
494
+ required: true
495
+ responses:
496
+ 202:
497
+ description: The crawl request has been queued for operation.
498
+ content:
499
+ application/json:
500
+ schema:
501
+ $ref: '#/components/schemas/crawlJob'
502
+ default:
503
+ description: The resulting error payload.
504
+ content:
505
+ application/json:
506
+ schema:
507
+ $ref: '#/components/schemas/errorResult'
508
+ x-codegen-request-body-name: crawlDesc
509
+ delete:
510
+ tags:
511
+ - jobs
512
+ summary: Delete all of the currently queued and active jobs
513
+ description: Halt and delete all of the currently queued and active crawl jobs
514
+ operationId: deleteJobs
515
+ # roles: ROLE_AU_ADMIN
516
+ responses:
517
+ 200:
518
+ description: All crawl jobs have been stopped and deleted.
519
+ content: {}
520
+ default:
521
+ description: The resulting error payload.
522
+ content:
523
+ application/json:
524
+ schema:
525
+ $ref: '#/components/schemas/errorResult'
526
+ /jobs/{jobId}:
527
+ get:
528
+ tags:
529
+ - jobs
530
+ summary: Get the crawl status of this job
531
+ description: Get the crawl job with a given crawl id
532
+ operationId: getCrawlJob
533
+ # roles: ROLE_AU_ADMIN
534
+ parameters:
535
+ - name: jobId
536
+ in: path
537
+ required: true
538
+ schema:
539
+ type: string
540
+ responses:
541
+ 200:
542
+ description: The crawl Job of the requested crawl
543
+ content:
544
+ application/json:
545
+ schema:
546
+ $ref: '#/components/schemas/crawlJob'
547
+ default:
548
+ description: The resulting error payload.
549
+ content:
550
+ application/json:
551
+ schema:
552
+ $ref: '#/components/schemas/errorResult'
553
+ delete:
554
+ tags:
555
+ - jobs
556
+ summary: Remove or stop a crawl job.
557
+ description: Delete a crawl job with the given job id, stopping any current
558
+ processing, if necessary.
559
+ operationId: deleteCrawlJob
560
+ # roles: ROLE_AU_ADMIN
561
+ parameters:
562
+ - name: jobId
563
+ in: path
564
+ description: The identifier used to identify a specific crawl job.
565
+ required: true
566
+ schema:
567
+ type: string
568
+ responses:
569
+ 200:
570
+ description: The crawlJob of the deleted crawl.
571
+ content:
572
+ application/json:
573
+ schema:
574
+ $ref: '#/components/schemas/crawlJob'
575
+ default:
576
+ description: The resulting error payload.
577
+ content:
578
+ application/json:
579
+ schema:
580
+ $ref: '#/components/schemas/errorResult'
581
+ /status:
582
+ get:
583
+ tags:
584
+ - status
585
+ summary: Get the status of the service
586
+ description: Get the status of the service
587
+ operationId: getStatus
588
+ # no credentials required
589
+ responses:
590
+ 200:
591
+ description: The status of the service
592
+ content:
593
+ application/json:
594
+ schema:
595
+ $ref: '#/components/schemas/apiStatus'
596
+ default:
597
+ description: The resulting error payload.
598
+ content:
599
+ application/json:
600
+ schema:
601
+ $ref: '#/components/schemas/errorResult'
602
+ /ws/crawls:
603
+ get:
604
+ tags:
605
+ - ws
606
+ summary: Query for list of crawls based on subset defined by query string
607
+ description: Query for crawls that meet a set of specified conditions
608
+ operationId: getWsCrawls
609
+ # roles: ROLE_AU_ADMIN
610
+ parameters:
611
+ - name: crawlQuery
612
+ in: query
613
+ description: The query that specifies the crawls to be returned
614
+ required: true
615
+ schema:
616
+ type: string
617
+ responses:
618
+ 200:
619
+ description: Information about the requested crawls
620
+ content:
621
+ application/json:
622
+ schema:
623
+ $ref: '#/components/schemas/crawlWsResult'
624
+ default:
625
+ description: The resulting error payload.
626
+ content:
627
+ application/json:
628
+ schema:
629
+ $ref: '#/components/schemas/errorResult'
630
+ components:
631
+ schemas:
632
+ apiStatus:
633
+ required:
634
+ - apiVersion
635
+ - ready
636
+ type: object
637
+ properties:
638
+ apiVersion:
639
+ type: string
640
+ description: The version of the API
641
+ componentName:
642
+ type: string
643
+ description: The name of the component
644
+ componentVersion:
645
+ type: string
646
+ description: The version of the component software
647
+ lockssVersion:
648
+ type: string
649
+ description: The version of the LOCKSS system
650
+ ready:
651
+ type: boolean
652
+ description: The indication of whether the service is available
653
+ serviceName:
654
+ type: string
655
+ description: The name of the service
656
+ readyTime:
657
+ type: integer
658
+ description: The time the service last became ready.
659
+ format: int64
660
+ reason:
661
+ type: string
662
+ description: The reason the service isn't ready.
663
+ startupStatus:
664
+ type: string
665
+ description: Enum indicating progress of plugin/AU processing at startup.
666
+ enum:
667
+ - NONE
668
+ - PLUGINS_CRAWLING
669
+ - PLUGINS_COLLECTED
670
+ - PLUGINS_LOADING
671
+ - PLUGINS_LOADED
672
+ - AUS_STARTING
673
+ - AUS_STARTED
674
+ description: The status information of the service
675
+ counter:
676
+ required:
677
+ - count
678
+ - itemsLink
679
+ type: object
680
+ properties:
681
+ count:
682
+ type: integer
683
+ description: The number of elements
684
+ format: int32
685
+ itemsLink:
686
+ type: string
687
+ description: A link to the list of count items or to a pager with count\
688
+ \ items.
689
+ description: A counter for urls.
690
+ crawlDesc:
691
+ required:
692
+ - auId
693
+ - crawlKind
694
+ type: object
695
+ properties:
696
+ auId:
697
+ type: string
698
+ description: The identifier of the archival unit to be crawled.
699
+ crawlKind:
700
+ $ref: '#/components/schemas/crawlKindEnum'
701
+ crawlerId:
702
+ type: string
703
+ description: The crawler to be used for this crawl.
704
+ default: classic
705
+ forceCrawl:
706
+ type: boolean
707
+ description: An indication of whether the crawl is to be forced, suppressing
708
+ conditions that might otherwise prevent the crawl from happening.
709
+ default: false
710
+ refetchDepth:
711
+ type: integer
712
+ description: The refetch depth to use for a deep crawl.
713
+ format: int32
714
+ default: -1
715
+ priority:
716
+ type: integer
717
+ description: The priority for the crawl.
718
+ format: int32
719
+ nullable: true
720
+ default: null
721
+ crawlList:
722
+ type: array
723
+ description: The list of URLs to crawl.
724
+ items:
725
+ type: string
726
+ crawlDepth:
727
+ type: integer
728
+ description: The depth to which the links should be followed. 0 means
729
+ do not follow links.
730
+ format: int32
731
+ extraCrawlerData:
732
+ type: object
733
+ additionalProperties:
734
+ type: object
735
+ properties: {}
736
+ description: A map of additional properties for a crawl on a given crawler.
737
+ description: A descriptor for a crawl.
738
+ crawlJob:
739
+ required:
740
+ - crawlDesc
741
+ - jobId
742
+ - jobStatus
743
+ - requestDate
744
+ type: object
745
+ properties:
746
+ crawlDesc:
747
+ $ref: '#/components/schemas/crawlDesc'
748
+ requestDate:
749
+ type: integer
750
+ description: The timestamp when the crawl was requested.
751
+ format: int64
752
+ jobId:
753
+ type: string
754
+ description: Identifier of the crawl job.
755
+ jobStatus:
756
+ $ref: '#/components/schemas/jobStatus'
757
+ startDate:
758
+ type: integer
759
+ description: The timestamp when the crawl began.
760
+ format: int64
761
+ endDate:
762
+ type: integer
763
+ description: The timestamp when the crawl ended.
764
+ format: int64
765
+ result:
766
+ type: string
767
+ description: A URI which can be used to retrieve the crawl data.
768
+ description: The job resulting from a request to perform a crawl.
769
+ crawlKindEnum:
770
+ title: Crawl Kind
771
+ description: The kind of crawl being performed either 'newContent' or 'repair'.
772
+ type: string
773
+ enum:
774
+ - newContent
775
+ - repair
776
+ crawlPager:
777
+ required:
778
+ - crawls
779
+ - pageInfo
780
+ type: object
781
+ properties:
782
+ crawls:
783
+ type: array
784
+ description: The crawls displayed in the page
785
+ items:
786
+ $ref: '#/components/schemas/crawlStatus'
787
+ pageInfo:
788
+ $ref: '#/components/schemas/pageInfo'
789
+ description: A display page of crawl status
790
+ crawlStatus:
791
+ required:
792
+ - auId
793
+ - auName
794
+ - crawlerId
795
+ - endTime
796
+ - jobId
797
+ - jobStatus
798
+ - priority
799
+ - startTime
800
+ - startUrls
801
+ - type
802
+ type: object
803
+ properties:
804
+ jobId:
805
+ type: string
806
+ description: The id for the crawl.
807
+ auId:
808
+ type: string
809
+ description: The id for the au.
810
+ auName:
811
+ type: string
812
+ description: The name for the au.
813
+ type:
814
+ type: string
815
+ description: The type of crawl.
816
+ startUrls:
817
+ type: array
818
+ description: The array of start urls.
819
+ items:
820
+ type: string
821
+ priority:
822
+ type: integer
823
+ description: The priority for this crawl.
824
+ format: int32
825
+ crawlerId:
826
+ type: string
827
+ description: The id of the crawler used for this crawl.
828
+ default: classic
829
+ sources:
830
+ type: array
831
+ description: The sources to use for the crawl.
832
+ items:
833
+ type: string
834
+ depth:
835
+ type: integer
836
+ description: The depth of the crawl.
837
+ format: int32
838
+ refetchDepth:
839
+ type: integer
840
+ description: The refetch depth of the crawl.
841
+ format: int32
842
+ proxy:
843
+ type: string
844
+ description: The proxy used for crawling.
845
+ startTime:
846
+ type: integer
847
+ description: The timestamp for the start of crawl.
848
+ format: int64
849
+ endTime:
850
+ type: integer
851
+ description: The timestamp for the end of the crawl.
852
+ format: int64
853
+ jobStatus:
854
+ $ref: '#/components/schemas/jobStatus'
855
+ isWaiting:
856
+ type: boolean
857
+ description: True if the crawl waiting to start.
858
+ isActive:
859
+ type: boolean
860
+ description: True if the crawl is active.
861
+ isError:
862
+ type: boolean
863
+ description: True if the crawl has errored.
864
+ bytesFetched:
865
+ type: integer
866
+ description: The number of bytes fetched.
867
+ format: int64
868
+ fetchedItems:
869
+ $ref: '#/components/schemas/counter'
870
+ excludedItems:
871
+ $ref: '#/components/schemas/counter'
872
+ notModifiedItems:
873
+ $ref: '#/components/schemas/counter'
874
+ parsedItems:
875
+ $ref: '#/components/schemas/counter'
876
+ pendingItems:
877
+ $ref: '#/components/schemas/counter'
878
+ errors:
879
+ $ref: '#/components/schemas/counter'
880
+ mimeTypes:
881
+ type: array
882
+ description: The list of urls by mimeType.
883
+ items:
884
+ $ref: '#/components/schemas/mimeCounter'
885
+ description: The status of a single crawl.
886
+ crawlWsResult:
887
+ required:
888
+ - auId
889
+ - auName
890
+ type: object
891
+ properties:
892
+ auId:
893
+ type: string
894
+ auName:
895
+ type: string
896
+ priority:
897
+ type: integer
898
+ format: int32
899
+ crawlKey:
900
+ type: string
901
+ crawlType:
902
+ type: string
903
+ startTime:
904
+ type: integer
905
+ format: int32
906
+ duration:
907
+ type: integer
908
+ format: int32
909
+ crawlStatus:
910
+ type: string
911
+ bytesFetchedCount:
912
+ type: integer
913
+ format: int32
914
+ pagesFetchedCount:
915
+ type: integer
916
+ format: int32
917
+ pagesFetched:
918
+ type: array
919
+ items:
920
+ type: string
921
+ pagesParsedCount:
922
+ type: integer
923
+ format: int32
924
+ pagesParsed:
925
+ type: array
926
+ items:
927
+ type: string
928
+ pagesPendingCount:
929
+ type: integer
930
+ format: int32
931
+ pagesPending:
932
+ type: array
933
+ items:
934
+ type: string
935
+ pagesExcludedCount:
936
+ type: integer
937
+ format: int32
938
+ pagesExcluded:
939
+ type: array
940
+ items:
941
+ type: string
942
+ offSiteUrlsExcludedCount:
943
+ type: integer
944
+ format: int32
945
+ pagesNotModifiedCount:
946
+ type: integer
947
+ format: int32
948
+ pagesNotModified:
949
+ type: array
950
+ items:
951
+ type: string
952
+ pagesWithErrorsCount:
953
+ type: integer
954
+ format: int32
955
+ pagesWithErrors:
956
+ type: array
957
+ items:
958
+ type: object
959
+ properties:
960
+ url:
961
+ type: string
962
+ severity:
963
+ type: string
964
+ message:
965
+ type: string
966
+ mimeTypeCount:
967
+ type: integer
968
+ format: int32
969
+ mimeTypes:
970
+ type: array
971
+ items:
972
+ type: string
973
+ sources:
974
+ type: array
975
+ items:
976
+ type: string
977
+ startingUrls:
978
+ type: array
979
+ items:
980
+ type: string
981
+ refetchDepth:
982
+ type: integer
983
+ format: int32
984
+ linkDepth:
985
+ type: integer
986
+ format: int32
987
+ crawlerConfig:
988
+ required:
989
+ - attributes
990
+ - crawlerId
991
+ type: object
992
+ properties:
993
+ crawlerId:
994
+ type: string
995
+ description: The identifier for this crawler
996
+ example: classic
997
+ attributes:
998
+ type: object
999
+ additionalProperties:
1000
+ type: string
1001
+ description: key value pairs specific providing attributes and configuration
1002
+ information.
1003
+ description: Configuration information about a specific crawler.
1004
+ crawlerStatus:
1005
+ required:
1006
+ - isEnabled
1007
+ type: object
1008
+ properties:
1009
+ isEnabled:
1010
+ type: boolean
1011
+ description: Is the crawler enabled
1012
+ isAutoCrawlEnabled:
1013
+ type: boolean
1014
+ description: Does crawler autocrawl AUs when needed.
1015
+ numJobsActive:
1016
+ type: integer
1017
+ description: The number of jobs running.
1018
+ format: int32
1019
+ numJobsFailed:
1020
+ type: integer
1021
+ description: The number of jobs failed.
1022
+ format: int32
1023
+ numJobsSuccessful:
1024
+ type: integer
1025
+ description: The number of jobs succeeded
1026
+ format: int32
1027
+ numJobsPending:
1028
+ type: integer
1029
+ description: The number of active jobs
1030
+ format: int32
1031
+ errMessage:
1032
+ type: string
1033
+ description: Status about a specific crawler.
1034
+ crawlerStatuses:
1035
+ type: object
1036
+ properties:
1037
+ crawlerMap:
1038
+ type: object
1039
+ additionalProperties:
1040
+ $ref: '#/components/schemas/crawlerStatus'
1041
+ description: An map of crawler status objects
1042
+ description: The metadata generated for a single item
1043
+ errorResult:
1044
+ type: object
1045
+ required:
1046
+ - message
1047
+ - code
1048
+ properties:
1049
+ message:
1050
+ type: string
1051
+ code:
1052
+ type: integer
1053
+ rootCause:
1054
+ type: string
1055
+ jobPager:
1056
+ required:
1057
+ - jobs
1058
+ - pageInfo
1059
+ type: object
1060
+ properties:
1061
+ jobs:
1062
+ type: array
1063
+ description: The jobs displayed in the page
1064
+ items:
1065
+ $ref: '#/components/schemas/crawlJob'
1066
+ pageInfo:
1067
+ $ref: '#/components/schemas/pageInfo'
1068
+ description: A display page of jobs
1069
+ jobStatus:
1070
+ required:
1071
+ - statusCode
1072
+ type: object
1073
+ properties:
1074
+ statusCode:
1075
+ type: string
1076
+ description: The numeric value for this status.
1077
+ enum:
1078
+ - STATUS_UNKNOWN
1079
+ - STATUS_QUEUED
1080
+ - STATUS_ACTIVE
1081
+ - STATUS_SUCCESSFUL
1082
+ - STATUS_ERROR
1083
+ - STATUS_ABORTED
1084
+ - STATUS_WINDOW_CLOSED
1085
+ - STATUS_FETCH_ERROR
1086
+ - STATUS_NO_PUB_PERMISSION
1087
+ - STATUS_PLUGIN_ERROR
1088
+ - STATUS_REPO_ERR
1089
+ - STATUS_RUNNING_AT_CRASH
1090
+ - STATUS_EXTRACTOR_ERROR
1091
+ - STATUS_CRAWL_TEST_SUCCESSFUL
1092
+ - STATUS_CRAWL_TEST_FAIL
1093
+ - STATUS_INELIGIBLE
1094
+ - STATUS_INACTIVE_REQUEST
1095
+ - STATUS_INTERRUPTED
1096
+ msg:
1097
+ type: string
1098
+ description: A text message explaining this status.
1099
+ description: A status which includes a code and a message.
1100
+ mimeCounter:
1101
+ required:
1102
+ - mimeType
1103
+ type: object
1104
+ properties:
1105
+ mimeType:
1106
+ type: string
1107
+ description: The mime type to count.
1108
+ count:
1109
+ type: integer
1110
+ description: The number of elements of mime type
1111
+ format: int32
1112
+ counterLink:
1113
+ type: string
1114
+ description: A link to the list of count elements or to a pager with
1115
+ count elements.
1116
+ description: A counter for mimeTypes seen during a crawl.
1117
+ pageInfo:
1118
+ required:
1119
+ - continuationToken
1120
+ - curLink
1121
+ - itemsInPage
1122
+ - totalCount
1123
+ type: object
1124
+ properties:
1125
+ totalCount:
1126
+ type: integer
1127
+ description: The total number of elements to be paginated
1128
+ format: int32
1129
+ nullable: true
1130
+ itemsInPage:
1131
+ type: integer
1132
+ description: The number of items in page.
1133
+ format: int32
1134
+ continuationToken:
1135
+ type: string
1136
+ description: The continuation token.
1137
+ nullable: true
1138
+ curLink:
1139
+ type: string
1140
+ description: The link to the current page.
1141
+ nextLink:
1142
+ type: string
1143
+ description: The link to the next page.
1144
+ nullable: true
1145
+ description: The information related to pagination of content
1146
+ urlError:
1147
+ required:
1148
+ - message
1149
+ - severity
1150
+ type: object
1151
+ properties:
1152
+ message:
1153
+ type: string
1154
+ description: The error message
1155
+ severity:
1156
+ type: string
1157
+ description: the severity of the error.
1158
+ enum:
1159
+ - Warning
1160
+ - Error
1161
+ - Fatal
1162
+ description: information related to an error for a url.
1163
+ urlInfo:
1164
+ required:
1165
+ - url
1166
+ type: object
1167
+ properties:
1168
+ url:
1169
+ type: string
1170
+ description: The url string
1171
+ error:
1172
+ $ref: '#/components/schemas/urlError'
1173
+ referrers:
1174
+ type: array
1175
+ description: An optional list of referrers.
1176
+ items:
1177
+ type: string
1178
+ description: information related to an url.
1179
+ urlPager:
1180
+ required:
1181
+ - pageInfo
1182
+ - urls
1183
+ type: object
1184
+ properties:
1185
+ pageInfo:
1186
+ $ref: '#/components/schemas/pageInfo'
1187
+ urls:
1188
+ type: array
1189
+ description: An list of url with related info.
1190
+ items:
1191
+ $ref: '#/components/schemas/urlInfo'
1192
+ description: A Pager for urls with maps.
1193
+ securitySchemes:
1194
+ basicAuth:
1195
+ type: http
1196
+ description: HTTP Basic Authentication. Works over `HTTP` and `HTTPS`
1197
+ scheme: basic