@biblioteksentralen/oai-pmh-client 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/dist/index.d.ts +161 -0
- package/dist/index.js +293 -0
- package/dist/index.mjs +284 -0
- package/package.json +43 -0
package/README.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# `@biblioteksentralen/oai-pmh-client`
|
|
2
|
+
|
|
3
|
+
## Usage
|
|
4
|
+
|
|
5
|
+
```ts
|
|
6
|
+
import { OaiPmhClient } from "@biblioteksentralen/oai-pmh-client";
|
|
7
|
+
import { pino } from "pino";
|
|
8
|
+
|
|
9
|
+
const log = pino(); // Any logger implementing the TypeScript Abstract Logger interface can be used.
|
|
10
|
+
|
|
11
|
+
const oaiPmhClient = new OaiPmhClient({
|
|
12
|
+
url: "https://stord.bib.no/cgi-bin/oai",
|
|
13
|
+
metadataPrefix: "marc21",
|
|
14
|
+
log,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const options = { from: "2023-05-10" };
|
|
18
|
+
for await (const { record } of oaiPmhClient.listRecords(options)) {
|
|
19
|
+
console.log(`Fetched record ${record.header.identifier}`);
|
|
20
|
+
// doSomethingWithRecord(response.record)
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The `listRecords()` method is implemented as an async generator and handles resumption and automatic retries under the hood. Custom timeout and retry options can be set when constructing the client. For example, to disable automatic retries, set `attempts` to `1`:
|
|
25
|
+
|
|
26
|
+
```ts
|
|
27
|
+
const oaiPmhClient = new OaiPmhClient({
|
|
28
|
+
url: "https://stord.bib.no/cgi-bin/oai",
|
|
29
|
+
metadataPrefix: "marc21",
|
|
30
|
+
log,
|
|
31
|
+
retry: { attempts: 1 },
|
|
32
|
+
});
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Error handling
|
|
36
|
+
|
|
37
|
+
If we receive an error from the OAI-PMH server, a `OaiPmhError` error will be raised, containing the [OAI-PMH error](http://www.openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions). These are typically returned for invalid argument values or other client errors.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { Logger } from 'ts-log';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { XmlElement } from '@biblioteksentralen/marc';
|
|
4
|
+
|
|
5
|
+
interface RetryOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Number of times to try running the function. Setting this to 1 means to not retry.
|
|
8
|
+
*/
|
|
9
|
+
attempts?: number;
|
|
10
|
+
/**
|
|
11
|
+
* Delay can be specified either as constant number of milliseconds or as a function
|
|
12
|
+
* that returns the number of milliseconds based on the attempt number.
|
|
13
|
+
*/
|
|
14
|
+
delay?: number | ((attempt: number) => number);
|
|
15
|
+
/**
|
|
16
|
+
* Callback function to be called on each retry. Can be used e.g. to conditionally
|
|
17
|
+
* abort retrying or do logging. If the function returns true, the retry will be aborted.
|
|
18
|
+
*/
|
|
19
|
+
onRetry?: (props: {
|
|
20
|
+
error: Error;
|
|
21
|
+
attempt: number;
|
|
22
|
+
delay: number;
|
|
23
|
+
}) => Promise<boolean | void> | boolean | void;
|
|
24
|
+
}
|
|
25
|
+
interface BackoffOptions {
|
|
26
|
+
randomize?: boolean;
|
|
27
|
+
min?: number;
|
|
28
|
+
max?: number;
|
|
29
|
+
factor?: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Exponential fallback function that can be passed to retry.
|
|
33
|
+
**/
|
|
34
|
+
declare const exponentialBackoff: ({ min, max, factor, randomize, }?: BackoffOptions) => (attempt: number) => number;
|
|
35
|
+
|
|
36
|
+
type RequestParams = {
|
|
37
|
+
metadataPrefix?: string;
|
|
38
|
+
set?: string;
|
|
39
|
+
identifier?: string;
|
|
40
|
+
resumptionToken?: string;
|
|
41
|
+
verb?: "ListRecords" | "ListSets" | "ListMetadataFormats" | "ListIdentifiers" | "GetRecord";
|
|
42
|
+
};
|
|
43
|
+
declare const OaiPmhHeader: z.ZodObject<{
|
|
44
|
+
identifier: z.ZodString;
|
|
45
|
+
deleted: z.ZodBoolean;
|
|
46
|
+
datestamp: z.ZodDate;
|
|
47
|
+
setSpec: z.ZodArray<z.ZodString, "many">;
|
|
48
|
+
}, "strip", z.ZodTypeAny, {
|
|
49
|
+
identifier: string;
|
|
50
|
+
deleted: boolean;
|
|
51
|
+
datestamp: Date;
|
|
52
|
+
setSpec: string[];
|
|
53
|
+
}, {
|
|
54
|
+
identifier: string;
|
|
55
|
+
deleted: boolean;
|
|
56
|
+
datestamp: Date;
|
|
57
|
+
setSpec: string[];
|
|
58
|
+
}>;
|
|
59
|
+
type OaiPmhHeader = z.infer<typeof OaiPmhHeader>;
|
|
60
|
+
declare const OaiPmhSet: z.ZodObject<{
|
|
61
|
+
setSpec: z.ZodString;
|
|
62
|
+
setName: z.ZodString;
|
|
63
|
+
}, "strip", z.ZodTypeAny, {
|
|
64
|
+
setSpec: string;
|
|
65
|
+
setName: string;
|
|
66
|
+
}, {
|
|
67
|
+
setSpec: string;
|
|
68
|
+
setName: string;
|
|
69
|
+
}>;
|
|
70
|
+
type OaiPmhSet = z.infer<typeof OaiPmhSet>;
|
|
71
|
+
type OaiPmhRecord = {
|
|
72
|
+
header: OaiPmhHeader;
|
|
73
|
+
metadata: XmlElement | undefined;
|
|
74
|
+
};
|
|
75
|
+
type ListRecordsParams = {
|
|
76
|
+
metadataPrefix?: string;
|
|
77
|
+
resumptionToken?: string;
|
|
78
|
+
from?: string;
|
|
79
|
+
until?: string;
|
|
80
|
+
set?: string;
|
|
81
|
+
};
|
|
82
|
+
type OaiPmhClientConfig = {
|
|
83
|
+
url: string;
|
|
84
|
+
log?: Logger;
|
|
85
|
+
headers?: Record<string, string>;
|
|
86
|
+
timeoutMs?: number;
|
|
87
|
+
retry?: RetryOptions;
|
|
88
|
+
metadataPrefix?: string;
|
|
89
|
+
set?: string;
|
|
90
|
+
retryOnOaiPmhError?: boolean;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Error thrown when the OAI-PMH server returns an error response.
|
|
95
|
+
* Ref: http://www.openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions
|
|
96
|
+
* There is usually no point in retrying when we get one of these.
|
|
97
|
+
*/
|
|
98
|
+
declare class OaiPmhError extends Error {
|
|
99
|
+
name: string;
|
|
100
|
+
code: string;
|
|
101
|
+
description?: string;
|
|
102
|
+
constructor({ code, description }: {
|
|
103
|
+
code: string;
|
|
104
|
+
description?: string;
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Error thrown when we receive a response with a status code >= 400, but not 404.
|
|
109
|
+
*/
|
|
110
|
+
declare class HttpError extends Error {
|
|
111
|
+
response: Response;
|
|
112
|
+
name: string;
|
|
113
|
+
constructor(response: Response, msg?: string);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
type OaiPmhResponse = {
|
|
117
|
+
records: OaiPmhRecord[];
|
|
118
|
+
resumptionToken?: string;
|
|
119
|
+
};
|
|
120
|
+
declare const parseListRecordsResponse: (doc: XmlElement) => OaiPmhResponse & {
|
|
121
|
+
error?: OaiPmhError | undefined;
|
|
122
|
+
};
|
|
123
|
+
type ListSetsResponse = {
|
|
124
|
+
sets: OaiPmhSet[];
|
|
125
|
+
error?: OaiPmhError;
|
|
126
|
+
};
|
|
127
|
+
declare const parseListSetsResponse: (doc: XmlElement) => ListSetsResponse;
|
|
128
|
+
declare class OaiPmhClient {
|
|
129
|
+
url: string;
|
|
130
|
+
metadataPrefix?: string;
|
|
131
|
+
set?: string;
|
|
132
|
+
log: Logger;
|
|
133
|
+
headers: Record<string, string>;
|
|
134
|
+
retryOptions: RetryOptions;
|
|
135
|
+
retryOnOaiPmhError: boolean;
|
|
136
|
+
timeoutMs: number;
|
|
137
|
+
constructor(config: OaiPmhClientConfig);
|
|
138
|
+
request(params: RequestParams): Promise<Response>;
|
|
139
|
+
private parseXmlResponse;
|
|
140
|
+
listRecordBatches({ resumptionToken, ...params }?: ListRecordsParams): AsyncGenerator<OaiPmhResponse>;
|
|
141
|
+
listRecords(params?: ListRecordsParams): AsyncGenerator<ListRecordsResponse>;
|
|
142
|
+
getRecord(recordId: string): Promise<{
|
|
143
|
+
record: {
|
|
144
|
+
header: {
|
|
145
|
+
identifier: string;
|
|
146
|
+
deleted: boolean;
|
|
147
|
+
datestamp: Date;
|
|
148
|
+
setSpec: string[];
|
|
149
|
+
};
|
|
150
|
+
metadata: XmlElement | undefined;
|
|
151
|
+
} | undefined;
|
|
152
|
+
error: OaiPmhError | undefined;
|
|
153
|
+
}>;
|
|
154
|
+
listSets(): Promise<ListSetsResponse>;
|
|
155
|
+
}
|
|
156
|
+
interface ListRecordsResponse {
|
|
157
|
+
record: OaiPmhRecord;
|
|
158
|
+
resumptionToken: string | undefined;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export { HttpError, type ListRecordsParams, type ListRecordsResponse, OaiPmhClient, type OaiPmhClientConfig, OaiPmhError, OaiPmhHeader, type OaiPmhRecord, OaiPmhSet, type RequestParams, exponentialBackoff, parseListRecordsResponse, parseListSetsResponse };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var radash = require('radash');
|
|
4
|
+
var tsLog = require('ts-log');
|
|
5
|
+
var url = require('url');
|
|
6
|
+
var zod = require('zod');
|
|
7
|
+
var marc = require('@biblioteksentralen/marc');
|
|
8
|
+
|
|
9
|
+
// src/OaiPmhClient.ts
|
|
10
|
+
var OaiPmhHeader = zod.z.object({
|
|
11
|
+
identifier: zod.z.string(),
|
|
12
|
+
deleted: zod.z.boolean(),
|
|
13
|
+
datestamp: zod.z.date(),
|
|
14
|
+
setSpec: zod.z.array(zod.z.string())
|
|
15
|
+
});
|
|
16
|
+
var OaiPmhSet = zod.z.object({
|
|
17
|
+
setSpec: zod.z.string(),
|
|
18
|
+
setName: zod.z.string()
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// src/errors.ts
|
|
22
|
+
var OaiPmhError = class extends Error {
|
|
23
|
+
name = "OaiPmhError";
|
|
24
|
+
code;
|
|
25
|
+
description;
|
|
26
|
+
constructor({ code, description }) {
|
|
27
|
+
super(
|
|
28
|
+
`OAI-PMH server returned error response: ${code}${description ? ` - ${description}` : ""}`
|
|
29
|
+
);
|
|
30
|
+
this.code = code;
|
|
31
|
+
this.description = description;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
var HttpError = class extends Error {
|
|
35
|
+
constructor(response, msg) {
|
|
36
|
+
super(msg);
|
|
37
|
+
this.response = response;
|
|
38
|
+
}
|
|
39
|
+
name = "HttpError";
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// src/util/retry.ts
|
|
43
|
+
var retry = async (func, { attempts = 3, delay = 500, onRetry } = {}) => {
|
|
44
|
+
for (const attempt of range(1, attempts)) {
|
|
45
|
+
try {
|
|
46
|
+
return await func();
|
|
47
|
+
} catch (error) {
|
|
48
|
+
if (!(error instanceof Error)) {
|
|
49
|
+
throw new TypeError(
|
|
50
|
+
`Non-error was thrown: "${String(
|
|
51
|
+
error
|
|
52
|
+
)}". You should only throw errors.`
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
if (error instanceof AbortRetry)
|
|
56
|
+
throw error._error;
|
|
57
|
+
if (attempt === attempts)
|
|
58
|
+
throw error;
|
|
59
|
+
const resolvedDelay = delay instanceof Function ? delay(attempt) : delay;
|
|
60
|
+
if (onRetry) {
|
|
61
|
+
const shouldAbort = await onRetry({
|
|
62
|
+
error,
|
|
63
|
+
attempt,
|
|
64
|
+
delay: resolvedDelay
|
|
65
|
+
});
|
|
66
|
+
if (shouldAbort === true)
|
|
67
|
+
throw error;
|
|
68
|
+
}
|
|
69
|
+
await new Promise((resolve) => setTimeout(resolve, resolvedDelay));
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
throw new Error("Congratulations, you reached supposedly unreachable code");
|
|
73
|
+
};
|
|
74
|
+
var exponentialBackoff = ({
|
|
75
|
+
min = 500,
|
|
76
|
+
max = 3e4,
|
|
77
|
+
factor = 2,
|
|
78
|
+
randomize = false
|
|
79
|
+
} = {}) => (attempt) => {
|
|
80
|
+
const randomFactor = randomize ? Math.random() + 1 : 1;
|
|
81
|
+
const delay = Math.round(
|
|
82
|
+
randomFactor * Math.max(min, 1) * Math.pow(factor, attempt)
|
|
83
|
+
);
|
|
84
|
+
return Math.min(delay, max);
|
|
85
|
+
};
|
|
86
|
+
var AbortRetry = class extends Error {
|
|
87
|
+
name = "AbortRetry";
|
|
88
|
+
_error;
|
|
89
|
+
constructor(error) {
|
|
90
|
+
super();
|
|
91
|
+
this._error = error;
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
function range(start, end) {
|
|
95
|
+
return [...Array(end - start + 1).keys()].map((i) => i + start);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// src/util/fetchWithTimeout.ts
|
|
99
|
+
async function fetchWithTimeout(input, init = {}, timeout = 6e4) {
|
|
100
|
+
try {
|
|
101
|
+
return await fetch(input, {
|
|
102
|
+
...init,
|
|
103
|
+
signal: AbortSignal.timeout(timeout)
|
|
104
|
+
});
|
|
105
|
+
} catch (error) {
|
|
106
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
107
|
+
throw new TimeoutError(`Request timed out after ${timeout} ms`);
|
|
108
|
+
}
|
|
109
|
+
throw error;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
var TimeoutError = class extends Error {
|
|
113
|
+
name = "TimeoutError";
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// src/OaiPmhClient.ts
|
|
117
|
+
var parseDatestamp = (datestamp) => {
|
|
118
|
+
if (!datestamp)
|
|
119
|
+
return void 0;
|
|
120
|
+
if (datestamp.match(/^\d{4}-\d{2}-\d{2}$/)) {
|
|
121
|
+
return /* @__PURE__ */ new Date(`${datestamp}T00:00:00Z`);
|
|
122
|
+
}
|
|
123
|
+
if (datestamp.match(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/)) {
|
|
124
|
+
return new Date(datestamp);
|
|
125
|
+
}
|
|
126
|
+
throw new Error(`Unknown datestamp format: ${datestamp}`);
|
|
127
|
+
};
|
|
128
|
+
var parseErrorResponse = (doc) => {
|
|
129
|
+
const errorElement = doc.elements("/oai:OAI-PMH/oai:error")[0];
|
|
130
|
+
return errorElement ? new OaiPmhError({
|
|
131
|
+
// Ref: http://www.openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions
|
|
132
|
+
code: errorElement.attr("code") ?? "noErrorCode",
|
|
133
|
+
description: errorElement.text()
|
|
134
|
+
}) : void 0;
|
|
135
|
+
};
|
|
136
|
+
var parseOaiPmhRecord = (record) => ({
|
|
137
|
+
header: OaiPmhHeader.parse({
|
|
138
|
+
identifier: record.text("oai:header/oai:identifier"),
|
|
139
|
+
deleted: record.elements("oai:header")[0]?.attr("status") === "deleted",
|
|
140
|
+
datestamp: parseDatestamp(record.text("oai:header/oai:datestamp")),
|
|
141
|
+
setSpec: record.elements("oai:header/oai:setSpec").map((node) => node.text())
|
|
142
|
+
}),
|
|
143
|
+
metadata: record.elements("oai:metadata/*")[0]
|
|
144
|
+
});
|
|
145
|
+
var parseListRecordsResponse = (doc) => {
|
|
146
|
+
const error = parseErrorResponse(doc);
|
|
147
|
+
const resumptionToken = doc.text(
|
|
148
|
+
"/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken"
|
|
149
|
+
);
|
|
150
|
+
const records = doc.elements("/oai:OAI-PMH/oai:ListRecords/oai:record").map(parseOaiPmhRecord);
|
|
151
|
+
return { error, resumptionToken, records };
|
|
152
|
+
};
|
|
153
|
+
var parseListSetsResponse = (doc) => {
|
|
154
|
+
const error = parseErrorResponse(doc);
|
|
155
|
+
const sets = doc.elements("/oai:OAI-PMH/oai:ListSets/oai:set").map(
|
|
156
|
+
(element) => OaiPmhSet.parse({
|
|
157
|
+
setSpec: element.text("oai:setSpec"),
|
|
158
|
+
setName: element.text("oai:setName")
|
|
159
|
+
})
|
|
160
|
+
);
|
|
161
|
+
return { error, sets };
|
|
162
|
+
};
|
|
163
|
+
var parseGetRecordResponse = (doc) => {
|
|
164
|
+
const error = parseErrorResponse(doc);
|
|
165
|
+
const records = doc.elements("/oai:OAI-PMH/oai:GetRecord/oai:record").map(parseOaiPmhRecord);
|
|
166
|
+
return { error, records };
|
|
167
|
+
};
|
|
168
|
+
var OaiPmhClient = class {
|
|
169
|
+
url;
|
|
170
|
+
metadataPrefix;
|
|
171
|
+
set;
|
|
172
|
+
log;
|
|
173
|
+
headers;
|
|
174
|
+
retryOptions;
|
|
175
|
+
retryOnOaiPmhError;
|
|
176
|
+
timeoutMs;
|
|
177
|
+
constructor(config) {
|
|
178
|
+
this.url = config.url;
|
|
179
|
+
this.log = config.log ?? tsLog.dummyLogger;
|
|
180
|
+
this.metadataPrefix = config.metadataPrefix;
|
|
181
|
+
this.set = config.set;
|
|
182
|
+
this.headers = config.headers ?? {};
|
|
183
|
+
this.timeoutMs = config.timeoutMs ?? 1e4;
|
|
184
|
+
this.retryOptions = {
|
|
185
|
+
attempts: config.retry?.attempts ?? 10,
|
|
186
|
+
delay: config.retry?.delay ?? exponentialBackoff({ min: 2e3, max: 3e4 }),
|
|
187
|
+
onRetry: config.retry?.onRetry ?? void 0
|
|
188
|
+
};
|
|
189
|
+
this.retryOnOaiPmhError = config.retryOnOaiPmhError ?? false;
|
|
190
|
+
}
|
|
191
|
+
async request(params) {
|
|
192
|
+
const searchParams = new url.URLSearchParams(radash.shake({ ...params }));
|
|
193
|
+
const url$1 = `${this.url}?${searchParams.toString()}`;
|
|
194
|
+
this.log.debug(`OAI-PMH request: ${url$1}`);
|
|
195
|
+
const response = await fetchWithTimeout(
|
|
196
|
+
url$1,
|
|
197
|
+
{
|
|
198
|
+
headers: {
|
|
199
|
+
"User-Agent": "bs-metadata-oai-harvester/0.1.0 (Biblioteksentralen; drift@bibsent.no)",
|
|
200
|
+
...this.headers
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
this.timeoutMs
|
|
204
|
+
);
|
|
205
|
+
if (response.status === 404) {
|
|
206
|
+
throw new AbortRetry(
|
|
207
|
+
new HttpError(
|
|
208
|
+
response,
|
|
209
|
+
`OAI-PMH endpoint does not exist (got ${response.status} response)`
|
|
210
|
+
)
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
if (!response.ok) {
|
|
214
|
+
const responseText = await response.text();
|
|
215
|
+
throw new HttpError(
|
|
216
|
+
response,
|
|
217
|
+
`Request failed with ${response.status} response: ${responseText}`
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
return response;
|
|
221
|
+
}
|
|
222
|
+
parseXmlResponse(response) {
|
|
223
|
+
return marc.parseXml(response, {
|
|
224
|
+
log: this.log,
|
|
225
|
+
namespaces: {
|
|
226
|
+
oai: "http://www.openarchives.org/OAI/2.0/"
|
|
227
|
+
}
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
async *listRecordBatches({
|
|
231
|
+
resumptionToken,
|
|
232
|
+
...params
|
|
233
|
+
} = {}) {
|
|
234
|
+
do {
|
|
235
|
+
const result = await retry(async () => {
|
|
236
|
+
const response = await this.request(
|
|
237
|
+
resumptionToken ? { verb: "ListRecords", resumptionToken } : {
|
|
238
|
+
verb: "ListRecords",
|
|
239
|
+
metadataPrefix: this.metadataPrefix,
|
|
240
|
+
set: this.set,
|
|
241
|
+
...params
|
|
242
|
+
}
|
|
243
|
+
);
|
|
244
|
+
const responseText = await response.text();
|
|
245
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
246
|
+
const listResponse = parseListRecordsResponse(responseXml);
|
|
247
|
+
if (listResponse.error && this.retryOnOaiPmhError) {
|
|
248
|
+
throw new OaiPmhError(listResponse.error);
|
|
249
|
+
}
|
|
250
|
+
return listResponse;
|
|
251
|
+
}, this.retryOptions);
|
|
252
|
+
if (result.error) {
|
|
253
|
+
throw new OaiPmhError(result.error);
|
|
254
|
+
}
|
|
255
|
+
yield result;
|
|
256
|
+
resumptionToken = result.resumptionToken;
|
|
257
|
+
} while (resumptionToken);
|
|
258
|
+
}
|
|
259
|
+
async *listRecords(params = {}) {
|
|
260
|
+
for await (const response of this.listRecordBatches(params)) {
|
|
261
|
+
const { records, resumptionToken } = response;
|
|
262
|
+
for (const record of records) {
|
|
263
|
+
yield { record, resumptionToken };
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
async getRecord(recordId) {
|
|
268
|
+
const response = await this.request({
|
|
269
|
+
verb: "GetRecord",
|
|
270
|
+
metadataPrefix: this.metadataPrefix,
|
|
271
|
+
identifier: recordId
|
|
272
|
+
});
|
|
273
|
+
const responseText = await response.text();
|
|
274
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
275
|
+
const { records, error } = parseGetRecordResponse(responseXml);
|
|
276
|
+
return { record: records[0], error };
|
|
277
|
+
}
|
|
278
|
+
async listSets() {
|
|
279
|
+
const response = await this.request({ verb: "ListSets" });
|
|
280
|
+
const responseText = await response.text();
|
|
281
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
282
|
+
return parseListSetsResponse(responseXml);
|
|
283
|
+
}
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
exports.HttpError = HttpError;
|
|
287
|
+
exports.OaiPmhClient = OaiPmhClient;
|
|
288
|
+
exports.OaiPmhError = OaiPmhError;
|
|
289
|
+
exports.OaiPmhHeader = OaiPmhHeader;
|
|
290
|
+
exports.OaiPmhSet = OaiPmhSet;
|
|
291
|
+
exports.exponentialBackoff = exponentialBackoff;
|
|
292
|
+
exports.parseListRecordsResponse = parseListRecordsResponse;
|
|
293
|
+
exports.parseListSetsResponse = parseListSetsResponse;
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import { shake } from 'radash';
|
|
2
|
+
import { dummyLogger } from 'ts-log';
|
|
3
|
+
import { URLSearchParams } from 'url';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
import { parseXml } from '@biblioteksentralen/marc';
|
|
6
|
+
|
|
7
|
+
// src/OaiPmhClient.ts
|
|
8
|
+
var OaiPmhHeader = z.object({
|
|
9
|
+
identifier: z.string(),
|
|
10
|
+
deleted: z.boolean(),
|
|
11
|
+
datestamp: z.date(),
|
|
12
|
+
setSpec: z.array(z.string())
|
|
13
|
+
});
|
|
14
|
+
var OaiPmhSet = z.object({
|
|
15
|
+
setSpec: z.string(),
|
|
16
|
+
setName: z.string()
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// src/errors.ts
|
|
20
|
+
var OaiPmhError = class extends Error {
|
|
21
|
+
name = "OaiPmhError";
|
|
22
|
+
code;
|
|
23
|
+
description;
|
|
24
|
+
constructor({ code, description }) {
|
|
25
|
+
super(
|
|
26
|
+
`OAI-PMH server returned error response: ${code}${description ? ` - ${description}` : ""}`
|
|
27
|
+
);
|
|
28
|
+
this.code = code;
|
|
29
|
+
this.description = description;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
var HttpError = class extends Error {
|
|
33
|
+
constructor(response, msg) {
|
|
34
|
+
super(msg);
|
|
35
|
+
this.response = response;
|
|
36
|
+
}
|
|
37
|
+
name = "HttpError";
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// src/util/retry.ts
|
|
41
|
+
var retry = async (func, { attempts = 3, delay = 500, onRetry } = {}) => {
|
|
42
|
+
for (const attempt of range(1, attempts)) {
|
|
43
|
+
try {
|
|
44
|
+
return await func();
|
|
45
|
+
} catch (error) {
|
|
46
|
+
if (!(error instanceof Error)) {
|
|
47
|
+
throw new TypeError(
|
|
48
|
+
`Non-error was thrown: "${String(
|
|
49
|
+
error
|
|
50
|
+
)}". You should only throw errors.`
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
if (error instanceof AbortRetry)
|
|
54
|
+
throw error._error;
|
|
55
|
+
if (attempt === attempts)
|
|
56
|
+
throw error;
|
|
57
|
+
const resolvedDelay = delay instanceof Function ? delay(attempt) : delay;
|
|
58
|
+
if (onRetry) {
|
|
59
|
+
const shouldAbort = await onRetry({
|
|
60
|
+
error,
|
|
61
|
+
attempt,
|
|
62
|
+
delay: resolvedDelay
|
|
63
|
+
});
|
|
64
|
+
if (shouldAbort === true)
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
67
|
+
await new Promise((resolve) => setTimeout(resolve, resolvedDelay));
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
throw new Error("Congratulations, you reached supposedly unreachable code");
|
|
71
|
+
};
|
|
72
|
+
var exponentialBackoff = ({
|
|
73
|
+
min = 500,
|
|
74
|
+
max = 3e4,
|
|
75
|
+
factor = 2,
|
|
76
|
+
randomize = false
|
|
77
|
+
} = {}) => (attempt) => {
|
|
78
|
+
const randomFactor = randomize ? Math.random() + 1 : 1;
|
|
79
|
+
const delay = Math.round(
|
|
80
|
+
randomFactor * Math.max(min, 1) * Math.pow(factor, attempt)
|
|
81
|
+
);
|
|
82
|
+
return Math.min(delay, max);
|
|
83
|
+
};
|
|
84
|
+
var AbortRetry = class extends Error {
|
|
85
|
+
name = "AbortRetry";
|
|
86
|
+
_error;
|
|
87
|
+
constructor(error) {
|
|
88
|
+
super();
|
|
89
|
+
this._error = error;
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
function range(start, end) {
|
|
93
|
+
return [...Array(end - start + 1).keys()].map((i) => i + start);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// src/util/fetchWithTimeout.ts
|
|
97
|
+
async function fetchWithTimeout(input, init = {}, timeout = 6e4) {
|
|
98
|
+
try {
|
|
99
|
+
return await fetch(input, {
|
|
100
|
+
...init,
|
|
101
|
+
signal: AbortSignal.timeout(timeout)
|
|
102
|
+
});
|
|
103
|
+
} catch (error) {
|
|
104
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
105
|
+
throw new TimeoutError(`Request timed out after ${timeout} ms`);
|
|
106
|
+
}
|
|
107
|
+
throw error;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
var TimeoutError = class extends Error {
|
|
111
|
+
name = "TimeoutError";
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// src/OaiPmhClient.ts
|
|
115
|
+
var parseDatestamp = (datestamp) => {
|
|
116
|
+
if (!datestamp)
|
|
117
|
+
return void 0;
|
|
118
|
+
if (datestamp.match(/^\d{4}-\d{2}-\d{2}$/)) {
|
|
119
|
+
return /* @__PURE__ */ new Date(`${datestamp}T00:00:00Z`);
|
|
120
|
+
}
|
|
121
|
+
if (datestamp.match(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/)) {
|
|
122
|
+
return new Date(datestamp);
|
|
123
|
+
}
|
|
124
|
+
throw new Error(`Unknown datestamp format: ${datestamp}`);
|
|
125
|
+
};
|
|
126
|
+
var parseErrorResponse = (doc) => {
|
|
127
|
+
const errorElement = doc.elements("/oai:OAI-PMH/oai:error")[0];
|
|
128
|
+
return errorElement ? new OaiPmhError({
|
|
129
|
+
// Ref: http://www.openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions
|
|
130
|
+
code: errorElement.attr("code") ?? "noErrorCode",
|
|
131
|
+
description: errorElement.text()
|
|
132
|
+
}) : void 0;
|
|
133
|
+
};
|
|
134
|
+
var parseOaiPmhRecord = (record) => ({
|
|
135
|
+
header: OaiPmhHeader.parse({
|
|
136
|
+
identifier: record.text("oai:header/oai:identifier"),
|
|
137
|
+
deleted: record.elements("oai:header")[0]?.attr("status") === "deleted",
|
|
138
|
+
datestamp: parseDatestamp(record.text("oai:header/oai:datestamp")),
|
|
139
|
+
setSpec: record.elements("oai:header/oai:setSpec").map((node) => node.text())
|
|
140
|
+
}),
|
|
141
|
+
metadata: record.elements("oai:metadata/*")[0]
|
|
142
|
+
});
|
|
143
|
+
var parseListRecordsResponse = (doc) => {
|
|
144
|
+
const error = parseErrorResponse(doc);
|
|
145
|
+
const resumptionToken = doc.text(
|
|
146
|
+
"/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken"
|
|
147
|
+
);
|
|
148
|
+
const records = doc.elements("/oai:OAI-PMH/oai:ListRecords/oai:record").map(parseOaiPmhRecord);
|
|
149
|
+
return { error, resumptionToken, records };
|
|
150
|
+
};
|
|
151
|
+
var parseListSetsResponse = (doc) => {
|
|
152
|
+
const error = parseErrorResponse(doc);
|
|
153
|
+
const sets = doc.elements("/oai:OAI-PMH/oai:ListSets/oai:set").map(
|
|
154
|
+
(element) => OaiPmhSet.parse({
|
|
155
|
+
setSpec: element.text("oai:setSpec"),
|
|
156
|
+
setName: element.text("oai:setName")
|
|
157
|
+
})
|
|
158
|
+
);
|
|
159
|
+
return { error, sets };
|
|
160
|
+
};
|
|
161
|
+
var parseGetRecordResponse = (doc) => {
|
|
162
|
+
const error = parseErrorResponse(doc);
|
|
163
|
+
const records = doc.elements("/oai:OAI-PMH/oai:GetRecord/oai:record").map(parseOaiPmhRecord);
|
|
164
|
+
return { error, records };
|
|
165
|
+
};
|
|
166
|
+
var OaiPmhClient = class {
|
|
167
|
+
url;
|
|
168
|
+
metadataPrefix;
|
|
169
|
+
set;
|
|
170
|
+
log;
|
|
171
|
+
headers;
|
|
172
|
+
retryOptions;
|
|
173
|
+
retryOnOaiPmhError;
|
|
174
|
+
timeoutMs;
|
|
175
|
+
constructor(config) {
|
|
176
|
+
this.url = config.url;
|
|
177
|
+
this.log = config.log ?? dummyLogger;
|
|
178
|
+
this.metadataPrefix = config.metadataPrefix;
|
|
179
|
+
this.set = config.set;
|
|
180
|
+
this.headers = config.headers ?? {};
|
|
181
|
+
this.timeoutMs = config.timeoutMs ?? 1e4;
|
|
182
|
+
this.retryOptions = {
|
|
183
|
+
attempts: config.retry?.attempts ?? 10,
|
|
184
|
+
delay: config.retry?.delay ?? exponentialBackoff({ min: 2e3, max: 3e4 }),
|
|
185
|
+
onRetry: config.retry?.onRetry ?? void 0
|
|
186
|
+
};
|
|
187
|
+
this.retryOnOaiPmhError = config.retryOnOaiPmhError ?? false;
|
|
188
|
+
}
|
|
189
|
+
async request(params) {
|
|
190
|
+
const searchParams = new URLSearchParams(shake({ ...params }));
|
|
191
|
+
const url = `${this.url}?${searchParams.toString()}`;
|
|
192
|
+
this.log.debug(`OAI-PMH request: ${url}`);
|
|
193
|
+
const response = await fetchWithTimeout(
|
|
194
|
+
url,
|
|
195
|
+
{
|
|
196
|
+
headers: {
|
|
197
|
+
"User-Agent": "bs-metadata-oai-harvester/0.1.0 (Biblioteksentralen; drift@bibsent.no)",
|
|
198
|
+
...this.headers
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
this.timeoutMs
|
|
202
|
+
);
|
|
203
|
+
if (response.status === 404) {
|
|
204
|
+
throw new AbortRetry(
|
|
205
|
+
new HttpError(
|
|
206
|
+
response,
|
|
207
|
+
`OAI-PMH endpoint does not exist (got ${response.status} response)`
|
|
208
|
+
)
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
if (!response.ok) {
|
|
212
|
+
const responseText = await response.text();
|
|
213
|
+
throw new HttpError(
|
|
214
|
+
response,
|
|
215
|
+
`Request failed with ${response.status} response: ${responseText}`
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
return response;
|
|
219
|
+
}
|
|
220
|
+
parseXmlResponse(response) {
|
|
221
|
+
return parseXml(response, {
|
|
222
|
+
log: this.log,
|
|
223
|
+
namespaces: {
|
|
224
|
+
oai: "http://www.openarchives.org/OAI/2.0/"
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
async *listRecordBatches({
|
|
229
|
+
resumptionToken,
|
|
230
|
+
...params
|
|
231
|
+
} = {}) {
|
|
232
|
+
do {
|
|
233
|
+
const result = await retry(async () => {
|
|
234
|
+
const response = await this.request(
|
|
235
|
+
resumptionToken ? { verb: "ListRecords", resumptionToken } : {
|
|
236
|
+
verb: "ListRecords",
|
|
237
|
+
metadataPrefix: this.metadataPrefix,
|
|
238
|
+
set: this.set,
|
|
239
|
+
...params
|
|
240
|
+
}
|
|
241
|
+
);
|
|
242
|
+
const responseText = await response.text();
|
|
243
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
244
|
+
const listResponse = parseListRecordsResponse(responseXml);
|
|
245
|
+
if (listResponse.error && this.retryOnOaiPmhError) {
|
|
246
|
+
throw new OaiPmhError(listResponse.error);
|
|
247
|
+
}
|
|
248
|
+
return listResponse;
|
|
249
|
+
}, this.retryOptions);
|
|
250
|
+
if (result.error) {
|
|
251
|
+
throw new OaiPmhError(result.error);
|
|
252
|
+
}
|
|
253
|
+
yield result;
|
|
254
|
+
resumptionToken = result.resumptionToken;
|
|
255
|
+
} while (resumptionToken);
|
|
256
|
+
}
|
|
257
|
+
async *listRecords(params = {}) {
|
|
258
|
+
for await (const response of this.listRecordBatches(params)) {
|
|
259
|
+
const { records, resumptionToken } = response;
|
|
260
|
+
for (const record of records) {
|
|
261
|
+
yield { record, resumptionToken };
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
async getRecord(recordId) {
|
|
266
|
+
const response = await this.request({
|
|
267
|
+
verb: "GetRecord",
|
|
268
|
+
metadataPrefix: this.metadataPrefix,
|
|
269
|
+
identifier: recordId
|
|
270
|
+
});
|
|
271
|
+
const responseText = await response.text();
|
|
272
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
273
|
+
const { records, error } = parseGetRecordResponse(responseXml);
|
|
274
|
+
return { record: records[0], error };
|
|
275
|
+
}
|
|
276
|
+
async listSets() {
|
|
277
|
+
const response = await this.request({ verb: "ListSets" });
|
|
278
|
+
const responseText = await response.text();
|
|
279
|
+
const responseXml = this.parseXmlResponse(responseText);
|
|
280
|
+
return parseListSetsResponse(responseXml);
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
export { HttpError, OaiPmhClient, OaiPmhError, OaiPmhHeader, OaiPmhSet, exponentialBackoff, parseListRecordsResponse, parseListSetsResponse };
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@biblioteksentralen/oai-pmh-client",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"private": false,
|
|
5
|
+
"description": "OAI-PMH client",
|
|
6
|
+
"author": "Biblioteksentralen",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"main": "dist/index.js",
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"import": "./dist/index.mjs",
|
|
13
|
+
"require": "./dist/index.js",
|
|
14
|
+
"types": "./dist/index.d.ts"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"README.md",
|
|
19
|
+
"LICENSE",
|
|
20
|
+
"dist/**/*.{js,mjs,d.ts}"
|
|
21
|
+
],
|
|
22
|
+
"devDependencies": {
|
|
23
|
+
"@types/node": "^18.19.31",
|
|
24
|
+
"rimraf": "^5.0.5",
|
|
25
|
+
"tsup": "^8.0.2",
|
|
26
|
+
"typescript": "^5.3.3",
|
|
27
|
+
"@dataplattform/eslint-config": "1.0.0"
|
|
28
|
+
},
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"pino": "^8.20.0",
|
|
31
|
+
"radash": "^12.1.0",
|
|
32
|
+
"ts-log": "^2.2.5",
|
|
33
|
+
"xpath": "^0.0.32",
|
|
34
|
+
"zod": "^3.22.4",
|
|
35
|
+
"@biblioteksentralen/marc": "0.0.1"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {
|
|
38
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --treeshake",
|
|
39
|
+
"dev": "tsc --watch --preserveWatchOutput",
|
|
40
|
+
"clean": "rimraf dist",
|
|
41
|
+
"lint": "eslint ."
|
|
42
|
+
}
|
|
43
|
+
}
|