arxiv-api-wrapper 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/oaiParser.ts CHANGED
@@ -1,264 +1,264 @@
1
- /**
2
- * Parser for OAI-PMH XML responses from the arXiv OAI endpoint.
3
- */
4
-
5
- import {
6
- type OaiErrorCode,
7
- type OaiIdentifyResponse,
8
- type OaiMetadataFormat,
9
- type OaiMetadataPrefix,
10
- type OaiSet,
11
- type OaiResumptionToken,
12
- type OaiHeader,
13
- type OaiRecord,
14
- type OaiMetadata,
15
- OaiError,
16
- } from './oaiTypes.js';
17
- import { XMLParser } from 'fast-xml-parser';
18
-
19
- const parser = new XMLParser({
20
- ignoreAttributes: false,
21
- attributeNamePrefix: '',
22
- removeNSPrefix: true,
23
- trimValues: true,
24
- parseTagValue: false,
25
- });
26
-
27
- const VALID_ERROR_CODES: OaiErrorCode[] = [
28
- 'badArgument',
29
- 'badResumptionToken',
30
- 'badVerb',
31
- 'cannotDisseminateFormat',
32
- 'idDoesNotExist',
33
- 'noMetadataFormats',
34
- 'noRecordsMatch',
35
- 'noSetHierarchy',
36
- ];
37
-
38
- const VALID_METADATA_PREFIXES: OaiMetadataPrefix[] = ['oai_dc', 'arXiv', 'arXivOld', 'arXivRaw'];
39
-
40
- function asArray<T>(x: T | T[] | undefined): T[] {
41
- if (x == null) return [];
42
- return Array.isArray(x) ? x : [x];
43
- }
44
-
45
- function str(v: unknown): string {
46
- if (v == null) return '';
47
- return String(v).trim();
48
- }
49
-
50
- function parseErrors(root: Record<string, unknown>): OaiError[] {
51
- const errors: OaiError[] = [];
52
- const raw = root.error;
53
- if (raw == null) return errors;
54
- const arr = asArray(raw);
55
- for (const e of arr) {
56
- const code = (e && typeof e === 'object' && 'code' in e && e.code) as string | undefined;
57
- const msg = (e && typeof e === 'object' && '#text' in e ? e['#text'] : e) as string | unknown;
58
- const messageText = typeof msg === 'string' ? msg : typeof msg !== 'undefined' ? String(msg) : '';
59
- const codeStr = (code ?? 'badArgument') as OaiErrorCode;
60
- if (VALID_ERROR_CODES.includes(codeStr)) {
61
- errors.push(new OaiError(codeStr, messageText));
62
- } else {
63
- errors.push(new OaiError('badArgument', messageText || codeStr));
64
- }
65
- }
66
- return errors;
67
- }
68
-
69
- function parseResumptionToken(el: unknown): OaiResumptionToken | undefined {
70
- if (el == null || typeof el !== 'object') return undefined;
71
- const o = el as Record<string, unknown>;
72
- const value = str(o['#text'] ?? o['_'] ?? '');
73
- if (!value) return undefined;
74
- const token: OaiResumptionToken = { value };
75
- if (o.expirationDate != null) token.expirationDate = str(o.expirationDate);
76
- if (o.completeListSize != null) token.completeListSize = Number(o.completeListSize);
77
- if (o.cursor != null) token.cursor = Number(o.cursor);
78
- return token;
79
- }
80
-
81
- function parseHeader(el: unknown): OaiHeader {
82
- const o = (el != null && typeof el === 'object' ? el : {}) as Record<string, unknown>;
83
- const setSpec = asArray(o.setSpec).map((s) => str(s));
84
- const status = o.status != null ? str(o.status) : undefined;
85
- return {
86
- identifier: str(o.identifier),
87
- datestamp: str(o.datestamp),
88
- setSpec,
89
- ...(status === 'deleted' ? { status: 'deleted' as const } : {}),
90
- };
91
- }
92
-
93
- /** Extract metadata as a plain object (first child of metadata is format-specific, e.g. dc or arXiv). */
94
- function parseMetadata(el: unknown): OaiMetadata | undefined {
95
- if (el == null || typeof el !== 'object') return undefined;
96
- const o = el as Record<string, unknown>;
97
- // metadata has a single child (e.g. dc, arXiv) - flatten one level for convenience
98
- const keys = Object.keys(o).filter((k) => !k.startsWith('@') && k !== '#text' && k !== '_');
99
- if (keys.length === 0) return undefined;
100
- const out: Record<string, unknown> = {};
101
- for (const key of keys) {
102
- const val = o[key];
103
- if (val != null && typeof val === 'object' && !Array.isArray(val)) {
104
- out[key] = val;
105
- } else {
106
- out[key] = val;
107
- }
108
- }
109
- return out as unknown as OaiMetadata;
110
- }
111
-
112
- function parseRecord(el: unknown): OaiRecord {
113
- const o = (el != null && typeof el === 'object' ? el : {}) as Record<string, unknown>;
114
- const header = parseHeader(o.header);
115
- const metadata = o.metadata != null ? parseMetadata(o.metadata) : undefined;
116
- const about = o.about != null ? asArray(o.about) : undefined;
117
- return { header, ...(metadata != null ? { metadata } : {}), ...(about != null ? { about } : {}) };
118
- }
119
-
120
- function getRoot(xml: string): Record<string, unknown> {
121
- const doc = parser.parse(xml) as Record<string, unknown>;
122
- const root = doc['OAI-PMH'] ?? doc['OAIPMH'] ?? doc;
123
- if (root == null || typeof root !== 'object') {
124
- throw new OaiError('badArgument', 'Invalid OAI-PMH response: no root element');
125
- }
126
- return root as Record<string, unknown>;
127
- }
128
-
129
- /**
130
- * Parse OAI-PMH response and throw OaiError if the response contains error elements.
131
- * Returns the verb-specific payload (e.g. Identify, ListRecords).
132
- */
133
- export function parseOaiResponse(xml: string): Record<string, unknown> {
134
- const root = getRoot(xml);
135
- const errors = parseErrors(root);
136
- if (errors.length > 0) {
137
- const first = errors[0];
138
- throw first;
139
- }
140
- return root;
141
- }
142
-
143
- /**
144
- * Parse an Identify response body.
145
- */
146
- export function parseIdentify(xml: string): OaiIdentifyResponse {
147
- const root = parseOaiResponse(xml);
148
- const id = (root.Identify ?? root.identify) as Record<string, unknown> | undefined;
149
- if (!id || typeof id !== 'object') {
150
- throw new OaiError('badArgument', 'Invalid Identify response: missing Identify element');
151
- }
152
- return {
153
- repositoryName: str(id.repositoryName),
154
- baseURL: str(id.baseURL),
155
- protocolVersion: str(id.protocolVersion),
156
- adminEmail: asArray(id.adminEmail).map((e) => str(e)),
157
- earliestDatestamp: str(id.earliestDatestamp),
158
- deletedRecord: (str(id.deletedRecord) || 'no') as 'no' | 'persistent' | 'transient',
159
- granularity: (str(id.granularity) || 'YYYY-MM-DD') as 'YYYY-MM-DD' | 'YYYY-MM-DDThh:mm:ssZ',
160
- compression: asArray(id.compression).map((c) => str(c)).filter(Boolean),
161
- description: id.description != null ? asArray(id.description) : undefined,
162
- };
163
- }
164
-
165
- /**
166
- * Parse a ListMetadataFormats response body.
167
- */
168
- export function parseListMetadataFormats(xml: string): OaiMetadataFormat[] {
169
- const root = parseOaiResponse(xml);
170
- const list = root.ListMetadataFormats ?? root.listMetadataFormats;
171
- if (!list || typeof list !== 'object') {
172
- throw new OaiError('badArgument', 'Invalid ListMetadataFormats response');
173
- }
174
- const arr = (list as Record<string, unknown>).metadataFormat;
175
- const formats = asArray(arr);
176
- return formats.map((f: unknown) => {
177
- const o = (f && typeof f === 'object' ? f : {}) as Record<string, unknown>;
178
- const metadataPrefix = str(o.metadataPrefix);
179
- if (!VALID_METADATA_PREFIXES.includes(metadataPrefix as OaiMetadataPrefix)) {
180
- throw new OaiError(
181
- 'cannotDisseminateFormat',
182
- `Unsupported metadataPrefix in ListMetadataFormats response: ${metadataPrefix}`
183
- );
184
- }
185
- return {
186
- metadataPrefix: metadataPrefix as OaiMetadataPrefix,
187
- schema: str(o.schema),
188
- metadataNamespace: str(o.metadataNamespace),
189
- };
190
- });
191
- }
192
-
193
- /**
194
- * Parse a ListSets response body.
195
- */
196
- export function parseListSets(xml: string): { sets: OaiSet[]; resumptionToken?: OaiResumptionToken } {
197
- const root = parseOaiResponse(xml);
198
- const list = root.ListSets ?? root.listSets;
199
- if (!list || typeof list !== 'object') {
200
- throw new OaiError('badArgument', 'Invalid ListSets response');
201
- }
202
- const o = list as Record<string, unknown>;
203
- const setArr = asArray(o.set);
204
- const sets: OaiSet[] = setArr.map((s: unknown) => {
205
- const set = (s && typeof s === 'object' ? s : {}) as Record<string, unknown>;
206
- return {
207
- setSpec: str(set.setSpec),
208
- setName: str(set.setName),
209
- ...(set.setDescription != null ? { setDescription: set.setDescription } : {}),
210
- };
211
- });
212
- const resumptionToken = parseResumptionToken(o.resumptionToken);
213
- return { sets, ...(resumptionToken ? { resumptionToken } : {}) };
214
- }
215
-
216
- /**
217
- * Parse a GetRecord response body.
218
- */
219
- export function parseGetRecord(xml: string): OaiRecord {
220
- const root = parseOaiResponse(xml);
221
- const getRecord = root.GetRecord ?? root.getRecord;
222
- if (!getRecord || typeof getRecord !== 'object') {
223
- throw new OaiError('badArgument', 'Invalid GetRecord response');
224
- }
225
- const record = (getRecord as Record<string, unknown>).record;
226
- if (!record) throw new OaiError('badArgument', 'Invalid GetRecord response: missing record');
227
- return parseRecord(record);
228
- }
229
-
230
- /**
231
- * Parse a ListIdentifiers response body.
232
- */
233
- export function parseListIdentifiers(
234
- xml: string
235
- ): { headers: OaiHeader[]; resumptionToken?: OaiResumptionToken } {
236
- const root = parseOaiResponse(xml);
237
- const list = root.ListIdentifiers ?? root.listIdentifiers;
238
- if (!list || typeof list !== 'object') {
239
- throw new OaiError('badArgument', 'Invalid ListIdentifiers response');
240
- }
241
- const o = list as Record<string, unknown>;
242
- const headerArr = asArray(o.header);
243
- const headers = headerArr.map((h: unknown) => parseHeader(h));
244
- const resumptionToken = parseResumptionToken(o.resumptionToken);
245
- return { headers, ...(resumptionToken ? { resumptionToken } : {}) };
246
- }
247
-
248
- /**
249
- * Parse a ListRecords response body.
250
- */
251
- export function parseListRecords(
252
- xml: string
253
- ): { records: OaiRecord[]; resumptionToken?: OaiResumptionToken } {
254
- const root = parseOaiResponse(xml);
255
- const list = root.ListRecords ?? root.listRecords;
256
- if (!list || typeof list !== 'object') {
257
- throw new OaiError('badArgument', 'Invalid ListRecords response');
258
- }
259
- const o = list as Record<string, unknown>;
260
- const recordArr = asArray(o.record);
261
- const records = recordArr.map((r: unknown) => parseRecord(r));
262
- const resumptionToken = parseResumptionToken(o.resumptionToken);
263
- return { records, ...(resumptionToken ? { resumptionToken } : {}) };
264
- }
1
+ /**
2
+ * Parser for OAI-PMH XML responses from the arXiv OAI endpoint.
3
+ */
4
+
5
+ import {
6
+ type OaiErrorCode,
7
+ type OaiIdentifyResponse,
8
+ type OaiMetadataFormat,
9
+ type OaiMetadataPrefix,
10
+ type OaiSet,
11
+ type OaiResumptionToken,
12
+ type OaiHeader,
13
+ type OaiRecord,
14
+ type OaiMetadata,
15
+ OaiError,
16
+ } from './oaiTypes.js';
17
+ import { XMLParser } from 'fast-xml-parser';
18
+
19
+ const parser = new XMLParser({
20
+ ignoreAttributes: false,
21
+ attributeNamePrefix: '',
22
+ removeNSPrefix: true,
23
+ trimValues: true,
24
+ parseTagValue: false,
25
+ });
26
+
27
+ const VALID_ERROR_CODES: OaiErrorCode[] = [
28
+ 'badArgument',
29
+ 'badResumptionToken',
30
+ 'badVerb',
31
+ 'cannotDisseminateFormat',
32
+ 'idDoesNotExist',
33
+ 'noMetadataFormats',
34
+ 'noRecordsMatch',
35
+ 'noSetHierarchy',
36
+ ];
37
+
38
+ const VALID_METADATA_PREFIXES: OaiMetadataPrefix[] = ['oai_dc', 'arXiv', 'arXivOld', 'arXivRaw'];
39
+
40
+ function asArray<T>(x: T | T[] | undefined): T[] {
41
+ if (x == null) return [];
42
+ return Array.isArray(x) ? x : [x];
43
+ }
44
+
45
+ function str(v: unknown): string {
46
+ if (v == null) return '';
47
+ return String(v).trim();
48
+ }
49
+
50
+ function parseErrors(root: Record<string, unknown>): OaiError[] {
51
+ const errors: OaiError[] = [];
52
+ const raw = root.error;
53
+ if (raw == null) return errors;
54
+ const arr = asArray(raw);
55
+ for (const e of arr) {
56
+ const code = (e && typeof e === 'object' && 'code' in e && e.code) as string | undefined;
57
+ const msg = (e && typeof e === 'object' && '#text' in e ? e['#text'] : e) as string | unknown;
58
+ const messageText = typeof msg === 'string' ? msg : typeof msg !== 'undefined' ? String(msg) : '';
59
+ const codeStr = (code ?? 'badArgument') as OaiErrorCode;
60
+ if (VALID_ERROR_CODES.includes(codeStr)) {
61
+ errors.push(new OaiError(codeStr, messageText));
62
+ } else {
63
+ errors.push(new OaiError('badArgument', messageText || codeStr));
64
+ }
65
+ }
66
+ return errors;
67
+ }
68
+
69
+ function parseResumptionToken(el: unknown): OaiResumptionToken | undefined {
70
+ if (el == null || typeof el !== 'object') return undefined;
71
+ const o = el as Record<string, unknown>;
72
+ const value = str(o['#text'] ?? o['_'] ?? '');
73
+ if (!value) return undefined;
74
+ const token: OaiResumptionToken = { value };
75
+ if (o.expirationDate != null) token.expirationDate = str(o.expirationDate);
76
+ if (o.completeListSize != null) token.completeListSize = Number(o.completeListSize);
77
+ if (o.cursor != null) token.cursor = Number(o.cursor);
78
+ return token;
79
+ }
80
+
81
+ function parseHeader(el: unknown): OaiHeader {
82
+ const o = (el != null && typeof el === 'object' ? el : {}) as Record<string, unknown>;
83
+ const setSpec = asArray(o.setSpec).map((s) => str(s));
84
+ const status = o.status != null ? str(o.status) : undefined;
85
+ return {
86
+ identifier: str(o.identifier),
87
+ datestamp: str(o.datestamp),
88
+ setSpec,
89
+ ...(status === 'deleted' ? { status: 'deleted' as const } : {}),
90
+ };
91
+ }
92
+
93
+ /** Extract metadata as a plain object (first child of metadata is format-specific, e.g. dc or arXiv). */
94
+ function parseMetadata(el: unknown): OaiMetadata | undefined {
95
+ if (el == null || typeof el !== 'object') return undefined;
96
+ const o = el as Record<string, unknown>;
97
+ // metadata has a single child (e.g. dc, arXiv) - flatten one level for convenience
98
+ const keys = Object.keys(o).filter((k) => !k.startsWith('@') && k !== '#text' && k !== '_');
99
+ if (keys.length === 0) return undefined;
100
+ const out: Record<string, unknown> = {};
101
+ for (const key of keys) {
102
+ const val = o[key];
103
+ if (val != null && typeof val === 'object' && !Array.isArray(val)) {
104
+ out[key] = val;
105
+ } else {
106
+ out[key] = val;
107
+ }
108
+ }
109
+ return out as unknown as OaiMetadata;
110
+ }
111
+
112
+ function parseRecord(el: unknown): OaiRecord {
113
+ const o = (el != null && typeof el === 'object' ? el : {}) as Record<string, unknown>;
114
+ const header = parseHeader(o.header);
115
+ const metadata = o.metadata != null ? parseMetadata(o.metadata) : undefined;
116
+ const about = o.about != null ? asArray(o.about) : undefined;
117
+ return { header, ...(metadata != null ? { metadata } : {}), ...(about != null ? { about } : {}) };
118
+ }
119
+
120
+ function getRoot(xml: string): Record<string, unknown> {
121
+ const doc = parser.parse(xml) as Record<string, unknown>;
122
+ const root = doc['OAI-PMH'] ?? doc['OAIPMH'] ?? doc;
123
+ if (root == null || typeof root !== 'object') {
124
+ throw new OaiError('badArgument', 'Invalid OAI-PMH response: no root element');
125
+ }
126
+ return root as Record<string, unknown>;
127
+ }
128
+
129
+ /**
130
+ * Parse OAI-PMH response and throw OaiError if the response contains error elements.
131
+ * Returns the verb-specific payload (e.g. Identify, ListRecords).
132
+ */
133
+ export function parseOaiResponse(xml: string): Record<string, unknown> {
134
+ const root = getRoot(xml);
135
+ const errors = parseErrors(root);
136
+ if (errors.length > 0) {
137
+ const first = errors[0];
138
+ throw first;
139
+ }
140
+ return root;
141
+ }
142
+
143
+ /**
144
+ * Parse an Identify response body.
145
+ */
146
+ export function parseIdentify(xml: string): OaiIdentifyResponse {
147
+ const root = parseOaiResponse(xml);
148
+ const id = (root.Identify ?? root.identify) as Record<string, unknown> | undefined;
149
+ if (!id || typeof id !== 'object') {
150
+ throw new OaiError('badArgument', 'Invalid Identify response: missing Identify element');
151
+ }
152
+ return {
153
+ repositoryName: str(id.repositoryName),
154
+ baseURL: str(id.baseURL),
155
+ protocolVersion: str(id.protocolVersion),
156
+ adminEmail: asArray(id.adminEmail).map((e) => str(e)),
157
+ earliestDatestamp: str(id.earliestDatestamp),
158
+ deletedRecord: (str(id.deletedRecord) || 'no') as 'no' | 'persistent' | 'transient',
159
+ granularity: (str(id.granularity) || 'YYYY-MM-DD') as 'YYYY-MM-DD' | 'YYYY-MM-DDThh:mm:ssZ',
160
+ compression: asArray(id.compression).map((c) => str(c)).filter(Boolean),
161
+ description: id.description != null ? asArray(id.description) : undefined,
162
+ };
163
+ }
164
+
165
+ /**
166
+ * Parse a ListMetadataFormats response body.
167
+ */
168
+ export function parseListMetadataFormats(xml: string): OaiMetadataFormat[] {
169
+ const root = parseOaiResponse(xml);
170
+ const list = root.ListMetadataFormats ?? root.listMetadataFormats;
171
+ if (!list || typeof list !== 'object') {
172
+ throw new OaiError('badArgument', 'Invalid ListMetadataFormats response');
173
+ }
174
+ const arr = (list as Record<string, unknown>).metadataFormat;
175
+ const formats = asArray(arr);
176
+ return formats.map((f: unknown) => {
177
+ const o = (f && typeof f === 'object' ? f : {}) as Record<string, unknown>;
178
+ const metadataPrefix = str(o.metadataPrefix);
179
+ if (!VALID_METADATA_PREFIXES.includes(metadataPrefix as OaiMetadataPrefix)) {
180
+ throw new OaiError(
181
+ 'cannotDisseminateFormat',
182
+ `Unsupported metadataPrefix in ListMetadataFormats response: ${metadataPrefix}`
183
+ );
184
+ }
185
+ return {
186
+ metadataPrefix: metadataPrefix as OaiMetadataPrefix,
187
+ schema: str(o.schema),
188
+ metadataNamespace: str(o.metadataNamespace),
189
+ };
190
+ });
191
+ }
192
+
193
+ /**
194
+ * Parse a ListSets response body.
195
+ */
196
+ export function parseListSets(xml: string): { sets: OaiSet[]; resumptionToken?: OaiResumptionToken } {
197
+ const root = parseOaiResponse(xml);
198
+ const list = root.ListSets ?? root.listSets;
199
+ if (!list || typeof list !== 'object') {
200
+ throw new OaiError('badArgument', 'Invalid ListSets response');
201
+ }
202
+ const o = list as Record<string, unknown>;
203
+ const setArr = asArray(o.set);
204
+ const sets: OaiSet[] = setArr.map((s: unknown) => {
205
+ const set = (s && typeof s === 'object' ? s : {}) as Record<string, unknown>;
206
+ return {
207
+ setSpec: str(set.setSpec),
208
+ setName: str(set.setName),
209
+ ...(set.setDescription != null ? { setDescription: set.setDescription } : {}),
210
+ };
211
+ });
212
+ const resumptionToken = parseResumptionToken(o.resumptionToken);
213
+ return { sets, ...(resumptionToken ? { resumptionToken } : {}) };
214
+ }
215
+
216
+ /**
217
+ * Parse a GetRecord response body.
218
+ */
219
+ export function parseGetRecord(xml: string): OaiRecord {
220
+ const root = parseOaiResponse(xml);
221
+ const getRecord = root.GetRecord ?? root.getRecord;
222
+ if (!getRecord || typeof getRecord !== 'object') {
223
+ throw new OaiError('badArgument', 'Invalid GetRecord response');
224
+ }
225
+ const record = (getRecord as Record<string, unknown>).record;
226
+ if (!record) throw new OaiError('badArgument', 'Invalid GetRecord response: missing record');
227
+ return parseRecord(record);
228
+ }
229
+
230
+ /**
231
+ * Parse a ListIdentifiers response body.
232
+ */
233
+ export function parseListIdentifiers(
234
+ xml: string
235
+ ): { headers: OaiHeader[]; resumptionToken?: OaiResumptionToken } {
236
+ const root = parseOaiResponse(xml);
237
+ const list = root.ListIdentifiers ?? root.listIdentifiers;
238
+ if (!list || typeof list !== 'object') {
239
+ throw new OaiError('badArgument', 'Invalid ListIdentifiers response');
240
+ }
241
+ const o = list as Record<string, unknown>;
242
+ const headerArr = asArray(o.header);
243
+ const headers = headerArr.map((h: unknown) => parseHeader(h));
244
+ const resumptionToken = parseResumptionToken(o.resumptionToken);
245
+ return { headers, ...(resumptionToken ? { resumptionToken } : {}) };
246
+ }
247
+
248
+ /**
249
+ * Parse a ListRecords response body.
250
+ */
251
+ export function parseListRecords(
252
+ xml: string
253
+ ): { records: OaiRecord[]; resumptionToken?: OaiResumptionToken } {
254
+ const root = parseOaiResponse(xml);
255
+ const list = root.ListRecords ?? root.listRecords;
256
+ if (!list || typeof list !== 'object') {
257
+ throw new OaiError('badArgument', 'Invalid ListRecords response');
258
+ }
259
+ const o = list as Record<string, unknown>;
260
+ const recordArr = asArray(o.record);
261
+ const records = recordArr.map((r: unknown) => parseRecord(r));
262
+ const resumptionToken = parseResumptionToken(o.resumptionToken);
263
+ return { records, ...(resumptionToken ? { resumptionToken } : {}) };
264
+ }