@longtable/scholar-research 0.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +7 -0
- package/dist/protocol.d.ts +56 -0
- package/dist/protocol.js +159 -0
- package/dist/publisher-access.d.ts +21 -0
- package/dist/publisher-access.js +564 -0
- package/dist/query.d.ts +6 -0
- package/dist/query.js +179 -0
- package/dist/rank.d.ts +2 -0
- package/dist/rank.js +173 -0
- package/dist/run.d.ts +2 -0
- package/dist/run.js +114 -0
- package/dist/sources.d.ts +5 -0
- package/dist/sources.js +537 -0
- package/dist/types.d.ts +179 -0
- package/dist/types.js +16 -0
- package/package.json +47 -0
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
import { PUBLISHERS } from "./types.js";
|
|
2
|
+
const PUBLISHER_CONFIGS = {
|
|
3
|
+
elsevier: {
|
|
4
|
+
publisher: "elsevier",
|
|
5
|
+
label: "Elsevier / ScienceDirect",
|
|
6
|
+
requiredEnv: ["ELSEVIER_API_KEY"],
|
|
7
|
+
optionalEnv: ["ELSEVIER_INST_TOKEN", "ELSEVIER_AUTHTOKEN"],
|
|
8
|
+
setupHint: "Set ELSEVIER_API_KEY and, when your institution provides one, ELSEVIER_INST_TOKEN or ELSEVIER_AUTHTOKEN."
|
|
9
|
+
},
|
|
10
|
+
springer_nature: {
|
|
11
|
+
publisher: "springer_nature",
|
|
12
|
+
label: "Springer Nature",
|
|
13
|
+
requiredEnv: ["SPRINGER_NATURE_API_KEY"],
|
|
14
|
+
optionalEnv: ["SPRINGER_NATURE_TDM_API_KEY", "SPRINGER_NATURE_TDM_ENDPOINT"],
|
|
15
|
+
setupHint: "Set SPRINGER_NATURE_API_KEY. Add SPRINGER_NATURE_TDM_ENDPOINT when your TDM agreement provides a licensed full-text endpoint."
|
|
16
|
+
},
|
|
17
|
+
wiley: {
|
|
18
|
+
publisher: "wiley",
|
|
19
|
+
label: "Wiley",
|
|
20
|
+
requiredEnv: ["WILEY_TDM_TOKEN"],
|
|
21
|
+
optionalEnv: ["WILEY_TDM_CLIENT_TOKEN"],
|
|
22
|
+
setupHint: "Set WILEY_TDM_TOKEN or WILEY_TDM_CLIENT_TOKEN after accepting Wiley's TDM terms."
|
|
23
|
+
},
|
|
24
|
+
taylor_francis: {
|
|
25
|
+
publisher: "taylor_francis",
|
|
26
|
+
label: "Taylor & Francis",
|
|
27
|
+
requiredEnv: ["TANDF_TDM_TOKEN"],
|
|
28
|
+
optionalEnv: ["TANDF_TDM_ENDPOINT", "TAYLOR_FRANCIS_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_ENDPOINT"],
|
|
29
|
+
setupHint: "Taylor & Francis TDM often requires an institutional arrangement. Set TANDF_TDM_ENDPOINT and TANDF_TDM_TOKEN when your institution provides them."
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
function now() {
|
|
33
|
+
return new Date().toISOString();
|
|
34
|
+
}
|
|
35
|
+
function defaultFetch() {
|
|
36
|
+
if (typeof fetch !== "function") {
|
|
37
|
+
throw new Error("LongTable publisher access probing requires a fetch-capable Node runtime.");
|
|
38
|
+
}
|
|
39
|
+
return fetch;
|
|
40
|
+
}
|
|
41
|
+
function asRecord(value) {
|
|
42
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
43
|
+
}
|
|
44
|
+
function asArray(value) {
|
|
45
|
+
return Array.isArray(value) ? value : [];
|
|
46
|
+
}
|
|
47
|
+
function asString(value) {
|
|
48
|
+
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
49
|
+
}
|
|
50
|
+
function firstString(value) {
|
|
51
|
+
return asString(asArray(value)[0]);
|
|
52
|
+
}
|
|
53
|
+
export function normalizeDoi(value) {
|
|
54
|
+
return value
|
|
55
|
+
.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "")
|
|
56
|
+
.replace(/^doi:\s*/i, "")
|
|
57
|
+
.trim()
|
|
58
|
+
.toLowerCase();
|
|
59
|
+
}
|
|
60
|
+
function endpoint(url, params) {
|
|
61
|
+
const parsed = new URL(url);
|
|
62
|
+
for (const [key, value] of Object.entries(params)) {
|
|
63
|
+
if (value !== undefined && value !== "") {
|
|
64
|
+
parsed.searchParams.set(key, String(value));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return parsed.toString();
|
|
68
|
+
}
|
|
69
|
+
function envValue(env, keys) {
|
|
70
|
+
for (const key of keys) {
|
|
71
|
+
const value = env[key];
|
|
72
|
+
if (value && value.trim()) {
|
|
73
|
+
return value.trim();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
function presentEnv(config, env) {
|
|
79
|
+
return [...config.requiredEnv, ...config.optionalEnv].filter((key) => Boolean(env[key]?.trim()));
|
|
80
|
+
}
|
|
81
|
+
function missingRequiredEnv(config, env) {
|
|
82
|
+
return config.requiredEnv.filter((key) => !envValue(env, key === "WILEY_TDM_TOKEN"
|
|
83
|
+
? ["WILEY_TDM_TOKEN", "WILEY_TDM_CLIENT_TOKEN"]
|
|
84
|
+
: key === "TANDF_TDM_TOKEN"
|
|
85
|
+
? ["TANDF_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_TOKEN"]
|
|
86
|
+
: [key]));
|
|
87
|
+
}
|
|
88
|
+
function inferPublisherFromText(value) {
|
|
89
|
+
const normalized = value?.toLowerCase() ?? "";
|
|
90
|
+
if (!normalized)
|
|
91
|
+
return undefined;
|
|
92
|
+
if (/elsevier|sciencedirect/.test(normalized))
|
|
93
|
+
return "elsevier";
|
|
94
|
+
if (/springer|nature\.com|springernature/.test(normalized))
|
|
95
|
+
return "springer_nature";
|
|
96
|
+
if (/wiley|onlinelibrary/.test(normalized))
|
|
97
|
+
return "wiley";
|
|
98
|
+
if (/taylor\s*&?\s*francis|tandfonline|routledge/.test(normalized))
|
|
99
|
+
return "taylor_francis";
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
function inferPublisherFromDoi(doi) {
|
|
103
|
+
if (doi.startsWith("10.1016/"))
|
|
104
|
+
return "elsevier";
|
|
105
|
+
if (doi.startsWith("10.1007/") || doi.startsWith("10.1038/"))
|
|
106
|
+
return "springer_nature";
|
|
107
|
+
if (doi.startsWith("10.1002/") || doi.startsWith("10.1111/"))
|
|
108
|
+
return "wiley";
|
|
109
|
+
if (doi.startsWith("10.1080/") || doi.startsWith("10.1207/"))
|
|
110
|
+
return "taylor_francis";
|
|
111
|
+
return undefined;
|
|
112
|
+
}
|
|
113
|
+
export function parsePublisherTarget(value) {
|
|
114
|
+
if (typeof value !== "string" || value.trim() === "" || value === "auto") {
|
|
115
|
+
return "auto";
|
|
116
|
+
}
|
|
117
|
+
if (PUBLISHERS.includes(value)) {
|
|
118
|
+
return value;
|
|
119
|
+
}
|
|
120
|
+
throw new Error(`Unknown publisher: ${value}`);
|
|
121
|
+
}
|
|
122
|
+
function tdmLinksFromCrossref(rawLinks) {
|
|
123
|
+
const links = [];
|
|
124
|
+
for (const entry of asArray(rawLinks)) {
|
|
125
|
+
const record = asRecord(entry);
|
|
126
|
+
const url = asString(record.URL) ?? asString(record.url);
|
|
127
|
+
if (!url) {
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
links.push({
|
|
131
|
+
url,
|
|
132
|
+
contentType: asString(record["content-type"]),
|
|
133
|
+
contentVersion: asString(record["content-version"]),
|
|
134
|
+
intendedApplication: asString(record["intended-application"])
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
return links;
|
|
138
|
+
}
|
|
139
|
+
function licenseUrlsFromCrossref(rawLicenses) {
|
|
140
|
+
return asArray(rawLicenses)
|
|
141
|
+
.map((entry) => asString(asRecord(entry).URL) ?? asString(asRecord(entry).url))
|
|
142
|
+
.filter((entry) => Boolean(entry));
|
|
143
|
+
}
|
|
144
|
+
export async function discoverCrossrefTdm(doi, env = process.env, httpFetch = defaultFetch()) {
|
|
145
|
+
const normalizedDoi = normalizeDoi(doi);
|
|
146
|
+
const url = endpoint(`https://api.crossref.org/works/${encodeURIComponent(normalizedDoi)}`, {
|
|
147
|
+
mailto: env.LONGTABLE_CONTACT_EMAIL
|
|
148
|
+
});
|
|
149
|
+
const response = await httpFetch(url, {
|
|
150
|
+
headers: {
|
|
151
|
+
accept: "application/json",
|
|
152
|
+
"user-agent": "LongTable/0.1.60 (https://github.com/HosungYou/LongTable)"
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
if (!response.ok) {
|
|
156
|
+
throw new Error(`Crossref DOI discovery failed: HTTP ${response.status} ${response.statusText}`);
|
|
157
|
+
}
|
|
158
|
+
const payload = asRecord(await response.json());
|
|
159
|
+
const message = asRecord(payload.message);
|
|
160
|
+
const publisher = asString(message.publisher);
|
|
161
|
+
const links = tdmLinksFromCrossref(message.link);
|
|
162
|
+
const title = firstString(message.title);
|
|
163
|
+
const sourceUrl = asString(message.URL);
|
|
164
|
+
const inferredPublisher = inferPublisherFromText(publisher) ??
|
|
165
|
+
inferPublisherFromText(sourceUrl) ??
|
|
166
|
+
links.map((link) => inferPublisherFromText(link.url)).find(Boolean) ??
|
|
167
|
+
inferPublisherFromDoi(normalizedDoi) ??
|
|
168
|
+
"other";
|
|
169
|
+
return {
|
|
170
|
+
doi: normalizedDoi,
|
|
171
|
+
publisher,
|
|
172
|
+
inferredPublisher,
|
|
173
|
+
title,
|
|
174
|
+
sourceUrl,
|
|
175
|
+
licenseUrls: licenseUrlsFromCrossref(message.license),
|
|
176
|
+
links
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
function buildMissingCredentialRecord(publisher, doi, crossref) {
|
|
180
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
181
|
+
return {
|
|
182
|
+
publisher,
|
|
183
|
+
checkedAt: now(),
|
|
184
|
+
credentialStatus: "missing",
|
|
185
|
+
entitlementStatus: "unknown",
|
|
186
|
+
tdmStatus: "not_configured",
|
|
187
|
+
collectionDepth: "metadata",
|
|
188
|
+
requiredEnv: config.requiredEnv,
|
|
189
|
+
presentEnv: [],
|
|
190
|
+
missingEnv: config.requiredEnv,
|
|
191
|
+
testedDoi: doi,
|
|
192
|
+
setupHint: config.setupHint,
|
|
193
|
+
verificationNote: "Publisher credential is not configured; LongTable can only use metadata or abstract routes.",
|
|
194
|
+
crossref
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
function baseRecord(input) {
|
|
198
|
+
const requiredEnv = input.config?.requiredEnv ?? [];
|
|
199
|
+
const present = input.config && input.env ? presentEnv(input.config, input.env) : [];
|
|
200
|
+
const missing = input.config && input.env ? missingRequiredEnv(input.config, input.env) : requiredEnv;
|
|
201
|
+
return {
|
|
202
|
+
publisher: input.publisher,
|
|
203
|
+
checkedAt: now(),
|
|
204
|
+
credentialStatus: input.credentialStatus,
|
|
205
|
+
entitlementStatus: input.entitlementStatus,
|
|
206
|
+
tdmStatus: input.tdmStatus,
|
|
207
|
+
collectionDepth: input.collectionDepth,
|
|
208
|
+
requiredEnv,
|
|
209
|
+
presentEnv: present,
|
|
210
|
+
missingEnv: missing,
|
|
211
|
+
testedDoi: input.doi,
|
|
212
|
+
endpoint: input.endpoint,
|
|
213
|
+
setupHint: input.setupHint,
|
|
214
|
+
verificationNote: input.verificationNote,
|
|
215
|
+
licenseNote: input.licenseNote,
|
|
216
|
+
evidenceSnippet: input.evidenceSnippet,
|
|
217
|
+
crossref: input.crossref
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
function chooseTextLink(discovery, publisher) {
|
|
221
|
+
return discovery?.links.find((link) => {
|
|
222
|
+
const application = link.intendedApplication?.toLowerCase() ?? "";
|
|
223
|
+
const contentType = link.contentType?.toLowerCase() ?? "";
|
|
224
|
+
const publisherMatches = publisher ? inferPublisherFromText(link.url) === publisher : true;
|
|
225
|
+
return publisherMatches &&
|
|
226
|
+
(application === "" || application.includes("text-mining")) &&
|
|
227
|
+
!contentType.includes("pdf");
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
async function fetchTextProbe(httpFetch, url, headers) {
|
|
231
|
+
const response = await httpFetch(url, { headers });
|
|
232
|
+
const contentType = Object.entries(headers).find(([key]) => key.toLowerCase() === "accept")?.[1] ?? "";
|
|
233
|
+
let text;
|
|
234
|
+
if (response.ok && !contentType.includes("application/pdf")) {
|
|
235
|
+
text = await response.text();
|
|
236
|
+
}
|
|
237
|
+
return {
|
|
238
|
+
ok: response.ok,
|
|
239
|
+
status: response.status,
|
|
240
|
+
statusText: response.statusText,
|
|
241
|
+
text,
|
|
242
|
+
endpoint: url
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
function snippetFromText(value) {
|
|
246
|
+
const cleaned = value
|
|
247
|
+
?.replace(/<[^>]+>/g, " ")
|
|
248
|
+
.replace(/\s+/g, " ")
|
|
249
|
+
.trim();
|
|
250
|
+
if (!cleaned) {
|
|
251
|
+
return undefined;
|
|
252
|
+
}
|
|
253
|
+
return cleaned.slice(0, 500);
|
|
254
|
+
}
|
|
255
|
+
function recordFromProbeResult(publisher, doi, env, result, crossref, fallbackHint) {
|
|
256
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
257
|
+
const snippet = snippetFromText(result.text);
|
|
258
|
+
if (result.ok) {
|
|
259
|
+
return baseRecord({
|
|
260
|
+
publisher,
|
|
261
|
+
doi,
|
|
262
|
+
config,
|
|
263
|
+
env,
|
|
264
|
+
crossref,
|
|
265
|
+
endpoint: result.endpoint,
|
|
266
|
+
credentialStatus: "valid",
|
|
267
|
+
entitlementStatus: "licensed_full_text_available",
|
|
268
|
+
tdmStatus: "permitted",
|
|
269
|
+
collectionDepth: snippet ? "licensed_snippet" : "licensed_full_text_local_only",
|
|
270
|
+
setupHint: config.setupHint,
|
|
271
|
+
licenseNote: "Publisher endpoint returned content for this DOI. Follow the applicable publisher and institutional TDM terms.",
|
|
272
|
+
verificationNote: snippet
|
|
273
|
+
? "Licensed publisher content was reachable and a short local snippet was extracted."
|
|
274
|
+
: "Licensed publisher content was reachable, but LongTable did not extract text from the returned format.",
|
|
275
|
+
evidenceSnippet: snippet
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
if (result.status === 401) {
|
|
279
|
+
return baseRecord({
|
|
280
|
+
publisher,
|
|
281
|
+
doi,
|
|
282
|
+
config,
|
|
283
|
+
env,
|
|
284
|
+
crossref,
|
|
285
|
+
endpoint: result.endpoint,
|
|
286
|
+
credentialStatus: "invalid",
|
|
287
|
+
entitlementStatus: "no_access",
|
|
288
|
+
tdmStatus: "denied",
|
|
289
|
+
collectionDepth: "metadata",
|
|
290
|
+
setupHint: config.setupHint,
|
|
291
|
+
verificationNote: `Publisher rejected the credential for this DOI: HTTP ${result.status} ${result.statusText}.`
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
if (result.status === 403) {
|
|
295
|
+
return baseRecord({
|
|
296
|
+
publisher,
|
|
297
|
+
doi,
|
|
298
|
+
config,
|
|
299
|
+
env,
|
|
300
|
+
crossref,
|
|
301
|
+
endpoint: result.endpoint,
|
|
302
|
+
credentialStatus: "valid",
|
|
303
|
+
entitlementStatus: "no_access",
|
|
304
|
+
tdmStatus: "denied",
|
|
305
|
+
collectionDepth: "metadata",
|
|
306
|
+
setupHint: config.setupHint,
|
|
307
|
+
verificationNote: `Credential was present, but the publisher denied entitlement for this DOI: HTTP ${result.status} ${result.statusText}.`
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
return baseRecord({
|
|
311
|
+
publisher,
|
|
312
|
+
doi,
|
|
313
|
+
config,
|
|
314
|
+
env,
|
|
315
|
+
crossref,
|
|
316
|
+
endpoint: result.endpoint,
|
|
317
|
+
credentialStatus: "present",
|
|
318
|
+
entitlementStatus: "unknown",
|
|
319
|
+
tdmStatus: "unknown",
|
|
320
|
+
collectionDepth: "metadata",
|
|
321
|
+
setupHint: fallbackHint,
|
|
322
|
+
verificationNote: `Publisher probe could not confirm access: HTTP ${result.status} ${result.statusText}.`
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
async function probeElsevier(doi, env, httpFetch, crossref) {
|
|
326
|
+
const config = PUBLISHER_CONFIGS.elsevier;
|
|
327
|
+
const apiKey = env.ELSEVIER_API_KEY?.trim();
|
|
328
|
+
if (!apiKey) {
|
|
329
|
+
return buildMissingCredentialRecord("elsevier", doi, crossref);
|
|
330
|
+
}
|
|
331
|
+
const link = chooseTextLink(crossref, "elsevier");
|
|
332
|
+
const url = link?.url ?? endpoint(`https://api.elsevier.com/content/article/doi/${encodeURIComponent(doi)}`, {
|
|
333
|
+
httpAccept: "text/plain"
|
|
334
|
+
});
|
|
335
|
+
const headers = {
|
|
336
|
+
accept: "text/plain, application/xml, application/json",
|
|
337
|
+
"X-ELS-APIKey": apiKey
|
|
338
|
+
};
|
|
339
|
+
if (env.ELSEVIER_INST_TOKEN?.trim()) {
|
|
340
|
+
headers["X-ELS-Insttoken"] = env.ELSEVIER_INST_TOKEN.trim();
|
|
341
|
+
}
|
|
342
|
+
if (env.ELSEVIER_AUTHTOKEN?.trim()) {
|
|
343
|
+
headers["X-ELS-Authtoken"] = env.ELSEVIER_AUTHTOKEN.trim();
|
|
344
|
+
}
|
|
345
|
+
const result = await fetchTextProbe(httpFetch, url, headers);
|
|
346
|
+
return recordFromProbeResult("elsevier", doi, env, result, crossref, config.setupHint);
|
|
347
|
+
}
|
|
348
|
+
async function probeSpringerNature(doi, env, httpFetch, crossref) {
|
|
349
|
+
const config = PUBLISHER_CONFIGS.springer_nature;
|
|
350
|
+
const apiKey = env.SPRINGER_NATURE_API_KEY?.trim();
|
|
351
|
+
if (!apiKey) {
|
|
352
|
+
return buildMissingCredentialRecord("springer_nature", doi, crossref);
|
|
353
|
+
}
|
|
354
|
+
const configuredEndpoint = env.SPRINGER_NATURE_TDM_ENDPOINT?.trim();
|
|
355
|
+
if (configuredEndpoint) {
|
|
356
|
+
const url = configuredEndpoint.replace("{doi}", encodeURIComponent(doi));
|
|
357
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
358
|
+
accept: "text/plain, application/xml, application/json",
|
|
359
|
+
"X-Api-Key": env.SPRINGER_NATURE_TDM_API_KEY?.trim() ?? apiKey
|
|
360
|
+
});
|
|
361
|
+
return recordFromProbeResult("springer_nature", doi, env, result, crossref, config.setupHint);
|
|
362
|
+
}
|
|
363
|
+
const url = endpoint("https://api.springernature.com/meta/v2/json", {
|
|
364
|
+
q: `doi:${doi}`,
|
|
365
|
+
api_key: apiKey,
|
|
366
|
+
p: 1
|
|
367
|
+
});
|
|
368
|
+
const response = await httpFetch(url, {
|
|
369
|
+
headers: {
|
|
370
|
+
accept: "application/json"
|
|
371
|
+
}
|
|
372
|
+
});
|
|
373
|
+
if (!response.ok) {
|
|
374
|
+
return recordFromProbeResult("springer_nature", doi, env, {
|
|
375
|
+
ok: response.ok,
|
|
376
|
+
status: response.status,
|
|
377
|
+
statusText: response.statusText,
|
|
378
|
+
endpoint: url
|
|
379
|
+
}, crossref, config.setupHint);
|
|
380
|
+
}
|
|
381
|
+
const payload = asRecord(await response.json());
|
|
382
|
+
const records = asArray(payload.records);
|
|
383
|
+
const hasRecord = records.length > 0;
|
|
384
|
+
return baseRecord({
|
|
385
|
+
publisher: "springer_nature",
|
|
386
|
+
doi,
|
|
387
|
+
config,
|
|
388
|
+
env,
|
|
389
|
+
crossref,
|
|
390
|
+
endpoint: url,
|
|
391
|
+
credentialStatus: "valid",
|
|
392
|
+
entitlementStatus: hasRecord ? "abstract_available" : "metadata_only",
|
|
393
|
+
tdmStatus: "requires_license_review",
|
|
394
|
+
collectionDepth: hasRecord ? "abstract" : "metadata",
|
|
395
|
+
setupHint: config.setupHint,
|
|
396
|
+
licenseNote: "Springer Nature metadata access was verified. Licensed full-text TDM requires a configured TDM endpoint or subscription arrangement.",
|
|
397
|
+
verificationNote: hasRecord
|
|
398
|
+
? "Springer Nature metadata API responded for this DOI; licensed full-text access was not confirmed."
|
|
399
|
+
: "Springer Nature credential worked, but no metadata record was returned for this DOI."
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
async function probeWiley(doi, env, httpFetch, crossref) {
|
|
403
|
+
const config = PUBLISHER_CONFIGS.wiley;
|
|
404
|
+
const token = envValue(env, ["WILEY_TDM_TOKEN", "WILEY_TDM_CLIENT_TOKEN"]);
|
|
405
|
+
if (!token) {
|
|
406
|
+
return buildMissingCredentialRecord("wiley", doi, crossref);
|
|
407
|
+
}
|
|
408
|
+
const url = `https://api.wiley.com/onlinelibrary/tdm/v1/articles/${encodeURIComponent(doi)}`;
|
|
409
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
410
|
+
accept: "text/plain, application/xml, application/pdf",
|
|
411
|
+
"Wiley-TDM-Client-Token": token
|
|
412
|
+
});
|
|
413
|
+
return recordFromProbeResult("wiley", doi, env, result, crossref, config.setupHint);
|
|
414
|
+
}
|
|
415
|
+
async function probeTaylorFrancis(doi, env, httpFetch, crossref) {
|
|
416
|
+
const config = PUBLISHER_CONFIGS.taylor_francis;
|
|
417
|
+
const token = envValue(env, ["TANDF_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_TOKEN"]);
|
|
418
|
+
const configuredEndpoint = envValue(env, ["TANDF_TDM_ENDPOINT", "TAYLOR_FRANCIS_TDM_ENDPOINT"]);
|
|
419
|
+
if (!token || !configuredEndpoint) {
|
|
420
|
+
return baseRecord({
|
|
421
|
+
publisher: "taylor_francis",
|
|
422
|
+
doi,
|
|
423
|
+
config,
|
|
424
|
+
env,
|
|
425
|
+
crossref,
|
|
426
|
+
credentialStatus: token ? "present" : "missing",
|
|
427
|
+
entitlementStatus: "unknown",
|
|
428
|
+
tdmStatus: "requires_license_review",
|
|
429
|
+
collectionDepth: "metadata",
|
|
430
|
+
setupHint: config.setupHint,
|
|
431
|
+
licenseNote: "Taylor & Francis indicates TDM access may require institutional support or a license supplement.",
|
|
432
|
+
verificationNote: "No Taylor & Francis licensed TDM endpoint was configured, so LongTable did not attempt full-text access."
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
const url = configuredEndpoint.replace("{doi}", encodeURIComponent(doi));
|
|
436
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
437
|
+
accept: "text/plain, application/xml, application/json",
|
|
438
|
+
authorization: `Bearer ${token}`
|
|
439
|
+
});
|
|
440
|
+
return recordFromProbeResult("taylor_francis", doi, env, result, crossref, config.setupHint);
|
|
441
|
+
}
|
|
442
|
+
async function probeKnownPublisher(publisher, doi, env, httpFetch, crossref) {
|
|
443
|
+
switch (publisher) {
|
|
444
|
+
case "elsevier":
|
|
445
|
+
return probeElsevier(doi, env, httpFetch, crossref);
|
|
446
|
+
case "springer_nature":
|
|
447
|
+
return probeSpringerNature(doi, env, httpFetch, crossref);
|
|
448
|
+
case "wiley":
|
|
449
|
+
return probeWiley(doi, env, httpFetch, crossref);
|
|
450
|
+
case "taylor_francis":
|
|
451
|
+
return probeTaylorFrancis(doi, env, httpFetch, crossref);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
export function publisherConfigs() {
|
|
455
|
+
return PUBLISHERS.map((publisher) => PUBLISHER_CONFIGS[publisher]);
|
|
456
|
+
}
|
|
457
|
+
export async function probePublisherAccess(input) {
|
|
458
|
+
const doi = normalizeDoi(input.doi);
|
|
459
|
+
const env = input.env ?? process.env;
|
|
460
|
+
const httpFetch = input.fetch ?? defaultFetch();
|
|
461
|
+
let crossref;
|
|
462
|
+
try {
|
|
463
|
+
crossref = await discoverCrossrefTdm(doi, env, httpFetch);
|
|
464
|
+
}
|
|
465
|
+
catch {
|
|
466
|
+
crossref = undefined;
|
|
467
|
+
}
|
|
468
|
+
const target = input.publisher ?? "auto";
|
|
469
|
+
const publisher = target === "auto"
|
|
470
|
+
? crossref?.inferredPublisher ?? inferPublisherFromDoi(doi) ?? "other"
|
|
471
|
+
: target;
|
|
472
|
+
if (publisher === "other") {
|
|
473
|
+
return baseRecord({
|
|
474
|
+
publisher: "other",
|
|
475
|
+
doi,
|
|
476
|
+
crossref,
|
|
477
|
+
credentialStatus: "present",
|
|
478
|
+
entitlementStatus: crossref?.links.length ? "unknown" : "metadata_only",
|
|
479
|
+
tdmStatus: crossref?.links.length ? "requires_license_review" : "unknown",
|
|
480
|
+
collectionDepth: "metadata",
|
|
481
|
+
setupHint: "No first-party publisher adapter matched this DOI. Use Crossref metadata and the publisher landing page.",
|
|
482
|
+
verificationNote: "LongTable could not map this DOI to Elsevier, Springer Nature, Wiley, or Taylor & Francis."
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
return probeKnownPublisher(publisher, doi, env, httpFetch, crossref);
|
|
486
|
+
}
|
|
487
|
+
export function summarizeConfiguredPublisherAccess(env = process.env) {
|
|
488
|
+
return PUBLISHERS.map((publisher) => {
|
|
489
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
490
|
+
const missing = missingRequiredEnv(config, env);
|
|
491
|
+
return baseRecord({
|
|
492
|
+
publisher,
|
|
493
|
+
config,
|
|
494
|
+
env,
|
|
495
|
+
credentialStatus: missing.length === 0 ? "present" : "missing",
|
|
496
|
+
entitlementStatus: "unknown",
|
|
497
|
+
tdmStatus: missing.length === 0 ? "unknown" : "not_configured",
|
|
498
|
+
collectionDepth: "metadata",
|
|
499
|
+
setupHint: config.setupHint,
|
|
500
|
+
verificationNote: missing.length === 0
|
|
501
|
+
? "Credential-like environment variables are present; run a DOI probe to confirm entitlement."
|
|
502
|
+
: "Required publisher credential environment variables are missing."
|
|
503
|
+
});
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
function bestAccessStatus(record) {
|
|
507
|
+
if (record.entitlementStatus === "licensed_full_text_available" && record.collectionDepth === "licensed_snippet") {
|
|
508
|
+
return "licensed_full_text_checked";
|
|
509
|
+
}
|
|
510
|
+
if (record.entitlementStatus === "licensed_full_text_available") {
|
|
511
|
+
return "licensed_full_text_available";
|
|
512
|
+
}
|
|
513
|
+
if (record.entitlementStatus === "no_access") {
|
|
514
|
+
return "access_denied";
|
|
515
|
+
}
|
|
516
|
+
if (record.entitlementStatus === "abstract_available") {
|
|
517
|
+
return "abstract_available";
|
|
518
|
+
}
|
|
519
|
+
if (record.entitlementStatus === "metadata_only") {
|
|
520
|
+
return "metadata_only";
|
|
521
|
+
}
|
|
522
|
+
return "license_unknown";
|
|
523
|
+
}
|
|
524
|
+
export async function enrichCardsWithPublisherAccess(input) {
|
|
525
|
+
const env = input.env ?? process.env;
|
|
526
|
+
const httpFetch = input.fetch ?? defaultFetch();
|
|
527
|
+
const limit = input.limit ?? 3;
|
|
528
|
+
const enriched = [];
|
|
529
|
+
let probes = 0;
|
|
530
|
+
for (const card of input.cards) {
|
|
531
|
+
if (!card.doi || probes >= limit) {
|
|
532
|
+
enriched.push(card);
|
|
533
|
+
continue;
|
|
534
|
+
}
|
|
535
|
+
probes += 1;
|
|
536
|
+
try {
|
|
537
|
+
const access = await probePublisherAccess({
|
|
538
|
+
doi: card.doi,
|
|
539
|
+
publisher: "auto",
|
|
540
|
+
env,
|
|
541
|
+
fetch: httpFetch
|
|
542
|
+
});
|
|
543
|
+
enriched.push({
|
|
544
|
+
...card,
|
|
545
|
+
publisher: access.publisher,
|
|
546
|
+
publisherAccess: access,
|
|
547
|
+
accessStatus: bestAccessStatus(access),
|
|
548
|
+
verificationDepth: access.collectionDepth === "licensed_snippet" ? "licensed_snippet" : card.verificationDepth,
|
|
549
|
+
entitlementSource: access.publisher === "other" ? "crossref_tdm" : "publisher_api",
|
|
550
|
+
collectionDepth: access.collectionDepth,
|
|
551
|
+
licenseNote: access.licenseNote,
|
|
552
|
+
verificationNote: access.verificationNote
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
catch (error) {
|
|
556
|
+
enriched.push({
|
|
557
|
+
...card,
|
|
558
|
+
accessStatus: "license_unknown",
|
|
559
|
+
verificationNote: `Publisher access probe failed: ${error instanceof Error ? error.message : String(error)}`
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
return enriched;
|
|
564
|
+
}
|
package/dist/query.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { type BuildSearchIntentInput, type ResearchSearchIntent, type SearchSource } from "./types.js";
|
|
2
|
+
export declare function normalizeSearchText(value: string): string;
|
|
3
|
+
export declare function splitCsvTerms(value?: string): string[];
|
|
4
|
+
export declare function extractSearchKeywords(text: string, limit?: number): string[];
|
|
5
|
+
export declare function parseSearchSources(value?: string): SearchSource[];
|
|
6
|
+
export declare function buildResearchSearchIntent(input: BuildSearchIntentInput): ResearchSearchIntent;
|