@longtable/cli 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/cli.js +202 -6
- package/dist/search/index.d.ts +1 -0
- package/dist/search/index.js +1 -0
- package/dist/search/publisher-access.d.ts +23 -0
- package/dist/search/publisher-access.js +577 -0
- package/dist/search/rank.js +30 -2
- package/dist/search/run.js +9 -1
- package/dist/search/sources.js +29 -2
- package/dist/search/types.d.ts +64 -0
- package/dist/search/types.js +6 -0
- package/package.json +7 -7
package/README.md
CHANGED
|
@@ -200,11 +200,18 @@ deduplicates, ranks, and labels results as evidence cards. Some sources work
|
|
|
200
200
|
without keys, some require a contact email, and some need API keys for reliable
|
|
201
201
|
use.
|
|
202
202
|
|
|
203
|
+
Publisher access is configured separately through environment variables and
|
|
204
|
+
DOI probes. `longtable search setup` checks Elsevier, Springer Nature, Wiley,
|
|
205
|
+
and Taylor & Francis credentials or TDM tokens without storing secrets.
|
|
206
|
+
|
|
203
207
|
Citation support should be checked explicitly. A reference can be useful as
|
|
204
208
|
background while still failing to support the specific claim attached to it.
|
|
205
209
|
|
|
206
210
|
```bash
|
|
211
|
+
longtable search setup
|
|
212
|
+
longtable search probe --doi "10.1016/example" --publisher elsevier
|
|
207
213
|
longtable search --query "trust calibration measurement" --intent measurement
|
|
214
|
+
longtable search --query "trust calibration measurement" --publisher-access --json
|
|
208
215
|
longtable search --query "trust calibration citation support" --intent citation --record
|
|
209
216
|
```
|
|
210
217
|
|
package/dist/cli.js
CHANGED
|
@@ -8,7 +8,7 @@ import { stdin as input, stdout as output, cwd, env, exit } from "node:process";
|
|
|
8
8
|
import { dirname, join, resolve } from "node:path";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { classifyCheckpointTrigger } from "@longtable/checkpoints";
|
|
11
|
-
import { assessSearchSourceCapabilities, buildResearchSearchIntent, runResearchSearch } from "./search/index.js";
|
|
11
|
+
import { assessSearchSourceCapabilities, buildResearchSearchIntent, buildSearchCapabilitySnapshot, parsePublisherTarget, probePublisherAccess, publisherConfigs, runResearchSearch, searchCapabilitySnapshotPath, summarizeConfiguredPublisherAccess } from "./search/index.js";
|
|
12
12
|
import { buildProviderChoices, buildQuickSetupFlow, createPersistedSetupOutput, installRuntimeConfigFromStoredSetup, loadSetupOutput, renderInstallSummary, renderSetupSummary, resolveDefaultRuntimeConfigPath, resolveDefaultSetupPath, saveSetupOutput, saveSetupAndRuntimeConfig, serializeSetupOutput, writeRuntimeConfig } from "@longtable/setup";
|
|
13
13
|
import { buildCodexSkillSpecs, buildCodexThinWrappedPrompt, installCodexSkills, listInstalledCodexSkills, renderQuestionRecordPrompt, removeCodexSkills, resolveCodexSkillsDir, runCodexThinWrapper } from "@longtable/provider-codex";
|
|
14
14
|
import { buildClaudeSkillSpecs, installClaudeSkills, listInstalledClaudeSkills, renderQuestionRecordInput, removeClaudeSkills, resolveClaudeSkillsDir } from "@longtable/provider-claude";
|
|
@@ -43,7 +43,7 @@ const ANSI = {
|
|
|
43
43
|
green: "\u001B[32m"
|
|
44
44
|
};
|
|
45
45
|
const LONGTABLE_MCP_SERVER_NAME = "longtable-state";
|
|
46
|
-
const LONGTABLE_MCP_PACKAGE_VERSION = "0.1.
|
|
46
|
+
const LONGTABLE_MCP_PACKAGE_VERSION = "0.1.30";
|
|
47
47
|
const LONGTABLE_MCP_MARKER_START = "# LongTable state MCP START";
|
|
48
48
|
const LONGTABLE_MCP_MARKER_END = "# LongTable state MCP END";
|
|
49
49
|
function style(text, prefix) {
|
|
@@ -89,7 +89,10 @@ function usage() {
|
|
|
89
89
|
" longtable show [--json] [--path <file>]",
|
|
90
90
|
" longtable install [--json] [--path <file>] [--runtime-path <file>]",
|
|
91
91
|
" longtable mcp install [--provider codex|claude|all] [--write] [--checkpoint-ui off|interactive|strong] [--json] [--codex-config <path>] [--claude-settings <path>] [--package <spec>]",
|
|
92
|
-
" longtable search --query <text> [--intent literature|theory|measurement|citation|metadata|venue] [--field <text>] [--source all|crossref,arxiv,openalex,semantic_scholar,pubmed,eric,doaj,unpaywall] [--must <term[,term]>] [--exclude <term[,term]>] [--limit <n>] [--allow-partial] [--record] [--cwd <path>] [--json]",
|
|
92
|
+
" longtable search --query <text> [--intent literature|theory|measurement|citation|metadata|venue] [--field <text>] [--source all|crossref,arxiv,openalex,semantic_scholar,pubmed,eric,doaj,unpaywall] [--must <term[,term]>] [--exclude <term[,term]>] [--limit <n>] [--allow-partial] [--publisher-access] [--record] [--cwd <path>] [--json]",
|
|
93
|
+
" longtable search setup [--doi <doi>] [--json]",
|
|
94
|
+
" longtable search doctor [--doi <doi>] [--publisher auto|elsevier|springer_nature|wiley|taylor_francis|all] [--json]",
|
|
95
|
+
" longtable search probe --doi <doi> [--publisher auto|elsevier|springer_nature|wiley|taylor_francis] [--json]",
|
|
93
96
|
" longtable sentinel --prompt <text> [--cwd <path>] [--json] [--record]",
|
|
94
97
|
" longtable team --prompt <text> [--role <role[,role]>] [--debate] [--rounds 3|5] [--cwd <path>] [--json]",
|
|
95
98
|
" longtable ask [--prompt <text>] [--print] [--json] [--setup <path>] [--cwd <path>]",
|
|
@@ -136,6 +139,10 @@ function parseArgs(argv) {
|
|
|
136
139
|
else if (command === "codex" || command === "claude" || command === "mcp") {
|
|
137
140
|
startIndex = 2;
|
|
138
141
|
}
|
|
142
|
+
else if (command === "search" && maybeSubcommand && !maybeSubcommand.startsWith("--")) {
|
|
143
|
+
subcommand = maybeSubcommand;
|
|
144
|
+
startIndex = 2;
|
|
145
|
+
}
|
|
139
146
|
else if (directCommand) {
|
|
140
147
|
subcommand = undefined;
|
|
141
148
|
startIndex = 1;
|
|
@@ -2013,6 +2020,7 @@ function renderEvidenceRunSummary(run, recordedPath) {
|
|
|
2013
2020
|
].filter(Boolean).join(", ");
|
|
2014
2021
|
lines.push(` - ${card.title}`);
|
|
2015
2022
|
lines.push(` score: ${card.relevanceScore}; support: ${card.citationSupportStatus}; depth: ${card.evidenceDepth}; sources: ${card.sourceRoutes.join(", ")}`);
|
|
2023
|
+
lines.push(` access: ${card.accessStatus}; verification: ${card.verificationNote}`);
|
|
2016
2024
|
if (identifiers) {
|
|
2017
2025
|
lines.push(` ids: ${identifiers}`);
|
|
2018
2026
|
}
|
|
@@ -2056,7 +2064,194 @@ async function recordEvidenceRun(run, workingDirectory) {
|
|
|
2056
2064
|
await syncCurrentWorkspaceView(context);
|
|
2057
2065
|
return evidencePath;
|
|
2058
2066
|
}
|
|
2059
|
-
|
|
2067
|
+
function renderPublisherAccessRecord(record) {
|
|
2068
|
+
const envSummary = record.missingEnv.length > 0
|
|
2069
|
+
? `missing ${record.missingEnv.join(", ")}`
|
|
2070
|
+
: `configured ${record.presentEnv.join(", ") || "none"}`;
|
|
2071
|
+
const lines = [
|
|
2072
|
+
`${record.publisher}:`,
|
|
2073
|
+
` credential: ${record.credentialStatus} (${envSummary})`,
|
|
2074
|
+
` entitlement: ${record.entitlementStatus}; tdm: ${record.tdmStatus}; depth: ${record.collectionDepth}`,
|
|
2075
|
+
` verification: ${record.verificationNote}`
|
|
2076
|
+
];
|
|
2077
|
+
if (record.testedDoi) {
|
|
2078
|
+
lines.push(` tested doi: ${record.testedDoi}`);
|
|
2079
|
+
}
|
|
2080
|
+
if (record.endpoint) {
|
|
2081
|
+
lines.push(` endpoint: ${record.endpoint}`);
|
|
2082
|
+
}
|
|
2083
|
+
if (record.licenseNote) {
|
|
2084
|
+
lines.push(` license: ${record.licenseNote}`);
|
|
2085
|
+
}
|
|
2086
|
+
if (record.setupHint) {
|
|
2087
|
+
lines.push(` setup: ${record.setupHint}`);
|
|
2088
|
+
}
|
|
2089
|
+
return lines.join("\n");
|
|
2090
|
+
}
|
|
2091
|
+
function renderPublisherAccessRecords(title, records, capabilityPath) {
|
|
2092
|
+
const lines = [title];
|
|
2093
|
+
if (capabilityPath) {
|
|
2094
|
+
lines.push(`- capability file: ${capabilityPath}`);
|
|
2095
|
+
}
|
|
2096
|
+
for (const record of records) {
|
|
2097
|
+
lines.push(renderPublisherAccessRecord(record));
|
|
2098
|
+
}
|
|
2099
|
+
return lines.join("\n");
|
|
2100
|
+
}
|
|
2101
|
+
async function saveSearchCapabilityRecords(records) {
|
|
2102
|
+
const snapshotPath = searchCapabilitySnapshotPath();
|
|
2103
|
+
await mkdir(dirname(snapshotPath), { recursive: true });
|
|
2104
|
+
await writeJsonFile(snapshotPath, buildSearchCapabilitySnapshot(records, env));
|
|
2105
|
+
return snapshotPath;
|
|
2106
|
+
}
|
|
2107
|
+
async function probeAllPublishers(doi) {
|
|
2108
|
+
const records = [];
|
|
2109
|
+
for (const publisher of publisherConfigs()) {
|
|
2110
|
+
records.push(await probePublisherAccess({
|
|
2111
|
+
doi,
|
|
2112
|
+
publisher: publisher.publisher,
|
|
2113
|
+
env
|
|
2114
|
+
}));
|
|
2115
|
+
}
|
|
2116
|
+
return records;
|
|
2117
|
+
}
|
|
2118
|
+
async function runSearchProbe(args) {
|
|
2119
|
+
if (typeof args.doi !== "string" || !args.doi.trim()) {
|
|
2120
|
+
throw new Error("`longtable search probe` requires --doi <doi>.");
|
|
2121
|
+
}
|
|
2122
|
+
const publisher = parsePublisherTarget(args.publisher);
|
|
2123
|
+
const record = await probePublisherAccess({
|
|
2124
|
+
doi: args.doi,
|
|
2125
|
+
publisher,
|
|
2126
|
+
env
|
|
2127
|
+
});
|
|
2128
|
+
if (args.json === true) {
|
|
2129
|
+
console.log(JSON.stringify({ record }, null, 2));
|
|
2130
|
+
}
|
|
2131
|
+
else {
|
|
2132
|
+
console.log(renderPublisherAccessRecord(record));
|
|
2133
|
+
}
|
|
2134
|
+
return [record];
|
|
2135
|
+
}
|
|
2136
|
+
async function runSearchDoctor(args) {
|
|
2137
|
+
let records;
|
|
2138
|
+
if (typeof args.doi === "string" && args.doi.trim()) {
|
|
2139
|
+
if (args.publisher === "all") {
|
|
2140
|
+
records = await probeAllPublishers(args.doi);
|
|
2141
|
+
}
|
|
2142
|
+
else {
|
|
2143
|
+
const publisher = parsePublisherTarget(args.publisher);
|
|
2144
|
+
records = [await probePublisherAccess({
|
|
2145
|
+
doi: args.doi,
|
|
2146
|
+
publisher,
|
|
2147
|
+
env
|
|
2148
|
+
})];
|
|
2149
|
+
}
|
|
2150
|
+
}
|
|
2151
|
+
else {
|
|
2152
|
+
records = summarizeConfiguredPublisherAccess(env);
|
|
2153
|
+
}
|
|
2154
|
+
const snapshotPath = searchCapabilitySnapshotPath();
|
|
2155
|
+
const snapshotExists = existsSync(snapshotPath);
|
|
2156
|
+
if (args.json === true) {
|
|
2157
|
+
console.log(JSON.stringify({
|
|
2158
|
+
capabilityFile: snapshotPath,
|
|
2159
|
+
capabilityFileExists: snapshotExists,
|
|
2160
|
+
records
|
|
2161
|
+
}, null, 2));
|
|
2162
|
+
}
|
|
2163
|
+
else {
|
|
2164
|
+
console.log(renderPublisherAccessRecords("LongTable Search Publisher Access Doctor", records, snapshotPath));
|
|
2165
|
+
if (!snapshotExists) {
|
|
2166
|
+
console.log("- saved capabilities: none yet; run `longtable search setup` to record non-secret capability status.");
|
|
2167
|
+
}
|
|
2168
|
+
}
|
|
2169
|
+
return records;
|
|
2170
|
+
}
|
|
2171
|
+
async function promptPublisherDoi(rl, label, defaultDoi) {
|
|
2172
|
+
const prompt = defaultDoi
|
|
2173
|
+
? `${label} test DOI [${defaultDoi}, Enter to reuse, skip to skip]: `
|
|
2174
|
+
: `${label} test DOI (Enter to skip): `;
|
|
2175
|
+
const answer = (await rl.question(prompt)).trim();
|
|
2176
|
+
if (!answer && defaultDoi) {
|
|
2177
|
+
return defaultDoi;
|
|
2178
|
+
}
|
|
2179
|
+
if (!answer || /^skip$/i.test(answer)) {
|
|
2180
|
+
return undefined;
|
|
2181
|
+
}
|
|
2182
|
+
return answer;
|
|
2183
|
+
}
|
|
2184
|
+
async function runInteractiveSearchSetup(defaultDoi) {
|
|
2185
|
+
const rl = createInterface({ input, output });
|
|
2186
|
+
const records = [];
|
|
2187
|
+
try {
|
|
2188
|
+
console.log("LongTable publisher access setup");
|
|
2189
|
+
console.log("LongTable does not store API keys or TDM tokens. It reads environment variables and records only non-secret capability results.");
|
|
2190
|
+
console.log("");
|
|
2191
|
+
for (const publisher of publisherConfigs()) {
|
|
2192
|
+
console.log(`${publisher.label}`);
|
|
2193
|
+
console.log(` required env: ${publisher.requiredEnv.join(", ")}`);
|
|
2194
|
+
if (publisher.optionalEnv.length > 0) {
|
|
2195
|
+
console.log(` optional env: ${publisher.optionalEnv.join(", ")}`);
|
|
2196
|
+
}
|
|
2197
|
+
console.log(` ${publisher.setupHint}`);
|
|
2198
|
+
const doi = await promptPublisherDoi(rl, publisher.label, defaultDoi);
|
|
2199
|
+
if (doi) {
|
|
2200
|
+
records.push(await probePublisherAccess({
|
|
2201
|
+
doi,
|
|
2202
|
+
publisher: publisher.publisher,
|
|
2203
|
+
env
|
|
2204
|
+
}));
|
|
2205
|
+
}
|
|
2206
|
+
else {
|
|
2207
|
+
const summary = summarizeConfiguredPublisherAccess(env)
|
|
2208
|
+
.find((record) => record.publisher === publisher.publisher);
|
|
2209
|
+
if (summary) {
|
|
2210
|
+
records.push(summary);
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
console.log(renderPublisherAccessRecord(records[records.length - 1]));
|
|
2214
|
+
console.log("");
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
finally {
|
|
2218
|
+
rl.close();
|
|
2219
|
+
}
|
|
2220
|
+
return records;
|
|
2221
|
+
}
|
|
2222
|
+
async function runSearchSetup(args) {
|
|
2223
|
+
const defaultDoi = typeof args.doi === "string" ? args.doi : undefined;
|
|
2224
|
+
const records = input.isTTY && output.isTTY && args.json !== true
|
|
2225
|
+
? await runInteractiveSearchSetup(defaultDoi)
|
|
2226
|
+
: defaultDoi
|
|
2227
|
+
? await probeAllPublishers(defaultDoi)
|
|
2228
|
+
: summarizeConfiguredPublisherAccess(env);
|
|
2229
|
+
const snapshotPath = await saveSearchCapabilityRecords(records);
|
|
2230
|
+
if (args.json === true) {
|
|
2231
|
+
console.log(JSON.stringify({
|
|
2232
|
+
capabilityFile: snapshotPath,
|
|
2233
|
+
snapshot: buildSearchCapabilitySnapshot(records, env)
|
|
2234
|
+
}, null, 2));
|
|
2235
|
+
return;
|
|
2236
|
+
}
|
|
2237
|
+
console.log(renderPublisherAccessRecords("LongTable Search Publisher Access Setup", records, snapshotPath));
|
|
2238
|
+
}
|
|
2239
|
+
async function runSearch(subcommand, args) {
|
|
2240
|
+
if (subcommand === "probe") {
|
|
2241
|
+
await runSearchProbe(args);
|
|
2242
|
+
return;
|
|
2243
|
+
}
|
|
2244
|
+
if (subcommand === "doctor" || subcommand === "status") {
|
|
2245
|
+
await runSearchDoctor(args);
|
|
2246
|
+
return;
|
|
2247
|
+
}
|
|
2248
|
+
if (subcommand === "setup") {
|
|
2249
|
+
await runSearchSetup(args);
|
|
2250
|
+
return;
|
|
2251
|
+
}
|
|
2252
|
+
if (subcommand) {
|
|
2253
|
+
throw new Error(`Unknown search subcommand: ${subcommand}`);
|
|
2254
|
+
}
|
|
2060
2255
|
const workingDirectory = typeof args.cwd === "string" ? args.cwd : cwd();
|
|
2061
2256
|
const projectContext = await loadProjectContextFromDirectory(workingDirectory);
|
|
2062
2257
|
const searchInput = {
|
|
@@ -2082,7 +2277,8 @@ async function runSearch(args) {
|
|
|
2082
2277
|
const run = await runResearchSearch({
|
|
2083
2278
|
...searchInput,
|
|
2084
2279
|
env,
|
|
2085
|
-
allowPartial
|
|
2280
|
+
allowPartial,
|
|
2281
|
+
publisherAccess: args["publisher-access"] === true
|
|
2086
2282
|
});
|
|
2087
2283
|
let recordedPath;
|
|
2088
2284
|
if (args.record === true && run.status !== "blocked") {
|
|
@@ -2907,7 +3103,7 @@ async function main() {
|
|
|
2907
3103
|
return;
|
|
2908
3104
|
}
|
|
2909
3105
|
if (command === "search") {
|
|
2910
|
-
await runSearch(values);
|
|
3106
|
+
await runSearch(subcommand, values);
|
|
2911
3107
|
return;
|
|
2912
3108
|
}
|
|
2913
3109
|
if (command === "ask") {
|
package/dist/search/index.d.ts
CHANGED
package/dist/search/index.js
CHANGED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { type CrossrefTdmDiscovery, type EvidenceCard, type Publisher, type PublisherAccessRecord, type PublisherProbeInput, type PublisherProbeTarget, type SearchCapabilitySnapshot, type SearchFetch } from "./types.js";
|
|
2
|
+
interface PublisherConfig {
|
|
3
|
+
publisher: Publisher;
|
|
4
|
+
label: string;
|
|
5
|
+
requiredEnv: string[];
|
|
6
|
+
optionalEnv: string[];
|
|
7
|
+
setupHint: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function normalizeDoi(value: string): string;
|
|
10
|
+
export declare function parsePublisherTarget(value?: string | boolean): PublisherProbeTarget;
|
|
11
|
+
export declare function discoverCrossrefTdm(doi: string, env?: Record<string, string | undefined>, httpFetch?: SearchFetch): Promise<CrossrefTdmDiscovery>;
|
|
12
|
+
export declare function publisherConfigs(): PublisherConfig[];
|
|
13
|
+
export declare function probePublisherAccess(input: PublisherProbeInput): Promise<PublisherAccessRecord>;
|
|
14
|
+
export declare function summarizeConfiguredPublisherAccess(env?: Record<string, string | undefined>): PublisherAccessRecord[];
|
|
15
|
+
export declare function buildSearchCapabilitySnapshot(records: PublisherAccessRecord[], env?: Record<string, string | undefined>): SearchCapabilitySnapshot;
|
|
16
|
+
export declare function searchCapabilitySnapshotPath(home?: string): string;
|
|
17
|
+
export declare function enrichCardsWithPublisherAccess(input: {
|
|
18
|
+
cards: EvidenceCard[];
|
|
19
|
+
env?: Record<string, string | undefined>;
|
|
20
|
+
fetch?: SearchFetch;
|
|
21
|
+
limit?: number;
|
|
22
|
+
}): Promise<EvidenceCard[]>;
|
|
23
|
+
export {};
|
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { PUBLISHERS } from "./types.js";
|
|
4
|
+
const PUBLISHER_CONFIGS = {
|
|
5
|
+
elsevier: {
|
|
6
|
+
publisher: "elsevier",
|
|
7
|
+
label: "Elsevier / ScienceDirect",
|
|
8
|
+
requiredEnv: ["ELSEVIER_API_KEY"],
|
|
9
|
+
optionalEnv: ["ELSEVIER_INST_TOKEN", "ELSEVIER_AUTHTOKEN"],
|
|
10
|
+
setupHint: "Set ELSEVIER_API_KEY and, when your institution provides one, ELSEVIER_INST_TOKEN or ELSEVIER_AUTHTOKEN."
|
|
11
|
+
},
|
|
12
|
+
springer_nature: {
|
|
13
|
+
publisher: "springer_nature",
|
|
14
|
+
label: "Springer Nature",
|
|
15
|
+
requiredEnv: ["SPRINGER_NATURE_API_KEY"],
|
|
16
|
+
optionalEnv: ["SPRINGER_NATURE_TDM_API_KEY", "SPRINGER_NATURE_TDM_ENDPOINT"],
|
|
17
|
+
setupHint: "Set SPRINGER_NATURE_API_KEY. Add SPRINGER_NATURE_TDM_ENDPOINT when your TDM agreement provides a licensed full-text endpoint."
|
|
18
|
+
},
|
|
19
|
+
wiley: {
|
|
20
|
+
publisher: "wiley",
|
|
21
|
+
label: "Wiley",
|
|
22
|
+
requiredEnv: ["WILEY_TDM_TOKEN"],
|
|
23
|
+
optionalEnv: ["WILEY_TDM_CLIENT_TOKEN"],
|
|
24
|
+
setupHint: "Set WILEY_TDM_TOKEN or WILEY_TDM_CLIENT_TOKEN after accepting Wiley's TDM terms."
|
|
25
|
+
},
|
|
26
|
+
taylor_francis: {
|
|
27
|
+
publisher: "taylor_francis",
|
|
28
|
+
label: "Taylor & Francis",
|
|
29
|
+
requiredEnv: ["TANDF_TDM_TOKEN"],
|
|
30
|
+
optionalEnv: ["TANDF_TDM_ENDPOINT", "TAYLOR_FRANCIS_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_ENDPOINT"],
|
|
31
|
+
setupHint: "Taylor & Francis TDM often requires an institutional arrangement. Set TANDF_TDM_ENDPOINT and TANDF_TDM_TOKEN when your institution provides them."
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
function now() {
|
|
35
|
+
return new Date().toISOString();
|
|
36
|
+
}
|
|
37
|
+
function defaultFetch() {
|
|
38
|
+
if (typeof fetch !== "function") {
|
|
39
|
+
throw new Error("LongTable publisher access probing requires a fetch-capable Node runtime.");
|
|
40
|
+
}
|
|
41
|
+
return fetch;
|
|
42
|
+
}
|
|
43
|
+
function asRecord(value) {
|
|
44
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
45
|
+
}
|
|
46
|
+
function asArray(value) {
|
|
47
|
+
return Array.isArray(value) ? value : [];
|
|
48
|
+
}
|
|
49
|
+
function asString(value) {
|
|
50
|
+
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
51
|
+
}
|
|
52
|
+
function firstString(value) {
|
|
53
|
+
return asString(asArray(value)[0]);
|
|
54
|
+
}
|
|
55
|
+
export function normalizeDoi(value) {
|
|
56
|
+
return value
|
|
57
|
+
.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "")
|
|
58
|
+
.replace(/^doi:\s*/i, "")
|
|
59
|
+
.trim()
|
|
60
|
+
.toLowerCase();
|
|
61
|
+
}
|
|
62
|
+
function endpoint(url, params) {
|
|
63
|
+
const parsed = new URL(url);
|
|
64
|
+
for (const [key, value] of Object.entries(params)) {
|
|
65
|
+
if (value !== undefined && value !== "") {
|
|
66
|
+
parsed.searchParams.set(key, String(value));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return parsed.toString();
|
|
70
|
+
}
|
|
71
|
+
function envValue(env, keys) {
|
|
72
|
+
for (const key of keys) {
|
|
73
|
+
const value = env[key];
|
|
74
|
+
if (value && value.trim()) {
|
|
75
|
+
return value.trim();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return undefined;
|
|
79
|
+
}
|
|
80
|
+
function presentEnv(config, env) {
|
|
81
|
+
return [...config.requiredEnv, ...config.optionalEnv].filter((key) => Boolean(env[key]?.trim()));
|
|
82
|
+
}
|
|
83
|
+
function missingRequiredEnv(config, env) {
|
|
84
|
+
return config.requiredEnv.filter((key) => !envValue(env, key === "WILEY_TDM_TOKEN"
|
|
85
|
+
? ["WILEY_TDM_TOKEN", "WILEY_TDM_CLIENT_TOKEN"]
|
|
86
|
+
: key === "TANDF_TDM_TOKEN"
|
|
87
|
+
? ["TANDF_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_TOKEN"]
|
|
88
|
+
: [key]));
|
|
89
|
+
}
|
|
90
|
+
function inferPublisherFromText(value) {
|
|
91
|
+
const normalized = value?.toLowerCase() ?? "";
|
|
92
|
+
if (!normalized)
|
|
93
|
+
return undefined;
|
|
94
|
+
if (/elsevier|sciencedirect/.test(normalized))
|
|
95
|
+
return "elsevier";
|
|
96
|
+
if (/springer|nature\.com|springernature/.test(normalized))
|
|
97
|
+
return "springer_nature";
|
|
98
|
+
if (/wiley|onlinelibrary/.test(normalized))
|
|
99
|
+
return "wiley";
|
|
100
|
+
if (/taylor\s*&?\s*francis|tandfonline|routledge/.test(normalized))
|
|
101
|
+
return "taylor_francis";
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
function inferPublisherFromDoi(doi) {
|
|
105
|
+
if (doi.startsWith("10.1016/"))
|
|
106
|
+
return "elsevier";
|
|
107
|
+
if (doi.startsWith("10.1007/") || doi.startsWith("10.1038/"))
|
|
108
|
+
return "springer_nature";
|
|
109
|
+
if (doi.startsWith("10.1002/") || doi.startsWith("10.1111/"))
|
|
110
|
+
return "wiley";
|
|
111
|
+
if (doi.startsWith("10.1080/") || doi.startsWith("10.1207/"))
|
|
112
|
+
return "taylor_francis";
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
export function parsePublisherTarget(value) {
|
|
116
|
+
if (typeof value !== "string" || value.trim() === "" || value === "auto") {
|
|
117
|
+
return "auto";
|
|
118
|
+
}
|
|
119
|
+
if (PUBLISHERS.includes(value)) {
|
|
120
|
+
return value;
|
|
121
|
+
}
|
|
122
|
+
throw new Error(`Unknown publisher: ${value}`);
|
|
123
|
+
}
|
|
124
|
+
function tdmLinksFromCrossref(rawLinks) {
|
|
125
|
+
const links = [];
|
|
126
|
+
for (const entry of asArray(rawLinks)) {
|
|
127
|
+
const record = asRecord(entry);
|
|
128
|
+
const url = asString(record.URL) ?? asString(record.url);
|
|
129
|
+
if (!url) {
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
links.push({
|
|
133
|
+
url,
|
|
134
|
+
contentType: asString(record["content-type"]),
|
|
135
|
+
contentVersion: asString(record["content-version"]),
|
|
136
|
+
intendedApplication: asString(record["intended-application"])
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
return links;
|
|
140
|
+
}
|
|
141
|
+
function licenseUrlsFromCrossref(rawLicenses) {
|
|
142
|
+
return asArray(rawLicenses)
|
|
143
|
+
.map((entry) => asString(asRecord(entry).URL) ?? asString(asRecord(entry).url))
|
|
144
|
+
.filter((entry) => Boolean(entry));
|
|
145
|
+
}
|
|
146
|
+
export async function discoverCrossrefTdm(doi, env = process.env, httpFetch = defaultFetch()) {
|
|
147
|
+
const normalizedDoi = normalizeDoi(doi);
|
|
148
|
+
const url = endpoint(`https://api.crossref.org/works/${encodeURIComponent(normalizedDoi)}`, {
|
|
149
|
+
mailto: env.LONGTABLE_CONTACT_EMAIL
|
|
150
|
+
});
|
|
151
|
+
const response = await httpFetch(url, {
|
|
152
|
+
headers: {
|
|
153
|
+
accept: "application/json",
|
|
154
|
+
"user-agent": "LongTable/0.1.30 (https://github.com/HosungYou/LongTable)"
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
if (!response.ok) {
|
|
158
|
+
throw new Error(`Crossref DOI discovery failed: HTTP ${response.status} ${response.statusText}`);
|
|
159
|
+
}
|
|
160
|
+
const payload = asRecord(await response.json());
|
|
161
|
+
const message = asRecord(payload.message);
|
|
162
|
+
const publisher = asString(message.publisher);
|
|
163
|
+
const links = tdmLinksFromCrossref(message.link);
|
|
164
|
+
const title = firstString(message.title);
|
|
165
|
+
const sourceUrl = asString(message.URL);
|
|
166
|
+
const inferredPublisher = inferPublisherFromText(publisher) ??
|
|
167
|
+
inferPublisherFromText(sourceUrl) ??
|
|
168
|
+
links.map((link) => inferPublisherFromText(link.url)).find(Boolean) ??
|
|
169
|
+
inferPublisherFromDoi(normalizedDoi) ??
|
|
170
|
+
"other";
|
|
171
|
+
return {
|
|
172
|
+
doi: normalizedDoi,
|
|
173
|
+
publisher,
|
|
174
|
+
inferredPublisher,
|
|
175
|
+
title,
|
|
176
|
+
sourceUrl,
|
|
177
|
+
licenseUrls: licenseUrlsFromCrossref(message.license),
|
|
178
|
+
links
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
function buildMissingCredentialRecord(publisher, doi, crossref) {
|
|
182
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
183
|
+
return {
|
|
184
|
+
publisher,
|
|
185
|
+
checkedAt: now(),
|
|
186
|
+
credentialStatus: "missing",
|
|
187
|
+
entitlementStatus: "unknown",
|
|
188
|
+
tdmStatus: "not_configured",
|
|
189
|
+
collectionDepth: "metadata",
|
|
190
|
+
requiredEnv: config.requiredEnv,
|
|
191
|
+
presentEnv: [],
|
|
192
|
+
missingEnv: config.requiredEnv,
|
|
193
|
+
testedDoi: doi,
|
|
194
|
+
setupHint: config.setupHint,
|
|
195
|
+
verificationNote: "Publisher credential is not configured; LongTable can only use metadata or abstract routes.",
|
|
196
|
+
crossref
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
function baseRecord(input) {
|
|
200
|
+
const requiredEnv = input.config?.requiredEnv ?? [];
|
|
201
|
+
const present = input.config && input.env ? presentEnv(input.config, input.env) : [];
|
|
202
|
+
const missing = input.config && input.env ? missingRequiredEnv(input.config, input.env) : requiredEnv;
|
|
203
|
+
return {
|
|
204
|
+
publisher: input.publisher,
|
|
205
|
+
checkedAt: now(),
|
|
206
|
+
credentialStatus: input.credentialStatus,
|
|
207
|
+
entitlementStatus: input.entitlementStatus,
|
|
208
|
+
tdmStatus: input.tdmStatus,
|
|
209
|
+
collectionDepth: input.collectionDepth,
|
|
210
|
+
requiredEnv,
|
|
211
|
+
presentEnv: present,
|
|
212
|
+
missingEnv: missing,
|
|
213
|
+
testedDoi: input.doi,
|
|
214
|
+
endpoint: input.endpoint,
|
|
215
|
+
setupHint: input.setupHint,
|
|
216
|
+
verificationNote: input.verificationNote,
|
|
217
|
+
licenseNote: input.licenseNote,
|
|
218
|
+
evidenceSnippet: input.evidenceSnippet,
|
|
219
|
+
crossref: input.crossref
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
function chooseTextLink(discovery, publisher) {
|
|
223
|
+
return discovery?.links.find((link) => {
|
|
224
|
+
const application = link.intendedApplication?.toLowerCase() ?? "";
|
|
225
|
+
const contentType = link.contentType?.toLowerCase() ?? "";
|
|
226
|
+
const publisherMatches = publisher ? inferPublisherFromText(link.url) === publisher : true;
|
|
227
|
+
return publisherMatches &&
|
|
228
|
+
(application === "" || application.includes("text-mining")) &&
|
|
229
|
+
!contentType.includes("pdf");
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
async function fetchTextProbe(httpFetch, url, headers) {
|
|
233
|
+
const response = await httpFetch(url, { headers });
|
|
234
|
+
const contentType = Object.entries(headers).find(([key]) => key.toLowerCase() === "accept")?.[1] ?? "";
|
|
235
|
+
let text;
|
|
236
|
+
if (response.ok && !contentType.includes("application/pdf")) {
|
|
237
|
+
text = await response.text();
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
ok: response.ok,
|
|
241
|
+
status: response.status,
|
|
242
|
+
statusText: response.statusText,
|
|
243
|
+
text,
|
|
244
|
+
endpoint: url
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
function snippetFromText(value) {
|
|
248
|
+
const cleaned = value
|
|
249
|
+
?.replace(/<[^>]+>/g, " ")
|
|
250
|
+
.replace(/\s+/g, " ")
|
|
251
|
+
.trim();
|
|
252
|
+
if (!cleaned) {
|
|
253
|
+
return undefined;
|
|
254
|
+
}
|
|
255
|
+
return cleaned.slice(0, 500);
|
|
256
|
+
}
|
|
257
|
+
function recordFromProbeResult(publisher, doi, env, result, crossref, fallbackHint) {
|
|
258
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
259
|
+
const snippet = snippetFromText(result.text);
|
|
260
|
+
if (result.ok) {
|
|
261
|
+
return baseRecord({
|
|
262
|
+
publisher,
|
|
263
|
+
doi,
|
|
264
|
+
config,
|
|
265
|
+
env,
|
|
266
|
+
crossref,
|
|
267
|
+
endpoint: result.endpoint,
|
|
268
|
+
credentialStatus: "valid",
|
|
269
|
+
entitlementStatus: "licensed_full_text_available",
|
|
270
|
+
tdmStatus: "permitted",
|
|
271
|
+
collectionDepth: snippet ? "licensed_snippet" : "licensed_full_text_local_only",
|
|
272
|
+
setupHint: config.setupHint,
|
|
273
|
+
licenseNote: "Publisher endpoint returned content for this DOI. Follow the applicable publisher and institutional TDM terms.",
|
|
274
|
+
verificationNote: snippet
|
|
275
|
+
? "Licensed publisher content was reachable and a short local snippet was extracted."
|
|
276
|
+
: "Licensed publisher content was reachable, but LongTable did not extract text from the returned format.",
|
|
277
|
+
evidenceSnippet: snippet
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
if (result.status === 401) {
|
|
281
|
+
return baseRecord({
|
|
282
|
+
publisher,
|
|
283
|
+
doi,
|
|
284
|
+
config,
|
|
285
|
+
env,
|
|
286
|
+
crossref,
|
|
287
|
+
endpoint: result.endpoint,
|
|
288
|
+
credentialStatus: "invalid",
|
|
289
|
+
entitlementStatus: "no_access",
|
|
290
|
+
tdmStatus: "denied",
|
|
291
|
+
collectionDepth: "metadata",
|
|
292
|
+
setupHint: config.setupHint,
|
|
293
|
+
verificationNote: `Publisher rejected the credential for this DOI: HTTP ${result.status} ${result.statusText}.`
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
if (result.status === 403) {
|
|
297
|
+
return baseRecord({
|
|
298
|
+
publisher,
|
|
299
|
+
doi,
|
|
300
|
+
config,
|
|
301
|
+
env,
|
|
302
|
+
crossref,
|
|
303
|
+
endpoint: result.endpoint,
|
|
304
|
+
credentialStatus: "valid",
|
|
305
|
+
entitlementStatus: "no_access",
|
|
306
|
+
tdmStatus: "denied",
|
|
307
|
+
collectionDepth: "metadata",
|
|
308
|
+
setupHint: config.setupHint,
|
|
309
|
+
verificationNote: `Credential was present, but the publisher denied entitlement for this DOI: HTTP ${result.status} ${result.statusText}.`
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
return baseRecord({
|
|
313
|
+
publisher,
|
|
314
|
+
doi,
|
|
315
|
+
config,
|
|
316
|
+
env,
|
|
317
|
+
crossref,
|
|
318
|
+
endpoint: result.endpoint,
|
|
319
|
+
credentialStatus: "present",
|
|
320
|
+
entitlementStatus: "unknown",
|
|
321
|
+
tdmStatus: "unknown",
|
|
322
|
+
collectionDepth: "metadata",
|
|
323
|
+
setupHint: fallbackHint,
|
|
324
|
+
verificationNote: `Publisher probe could not confirm access: HTTP ${result.status} ${result.statusText}.`
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
async function probeElsevier(doi, env, httpFetch, crossref) {
|
|
328
|
+
const config = PUBLISHER_CONFIGS.elsevier;
|
|
329
|
+
const apiKey = env.ELSEVIER_API_KEY?.trim();
|
|
330
|
+
if (!apiKey) {
|
|
331
|
+
return buildMissingCredentialRecord("elsevier", doi, crossref);
|
|
332
|
+
}
|
|
333
|
+
const link = chooseTextLink(crossref, "elsevier");
|
|
334
|
+
const url = link?.url ?? endpoint(`https://api.elsevier.com/content/article/doi/${encodeURIComponent(doi)}`, {
|
|
335
|
+
httpAccept: "text/plain"
|
|
336
|
+
});
|
|
337
|
+
const headers = {
|
|
338
|
+
accept: "text/plain, application/xml, application/json",
|
|
339
|
+
"X-ELS-APIKey": apiKey
|
|
340
|
+
};
|
|
341
|
+
if (env.ELSEVIER_INST_TOKEN?.trim()) {
|
|
342
|
+
headers["X-ELS-Insttoken"] = env.ELSEVIER_INST_TOKEN.trim();
|
|
343
|
+
}
|
|
344
|
+
if (env.ELSEVIER_AUTHTOKEN?.trim()) {
|
|
345
|
+
headers["X-ELS-Authtoken"] = env.ELSEVIER_AUTHTOKEN.trim();
|
|
346
|
+
}
|
|
347
|
+
const result = await fetchTextProbe(httpFetch, url, headers);
|
|
348
|
+
return recordFromProbeResult("elsevier", doi, env, result, crossref, config.setupHint);
|
|
349
|
+
}
|
|
350
|
+
async function probeSpringerNature(doi, env, httpFetch, crossref) {
|
|
351
|
+
const config = PUBLISHER_CONFIGS.springer_nature;
|
|
352
|
+
const apiKey = env.SPRINGER_NATURE_API_KEY?.trim();
|
|
353
|
+
if (!apiKey) {
|
|
354
|
+
return buildMissingCredentialRecord("springer_nature", doi, crossref);
|
|
355
|
+
}
|
|
356
|
+
const configuredEndpoint = env.SPRINGER_NATURE_TDM_ENDPOINT?.trim();
|
|
357
|
+
if (configuredEndpoint) {
|
|
358
|
+
const url = configuredEndpoint.replace("{doi}", encodeURIComponent(doi));
|
|
359
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
360
|
+
accept: "text/plain, application/xml, application/json",
|
|
361
|
+
"X-Api-Key": env.SPRINGER_NATURE_TDM_API_KEY?.trim() ?? apiKey
|
|
362
|
+
});
|
|
363
|
+
return recordFromProbeResult("springer_nature", doi, env, result, crossref, config.setupHint);
|
|
364
|
+
}
|
|
365
|
+
const url = endpoint("https://api.springernature.com/meta/v2/json", {
|
|
366
|
+
q: `doi:${doi}`,
|
|
367
|
+
api_key: apiKey,
|
|
368
|
+
p: 1
|
|
369
|
+
});
|
|
370
|
+
const response = await httpFetch(url, {
|
|
371
|
+
headers: {
|
|
372
|
+
accept: "application/json"
|
|
373
|
+
}
|
|
374
|
+
});
|
|
375
|
+
if (!response.ok) {
|
|
376
|
+
return recordFromProbeResult("springer_nature", doi, env, {
|
|
377
|
+
ok: response.ok,
|
|
378
|
+
status: response.status,
|
|
379
|
+
statusText: response.statusText,
|
|
380
|
+
endpoint: url
|
|
381
|
+
}, crossref, config.setupHint);
|
|
382
|
+
}
|
|
383
|
+
const payload = asRecord(await response.json());
|
|
384
|
+
const records = asArray(payload.records);
|
|
385
|
+
const hasRecord = records.length > 0;
|
|
386
|
+
return baseRecord({
|
|
387
|
+
publisher: "springer_nature",
|
|
388
|
+
doi,
|
|
389
|
+
config,
|
|
390
|
+
env,
|
|
391
|
+
crossref,
|
|
392
|
+
endpoint: url,
|
|
393
|
+
credentialStatus: "valid",
|
|
394
|
+
entitlementStatus: hasRecord ? "abstract_available" : "metadata_only",
|
|
395
|
+
tdmStatus: "requires_license_review",
|
|
396
|
+
collectionDepth: hasRecord ? "abstract" : "metadata",
|
|
397
|
+
setupHint: config.setupHint,
|
|
398
|
+
licenseNote: "Springer Nature metadata access was verified. Licensed full-text TDM requires a configured TDM endpoint or subscription arrangement.",
|
|
399
|
+
verificationNote: hasRecord
|
|
400
|
+
? "Springer Nature metadata API responded for this DOI; licensed full-text access was not confirmed."
|
|
401
|
+
: "Springer Nature credential worked, but no metadata record was returned for this DOI."
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
async function probeWiley(doi, env, httpFetch, crossref) {
|
|
405
|
+
const config = PUBLISHER_CONFIGS.wiley;
|
|
406
|
+
const token = envValue(env, ["WILEY_TDM_TOKEN", "WILEY_TDM_CLIENT_TOKEN"]);
|
|
407
|
+
if (!token) {
|
|
408
|
+
return buildMissingCredentialRecord("wiley", doi, crossref);
|
|
409
|
+
}
|
|
410
|
+
const url = `https://api.wiley.com/onlinelibrary/tdm/v1/articles/${encodeURIComponent(doi)}`;
|
|
411
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
412
|
+
accept: "text/plain, application/xml, application/pdf",
|
|
413
|
+
"Wiley-TDM-Client-Token": token
|
|
414
|
+
});
|
|
415
|
+
return recordFromProbeResult("wiley", doi, env, result, crossref, config.setupHint);
|
|
416
|
+
}
|
|
417
|
+
async function probeTaylorFrancis(doi, env, httpFetch, crossref) {
|
|
418
|
+
const config = PUBLISHER_CONFIGS.taylor_francis;
|
|
419
|
+
const token = envValue(env, ["TANDF_TDM_TOKEN", "TAYLOR_FRANCIS_TDM_TOKEN"]);
|
|
420
|
+
const configuredEndpoint = envValue(env, ["TANDF_TDM_ENDPOINT", "TAYLOR_FRANCIS_TDM_ENDPOINT"]);
|
|
421
|
+
if (!token || !configuredEndpoint) {
|
|
422
|
+
return baseRecord({
|
|
423
|
+
publisher: "taylor_francis",
|
|
424
|
+
doi,
|
|
425
|
+
config,
|
|
426
|
+
env,
|
|
427
|
+
crossref,
|
|
428
|
+
credentialStatus: token ? "present" : "missing",
|
|
429
|
+
entitlementStatus: "unknown",
|
|
430
|
+
tdmStatus: "requires_license_review",
|
|
431
|
+
collectionDepth: "metadata",
|
|
432
|
+
setupHint: config.setupHint,
|
|
433
|
+
licenseNote: "Taylor & Francis indicates TDM access may require institutional support or a license supplement.",
|
|
434
|
+
verificationNote: "No Taylor & Francis licensed TDM endpoint was configured, so LongTable did not attempt full-text access."
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
const url = configuredEndpoint.replace("{doi}", encodeURIComponent(doi));
|
|
438
|
+
const result = await fetchTextProbe(httpFetch, url, {
|
|
439
|
+
accept: "text/plain, application/xml, application/json",
|
|
440
|
+
authorization: `Bearer ${token}`
|
|
441
|
+
});
|
|
442
|
+
return recordFromProbeResult("taylor_francis", doi, env, result, crossref, config.setupHint);
|
|
443
|
+
}
|
|
444
|
+
async function probeKnownPublisher(publisher, doi, env, httpFetch, crossref) {
|
|
445
|
+
switch (publisher) {
|
|
446
|
+
case "elsevier":
|
|
447
|
+
return probeElsevier(doi, env, httpFetch, crossref);
|
|
448
|
+
case "springer_nature":
|
|
449
|
+
return probeSpringerNature(doi, env, httpFetch, crossref);
|
|
450
|
+
case "wiley":
|
|
451
|
+
return probeWiley(doi, env, httpFetch, crossref);
|
|
452
|
+
case "taylor_francis":
|
|
453
|
+
return probeTaylorFrancis(doi, env, httpFetch, crossref);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
export function publisherConfigs() {
|
|
457
|
+
return PUBLISHERS.map((publisher) => PUBLISHER_CONFIGS[publisher]);
|
|
458
|
+
}
|
|
459
|
+
export async function probePublisherAccess(input) {
|
|
460
|
+
const doi = normalizeDoi(input.doi);
|
|
461
|
+
const env = input.env ?? process.env;
|
|
462
|
+
const httpFetch = input.fetch ?? defaultFetch();
|
|
463
|
+
let crossref;
|
|
464
|
+
try {
|
|
465
|
+
crossref = await discoverCrossrefTdm(doi, env, httpFetch);
|
|
466
|
+
}
|
|
467
|
+
catch {
|
|
468
|
+
crossref = undefined;
|
|
469
|
+
}
|
|
470
|
+
const target = input.publisher ?? "auto";
|
|
471
|
+
const publisher = target === "auto"
|
|
472
|
+
? crossref?.inferredPublisher ?? inferPublisherFromDoi(doi) ?? "other"
|
|
473
|
+
: target;
|
|
474
|
+
if (publisher === "other") {
|
|
475
|
+
return baseRecord({
|
|
476
|
+
publisher: "other",
|
|
477
|
+
doi,
|
|
478
|
+
crossref,
|
|
479
|
+
credentialStatus: "present",
|
|
480
|
+
entitlementStatus: crossref?.links.length ? "unknown" : "metadata_only",
|
|
481
|
+
tdmStatus: crossref?.links.length ? "requires_license_review" : "unknown",
|
|
482
|
+
collectionDepth: "metadata",
|
|
483
|
+
setupHint: "No first-party publisher adapter matched this DOI. Use Crossref metadata and the publisher landing page.",
|
|
484
|
+
verificationNote: "LongTable could not map this DOI to Elsevier, Springer Nature, Wiley, or Taylor & Francis."
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
return probeKnownPublisher(publisher, doi, env, httpFetch, crossref);
|
|
488
|
+
}
|
|
489
|
+
export function summarizeConfiguredPublisherAccess(env = process.env) {
|
|
490
|
+
return PUBLISHERS.map((publisher) => {
|
|
491
|
+
const config = PUBLISHER_CONFIGS[publisher];
|
|
492
|
+
const missing = missingRequiredEnv(config, env);
|
|
493
|
+
return baseRecord({
|
|
494
|
+
publisher,
|
|
495
|
+
config,
|
|
496
|
+
env,
|
|
497
|
+
credentialStatus: missing.length === 0 ? "present" : "missing",
|
|
498
|
+
entitlementStatus: "unknown",
|
|
499
|
+
tdmStatus: missing.length === 0 ? "unknown" : "not_configured",
|
|
500
|
+
collectionDepth: "metadata",
|
|
501
|
+
setupHint: config.setupHint,
|
|
502
|
+
verificationNote: missing.length === 0
|
|
503
|
+
? "Credential-like environment variables are present; run a DOI probe to confirm entitlement."
|
|
504
|
+
: "Required publisher credential environment variables are missing."
|
|
505
|
+
});
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
export function buildSearchCapabilitySnapshot(records, env = process.env) {
|
|
509
|
+
return {
|
|
510
|
+
version: 1,
|
|
511
|
+
updatedAt: now(),
|
|
512
|
+
contactEmailPresent: Boolean(env.LONGTABLE_CONTACT_EMAIL?.trim()),
|
|
513
|
+
records
|
|
514
|
+
};
|
|
515
|
+
}
|
|
516
|
+
export function searchCapabilitySnapshotPath(home = homedir()) {
|
|
517
|
+
return join(home, ".longtable", "search-capabilities.json");
|
|
518
|
+
}
|
|
519
|
+
function bestAccessStatus(record) {
|
|
520
|
+
if (record.entitlementStatus === "licensed_full_text_available" && record.collectionDepth === "licensed_snippet") {
|
|
521
|
+
return "licensed_full_text_checked";
|
|
522
|
+
}
|
|
523
|
+
if (record.entitlementStatus === "licensed_full_text_available") {
|
|
524
|
+
return "licensed_full_text_available";
|
|
525
|
+
}
|
|
526
|
+
if (record.entitlementStatus === "no_access") {
|
|
527
|
+
return "access_denied";
|
|
528
|
+
}
|
|
529
|
+
if (record.entitlementStatus === "abstract_available") {
|
|
530
|
+
return "abstract_available";
|
|
531
|
+
}
|
|
532
|
+
if (record.entitlementStatus === "metadata_only") {
|
|
533
|
+
return "metadata_only";
|
|
534
|
+
}
|
|
535
|
+
return "license_unknown";
|
|
536
|
+
}
|
|
537
|
+
export async function enrichCardsWithPublisherAccess(input) {
|
|
538
|
+
const env = input.env ?? process.env;
|
|
539
|
+
const httpFetch = input.fetch ?? defaultFetch();
|
|
540
|
+
const limit = input.limit ?? 3;
|
|
541
|
+
const enriched = [];
|
|
542
|
+
let probes = 0;
|
|
543
|
+
for (const card of input.cards) {
|
|
544
|
+
if (!card.doi || probes >= limit) {
|
|
545
|
+
enriched.push(card);
|
|
546
|
+
continue;
|
|
547
|
+
}
|
|
548
|
+
probes += 1;
|
|
549
|
+
try {
|
|
550
|
+
const access = await probePublisherAccess({
|
|
551
|
+
doi: card.doi,
|
|
552
|
+
publisher: "auto",
|
|
553
|
+
env,
|
|
554
|
+
fetch: httpFetch
|
|
555
|
+
});
|
|
556
|
+
enriched.push({
|
|
557
|
+
...card,
|
|
558
|
+
publisher: access.publisher,
|
|
559
|
+
publisherAccess: access,
|
|
560
|
+
accessStatus: bestAccessStatus(access),
|
|
561
|
+
verificationDepth: access.collectionDepth === "licensed_snippet" ? "licensed_snippet" : card.verificationDepth,
|
|
562
|
+
entitlementSource: access.publisher === "other" ? "crossref_tdm" : "publisher_api",
|
|
563
|
+
collectionDepth: access.collectionDepth,
|
|
564
|
+
licenseNote: access.licenseNote,
|
|
565
|
+
verificationNote: access.verificationNote
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
catch (error) {
|
|
569
|
+
enriched.push({
|
|
570
|
+
...card,
|
|
571
|
+
accessStatus: "license_unknown",
|
|
572
|
+
verificationNote: `Publisher access probe failed: ${error instanceof Error ? error.message : String(error)}`
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
return enriched;
|
|
577
|
+
}
|
package/dist/search/rank.js
CHANGED
|
@@ -41,11 +41,12 @@ function sourceBoost(card) {
|
|
|
41
41
|
return 1;
|
|
42
42
|
}
|
|
43
43
|
function supportStatus(card, matches, keywords) {
|
|
44
|
-
if (!card.abstractAvailable) {
|
|
44
|
+
if (card.verificationDepth === "metadata" || !card.abstractAvailable) {
|
|
45
45
|
return "not_verified";
|
|
46
46
|
}
|
|
47
47
|
const ratio = keywords.length === 0 ? 0 : matches.length / keywords.length;
|
|
48
|
-
|
|
48
|
+
const fullTextChecked = card.verificationDepth === "licensed_snippet" || card.verificationDepth === "legal_full_text";
|
|
49
|
+
if (fullTextChecked && ratio >= 0.65)
|
|
49
50
|
return "direct_support";
|
|
50
51
|
if (ratio >= 0.35)
|
|
51
52
|
return "indirect_support";
|
|
@@ -92,8 +93,27 @@ function scoreCard(card, intent, matches) {
|
|
|
92
93
|
score += matches.length * 2;
|
|
93
94
|
return Math.max(0, Math.round(score * 10) / 10);
|
|
94
95
|
}
|
|
96
|
+
function accessStrength(card) {
|
|
97
|
+
if (card.accessStatus === "licensed_full_text_checked")
|
|
98
|
+
return 6;
|
|
99
|
+
if (card.accessStatus === "licensed_full_text_available")
|
|
100
|
+
return 5;
|
|
101
|
+
if (card.accessStatus === "legal_full_text_available")
|
|
102
|
+
return 4;
|
|
103
|
+
if (card.accessStatus === "abstract_available")
|
|
104
|
+
return 3;
|
|
105
|
+
if (card.accessStatus === "metadata_only")
|
|
106
|
+
return 2;
|
|
107
|
+
if (card.accessStatus === "license_unknown")
|
|
108
|
+
return 1;
|
|
109
|
+
return 0;
|
|
110
|
+
}
|
|
111
|
+
function strongerAccess(existing, incoming) {
|
|
112
|
+
return accessStrength(incoming) > accessStrength(existing) ? incoming : existing;
|
|
113
|
+
}
|
|
95
114
|
function mergeCards(existing, incoming) {
|
|
96
115
|
const sourceRoutes = [...new Set([...existing.sourceRoutes, ...incoming.sourceRoutes])];
|
|
116
|
+
const stronger = strongerAccess(existing, incoming);
|
|
97
117
|
return {
|
|
98
118
|
...existing,
|
|
99
119
|
authors: existing.authors.length > 0 ? existing.authors : incoming.authors,
|
|
@@ -110,6 +130,14 @@ function mergeCards(existing, incoming) {
|
|
|
110
130
|
abstractAvailable: existing.abstractAvailable || incoming.abstractAvailable,
|
|
111
131
|
legalFullTextAvailable: existing.legalFullTextAvailable || incoming.legalFullTextAvailable,
|
|
112
132
|
fullTextUrl: existing.fullTextUrl ?? incoming.fullTextUrl,
|
|
133
|
+
publisher: existing.publisher ?? incoming.publisher,
|
|
134
|
+
entitlementSource: stronger.entitlementSource ?? existing.entitlementSource ?? incoming.entitlementSource,
|
|
135
|
+
collectionDepth: stronger.collectionDepth ?? existing.collectionDepth ?? incoming.collectionDepth,
|
|
136
|
+
licenseNote: stronger.licenseNote ?? existing.licenseNote ?? incoming.licenseNote,
|
|
137
|
+
publisherAccess: stronger.publisherAccess ?? existing.publisherAccess ?? incoming.publisherAccess,
|
|
138
|
+
accessStatus: stronger.accessStatus,
|
|
139
|
+
verificationDepth: stronger.verificationDepth,
|
|
140
|
+
verificationNote: stronger.verificationNote,
|
|
113
141
|
citationCount: Math.max(existing.citationCount ?? 0, incoming.citationCount ?? 0) || undefined,
|
|
114
142
|
researchDesign: existing.researchDesign ?? incoming.researchDesign,
|
|
115
143
|
constructsOrMeasures: [...new Set([...(existing.constructsOrMeasures ?? []), ...(incoming.constructsOrMeasures ?? [])])],
|
package/dist/search/run.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { buildResearchSearchIntent } from "./query.js";
|
|
2
2
|
import { dedupeAndRankCards } from "./rank.js";
|
|
3
|
+
import { enrichCardsWithPublisherAccess } from "./publisher-access.js";
|
|
3
4
|
import { assessSearchSourceCapabilities, runSourceSearch } from "./sources.js";
|
|
4
5
|
function runId() {
|
|
5
6
|
return `evidence_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
|
|
@@ -85,6 +86,13 @@ export async function runResearchSearch(input) {
|
|
|
85
86
|
}
|
|
86
87
|
}
|
|
87
88
|
const rankedCards = dedupeAndRankCards(cards, intent);
|
|
89
|
+
const finalCards = input.publisherAccess === true
|
|
90
|
+
? await enrichCardsWithPublisherAccess({
|
|
91
|
+
cards: rankedCards,
|
|
92
|
+
env,
|
|
93
|
+
fetch: httpFetch
|
|
94
|
+
})
|
|
95
|
+
: rankedCards;
|
|
88
96
|
const hasFailure = sourceReports.some((report) => report.status === "failed" || report.status === "skipped");
|
|
89
97
|
const status = hasFailure ? "partial" : "completed";
|
|
90
98
|
return {
|
|
@@ -94,7 +102,7 @@ export async function runResearchSearch(input) {
|
|
|
94
102
|
status,
|
|
95
103
|
intent,
|
|
96
104
|
sourceReports,
|
|
97
|
-
cards:
|
|
105
|
+
cards: finalCards,
|
|
98
106
|
skippedSources,
|
|
99
107
|
warnings: [
|
|
100
108
|
...skippedSources.map((capability) => capability.reason ?? `${capability.source} unavailable.`),
|
package/dist/search/sources.js
CHANGED
|
@@ -62,6 +62,30 @@ function inferEvidenceDepth(abstract, legalFullTextAvailable = false) {
|
|
|
62
62
|
return "abstract_only";
|
|
63
63
|
return "metadata_only";
|
|
64
64
|
}
|
|
65
|
+
function inferAccessStatus(abstract, legalFullTextAvailable = false) {
|
|
66
|
+
if (legalFullTextAvailable)
|
|
67
|
+
return "legal_full_text_available";
|
|
68
|
+
if (abstract)
|
|
69
|
+
return "abstract_available";
|
|
70
|
+
return "metadata_only";
|
|
71
|
+
}
|
|
72
|
+
function inferVerificationDepth(abstract) {
|
|
73
|
+
if (abstract)
|
|
74
|
+
return "abstract";
|
|
75
|
+
return "metadata";
|
|
76
|
+
}
|
|
77
|
+
function verificationNote(abstract, legalFullTextAvailable = false) {
|
|
78
|
+
if (legalFullTextAvailable && abstract) {
|
|
79
|
+
return "Legal full text URL was found, but this card is abstract-based and not full-paper verified.";
|
|
80
|
+
}
|
|
81
|
+
if (legalFullTextAvailable) {
|
|
82
|
+
return "Legal full text URL was found, but LongTable did not retrieve or verify the full text.";
|
|
83
|
+
}
|
|
84
|
+
if (abstract) {
|
|
85
|
+
return "Abstract is available; citation support is abstract-based, not full-paper verified.";
|
|
86
|
+
}
|
|
87
|
+
return "Metadata exists; citation support has not been verified against abstract or full text.";
|
|
88
|
+
}
|
|
65
89
|
function inferResearchDesign(abstract) {
|
|
66
90
|
const normalized = abstract?.toLowerCase() ?? "";
|
|
67
91
|
if (!normalized)
|
|
@@ -111,6 +135,9 @@ function baseCard(input) {
|
|
|
111
135
|
abstract,
|
|
112
136
|
abstractAvailable: Boolean(abstract),
|
|
113
137
|
evidenceDepth: inferEvidenceDepth(abstract, legalFullTextAvailable),
|
|
138
|
+
accessStatus: inferAccessStatus(abstract, legalFullTextAvailable),
|
|
139
|
+
verificationDepth: inferVerificationDepth(abstract),
|
|
140
|
+
verificationNote: verificationNote(abstract, legalFullTextAvailable),
|
|
114
141
|
legalFullTextAvailable,
|
|
115
142
|
fullTextUrl: input.fullTextUrl,
|
|
116
143
|
citationCount: input.citationCount,
|
|
@@ -128,7 +155,7 @@ async function fetchJson(context, url) {
|
|
|
128
155
|
const response = await context.fetch(url, {
|
|
129
156
|
headers: {
|
|
130
157
|
"accept": "application/json",
|
|
131
|
-
"user-agent": "LongTable/0.1.
|
|
158
|
+
"user-agent": "LongTable/0.1.30 (https://github.com/HosungYou/LongTable)"
|
|
132
159
|
}
|
|
133
160
|
});
|
|
134
161
|
if (!response.ok) {
|
|
@@ -140,7 +167,7 @@ async function fetchText(context, url) {
|
|
|
140
167
|
const response = await context.fetch(url, {
|
|
141
168
|
headers: {
|
|
142
169
|
"accept": "application/xml, text/xml, application/atom+xml, text/plain",
|
|
143
|
-
"user-agent": "LongTable/0.1.
|
|
170
|
+
"user-agent": "LongTable/0.1.30 (https://github.com/HosungYou/LongTable)"
|
|
144
171
|
}
|
|
145
172
|
});
|
|
146
173
|
if (!response.ok) {
|
package/dist/search/types.d.ts
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
export declare const SEARCH_SOURCES: readonly ["crossref", "arxiv", "openalex", "semantic_scholar", "pubmed", "eric", "doaj", "unpaywall"];
|
|
2
2
|
export type SearchSource = typeof SEARCH_SOURCES[number];
|
|
3
|
+
export declare const PUBLISHERS: readonly ["elsevier", "springer_nature", "wiley", "taylor_francis"];
|
|
4
|
+
export type Publisher = typeof PUBLISHERS[number];
|
|
5
|
+
export type PublisherProbeTarget = Publisher | "auto";
|
|
3
6
|
export type ResearchSearchIntentKind = "literature" | "theory" | "measurement" | "citation" | "metadata" | "venue";
|
|
4
7
|
export type EvidenceDepth = "metadata_only" | "abstract_only" | "legal_full_text_available" | "legal_full_text_unavailable" | "secondary_summary_only";
|
|
8
|
+
export type AccessStatus = "metadata_only" | "abstract_available" | "legal_full_text_available" | "licensed_full_text_available" | "licensed_full_text_checked" | "access_denied" | "license_unknown";
|
|
9
|
+
export type VerificationDepth = "metadata" | "abstract" | "legal_full_text" | "licensed_snippet" | "secondary";
|
|
10
|
+
export type EntitlementSource = "crossref_tdm" | "publisher_api" | "institutional_token" | "user_api_key" | "none";
|
|
11
|
+
export type CollectionDepth = "metadata" | "abstract" | "licensed_snippet" | "licensed_full_text_local_only";
|
|
12
|
+
export type CredentialStatus = "missing" | "present" | "valid" | "invalid";
|
|
13
|
+
export type EntitlementStatus = "unknown" | "no_access" | "metadata_only" | "abstract_available" | "licensed_full_text_available";
|
|
14
|
+
export type TdmStatus = "unknown" | "not_configured" | "permitted" | "denied" | "requires_license_review";
|
|
5
15
|
export type CitationSupportStatus = "direct_support" | "indirect_support" | "background" | "questionable_fit" | "not_verified";
|
|
6
16
|
export type SourceRunStatus = "completed" | "skipped" | "failed";
|
|
7
17
|
export type EvidenceRunStatus = "completed" | "partial" | "blocked";
|
|
@@ -39,8 +49,16 @@ export interface EvidenceCard {
|
|
|
39
49
|
abstract?: string;
|
|
40
50
|
abstractAvailable: boolean;
|
|
41
51
|
evidenceDepth: EvidenceDepth;
|
|
52
|
+
accessStatus: AccessStatus;
|
|
53
|
+
verificationDepth: VerificationDepth;
|
|
54
|
+
verificationNote: string;
|
|
42
55
|
legalFullTextAvailable: boolean;
|
|
43
56
|
fullTextUrl?: string;
|
|
57
|
+
publisher?: Publisher | "other";
|
|
58
|
+
entitlementSource?: EntitlementSource;
|
|
59
|
+
collectionDepth?: CollectionDepth;
|
|
60
|
+
licenseNote?: string;
|
|
61
|
+
publisherAccess?: PublisherAccessRecord;
|
|
44
62
|
citationCount?: number;
|
|
45
63
|
researchDesign?: string;
|
|
46
64
|
constructsOrMeasures?: string[];
|
|
@@ -120,4 +138,50 @@ export interface RunResearchSearchInput extends BuildSearchIntentInput {
|
|
|
120
138
|
env?: Record<string, string | undefined>;
|
|
121
139
|
fetch?: SearchFetch;
|
|
122
140
|
allowPartial?: boolean;
|
|
141
|
+
publisherAccess?: boolean;
|
|
142
|
+
}
|
|
143
|
+
export interface CrossrefTdmLink {
|
|
144
|
+
url: string;
|
|
145
|
+
contentType?: string;
|
|
146
|
+
contentVersion?: string;
|
|
147
|
+
intendedApplication?: string;
|
|
148
|
+
}
|
|
149
|
+
export interface CrossrefTdmDiscovery {
|
|
150
|
+
doi: string;
|
|
151
|
+
publisher?: string;
|
|
152
|
+
inferredPublisher?: Publisher | "other";
|
|
153
|
+
title?: string;
|
|
154
|
+
sourceUrl?: string;
|
|
155
|
+
licenseUrls: string[];
|
|
156
|
+
links: CrossrefTdmLink[];
|
|
157
|
+
}
|
|
158
|
+
export interface PublisherAccessRecord {
|
|
159
|
+
publisher: Publisher | "other";
|
|
160
|
+
checkedAt: string;
|
|
161
|
+
credentialStatus: CredentialStatus;
|
|
162
|
+
entitlementStatus: EntitlementStatus;
|
|
163
|
+
tdmStatus: TdmStatus;
|
|
164
|
+
collectionDepth: CollectionDepth;
|
|
165
|
+
requiredEnv: string[];
|
|
166
|
+
presentEnv: string[];
|
|
167
|
+
missingEnv: string[];
|
|
168
|
+
testedDoi?: string;
|
|
169
|
+
endpoint?: string;
|
|
170
|
+
setupHint: string;
|
|
171
|
+
licenseNote?: string;
|
|
172
|
+
verificationNote: string;
|
|
173
|
+
evidenceSnippet?: string;
|
|
174
|
+
crossref?: CrossrefTdmDiscovery;
|
|
175
|
+
}
|
|
176
|
+
export interface SearchCapabilitySnapshot {
|
|
177
|
+
version: 1;
|
|
178
|
+
updatedAt: string;
|
|
179
|
+
contactEmailPresent: boolean;
|
|
180
|
+
records: PublisherAccessRecord[];
|
|
181
|
+
}
|
|
182
|
+
export interface PublisherProbeInput {
|
|
183
|
+
doi: string;
|
|
184
|
+
publisher?: PublisherProbeTarget;
|
|
185
|
+
env?: Record<string, string | undefined>;
|
|
186
|
+
fetch?: SearchFetch;
|
|
123
187
|
}
|
package/dist/search/types.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@longtable/cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.30",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Researcher-facing LongTable CLI",
|
|
6
6
|
"type": "module",
|
|
@@ -28,12 +28,12 @@
|
|
|
28
28
|
"typecheck": "tsc -p tsconfig.json --noEmit"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@longtable/checkpoints": "0.1.
|
|
32
|
-
"@longtable/core": "0.1.
|
|
33
|
-
"@longtable/memory": "0.1.
|
|
34
|
-
"@longtable/provider-claude": "0.1.
|
|
35
|
-
"@longtable/provider-codex": "0.1.
|
|
36
|
-
"@longtable/setup": "0.1.
|
|
31
|
+
"@longtable/checkpoints": "0.1.30",
|
|
32
|
+
"@longtable/core": "0.1.30",
|
|
33
|
+
"@longtable/memory": "0.1.30",
|
|
34
|
+
"@longtable/provider-claude": "0.1.30",
|
|
35
|
+
"@longtable/provider-codex": "0.1.30",
|
|
36
|
+
"@longtable/setup": "0.1.30"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"@types/node": "^22.10.1",
|