@zenalexa/unicli 0.225.1 → 0.225.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -3
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/dist/adapters/acl-anthology/papers.d.ts +16 -9
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
- package/dist/adapters/acl-anthology/papers.js +322 -58
- package/dist/adapters/acl-anthology/papers.js.map +1 -1
- package/dist/adapters/arxiv/papers.d.ts +22 -4
- package/dist/adapters/arxiv/papers.d.ts.map +1 -1
- package/dist/adapters/arxiv/papers.js +202 -4
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.d.ts +15 -1
- package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +72 -8
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/biorxiv/preprints.d.ts +9 -0
- package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/biorxiv/preprints.js +78 -0
- package/dist/adapters/biorxiv/preprints.js.map +1 -0
- package/dist/adapters/cnki/search.d.ts +82 -0
- package/dist/adapters/cnki/search.d.ts.map +1 -0
- package/dist/adapters/cnki/search.js +236 -0
- package/dist/adapters/cnki/search.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +12 -7
- package/dist/adapters/cvf/papers.d.ts.map +1 -1
- package/dist/adapters/cvf/papers.js +210 -27
- package/dist/adapters/cvf/papers.js.map +1 -1
- package/dist/adapters/dblp/publications.d.ts +12 -5
- package/dist/adapters/dblp/publications.d.ts.map +1 -1
- package/dist/adapters/dblp/publications.js +31 -8
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/search.d.ts +22 -1
- package/dist/adapters/google-scholar/search.d.ts.map +1 -1
- package/dist/adapters/google-scholar/search.js +129 -14
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.d.ts +12 -3
- package/dist/adapters/hf/paper.d.ts.map +1 -1
- package/dist/adapters/hf/paper.js +65 -5
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/medrxiv/preprints.d.ts +9 -0
- package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/medrxiv/preprints.js +78 -0
- package/dist/adapters/medrxiv/preprints.js.map +1 -0
- package/dist/adapters/neurips/proceedings.d.ts +8 -7
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
- package/dist/adapters/neurips/proceedings.js +209 -21
- package/dist/adapters/neurips/proceedings.js.map +1 -1
- package/dist/adapters/openalex/works.d.ts +21 -5
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +108 -8
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.d.ts +10 -4
- package/dist/adapters/openreview/papers.d.ts.map +1 -1
- package/dist/adapters/openreview/papers.js +351 -24
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +6 -6
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
- package/dist/adapters/pmlr/proceedings.js +92 -12
- package/dist/adapters/pmlr/proceedings.js.map +1 -1
- package/dist/adapters/pubmed/articles.d.ts +8 -4
- package/dist/adapters/pubmed/articles.d.ts.map +1 -1
- package/dist/adapters/pubmed/articles.js +272 -39
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/rxiv/preprints.d.ts +75 -0
- package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/rxiv/preprints.js +651 -0
- package/dist/adapters/rxiv/preprints.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.js +122 -0
- package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
- package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
- package/dist/adapters/semantic-scholar/papers.js +80 -6
- package/dist/adapters/semantic-scholar/papers.js.map +1 -1
- package/dist/adapters/unpaywall/works.d.ts +7 -7
- package/dist/adapters/unpaywall/works.d.ts.map +1 -1
- package/dist/adapters/unpaywall/works.js +104 -12
- package/dist/adapters/unpaywall/works.js.map +1 -1
- package/dist/adapters/wanfang/search.d.ts +14 -0
- package/dist/adapters/wanfang/search.d.ts.map +1 -1
- package/dist/adapters/wanfang/search.js +56 -7
- package/dist/adapters/wanfang/search.js.map +1 -1
- package/dist/browser/page.d.ts +2 -0
- package/dist/browser/page.d.ts.map +1 -1
- package/dist/browser/page.js +12 -0
- package/dist/browser/page.js.map +1 -1
- package/dist/browser/protocol.d.ts +6 -1
- package/dist/browser/protocol.d.ts.map +1 -1
- package/dist/browser/protocol.js.map +1 -1
- package/dist/commands/browser/actions.d.ts.map +1 -1
- package/dist/commands/browser/actions.js +487 -8
- package/dist/commands/browser/actions.js.map +1 -1
- package/dist/commands/compute.js +12 -1
- package/dist/commands/compute.js.map +1 -1
- package/dist/commands/schema.d.ts.map +1 -1
- package/dist/commands/schema.js +22 -0
- package/dist/commands/schema.js.map +1 -1
- package/dist/commands/scholar.d.ts +77 -5
- package/dist/commands/scholar.d.ts.map +1 -1
- package/dist/commands/scholar.js +2945 -83
- package/dist/commands/scholar.js.map +1 -1
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +14 -3
- package/dist/commands/search.js.map +1 -1
- package/dist/compute/contracts.d.ts +55 -0
- package/dist/compute/contracts.d.ts.map +1 -0
- package/dist/compute/contracts.js +487 -0
- package/dist/compute/contracts.js.map +1 -0
- package/dist/core/command-contract.d.ts.map +1 -1
- package/dist/core/command-contract.js +5 -0
- package/dist/core/command-contract.js.map +1 -1
- package/dist/core/schema-v2.d.ts +1 -0
- package/dist/core/schema-v2.d.ts.map +1 -1
- package/dist/core/schema-v2.js +1 -0
- package/dist/core/schema-v2.js.map +1 -1
- package/dist/discovery/aliases.d.ts +8 -1
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +333 -20
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts +2 -0
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +525 -66
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts +1 -0
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +299 -2
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +3 -0
- package/dist/discovery/loader.js.map +1 -1
- package/dist/discovery/macos-dynamic.d.ts +1 -0
- package/dist/discovery/macos-dynamic.d.ts.map +1 -1
- package/dist/discovery/macos-dynamic.js +20 -1
- package/dist/discovery/macos-dynamic.js.map +1 -1
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +12 -5
- package/dist/discovery/search.js.map +1 -1
- package/dist/engine/browser/evidence.d.ts +34 -1
- package/dist/engine/browser/evidence.d.ts.map +1 -1
- package/dist/engine/browser/evidence.js +141 -6
- package/dist/engine/browser/evidence.js.map +1 -1
- package/dist/engine/capability-policy.d.ts.map +1 -1
- package/dist/engine/capability-policy.js +30 -4
- package/dist/engine/capability-policy.js.map +1 -1
- package/dist/engine/kernel/stages.d.ts.map +1 -1
- package/dist/engine/kernel/stages.js +3 -0
- package/dist/engine/kernel/stages.js.map +1 -1
- package/dist/engine/operation-policy.d.ts +4 -1
- package/dist/engine/operation-policy.d.ts.map +1 -1
- package/dist/engine/operation-policy.js +23 -0
- package/dist/engine/operation-policy.js.map +1 -1
- package/dist/engine/steps/fetch-text.d.ts.map +1 -1
- package/dist/engine/steps/fetch-text.js +2 -2
- package/dist/engine/steps/fetch-text.js.map +1 -1
- package/dist/engine/steps/fetch.d.ts +1 -0
- package/dist/engine/steps/fetch.d.ts.map +1 -1
- package/dist/engine/steps/fetch.js +24 -4
- package/dist/engine/steps/fetch.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts +5 -5
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +61 -8
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/fast-path/manifest.d.ts +3 -0
- package/dist/fast-path/manifest.d.ts.map +1 -1
- package/dist/fast-path/manifest.js.map +1 -1
- package/dist/fast-path/policy.d.ts.map +1 -1
- package/dist/fast-path/policy.js +3 -0
- package/dist/fast-path/policy.js.map +1 -1
- package/dist/fast-path/render.d.ts +2 -0
- package/dist/fast-path/render.d.ts.map +1 -1
- package/dist/fast-path/render.js +9 -0
- package/dist/fast-path/render.js.map +1 -1
- package/dist/manifest-compact.txt +2 -2
- package/dist/manifest.json +6977 -1002
- package/dist/mcp/handler.d.ts +2 -16
- package/dist/mcp/handler.d.ts.map +1 -1
- package/dist/mcp/handler.js.map +1 -1
- package/dist/mcp/http-transport.d.ts +7 -1
- package/dist/mcp/http-transport.d.ts.map +1 -1
- package/dist/mcp/http-transport.js +20 -1
- package/dist/mcp/http-transport.js.map +1 -1
- package/dist/mcp/jsonrpc.d.ts +27 -0
- package/dist/mcp/jsonrpc.d.ts.map +1 -0
- package/dist/mcp/jsonrpc.js +12 -0
- package/dist/mcp/jsonrpc.js.map +1 -0
- package/dist/mcp/origin-guard.d.ts +26 -0
- package/dist/mcp/origin-guard.d.ts.map +1 -0
- package/dist/mcp/origin-guard.js +42 -0
- package/dist/mcp/origin-guard.js.map +1 -0
- package/dist/mcp/profiles/computer-use.d.ts.map +1 -1
- package/dist/mcp/profiles/computer-use.js +30 -270
- package/dist/mcp/profiles/computer-use.js.map +1 -1
- package/dist/mcp/streamable-http/session.d.ts +4 -22
- package/dist/mcp/streamable-http/session.d.ts.map +1 -1
- package/dist/mcp/streamable-http/session.js +4 -24
- package/dist/mcp/streamable-http/session.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +74 -54
- package/dist/mcp/tools.js.map +1 -1
- package/dist/output/envelope.d.ts +2 -0
- package/dist/output/envelope.d.ts.map +1 -1
- package/dist/output/envelope.js.map +1 -1
- package/dist/output/error-map.d.ts +14 -0
- package/dist/output/error-map.d.ts.map +1 -1
- package/dist/output/error-map.js +20 -0
- package/dist/output/error-map.js.map +1 -1
- package/dist/registry.d.ts +2 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +1 -0
- package/dist/registry.js.map +1 -1
- package/dist/transport/cascade.d.ts.map +1 -1
- package/dist/transport/cascade.js +77 -5
- package/dist/transport/cascade.js.map +1 -1
- package/dist/transport/refs.d.ts +33 -1
- package/dist/transport/refs.d.ts.map +1 -1
- package/dist/transport/refs.js +40 -1
- package/dist/transport/refs.js.map +1 -1
- package/dist/types/scholarly.d.ts +19 -4
- package/dist/types/scholarly.d.ts.map +1 -1
- package/dist/types/scholarly.js +4 -4
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.test.ts +111 -0
- package/src/adapters/acl-anthology/papers.ts +379 -71
- package/src/adapters/arxiv/papers.test.ts +46 -0
- package/src/adapters/arxiv/papers.ts +251 -4
- package/src/adapters/baidu-scholar/search.ts +74 -11
- package/src/adapters/biorxiv/preprints.ts +112 -0
- package/src/adapters/cnki/search.ts +357 -0
- package/src/adapters/cvf/papers.ts +260 -27
- package/src/adapters/dblp/publications.test.ts +9 -0
- package/src/adapters/dblp/publications.ts +31 -8
- package/src/adapters/defuddle/read.yaml +30 -0
- package/src/adapters/google-scholar/search.ts +165 -17
- package/src/adapters/hf/paper.test.ts +23 -0
- package/src/adapters/hf/paper.ts +89 -5
- package/src/adapters/hf/top.yaml +34 -2
- package/src/adapters/huggingface-papers/daily.yaml +37 -3
- package/src/adapters/huggingface-papers/search.yaml +43 -9
- package/src/adapters/jina/read.yaml +30 -0
- package/src/adapters/markdown-new/read.yaml +50 -0
- package/src/adapters/medrxiv/preprints.ts +112 -0
- package/src/adapters/neurips/proceedings.ts +266 -22
- package/src/adapters/ollama-cloud/fetch.yaml +39 -0
- package/src/adapters/ollama-cloud/search.yaml +43 -0
- package/src/adapters/openalex/works.test.ts +15 -4
- package/src/adapters/openalex/works.ts +136 -8
- package/src/adapters/openreview/papers.test.ts +31 -0
- package/src/adapters/openreview/papers.ts +407 -29
- package/src/adapters/pmlr/proceedings.ts +102 -12
- package/src/adapters/pubmed/articles.test.ts +88 -1
- package/src/adapters/pubmed/articles.ts +343 -44
- package/src/adapters/rxiv/preprints.test.ts +233 -0
- package/src/adapters/rxiv/preprints.ts +849 -0
- package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
- package/src/adapters/scholar-artifacts/pdf.ts +133 -0
- package/src/adapters/semantic-scholar/papers.ts +98 -6
- package/src/adapters/unpaywall/works.ts +141 -12
- package/src/adapters/wanfang/search.ts +57 -7
- package/src/adapters/cnki/search.yaml +0 -49
package/dist/commands/scholar.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @owner src::commands::scholar
|
|
3
|
-
* @does Top-level `unicli scholar` meta-command for academic source discovery: searches, retrieves, PDF availability, citation/reference traversal, and doctor output across adapters tagged with `scholar.*` capabilities.
|
|
3
|
+
* @does Top-level `unicli scholar` meta-command for academic source discovery: searches, retrieves, non-destructive availability audits, agent workflow/runbook planning, evidence/citation-safety classification, reproducibility/install planning, source coverage comparison, peer-review audit retrieval, PDF availability, source-direct full text, PDF artifact download/read, code/dataset/model resource lookup, citation/reference traversal, and doctor output across adapters tagged with `scholar.*` capabilities.
|
|
4
4
|
* @needs src/registry.ts, src/types/scholarly.ts, src/engine/kernel/execute.ts, src/output/formatter.ts
|
|
5
5
|
* @feeds src/cli.ts, MCP/agent command discovery via list/search/do
|
|
6
|
-
* @breaks Missing capability tags make scholarly sources invisible
|
|
7
|
-
* @invariants --sources default is a conservative first-source set; --sources all is registry capability discovery; DOI is the primary dedupe key;
|
|
8
|
-
* @side-effects Executes adapter commands through the engine kernel;
|
|
6
|
+
* @breaks Missing capability tags make scholarly sources invisible; unfiltered internal ArgBags violate adapter schemas; weak reference routing can send DOI/arXiv/PMID/OpenReview lookups to the wrong first source; weak canonicalized availability can leave title-based Agent runbooks blocked on explicit source subsets; weak workflow/evidence classification can invite citation from metadata-only rows; weak reproducibility planning can encourage running uninspected remote code; weak review command selection can return search rows instead of review-thread evidence; missing fulltext/PDF artifact adapter blocks read; missing resource output fields hide linked code/data.
|
|
7
|
+
* @invariants --sources default is a conservative first-source set; coverage audits inspect registered capabilities without network I/O; workflow and evidence classification are derived from availability rows and never download, execute, or summarize claims; availability/evidence/source-audit/workflow/reproduce may use canonical lookup sources only to resolve an unknown title before rerunning the caller's requested source scope; reproducibility planning never executes clone/install/run commands and requires repository inspection before install; review retrieval prefers forum/thread commands over search commands; resource commands default to sources exposing the requested resource capability; availability audits fetch metadata/PDF/resource evidence without writing artifacts; source-direct fulltext is tried before PDF artifacts for `scholar read`; unknown artifact refs use every scholar.pdf source; --sources all is registry capability discovery; DOI is the primary dedupe key; internal fan-out passes only args declared by the target command.
|
|
8
|
+
* @side-effects Executes adapter commands through the engine kernel; source-direct fulltext may fetch remote XML; artifact subcommands may write PDFs and execute pdftotext through scholar-artifacts.
|
|
9
9
|
* @perf Fan-out is sequential today, O(S * R), where S is source count and R is rows per source.
|
|
10
10
|
* @concurrency safe — Commander handlers run one at a time per process
|
|
11
11
|
* @test tests/unit/commands/scholar.test.ts
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
* @since 2026-05-19
|
|
14
14
|
*/
|
|
15
15
|
import { commandStrategy, getAllAdapters, resolveCommand, } from "../registry.js";
|
|
16
|
+
import { resolveArgs } from "../engine/args.js";
|
|
16
17
|
import { buildInvocation, execute } from "../engine/kernel/execute.js";
|
|
17
18
|
import { makeCtx } from "../output/envelope.js";
|
|
18
19
|
import { detectFormat, format } from "../output/formatter.js";
|
|
@@ -25,6 +26,18 @@ export const DEFAULT_SCHOLAR_SOURCES = [
|
|
|
25
26
|
"dblp",
|
|
26
27
|
"pubmed",
|
|
27
28
|
];
|
|
29
|
+
const CANONICAL_REFERENCE_SOURCES = [
|
|
30
|
+
"semantic-scholar",
|
|
31
|
+
"openalex",
|
|
32
|
+
"crossref",
|
|
33
|
+
"arxiv",
|
|
34
|
+
"pubmed",
|
|
35
|
+
"openreview",
|
|
36
|
+
"huggingface-papers",
|
|
37
|
+
"hf",
|
|
38
|
+
"biorxiv",
|
|
39
|
+
"medrxiv",
|
|
40
|
+
];
|
|
28
41
|
export const SCHOLAR_CAPABILITIES = [
|
|
29
42
|
"scholar.search",
|
|
30
43
|
"scholar.get",
|
|
@@ -36,10 +49,28 @@ export const SCHOLAR_CAPABILITIES = [
|
|
|
36
49
|
"scholar.datasets",
|
|
37
50
|
"scholar.code",
|
|
38
51
|
"scholar.review",
|
|
52
|
+
"scholar.fulltext",
|
|
39
53
|
];
|
|
54
|
+
const SINGLE_RECORD_ARG_NAMES = new Set([
|
|
55
|
+
"id",
|
|
56
|
+
"ref",
|
|
57
|
+
"doi",
|
|
58
|
+
"arxiv_id",
|
|
59
|
+
"pmid",
|
|
60
|
+
"key",
|
|
61
|
+
"forum",
|
|
62
|
+
]);
|
|
40
63
|
function hasAnyScholarCapability(adapter) {
|
|
41
64
|
return Object.values(adapter.commands).some((command) => (command.capabilities ?? []).some((cap) => cap.startsWith("scholar.")));
|
|
42
65
|
}
|
|
66
|
+
function isSingleRecordScholarCommand(command) {
|
|
67
|
+
if ((command.capabilities ?? []).includes("scholar.get"))
|
|
68
|
+
return true;
|
|
69
|
+
return (command.adapterArgs ?? []).some((arg) => arg.required === true && SINGLE_RECORD_ARG_NAMES.has(arg.name));
|
|
70
|
+
}
|
|
71
|
+
function declaresAdapterArg(command, name) {
|
|
72
|
+
return (command.adapterArgs ?? []).some((arg) => arg.name === name);
|
|
73
|
+
}
|
|
43
74
|
export function listScholarAdapters() {
|
|
44
75
|
return getAllAdapters()
|
|
45
76
|
.filter(hasAnyScholarCapability)
|
|
@@ -56,12 +87,91 @@ export function resolveScholarSources(sourcesArg, fallback = DEFAULT_SCHOLAR_SOU
|
|
|
56
87
|
.filter(Boolean);
|
|
57
88
|
}
|
|
58
89
|
export function findScholarCommandByCapability(adapter, capability) {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
90
|
+
const matches = Object.entries(adapter.commands).filter(([, command]) => (command.capabilities ?? []).includes(capability));
|
|
91
|
+
if (capability === "scholar.pdf" ||
|
|
92
|
+
capability === "scholar.code" ||
|
|
93
|
+
capability === "scholar.datasets") {
|
|
94
|
+
const singleRecord = matches.find(([, command]) => isSingleRecordScholarCommand(command));
|
|
95
|
+
if (singleRecord) {
|
|
96
|
+
return { name: singleRecord[0], command: singleRecord[1] };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
const first = matches[0];
|
|
100
|
+
if (first)
|
|
101
|
+
return { name: first[0], command: first[1] };
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
function findScholarSingleRecordCommandByCapability(adapter, capability) {
|
|
105
|
+
const match = Object.entries(adapter.commands).find(([, command]) => (command.capabilities ?? []).includes(capability) &&
|
|
106
|
+
isSingleRecordScholarCommand(command));
|
|
107
|
+
return match ? { name: match[0], command: match[1] } : undefined;
|
|
108
|
+
}
|
|
109
|
+
export function findScholarResourceSearchCommandByCapability(adapter, capability) {
|
|
110
|
+
const match = Object.entries(adapter.commands).find(([, command]) => {
|
|
111
|
+
const capabilities = command.capabilities ?? [];
|
|
112
|
+
return (capabilities.includes("scholar.search") &&
|
|
113
|
+
capabilities.includes(capability) &&
|
|
114
|
+
declaresAdapterArg(command, "query"));
|
|
115
|
+
});
|
|
116
|
+
if (!match)
|
|
117
|
+
return undefined;
|
|
118
|
+
return { name: match[0], command: match[1] };
|
|
119
|
+
}
|
|
120
|
+
export function findScholarQueryableSearchCommand(adapter) {
|
|
121
|
+
const match = Object.entries(adapter.commands).find(([, command]) => {
|
|
122
|
+
const capabilities = command.capabilities ?? [];
|
|
123
|
+
return (capabilities.includes("scholar.search") &&
|
|
124
|
+
declaresAdapterArg(command, "query"));
|
|
125
|
+
});
|
|
126
|
+
if (!match)
|
|
127
|
+
return undefined;
|
|
128
|
+
return { name: match[0], command: match[1] };
|
|
129
|
+
}
|
|
130
|
+
export function findScholarReviewThreadCommand(adapter) {
|
|
131
|
+
const matches = Object.entries(adapter.commands).filter(([, command]) => (command.capabilities ?? []).includes("scholar.review"));
|
|
132
|
+
const forumCommand = matches.find(([, command]) => (command.adapterArgs ?? []).some((arg) => arg.required === true && arg.name === "forum"));
|
|
133
|
+
if (forumCommand) {
|
|
134
|
+
return { name: forumCommand[0], command: forumCommand[1] };
|
|
135
|
+
}
|
|
136
|
+
const namedReviewCommand = matches.find(([name]) => /reviews?/i.test(name));
|
|
137
|
+
if (namedReviewCommand) {
|
|
138
|
+
return { name: namedReviewCommand[0], command: namedReviewCommand[1] };
|
|
62
139
|
}
|
|
140
|
+
const first = matches[0];
|
|
141
|
+
if (first)
|
|
142
|
+
return { name: first[0], command: first[1] };
|
|
63
143
|
return undefined;
|
|
64
144
|
}
|
|
145
|
+
export function listScholarSourcesByCapability(capability) {
|
|
146
|
+
return listScholarAdapters()
|
|
147
|
+
.filter((adapter) => findScholarCommandByCapability(adapter, capability))
|
|
148
|
+
.map((adapter) => adapter.name);
|
|
149
|
+
}
|
|
150
|
+
function listScholarReviewSources() {
|
|
151
|
+
return listScholarAdapters()
|
|
152
|
+
.filter((adapter) => findScholarReviewThreadCommand(adapter))
|
|
153
|
+
.map((adapter) => adapter.name);
|
|
154
|
+
}
|
|
155
|
+
function listSingleRecordScholarSourcesByCapability(capability) {
|
|
156
|
+
return listScholarAdapters()
|
|
157
|
+
.filter((adapter) => Boolean(findScholarSingleRecordCommandByCapability(adapter, capability)))
|
|
158
|
+
.map((adapter) => adapter.name);
|
|
159
|
+
}
|
|
160
|
+
function listResourceSearchScholarSourcesByCapability(capability) {
|
|
161
|
+
return listScholarAdapters()
|
|
162
|
+
.filter((adapter) => Boolean(findScholarResourceSearchCommandByCapability(adapter, capability)))
|
|
163
|
+
.map((adapter) => adapter.name);
|
|
164
|
+
}
|
|
165
|
+
function listResourceDetailSourcesForSearchFallback(capability, opts) {
|
|
166
|
+
const singleRecordSources = listSingleRecordScholarSourcesByCapability(capability);
|
|
167
|
+
if (opts.source) {
|
|
168
|
+
return singleRecordSources.includes(opts.source) ? [opts.source] : [];
|
|
169
|
+
}
|
|
170
|
+
if (opts.sources) {
|
|
171
|
+
return resolveScholarSources(opts.sources).filter((source) => singleRecordSources.includes(source));
|
|
172
|
+
}
|
|
173
|
+
return singleRecordSources;
|
|
174
|
+
}
|
|
65
175
|
function bareDoi(value) {
|
|
66
176
|
return value
|
|
67
177
|
.trim()
|
|
@@ -83,7 +193,14 @@ export function resolveScholarReference(ref) {
|
|
|
83
193
|
return {
|
|
84
194
|
kind: "doi",
|
|
85
195
|
value: doi,
|
|
86
|
-
preferredSources: [
|
|
196
|
+
preferredSources: [
|
|
197
|
+
"openalex",
|
|
198
|
+
"crossref",
|
|
199
|
+
"semantic-scholar",
|
|
200
|
+
"unpaywall",
|
|
201
|
+
"biorxiv",
|
|
202
|
+
"medrxiv",
|
|
203
|
+
],
|
|
87
204
|
};
|
|
88
205
|
}
|
|
89
206
|
if (/^(?:arxiv:|https?:\/\/arxiv\.org\/(?:abs|pdf)\/|\d{4}\.\d{4,5})/i.test(raw)) {
|
|
@@ -101,7 +218,7 @@ export function resolveScholarReference(ref) {
|
|
|
101
218
|
preferredSources: ["pubmed", "semantic-scholar", "openalex"],
|
|
102
219
|
};
|
|
103
220
|
}
|
|
104
|
-
const openReview = raw.match(/^openreview:\s
|
|
221
|
+
const openReview = raw.match(/^(?:openreview:\s*|https?:\/\/openreview\.net\/forum\?id=)([A-Za-z0-9_-]{6,20})/i);
|
|
105
222
|
if (openReview) {
|
|
106
223
|
return {
|
|
107
224
|
kind: "openreview",
|
|
@@ -178,9 +295,12 @@ function numberOpt(raw, fallback, max) {
|
|
|
178
295
|
return n;
|
|
179
296
|
}
|
|
180
297
|
function coerceStringArray(value) {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
298
|
+
const raw = Array.isArray(value)
|
|
299
|
+
? value
|
|
300
|
+
: typeof value === "string"
|
|
301
|
+
? value.split(/\s*(?:,|;|\n)\s*/)
|
|
302
|
+
: [];
|
|
303
|
+
const out = raw.map((item) => String(item ?? "").trim()).filter(Boolean);
|
|
184
304
|
return out.length > 0 ? out : undefined;
|
|
185
305
|
}
|
|
186
306
|
function coerceNumber(value) {
|
|
@@ -192,7 +312,7 @@ function coerceNumber(value) {
|
|
|
192
312
|
}
|
|
193
313
|
return undefined;
|
|
194
314
|
}
|
|
195
|
-
function coerceToScholarlyRecords(rows, source) {
|
|
315
|
+
export function coerceToScholarlyRecords(rows, source) {
|
|
196
316
|
if (!Array.isArray(rows))
|
|
197
317
|
return [];
|
|
198
318
|
const out = [];
|
|
@@ -203,6 +323,26 @@ function coerceToScholarlyRecords(rows, source) {
|
|
|
203
323
|
if (typeof record.id !== "string" || typeof record.title !== "string") {
|
|
204
324
|
continue;
|
|
205
325
|
}
|
|
326
|
+
const sourceUrl = typeof record.source_url === "string" && record.source_url.length > 0
|
|
327
|
+
? record.source_url
|
|
328
|
+
: typeof record.url === "string" && record.url.length > 0
|
|
329
|
+
? record.url
|
|
330
|
+
: undefined;
|
|
331
|
+
const pdfUrl = typeof record.pdf_url === "string" && record.pdf_url.length > 0
|
|
332
|
+
? record.pdf_url
|
|
333
|
+
: typeof record.pdf === "string" && record.pdf.length > 0
|
|
334
|
+
? record.pdf
|
|
335
|
+
: undefined;
|
|
336
|
+
const recordDate = typeof record.date === "string" && record.date.length > 0
|
|
337
|
+
? record.date
|
|
338
|
+
: typeof record.pdate === "string" && record.pdate.length > 0
|
|
339
|
+
? record.pdate
|
|
340
|
+
: typeof record.published === "string" && record.published.length > 0
|
|
341
|
+
? record.published
|
|
342
|
+
: typeof record.publishedAt === "string" &&
|
|
343
|
+
record.publishedAt.length > 0
|
|
344
|
+
? record.publishedAt
|
|
345
|
+
: undefined;
|
|
206
346
|
const work = {
|
|
207
347
|
id: record.id,
|
|
208
348
|
title: record.title,
|
|
@@ -218,34 +358,62 @@ function coerceToScholarlyRecords(rows, source) {
|
|
|
218
358
|
if (authors)
|
|
219
359
|
work.authors = authors;
|
|
220
360
|
const year = coerceNumber(record.year);
|
|
221
|
-
if (year !== undefined)
|
|
361
|
+
if (year !== undefined) {
|
|
222
362
|
work.year = year;
|
|
363
|
+
}
|
|
364
|
+
else if (recordDate) {
|
|
365
|
+
const dateYear = Number(recordDate.slice(0, 4));
|
|
366
|
+
if (Number.isInteger(dateYear))
|
|
367
|
+
work.year = dateYear;
|
|
368
|
+
}
|
|
369
|
+
if (recordDate)
|
|
370
|
+
work.date = recordDate;
|
|
371
|
+
if (sourceUrl)
|
|
372
|
+
work.source_url = sourceUrl;
|
|
373
|
+
if (pdfUrl)
|
|
374
|
+
work.pdf_url = pdfUrl;
|
|
223
375
|
for (const field of [
|
|
224
|
-
"date",
|
|
225
376
|
"venue",
|
|
226
377
|
"type",
|
|
227
378
|
"abstract",
|
|
228
379
|
"doi",
|
|
229
380
|
"arxiv_id",
|
|
230
381
|
"pmid",
|
|
382
|
+
"pmc_id",
|
|
231
383
|
"openalex_id",
|
|
232
384
|
"semantic_scholar_id",
|
|
233
385
|
"dblp_key",
|
|
234
386
|
"openreview_id",
|
|
235
387
|
"oa_status",
|
|
236
|
-
"pdf_url",
|
|
237
388
|
"landing_url",
|
|
238
389
|
"code_url",
|
|
390
|
+
"project_url",
|
|
239
391
|
"dataset_url",
|
|
240
|
-
"
|
|
392
|
+
"model_urls",
|
|
393
|
+
"dataset_urls",
|
|
394
|
+
"space_urls",
|
|
241
395
|
]) {
|
|
242
396
|
if (typeof record[field] === "string" && record[field].length > 0) {
|
|
243
397
|
work[field] = record[field];
|
|
244
398
|
}
|
|
245
399
|
}
|
|
400
|
+
if (!work.openreview_id && source === "openreview") {
|
|
401
|
+
work.openreview_id = work.id;
|
|
402
|
+
}
|
|
403
|
+
if (!work.arxiv_id) {
|
|
404
|
+
const arxivId = work.id
|
|
405
|
+
.replace(/^https?:\/\/arxiv\.org\/abs\//i, "")
|
|
406
|
+
.replace(/v\d+$/i, "");
|
|
407
|
+
if (/^\d{4}\.\d{4,5}/.test(arxivId))
|
|
408
|
+
work.arxiv_id = arxivId;
|
|
409
|
+
}
|
|
246
410
|
for (const [sourceField, targetField] of [
|
|
247
411
|
["cited_by_count", "cited_by_count"],
|
|
248
412
|
["references_count", "references_count"],
|
|
413
|
+
["github_stars", "github_stars"],
|
|
414
|
+
["num_models", "num_models"],
|
|
415
|
+
["num_datasets", "num_datasets"],
|
|
416
|
+
["num_spaces", "num_spaces"],
|
|
249
417
|
]) {
|
|
250
418
|
const n = coerceNumber(record[sourceField]);
|
|
251
419
|
if (n !== undefined)
|
|
@@ -256,10 +424,184 @@ function coerceToScholarlyRecords(rows, source) {
|
|
|
256
424
|
}
|
|
257
425
|
if (record.raw !== undefined)
|
|
258
426
|
work.raw = record.raw;
|
|
427
|
+
const matchedFields = coerceStringArray(record.matched_fields);
|
|
428
|
+
if (matchedFields)
|
|
429
|
+
work.matched_fields = matchedFields;
|
|
430
|
+
for (const field of ["search_scope", "search_window"]) {
|
|
431
|
+
if (typeof record[field] === "string" && record[field].length > 0) {
|
|
432
|
+
work[field] = record[field];
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
for (const [sourceField, targetField] of [
|
|
436
|
+
["search_scanned_records", "search_scanned_records"],
|
|
437
|
+
["search_total_records", "search_total_records"],
|
|
438
|
+
]) {
|
|
439
|
+
const n = coerceNumber(record[sourceField]);
|
|
440
|
+
if (n !== undefined)
|
|
441
|
+
work[targetField] = n;
|
|
442
|
+
}
|
|
443
|
+
if (typeof record.search_exhaustive === "boolean") {
|
|
444
|
+
work.search_exhaustive = record.search_exhaustive;
|
|
445
|
+
}
|
|
259
446
|
out.push(work);
|
|
260
447
|
}
|
|
261
448
|
return out;
|
|
262
449
|
}
|
|
450
|
+
function definedEntries(args) {
|
|
451
|
+
return Object.entries(args).filter(([, value]) => value !== undefined);
|
|
452
|
+
}
|
|
453
|
+
export function normalizeScholarCommandArgs(command, args) {
|
|
454
|
+
const adapterArgs = command.adapterArgs ?? [];
|
|
455
|
+
const declared = new Set(adapterArgs.map((arg) => arg.name));
|
|
456
|
+
if (declared.size === 0)
|
|
457
|
+
return Object.fromEntries(definedEntries(args));
|
|
458
|
+
const filtered = Object.fromEntries(definedEntries(args).filter(([name]) => declared.has(name)));
|
|
459
|
+
const internalSchema = adapterArgs.map((arg) => ({
|
|
460
|
+
...arg,
|
|
461
|
+
positional: false,
|
|
462
|
+
}));
|
|
463
|
+
const resolved = resolveArgs({
|
|
464
|
+
opts: filtered,
|
|
465
|
+
positionals: [],
|
|
466
|
+
schema: internalSchema,
|
|
467
|
+
stdinIsTTY: true,
|
|
468
|
+
});
|
|
469
|
+
return Object.fromEntries(definedEntries(resolved.args).filter(([name]) => declared.has(name)));
|
|
470
|
+
}
|
|
471
|
+
function referenceArgs(route, opts = {}) {
|
|
472
|
+
return {
|
|
473
|
+
ref: route.value,
|
|
474
|
+
id: route.value,
|
|
475
|
+
doi: route.kind === "doi" ? route.value : undefined,
|
|
476
|
+
arxiv_id: route.kind === "arxiv" ? route.value : undefined,
|
|
477
|
+
pmid: route.kind === "pmid" ? route.value : undefined,
|
|
478
|
+
key: route.kind === "dblp" ? route.value : undefined,
|
|
479
|
+
forum: route.kind === "openreview" ? route.value : undefined,
|
|
480
|
+
email: opts.unpaywallEmail,
|
|
481
|
+
venue: opts.venue,
|
|
482
|
+
year: opts.year,
|
|
483
|
+
volume: opts.volume,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
export function resolveScholarArtifactSources(sourceArg, sourcesArg, route) {
|
|
487
|
+
if (sourceArg)
|
|
488
|
+
return [sourceArg];
|
|
489
|
+
if (sourcesArg)
|
|
490
|
+
return resolveScholarSources(sourcesArg);
|
|
491
|
+
if (route.kind === "unknown")
|
|
492
|
+
return listScholarSourcesByCapability("scholar.pdf");
|
|
493
|
+
return resolveScholarSources(undefined, route.preferredSources);
|
|
494
|
+
}
|
|
495
|
+
export function resolveScholarFulltextSources(sourceArg, sourcesArg, route) {
|
|
496
|
+
if (sourceArg)
|
|
497
|
+
return [sourceArg];
|
|
498
|
+
if (sourcesArg)
|
|
499
|
+
return resolveScholarSources(sourcesArg);
|
|
500
|
+
const candidates = route.kind === "unknown"
|
|
501
|
+
? listScholarSourcesByCapability("scholar.fulltext")
|
|
502
|
+
: resolveScholarSources(undefined, route.preferredSources);
|
|
503
|
+
return candidates.filter((source) => {
|
|
504
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
505
|
+
return adapter
|
|
506
|
+
? findScholarCommandByCapability(adapter, "scholar.fulltext") !==
|
|
507
|
+
undefined
|
|
508
|
+
: false;
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
async function executeScholarAdapterCommand(source, found, args, capability) {
|
|
512
|
+
const invocation = buildInvocation("cli", source, found.name, {
|
|
513
|
+
args: normalizeScholarCommandArgs(found.command, args),
|
|
514
|
+
source: "internal",
|
|
515
|
+
}, { approved: true });
|
|
516
|
+
if (!invocation) {
|
|
517
|
+
return {
|
|
518
|
+
source,
|
|
519
|
+
capability,
|
|
520
|
+
records: [],
|
|
521
|
+
error: {
|
|
522
|
+
code: "build_invocation_failed",
|
|
523
|
+
message: `could not build invocation for ${source}.${found.name}`,
|
|
524
|
+
},
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
const result = await execute(invocation);
|
|
528
|
+
if (result.error) {
|
|
529
|
+
return {
|
|
530
|
+
source,
|
|
531
|
+
capability,
|
|
532
|
+
records: [],
|
|
533
|
+
error: {
|
|
534
|
+
code: result.error.code ?? "execution_error",
|
|
535
|
+
message: result.error.message ?? "adapter command failed",
|
|
536
|
+
retryable: result.error.retryable,
|
|
537
|
+
},
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
return {
|
|
541
|
+
source,
|
|
542
|
+
capability,
|
|
543
|
+
records: coerceToScholarlyRecords(result.results, source),
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
async function executeScholarAdapterRows(source, found, args) {
|
|
547
|
+
const invocation = buildInvocation("cli", source, found.name, {
|
|
548
|
+
args: normalizeScholarCommandArgs(found.command, args),
|
|
549
|
+
source: "internal",
|
|
550
|
+
}, { approved: true });
|
|
551
|
+
if (!invocation) {
|
|
552
|
+
return {
|
|
553
|
+
source,
|
|
554
|
+
rows: [],
|
|
555
|
+
error: {
|
|
556
|
+
code: "build_invocation_failed",
|
|
557
|
+
message: `could not build invocation for ${source}.${found.name}`,
|
|
558
|
+
},
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
const result = await execute(invocation);
|
|
562
|
+
if (result.error) {
|
|
563
|
+
return {
|
|
564
|
+
source,
|
|
565
|
+
rows: [],
|
|
566
|
+
error: {
|
|
567
|
+
code: result.error.code ?? "execution_error",
|
|
568
|
+
message: result.error.message ?? "adapter command failed",
|
|
569
|
+
retryable: result.error.retryable,
|
|
570
|
+
},
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
const rows = Array.isArray(result.results)
|
|
574
|
+
? result.results
|
|
575
|
+
.filter((row) => typeof row === "object" && row !== null && !Array.isArray(row))
|
|
576
|
+
.map((row) => ({ source_adapter: source, ...row }))
|
|
577
|
+
: [];
|
|
578
|
+
return { source, rows };
|
|
579
|
+
}
|
|
580
|
+
async function runReviewAdapterCommand(source, args) {
|
|
581
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
582
|
+
if (!adapter) {
|
|
583
|
+
return {
|
|
584
|
+
source,
|
|
585
|
+
rows: [],
|
|
586
|
+
error: {
|
|
587
|
+
code: "adapter_not_found",
|
|
588
|
+
message: `unknown source: ${source}`,
|
|
589
|
+
},
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
const found = findScholarReviewThreadCommand(adapter);
|
|
593
|
+
if (!found) {
|
|
594
|
+
return {
|
|
595
|
+
source,
|
|
596
|
+
rows: [],
|
|
597
|
+
error: {
|
|
598
|
+
code: "capability_unsupported",
|
|
599
|
+
message: `${source} does not expose scholar.review`,
|
|
600
|
+
},
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
return executeScholarAdapterRows(source, found, args);
|
|
604
|
+
}
|
|
263
605
|
async function runAdapterCommand(source, capability, args) {
|
|
264
606
|
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
265
607
|
if (!adapter) {
|
|
@@ -283,33 +625,244 @@ async function runAdapterCommand(source, capability, args) {
|
|
|
283
625
|
},
|
|
284
626
|
};
|
|
285
627
|
}
|
|
286
|
-
|
|
287
|
-
|
|
628
|
+
return executeScholarAdapterCommand(source, found, args, capability);
|
|
629
|
+
}
|
|
630
|
+
async function runSingleRecordResourceCommand(source, capability, args) {
|
|
631
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
632
|
+
if (!adapter) {
|
|
288
633
|
return {
|
|
289
634
|
source,
|
|
290
635
|
records: [],
|
|
291
636
|
error: {
|
|
292
|
-
code: "
|
|
293
|
-
message: `
|
|
637
|
+
code: "adapter_not_found",
|
|
638
|
+
message: `unknown source: ${source}`,
|
|
294
639
|
},
|
|
295
640
|
};
|
|
296
641
|
}
|
|
297
|
-
const
|
|
298
|
-
if (
|
|
642
|
+
const found = findScholarSingleRecordCommandByCapability(adapter, capability);
|
|
643
|
+
if (!found) {
|
|
299
644
|
return {
|
|
300
645
|
source,
|
|
646
|
+
capability,
|
|
301
647
|
records: [],
|
|
302
648
|
error: {
|
|
303
|
-
code:
|
|
304
|
-
message:
|
|
649
|
+
code: "capability_unsupported",
|
|
650
|
+
message: `${source} does not expose single-record ${capability}`,
|
|
305
651
|
},
|
|
306
652
|
};
|
|
307
653
|
}
|
|
654
|
+
return executeScholarAdapterCommand(source, found, args, capability);
|
|
655
|
+
}
|
|
656
|
+
async function collectSingleRecords(capability, ref, opts, sourceFallback) {
|
|
657
|
+
const route = resolveScholarReference(ref);
|
|
658
|
+
const sourceList = opts.source
|
|
659
|
+
? [opts.source]
|
|
660
|
+
: resolveScholarSources(opts.sources, sourceFallback ?? route.preferredSources);
|
|
661
|
+
const outcomes = [];
|
|
662
|
+
for (const source of sourceList) {
|
|
663
|
+
outcomes.push(await runAdapterCommand(source, capability, {
|
|
664
|
+
...referenceArgs(route, opts),
|
|
665
|
+
}));
|
|
666
|
+
}
|
|
308
667
|
return {
|
|
309
|
-
|
|
310
|
-
|
|
668
|
+
sourceList,
|
|
669
|
+
outcomes,
|
|
670
|
+
records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: capability === "scholar.pdf" ? 10 : 50 }),
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
async function collectResourceSearchRecords(capability, query, opts) {
|
|
674
|
+
const sourceList = opts.source
|
|
675
|
+
? [opts.source]
|
|
676
|
+
: resolveScholarSources(opts.sources, listResourceSearchScholarSourcesByCapability(capability));
|
|
677
|
+
const outcomes = [];
|
|
678
|
+
for (const source of sourceList) {
|
|
679
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
680
|
+
const found = adapter
|
|
681
|
+
? findScholarResourceSearchCommandByCapability(adapter, capability)
|
|
682
|
+
: undefined;
|
|
683
|
+
if (!adapter) {
|
|
684
|
+
outcomes.push({
|
|
685
|
+
source,
|
|
686
|
+
records: [],
|
|
687
|
+
error: {
|
|
688
|
+
code: "adapter_not_found",
|
|
689
|
+
message: `unknown source: ${source}`,
|
|
690
|
+
},
|
|
691
|
+
});
|
|
692
|
+
continue;
|
|
693
|
+
}
|
|
694
|
+
if (!found) {
|
|
695
|
+
outcomes.push({
|
|
696
|
+
source,
|
|
697
|
+
records: [],
|
|
698
|
+
error: {
|
|
699
|
+
code: "capability_unsupported",
|
|
700
|
+
message: `${source} does not expose queryable ${capability}`,
|
|
701
|
+
},
|
|
702
|
+
});
|
|
703
|
+
continue;
|
|
704
|
+
}
|
|
705
|
+
outcomes.push(await executeScholarAdapterCommand(source, found, {
|
|
706
|
+
query,
|
|
707
|
+
limit: "5",
|
|
708
|
+
}, capability));
|
|
709
|
+
}
|
|
710
|
+
return {
|
|
711
|
+
sourceList,
|
|
712
|
+
outcomes,
|
|
713
|
+
records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
|
|
714
|
+
};
|
|
715
|
+
}
|
|
716
|
+
async function collectResourceDetailRecordsFromSearch(capability, searchRecords, opts) {
|
|
717
|
+
const sourceList = listResourceDetailSourcesForSearchFallback(capability, opts);
|
|
718
|
+
const refs = [
|
|
719
|
+
...new Set(searchRecords
|
|
720
|
+
.map((record) => record.arxiv_id ?? record.id)
|
|
721
|
+
.filter((ref) => /^\d{4}\.\d{4,5}(?:v\d+)?$/i.test(ref))),
|
|
722
|
+
].slice(0, 3);
|
|
723
|
+
const outcomes = [];
|
|
724
|
+
for (const ref of refs) {
|
|
725
|
+
for (const source of sourceList) {
|
|
726
|
+
outcomes.push(await runAdapterCommand(source, capability, {
|
|
727
|
+
...referenceArgs(resolveScholarReference(ref)),
|
|
728
|
+
}));
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
return {
|
|
732
|
+
sourceList,
|
|
733
|
+
outcomes,
|
|
734
|
+
records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
|
|
735
|
+
};
|
|
736
|
+
}
|
|
737
|
+
async function collectPdfCandidates(ref, opts) {
|
|
738
|
+
const route = resolveScholarReference(ref);
|
|
739
|
+
const sourceList = resolveScholarArtifactSources(opts.source, opts.sources, route);
|
|
740
|
+
const outcomes = [];
|
|
741
|
+
for (const source of sourceList) {
|
|
742
|
+
outcomes.push(await runAdapterCommand(source, "scholar.pdf", {
|
|
743
|
+
...referenceArgs(route, opts),
|
|
744
|
+
}));
|
|
745
|
+
}
|
|
746
|
+
if (route.kind === "unknown" ||
|
|
747
|
+
outcomes.every((outcome) => outcome.records.length === 0)) {
|
|
748
|
+
for (const source of sourceList) {
|
|
749
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
750
|
+
const found = adapter
|
|
751
|
+
? findScholarQueryableSearchCommand(adapter)
|
|
752
|
+
: undefined;
|
|
753
|
+
if (!adapter) {
|
|
754
|
+
outcomes.push({
|
|
755
|
+
source,
|
|
756
|
+
records: [],
|
|
757
|
+
error: {
|
|
758
|
+
code: "adapter_not_found",
|
|
759
|
+
message: `unknown source: ${source}`,
|
|
760
|
+
},
|
|
761
|
+
});
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
if (!found) {
|
|
765
|
+
outcomes.push({
|
|
766
|
+
source,
|
|
767
|
+
records: [],
|
|
768
|
+
error: {
|
|
769
|
+
code: "capability_unsupported",
|
|
770
|
+
message: `${source} does not expose queryable scholar.search`,
|
|
771
|
+
},
|
|
772
|
+
});
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
const outcome = await executeScholarAdapterCommand(source, found, {
|
|
776
|
+
query: ref,
|
|
777
|
+
limit: "5",
|
|
778
|
+
}, "scholar.pdf");
|
|
779
|
+
outcomes.push(route.kind === "unknown"
|
|
780
|
+
? onlyRelevantUnknownQueryRecords(outcome, ref)
|
|
781
|
+
: outcome);
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
return {
|
|
785
|
+
sourceList,
|
|
786
|
+
outcomes,
|
|
787
|
+
records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
function firstPdfRecord(records) {
|
|
791
|
+
return records.find((record) => typeof record.pdf_url === "string" && record.pdf_url.length > 0);
|
|
792
|
+
}
|
|
793
|
+
function normalizedTitleKey(value) {
|
|
794
|
+
return value
|
|
795
|
+
.toLowerCase()
|
|
796
|
+
.replace(/[^a-z0-9]+/g, " ")
|
|
797
|
+
.trim()
|
|
798
|
+
.replace(/\s+/g, " ");
|
|
799
|
+
}
|
|
800
|
+
export function isScholarlyRecordRelevantToQuery(record, query) {
|
|
801
|
+
const queryKey = normalizedTitleKey(query);
|
|
802
|
+
if (!queryKey)
|
|
803
|
+
return true;
|
|
804
|
+
const titleKey = normalizedTitleKey(record.title);
|
|
805
|
+
if (!titleKey)
|
|
806
|
+
return false;
|
|
807
|
+
if (titleKey === queryKey || titleKey.startsWith(`${queryKey} `)) {
|
|
808
|
+
return true;
|
|
809
|
+
}
|
|
810
|
+
if (queryKey.length >= 12 && titleKey.includes(queryKey))
|
|
811
|
+
return true;
|
|
812
|
+
const queryTokens = queryKey.split(" ").filter(Boolean);
|
|
813
|
+
if (queryTokens.length <= 3)
|
|
814
|
+
return false;
|
|
815
|
+
const titleTokens = new Set(titleKey.split(" ").filter(Boolean));
|
|
816
|
+
const matched = queryTokens.filter((token) => titleTokens.has(token)).length;
|
|
817
|
+
return matched / queryTokens.length >= 0.8;
|
|
818
|
+
}
|
|
819
|
+
function onlyRelevantUnknownQueryRecords(outcome, query) {
|
|
820
|
+
return {
|
|
821
|
+
...outcome,
|
|
822
|
+
records: outcome.records.filter((record) => isScholarlyRecordRelevantToQuery(record, query)),
|
|
311
823
|
};
|
|
312
824
|
}
|
|
825
|
+
export function isScholarlyRecordRelevantToRef(record, ref) {
|
|
826
|
+
const route = resolveScholarReference(ref);
|
|
827
|
+
const candidates = [
|
|
828
|
+
record.id,
|
|
829
|
+
record.arxiv_id,
|
|
830
|
+
record.doi,
|
|
831
|
+
record.pmid,
|
|
832
|
+
record.pmc_id,
|
|
833
|
+
record.openreview_id,
|
|
834
|
+
record.semantic_scholar_id,
|
|
835
|
+
record.source_url,
|
|
836
|
+
record.pdf_url,
|
|
837
|
+
];
|
|
838
|
+
if (route.kind === "arxiv") {
|
|
839
|
+
return candidates.some((candidate) => canonicalArxivId(candidate) === route.value);
|
|
840
|
+
}
|
|
841
|
+
if (route.kind === "doi") {
|
|
842
|
+
return candidates.some((candidate) => canonicalDoi(candidate) === route.value);
|
|
843
|
+
}
|
|
844
|
+
const needle = route.value.toLowerCase();
|
|
845
|
+
return candidates.some((candidate) => typeof candidate === "string" &&
|
|
846
|
+
candidate.trim().toLowerCase().includes(needle));
|
|
847
|
+
}
|
|
848
|
+
function onlyRelevantRefRecords(outcome, ref) {
|
|
849
|
+
return {
|
|
850
|
+
...outcome,
|
|
851
|
+
records: outcome.records.filter((record) => isScholarlyRecordRelevantToRef(record, ref)),
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
function isRetryableScholarError(error) {
|
|
855
|
+
return (error?.retryable === true ||
|
|
856
|
+
error?.code === "rate_limit" ||
|
|
857
|
+
error?.code === "rate_limited");
|
|
858
|
+
}
|
|
859
|
+
function formatScholarOutcomeError(outcome) {
|
|
860
|
+
const code = outcome.error?.code ?? "unknown_error";
|
|
861
|
+
const message = outcome.error?.message?.trim();
|
|
862
|
+
return message
|
|
863
|
+
? `${outcome.source}: ${code} (${message})`
|
|
864
|
+
: `${outcome.source}: ${code}`;
|
|
865
|
+
}
|
|
313
866
|
function columns(detailed = false) {
|
|
314
867
|
return detailed
|
|
315
868
|
? [
|
|
@@ -327,11 +880,71 @@ function columns(detailed = false) {
|
|
|
327
880
|
"is_open_access",
|
|
328
881
|
"oa_status",
|
|
329
882
|
"pdf_url",
|
|
883
|
+
"code_url",
|
|
884
|
+
"project_url",
|
|
885
|
+
"dataset_url",
|
|
886
|
+
"model_urls",
|
|
887
|
+
"dataset_urls",
|
|
888
|
+
"space_urls",
|
|
889
|
+
"github_stars",
|
|
890
|
+
"num_models",
|
|
891
|
+
"num_datasets",
|
|
892
|
+
"num_spaces",
|
|
330
893
|
"source_adapter",
|
|
331
894
|
"source_url",
|
|
895
|
+
"search_scope",
|
|
896
|
+
"search_window",
|
|
897
|
+
"search_exhaustive",
|
|
332
898
|
]
|
|
333
899
|
: ["id", "title", "year", "venue", "doi", "pdf_url", "source_adapter"];
|
|
334
900
|
}
|
|
901
|
+
function resourceColumns(detailed = false) {
|
|
902
|
+
const base = [
|
|
903
|
+
"id",
|
|
904
|
+
"title",
|
|
905
|
+
"source_adapter",
|
|
906
|
+
"source_url",
|
|
907
|
+
"pdf_url",
|
|
908
|
+
"code_url",
|
|
909
|
+
"project_url",
|
|
910
|
+
"dataset_url",
|
|
911
|
+
"model_urls",
|
|
912
|
+
"dataset_urls",
|
|
913
|
+
"space_urls",
|
|
914
|
+
];
|
|
915
|
+
return detailed
|
|
916
|
+
? [
|
|
917
|
+
...base,
|
|
918
|
+
"authors",
|
|
919
|
+
"year",
|
|
920
|
+
"github_stars",
|
|
921
|
+
"num_models",
|
|
922
|
+
"num_datasets",
|
|
923
|
+
"num_spaces",
|
|
924
|
+
"retrieved_at",
|
|
925
|
+
]
|
|
926
|
+
: base;
|
|
927
|
+
}
|
|
928
|
+
function columnsForCapability(capability, detailed = false) {
|
|
929
|
+
return capability === "scholar.code" || capability === "scholar.datasets"
|
|
930
|
+
? resourceColumns(detailed)
|
|
931
|
+
: columns(detailed);
|
|
932
|
+
}
|
|
933
|
+
function reviewColumns(detailed = false) {
|
|
934
|
+
const base = [
|
|
935
|
+
"source_adapter",
|
|
936
|
+
"forum",
|
|
937
|
+
"note_id",
|
|
938
|
+
"type",
|
|
939
|
+
"created_at",
|
|
940
|
+
"source_url",
|
|
941
|
+
"rating",
|
|
942
|
+
"confidence",
|
|
943
|
+
"text",
|
|
944
|
+
"text_truncated",
|
|
945
|
+
];
|
|
946
|
+
return detailed ? [...base, "author", "invitation", "text_chars"] : base;
|
|
947
|
+
}
|
|
335
948
|
async function runSearch(program, query, opts) {
|
|
336
949
|
const startedAt = Date.now();
|
|
337
950
|
const fmt = detectFormat(program.opts().format);
|
|
@@ -351,9 +964,9 @@ async function runSearch(program, query, opts) {
|
|
|
351
964
|
code: "SCHOLAR_NOT_FOUND",
|
|
352
965
|
message: `no scholarly works returned for "${query}" across [${sources.join(", ")}]`,
|
|
353
966
|
suggestion: errors.length > 0
|
|
354
|
-
? `Per-source errors: ${errors.map(
|
|
967
|
+
? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
|
|
355
968
|
: "Try --sources all or a more specific query.",
|
|
356
|
-
retryable: errors.some((outcome) => outcome.error
|
|
969
|
+
retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
357
970
|
};
|
|
358
971
|
console.error(format(null, undefined, fmt, ctx));
|
|
359
972
|
process.exit(ExitCode.EMPTY_RESULT);
|
|
@@ -364,80 +977,2202 @@ async function runSingle(program, capability, ref, opts) {
|
|
|
364
977
|
const startedAt = Date.now();
|
|
365
978
|
const fmt = detectFormat(program.opts().format);
|
|
366
979
|
const ctx = makeCtx(capability, startedAt);
|
|
980
|
+
const lookupRef = await resolveGraphLookupRef(capability, ref, opts);
|
|
981
|
+
const { sourceList, outcomes, records } = await collectSingleRecords(capability, lookupRef, opts);
|
|
982
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
983
|
+
ctx.surface = "web";
|
|
984
|
+
if (records.length === 0) {
|
|
985
|
+
const errors = outcomes.filter((outcome) => outcome.error);
|
|
986
|
+
ctx.error = {
|
|
987
|
+
code: "SCHOLAR_NOT_FOUND",
|
|
988
|
+
message: `no scholarly records returned for "${ref}" across [${sourceList.join(", ")}]`,
|
|
989
|
+
suggestion: errors.length > 0
|
|
990
|
+
? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
|
|
991
|
+
: "Run `unicli scholar doctor` to inspect available scholarly sources.",
|
|
992
|
+
retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
993
|
+
};
|
|
994
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
995
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
996
|
+
}
|
|
997
|
+
console.log(format(records, columnsForCapability(capability, opts.detailed), fmt, ctx));
|
|
998
|
+
}
|
|
999
|
+
async function runReviews(program, ref, opts) {
|
|
1000
|
+
const startedAt = Date.now();
|
|
1001
|
+
const fmt = detectFormat(program.opts().format);
|
|
1002
|
+
const ctx = makeCtx("scholar.reviews", startedAt);
|
|
367
1003
|
const route = resolveScholarReference(ref);
|
|
368
1004
|
const sourceList = opts.source
|
|
369
1005
|
? [opts.source]
|
|
370
|
-
: resolveScholarSources(opts.sources, route.
|
|
1006
|
+
: resolveScholarSources(opts.sources, route.kind === "openreview"
|
|
1007
|
+
? route.preferredSources
|
|
1008
|
+
: listScholarReviewSources());
|
|
371
1009
|
const outcomes = [];
|
|
372
1010
|
for (const source of sourceList) {
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
pmid: route.kind === "pmid" ? route.value : undefined,
|
|
1011
|
+
const maxLength = opts.maxLength === undefined ? undefined : Number(opts.maxLength);
|
|
1012
|
+
outcomes.push(await runReviewAdapterCommand(source, {
|
|
1013
|
+
...referenceArgs(route),
|
|
1014
|
+
forum: route.value,
|
|
1015
|
+
"max-length": maxLength,
|
|
379
1016
|
}));
|
|
380
1017
|
}
|
|
381
|
-
const
|
|
1018
|
+
const rows = outcomes.flatMap((outcome) => outcome.rows);
|
|
382
1019
|
ctx.duration_ms = Date.now() - startedAt;
|
|
383
1020
|
ctx.surface = "web";
|
|
384
|
-
if (
|
|
1021
|
+
if (rows.length === 0) {
|
|
385
1022
|
const errors = outcomes.filter((outcome) => outcome.error);
|
|
386
1023
|
ctx.error = {
|
|
387
|
-
code: "
|
|
388
|
-
message: `no scholarly
|
|
1024
|
+
code: "SCHOLAR_REVIEWS_NOT_FOUND",
|
|
1025
|
+
message: `no scholarly review rows returned for "${ref}" across [${sourceList.join(", ")}]`,
|
|
389
1026
|
suggestion: errors.length > 0
|
|
390
|
-
? `Per-source errors: ${errors.map(
|
|
391
|
-
: "
|
|
392
|
-
retryable: errors.some((outcome) => outcome.error
|
|
1027
|
+
? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
|
|
1028
|
+
: "Use an OpenReview forum id or URL, or run `unicli scholar search <query> --sources openreview` before requesting reviews.",
|
|
1029
|
+
retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
393
1030
|
};
|
|
394
1031
|
console.error(format(null, undefined, fmt, ctx));
|
|
395
1032
|
process.exit(ExitCode.EMPTY_RESULT);
|
|
396
1033
|
}
|
|
397
|
-
console.log(format(
|
|
1034
|
+
console.log(format(rows, reviewColumns(opts.detailed), fmt, ctx));
|
|
398
1035
|
}
|
|
399
|
-
|
|
1036
|
+
function nonEmptyResourceField(value) {
|
|
1037
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
1038
|
+
}
|
|
1039
|
+
function positiveResourceCount(value) {
|
|
1040
|
+
return value !== undefined && value > 0;
|
|
1041
|
+
}
|
|
1042
|
+
export function hasResourceForCapability(record, capability) {
|
|
1043
|
+
if (capability === "scholar.code") {
|
|
1044
|
+
return (nonEmptyResourceField(record.code_url) ||
|
|
1045
|
+
nonEmptyResourceField(record.project_url));
|
|
1046
|
+
}
|
|
1047
|
+
return (nonEmptyResourceField(record.dataset_url) ||
|
|
1048
|
+
nonEmptyResourceField(record.dataset_urls) ||
|
|
1049
|
+
nonEmptyResourceField(record.model_urls) ||
|
|
1050
|
+
nonEmptyResourceField(record.space_urls) ||
|
|
1051
|
+
positiveResourceCount(record.num_datasets) ||
|
|
1052
|
+
positiveResourceCount(record.num_models) ||
|
|
1053
|
+
positiveResourceCount(record.num_spaces));
|
|
1054
|
+
}
|
|
1055
|
+
function hasCodeResource(record) {
|
|
1056
|
+
return (nonEmptyResourceField(record.code_url) ||
|
|
1057
|
+
nonEmptyResourceField(record.project_url));
|
|
1058
|
+
}
|
|
1059
|
+
function hasDatasetResource(record) {
|
|
1060
|
+
return (nonEmptyResourceField(record.dataset_url) ||
|
|
1061
|
+
nonEmptyResourceField(record.dataset_urls) ||
|
|
1062
|
+
positiveResourceCount(record.num_datasets));
|
|
1063
|
+
}
|
|
1064
|
+
function hasModelResource(record) {
|
|
1065
|
+
return (nonEmptyResourceField(record.model_urls) ||
|
|
1066
|
+
positiveResourceCount(record.num_models));
|
|
1067
|
+
}
|
|
1068
|
+
function hasSpaceResource(record) {
|
|
1069
|
+
return (nonEmptyResourceField(record.space_urls) ||
|
|
1070
|
+
positiveResourceCount(record.num_spaces));
|
|
1071
|
+
}
|
|
1072
|
+
async function collectResourceRecords(capability, ref, opts) {
|
|
1073
|
+
const route = resolveScholarReference(ref);
|
|
1074
|
+
let sourceList = route.kind === "unknown"
|
|
1075
|
+
? []
|
|
1076
|
+
: opts.source
|
|
1077
|
+
? [opts.source]
|
|
1078
|
+
: resolveScholarSources(opts.sources, listSingleRecordScholarSourcesByCapability(capability));
|
|
1079
|
+
let outcomes = [];
|
|
1080
|
+
if (route.kind !== "unknown") {
|
|
1081
|
+
for (const source of sourceList) {
|
|
1082
|
+
outcomes.push(await runSingleRecordResourceCommand(source, capability, {
|
|
1083
|
+
...referenceArgs(route),
|
|
1084
|
+
}));
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
let resourceRecords = reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }).filter((record) => hasResourceForCapability(record, capability));
|
|
1088
|
+
if (resourceRecords.length === 0 && route.kind !== "unknown") {
|
|
1089
|
+
const searched = await collectResourceSearchRecords(capability, ref, opts);
|
|
1090
|
+
const searchOutcomes = searched.outcomes.map((outcome) => onlyRelevantRefRecords(outcome, ref));
|
|
1091
|
+
const searchedSources = new Set(searchOutcomes
|
|
1092
|
+
.filter((outcome) => !outcome.error)
|
|
1093
|
+
.map((outcome) => outcome.source));
|
|
1094
|
+
sourceList = uniqueStrings([...sourceList, ...searched.sourceList]);
|
|
1095
|
+
outcomes = [
|
|
1096
|
+
...outcomes.filter((outcome) => outcome.error?.code !== "capability_unsupported" ||
|
|
1097
|
+
!searchedSources.has(outcome.source)),
|
|
1098
|
+
...searchOutcomes,
|
|
1099
|
+
];
|
|
1100
|
+
resourceRecords = reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }).filter((record) => hasResourceForCapability(record, capability));
|
|
1101
|
+
}
|
|
1102
|
+
if (resourceRecords.length === 0 && route.kind === "unknown") {
|
|
1103
|
+
const searched = await collectResourceSearchRecords(capability, ref, opts);
|
|
1104
|
+
const searchOutcomes = searched.outcomes.map((outcome) => onlyRelevantUnknownQueryRecords(outcome, ref));
|
|
1105
|
+
const searchRecords = reciprocalRankFusion(searchOutcomes.map((outcome) => outcome.records), { topN: 10 });
|
|
1106
|
+
const enriched = await collectResourceDetailRecordsFromSearch(capability, searchRecords, opts);
|
|
1107
|
+
sourceList = [...sourceList, ...searched.sourceList];
|
|
1108
|
+
if (enriched.sourceList.length > 0) {
|
|
1109
|
+
sourceList = [...sourceList, ...enriched.sourceList];
|
|
1110
|
+
}
|
|
1111
|
+
outcomes = [...outcomes, ...searchOutcomes, ...enriched.outcomes];
|
|
1112
|
+
resourceRecords = reciprocalRankFusion([enriched.records, searchRecords], {
|
|
1113
|
+
topN: 10,
|
|
1114
|
+
}).filter((record) => hasResourceForCapability(record, capability));
|
|
1115
|
+
}
|
|
1116
|
+
return { sourceList, outcomes, records: resourceRecords };
|
|
1117
|
+
}
|
|
1118
|
+
async function runResources(program, capability, ref, opts) {
|
|
400
1119
|
const startedAt = Date.now();
|
|
401
1120
|
const fmt = detectFormat(program.opts().format);
|
|
402
|
-
const ctx = makeCtx(
|
|
403
|
-
const
|
|
404
|
-
const rows = listScholarAdapters()
|
|
405
|
-
.filter((adapter) => selected.includes(adapter.name))
|
|
406
|
-
.map((adapter) => {
|
|
407
|
-
const caps = new Set();
|
|
408
|
-
for (const command of Object.values(adapter.commands)) {
|
|
409
|
-
for (const cap of command.capabilities ?? []) {
|
|
410
|
-
if (cap.startsWith("scholar."))
|
|
411
|
-
caps.add(cap);
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
const health = resolveCommand(adapter.name, "health");
|
|
415
|
-
const strategy = health
|
|
416
|
-
? commandStrategy(adapter, health.command)
|
|
417
|
-
: undefined;
|
|
418
|
-
return {
|
|
419
|
-
source: adapter.name,
|
|
420
|
-
capabilities: [...caps].sort(),
|
|
421
|
-
health: !health
|
|
422
|
-
? "skipped"
|
|
423
|
-
: strategy !== undefined && strategy !== Strategy.PUBLIC
|
|
424
|
-
? "blocked"
|
|
425
|
-
: "available",
|
|
426
|
-
detail: !health
|
|
427
|
-
? "no `health` command — adapter passes by capability introspection"
|
|
428
|
-
: strategy !== undefined && strategy !== Strategy.PUBLIC
|
|
429
|
-
? `health probe requires ${strategy} auth — skipped`
|
|
430
|
-
: "health probe command is public",
|
|
431
|
-
};
|
|
432
|
-
});
|
|
1121
|
+
const ctx = makeCtx(capability, startedAt);
|
|
1122
|
+
const { sourceList, outcomes, records: resourceRecords, } = await collectResourceRecords(capability, ref, opts);
|
|
433
1123
|
ctx.duration_ms = Date.now() - startedAt;
|
|
434
1124
|
ctx.surface = "web";
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
1125
|
+
if (resourceRecords.length === 0) {
|
|
1126
|
+
const errors = outcomes.filter((outcome) => outcome.error);
|
|
1127
|
+
const label = capability === "scholar.code" ? "code" : "dataset/model";
|
|
1128
|
+
ctx.error = {
|
|
1129
|
+
code: "SCHOLAR_RESOURCE_NOT_FOUND",
|
|
1130
|
+
message: `no scholarly ${label} resources returned for "${ref}" across [${sourceList.join(", ")}]`,
|
|
1131
|
+
suggestion: errors.length > 0
|
|
1132
|
+
? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
|
|
1133
|
+
: "Try --source hf for Hugging Face paper resources, or run `unicli scholar doctor` to inspect resource-capable sources.",
|
|
1134
|
+
retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
1135
|
+
};
|
|
1136
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
1137
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
1138
|
+
}
|
|
1139
|
+
console.log(format(resourceRecords, resourceColumns(opts.detailed), fmt, ctx));
|
|
1140
|
+
}
|
|
1141
|
+
function uniqueStrings(values) {
|
|
1142
|
+
return [...new Set([...values].filter(Boolean))];
|
|
1143
|
+
}
|
|
1144
|
+
function sourcesForRecords(records, predicate = () => true) {
|
|
1145
|
+
return uniqueStrings(records
|
|
1146
|
+
.filter(predicate)
|
|
1147
|
+
.map((record) => record.source_adapter)
|
|
1148
|
+
.filter(Boolean));
|
|
1149
|
+
}
|
|
1150
|
+
function quoteCliArg(value) {
|
|
1151
|
+
return `'${value.replace(/'/g, "'\\''")}'`;
|
|
1152
|
+
}
|
|
1153
|
+
function scholarCommand(subcommand, ref, opts) {
|
|
1154
|
+
const flags = [
|
|
1155
|
+
opts.source ? `--source ${quoteCliArg(opts.source)}` : undefined,
|
|
1156
|
+
opts.sources ? `--sources ${quoteCliArg(opts.sources)}` : undefined,
|
|
1157
|
+
opts.unpaywallEmail
|
|
1158
|
+
? `--unpaywall-email ${quoteCliArg(opts.unpaywallEmail)}`
|
|
1159
|
+
: undefined,
|
|
1160
|
+
].filter(Boolean);
|
|
1161
|
+
return [`unicli scholar ${subcommand}`, quoteCliArg(ref), ...flags].join(" ");
|
|
1162
|
+
}
|
|
1163
|
+
function canonicalArxivId(value) {
|
|
1164
|
+
if (typeof value !== "string")
|
|
1165
|
+
return undefined;
|
|
1166
|
+
const match = value.match(/(?:arxiv(?:\.org\/(?:abs|pdf)\/|:)|^)(\d{4}\.\d{4,5})(?:v\d+)?/i);
|
|
1167
|
+
return match?.[1];
|
|
1168
|
+
}
|
|
1169
|
+
function canonicalDoi(value) {
|
|
1170
|
+
if (typeof value !== "string")
|
|
1171
|
+
return undefined;
|
|
1172
|
+
const doi = bareDoi(value);
|
|
1173
|
+
return /^10\.\S+\/\S+$/i.test(doi) ? doi : undefined;
|
|
1174
|
+
}
|
|
1175
|
+
function firstRecordValue(records, pick) {
|
|
1176
|
+
for (const record of records) {
|
|
1177
|
+
const value = pick(record);
|
|
1178
|
+
if (value)
|
|
1179
|
+
return value;
|
|
1180
|
+
}
|
|
1181
|
+
return undefined;
|
|
1182
|
+
}
|
|
1183
|
+
function canonicalScholarReference(route, records) {
|
|
1184
|
+
const arxivId = firstRecordValue(records, (record) => canonicalArxivId(record.arxiv_id) ??
|
|
1185
|
+
canonicalArxivId(record.id) ??
|
|
1186
|
+
canonicalArxivId(record.source_url) ??
|
|
1187
|
+
canonicalArxivId(record.pdf_url));
|
|
1188
|
+
if (arxivId)
|
|
1189
|
+
return { kind: "arxiv", ref: arxivId };
|
|
1190
|
+
const doi = firstRecordValue(records, (record) => canonicalDoi(record.doi) ?? canonicalDoi(record.id));
|
|
1191
|
+
if (doi)
|
|
1192
|
+
return { kind: "doi", ref: doi };
|
|
1193
|
+
const pmid = firstRecordValue(records, (record) => typeof record.pmid === "string" && record.pmid.trim().length > 0
|
|
1194
|
+
? record.pmid.trim()
|
|
1195
|
+
: undefined);
|
|
1196
|
+
if (pmid)
|
|
1197
|
+
return { kind: "pmid", ref: pmid };
|
|
1198
|
+
const openreviewId = firstRecordValue(records, (record) => typeof record.openreview_id === "string" &&
|
|
1199
|
+
record.openreview_id.trim().length > 0
|
|
1200
|
+
? `openreview:${record.openreview_id.trim()}`
|
|
1201
|
+
: undefined);
|
|
1202
|
+
if (openreviewId)
|
|
1203
|
+
return { kind: "openreview", ref: openreviewId };
|
|
1204
|
+
const semanticScholarId = firstRecordValue(records, (record) => typeof record.semantic_scholar_id === "string" &&
|
|
1205
|
+
record.semantic_scholar_id.trim().length > 0
|
|
1206
|
+
? record.semantic_scholar_id.trim()
|
|
1207
|
+
: undefined);
|
|
1208
|
+
if (semanticScholarId)
|
|
1209
|
+
return { kind: "semantic-scholar", ref: semanticScholarId };
|
|
1210
|
+
return route.kind === "unknown"
|
|
1211
|
+
? undefined
|
|
1212
|
+
: { kind: route.kind, ref: route.value };
|
|
1213
|
+
}
|
|
1214
|
+
function sourceCommand(source, command, placeholder) {
|
|
1215
|
+
if (!command)
|
|
1216
|
+
return undefined;
|
|
1217
|
+
return [`unicli ${source} ${command}`, placeholder].filter(Boolean).join(" ");
|
|
1218
|
+
}
|
|
1219
|
+
function capabilitySet(adapter) {
|
|
1220
|
+
const capabilities = new Set();
|
|
1221
|
+
for (const command of Object.values(adapter.commands)) {
|
|
1222
|
+
for (const capability of command.capabilities ?? []) {
|
|
1223
|
+
if (SCHOLAR_CAPABILITIES.includes(capability)) {
|
|
1224
|
+
capabilities.add(capability);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
return capabilities;
|
|
1229
|
+
}
|
|
1230
|
+
function sourceCommandByCapability(adapter, capability) {
|
|
1231
|
+
if (capability === "scholar.code" || capability === "scholar.datasets") {
|
|
1232
|
+
return (findScholarSingleRecordCommandByCapability(adapter, capability)?.name ??
|
|
1233
|
+
findScholarResourceSearchCommandByCapability(adapter, capability)?.name);
|
|
1234
|
+
}
|
|
1235
|
+
return findScholarCommandByCapability(adapter, capability)?.name;
|
|
1236
|
+
}
|
|
1237
|
+
function adapterSupportsSourceScopedAvailability(adapter, capabilities = capabilitySet(adapter)) {
|
|
1238
|
+
return (capabilities.has("scholar.get") ||
|
|
1239
|
+
capabilities.has("scholar.pdf") ||
|
|
1240
|
+
sourceCommandByCapability(adapter, "scholar.code") !== undefined ||
|
|
1241
|
+
sourceCommandByCapability(adapter, "scholar.datasets") !== undefined);
|
|
1242
|
+
}
|
|
1243
|
+
function sourceReviewCommand(adapter) {
|
|
1244
|
+
return findScholarReviewThreadCommand(adapter)?.name;
|
|
1245
|
+
}
|
|
1246
|
+
function coverageCommandByCapability(adapter, capability) {
|
|
1247
|
+
return capability === "scholar.review"
|
|
1248
|
+
? sourceReviewCommand(adapter)
|
|
1249
|
+
: sourceCommandByCapability(adapter, capability);
|
|
1250
|
+
}
|
|
1251
|
+
function sourceSearchCommand(adapter) {
|
|
1252
|
+
const queryable = findScholarQueryableSearchCommand(adapter);
|
|
1253
|
+
if (queryable)
|
|
1254
|
+
return sourceCommand(adapter.name, queryable.name, "<query>");
|
|
1255
|
+
return sourceCommand(adapter.name, sourceCommandByCapability(adapter, "scholar.search"));
|
|
1256
|
+
}
|
|
1257
|
+
function recommendedUses(capabilities) {
|
|
1258
|
+
const uses = [];
|
|
1259
|
+
if (capabilities.has("scholar.search"))
|
|
1260
|
+
uses.push("discovery");
|
|
1261
|
+
if (capabilities.has("scholar.get"))
|
|
1262
|
+
uses.push("metadata");
|
|
1263
|
+
if (capabilities.has("scholar.pdf"))
|
|
1264
|
+
uses.push("pdf-download/read");
|
|
1265
|
+
if (capabilities.has("scholar.fulltext"))
|
|
1266
|
+
uses.push("source-fulltext");
|
|
1267
|
+
if (capabilities.has("scholar.code"))
|
|
1268
|
+
uses.push("code/project");
|
|
1269
|
+
if (capabilities.has("scholar.datasets"))
|
|
1270
|
+
uses.push("datasets/models/spaces");
|
|
1271
|
+
if (capabilities.has("scholar.citations") ||
|
|
1272
|
+
capabilities.has("scholar.references")) {
|
|
1273
|
+
uses.push("citation-graph");
|
|
1274
|
+
}
|
|
1275
|
+
if (capabilities.has("scholar.review"))
|
|
1276
|
+
uses.push("peer-review-audit");
|
|
1277
|
+
if (capabilities.has("scholar.venue") || capabilities.has("scholar.author"))
|
|
1278
|
+
uses.push("venue/author-browse");
|
|
1279
|
+
return uses;
|
|
1280
|
+
}
|
|
1281
|
+
function sourceRole(capabilities) {
|
|
1282
|
+
if (capabilities.has("scholar.review") &&
|
|
1283
|
+
capabilities.has("scholar.fulltext"))
|
|
1284
|
+
return "review-fulltext-source";
|
|
1285
|
+
if (capabilities.has("scholar.code") || capabilities.has("scholar.datasets"))
|
|
1286
|
+
return "resource-source";
|
|
1287
|
+
if (capabilities.has("scholar.pdf") && capabilities.has("scholar.get"))
|
|
1288
|
+
return "artifact-source";
|
|
1289
|
+
if (capabilities.has("scholar.fulltext"))
|
|
1290
|
+
return "fulltext-source";
|
|
1291
|
+
if (capabilities.has("scholar.citations") ||
|
|
1292
|
+
capabilities.has("scholar.references")) {
|
|
1293
|
+
return "graph-source";
|
|
1294
|
+
}
|
|
1295
|
+
if (capabilities.has("scholar.search"))
|
|
1296
|
+
return "discovery-source";
|
|
1297
|
+
return "metadata-source";
|
|
1298
|
+
}
|
|
1299
|
+
function readStrategy(capabilities) {
|
|
1300
|
+
const hasPdf = capabilities.has("scholar.pdf");
|
|
1301
|
+
const hasFulltext = capabilities.has("scholar.fulltext");
|
|
1302
|
+
if (hasFulltext && hasPdf)
|
|
1303
|
+
return "source-fulltext-then-pdf";
|
|
1304
|
+
if (hasFulltext)
|
|
1305
|
+
return "source-fulltext";
|
|
1306
|
+
if (hasPdf)
|
|
1307
|
+
return "pdf-download";
|
|
1308
|
+
if (capabilities.has("scholar.get"))
|
|
1309
|
+
return "metadata-only";
|
|
1310
|
+
return "discovery-only";
|
|
1311
|
+
}
|
|
1312
|
+
function supportsSourceScopedAvailability(capabilities) {
|
|
1313
|
+
return (capabilities.has("scholar.get") ||
|
|
1314
|
+
capabilities.has("scholar.pdf") ||
|
|
1315
|
+
capabilities.has("scholar.code") ||
|
|
1316
|
+
capabilities.has("scholar.datasets"));
|
|
1317
|
+
}
|
|
1318
|
+
function coverageHandoffStrategy(capabilities, adapter) {
|
|
1319
|
+
if (adapter
|
|
1320
|
+
? adapterSupportsSourceScopedAvailability(adapter, capabilities)
|
|
1321
|
+
: supportsSourceScopedAvailability(capabilities)) {
|
|
1322
|
+
return "source-scoped-evidence";
|
|
1323
|
+
}
|
|
1324
|
+
if (capabilities.has("scholar.search")) {
|
|
1325
|
+
return "discovery-result-to-canonical-workflow";
|
|
1326
|
+
}
|
|
1327
|
+
if (capabilities.has("scholar.citations") ||
|
|
1328
|
+
capabilities.has("scholar.references") ||
|
|
1329
|
+
capabilities.has("scholar.review") ||
|
|
1330
|
+
capabilities.has("scholar.fulltext")) {
|
|
1331
|
+
return "identifier-required";
|
|
1332
|
+
}
|
|
1333
|
+
return "metadata-only";
|
|
1334
|
+
}
|
|
1335
|
+
function missingClosedLoopCapabilities(capabilities) {
|
|
1336
|
+
const missing = [];
|
|
1337
|
+
if (!capabilities.has("scholar.search"))
|
|
1338
|
+
missing.push("search");
|
|
1339
|
+
if (!capabilities.has("scholar.get"))
|
|
1340
|
+
missing.push("metadata-get");
|
|
1341
|
+
if (!capabilities.has("scholar.pdf") &&
|
|
1342
|
+
!capabilities.has("scholar.fulltext")) {
|
|
1343
|
+
missing.push("readable-text");
|
|
1344
|
+
}
|
|
1345
|
+
if (!capabilities.has("scholar.pdf"))
|
|
1346
|
+
missing.push("pdf-download");
|
|
1347
|
+
if (!capabilities.has("scholar.fulltext"))
|
|
1348
|
+
missing.push("source-fulltext");
|
|
1349
|
+
if (!capabilities.has("scholar.code"))
|
|
1350
|
+
missing.push("code/project");
|
|
1351
|
+
if (!capabilities.has("scholar.datasets"))
|
|
1352
|
+
missing.push("datasets/models/spaces");
|
|
1353
|
+
if (!capabilities.has("scholar.citations") &&
|
|
1354
|
+
!capabilities.has("scholar.references")) {
|
|
1355
|
+
missing.push("citation/reference-graph");
|
|
1356
|
+
}
|
|
1357
|
+
if (!capabilities.has("scholar.review"))
|
|
1358
|
+
missing.push("peer-review-audit");
|
|
1359
|
+
return missing;
|
|
1360
|
+
}
|
|
1361
|
+
export function buildScholarCoverageRows(adapters = listScholarAdapters()) {
|
|
1362
|
+
return [...adapters]
|
|
1363
|
+
.sort((left, right) => left.name.localeCompare(right.name))
|
|
1364
|
+
.map((adapter) => {
|
|
1365
|
+
const capabilities = capabilitySet(adapter);
|
|
1366
|
+
const get = sourceCommandByCapability(adapter, "scholar.get");
|
|
1367
|
+
const pdf = sourceCommandByCapability(adapter, "scholar.pdf");
|
|
1368
|
+
const fulltext = sourceCommandByCapability(adapter, "scholar.fulltext");
|
|
1369
|
+
const code = sourceCommandByCapability(adapter, "scholar.code");
|
|
1370
|
+
const datasets = sourceCommandByCapability(adapter, "scholar.datasets");
|
|
1371
|
+
const citations = sourceCommandByCapability(adapter, "scholar.citations");
|
|
1372
|
+
const references = sourceCommandByCapability(adapter, "scholar.references");
|
|
1373
|
+
const review = sourceReviewCommand(adapter);
|
|
1374
|
+
const author = sourceCommandByCapability(adapter, "scholar.author");
|
|
1375
|
+
const venue = sourceCommandByCapability(adapter, "scholar.venue");
|
|
1376
|
+
const missing = missingClosedLoopCapabilities(capabilities);
|
|
1377
|
+
const hasSourceScopedAvailability = adapterSupportsSourceScopedAvailability(adapter, capabilities);
|
|
1378
|
+
return {
|
|
1379
|
+
source: adapter.name,
|
|
1380
|
+
role: sourceRole(capabilities),
|
|
1381
|
+
recommended_for: recommendedUses(capabilities),
|
|
1382
|
+
read_strategy: readStrategy(capabilities),
|
|
1383
|
+
handoff_strategy: coverageHandoffStrategy(capabilities, adapter),
|
|
1384
|
+
coverage_score: capabilities.size,
|
|
1385
|
+
coverage_total: SCHOLAR_CAPABILITIES.length,
|
|
1386
|
+
missing_closed_loop: missing,
|
|
1387
|
+
has_search: capabilities.has("scholar.search"),
|
|
1388
|
+
has_get: capabilities.has("scholar.get"),
|
|
1389
|
+
has_pdf: capabilities.has("scholar.pdf"),
|
|
1390
|
+
has_fulltext: capabilities.has("scholar.fulltext"),
|
|
1391
|
+
has_code: capabilities.has("scholar.code"),
|
|
1392
|
+
has_datasets: capabilities.has("scholar.datasets"),
|
|
1393
|
+
has_citations: capabilities.has("scholar.citations"),
|
|
1394
|
+
has_references: capabilities.has("scholar.references"),
|
|
1395
|
+
has_review: capabilities.has("scholar.review"),
|
|
1396
|
+
has_author: capabilities.has("scholar.author"),
|
|
1397
|
+
has_venue: capabilities.has("scholar.venue"),
|
|
1398
|
+
next_availability: hasSourceScopedAvailability
|
|
1399
|
+
? `unicli scholar availability <ref> --source ${adapter.name}`
|
|
1400
|
+
: undefined,
|
|
1401
|
+
next_read: capabilities.has("scholar.pdf") ||
|
|
1402
|
+
capabilities.has("scholar.fulltext")
|
|
1403
|
+
? `unicli scholar read <ref> --source ${adapter.name}`
|
|
1404
|
+
: undefined,
|
|
1405
|
+
next_search: sourceSearchCommand(adapter),
|
|
1406
|
+
next_workflow_from_result: capabilities.has("scholar.search")
|
|
1407
|
+
? "unicli scholar workflow <title-or-id>"
|
|
1408
|
+
: undefined,
|
|
1409
|
+
next_sources_from_result: capabilities.has("scholar.search")
|
|
1410
|
+
? "unicli scholar sources <title-or-id>"
|
|
1411
|
+
: undefined,
|
|
1412
|
+
next_read_from_result: capabilities.has("scholar.search")
|
|
1413
|
+
? "unicli scholar read <title-or-id>"
|
|
1414
|
+
: undefined,
|
|
1415
|
+
next_get: sourceCommand(adapter.name, get, "<id-or-ref>"),
|
|
1416
|
+
next_pdf: sourceCommand(adapter.name, pdf, "<id-or-ref>"),
|
|
1417
|
+
next_fulltext: sourceCommand(adapter.name, fulltext, "<id-or-ref>"),
|
|
1418
|
+
next_code: sourceCommand(adapter.name, code, "<id-or-ref>"),
|
|
1419
|
+
next_datasets: sourceCommand(adapter.name, datasets, "<id-or-ref>"),
|
|
1420
|
+
next_citations: sourceCommand(adapter.name, citations, "<id-or-ref>"),
|
|
1421
|
+
next_references: sourceCommand(adapter.name, references, "<id-or-ref>"),
|
|
1422
|
+
next_review: sourceCommand(adapter.name, review, "<id-or-ref>"),
|
|
1423
|
+
next_author: sourceCommand(adapter.name, author, "<author>"),
|
|
1424
|
+
next_venue: sourceCommand(adapter.name, venue, "<venue>"),
|
|
1425
|
+
capabilities: [...capabilities].sort(),
|
|
1426
|
+
commands: Object.fromEntries(SCHOLAR_CAPABILITIES.map((capability) => [
|
|
1427
|
+
capability.replace(/^scholar\./, ""),
|
|
1428
|
+
coverageCommandByCapability(adapter, capability),
|
|
1429
|
+
]).filter(([, command]) => command !== undefined)),
|
|
1430
|
+
};
|
|
1431
|
+
});
|
|
1432
|
+
}
|
|
1433
|
+
function coverageColumns(detailed = false) {
|
|
1434
|
+
const base = [
|
|
1435
|
+
"source",
|
|
1436
|
+
"role",
|
|
1437
|
+
"recommended_for",
|
|
1438
|
+
"read_strategy",
|
|
1439
|
+
"handoff_strategy",
|
|
1440
|
+
"coverage_score",
|
|
1441
|
+
"coverage_total",
|
|
1442
|
+
"missing_closed_loop",
|
|
1443
|
+
"has_search",
|
|
1444
|
+
"has_get",
|
|
1445
|
+
"has_pdf",
|
|
1446
|
+
"has_fulltext",
|
|
1447
|
+
"has_code",
|
|
1448
|
+
"has_datasets",
|
|
1449
|
+
"has_citations",
|
|
1450
|
+
"has_references",
|
|
1451
|
+
"has_review",
|
|
1452
|
+
"next_availability",
|
|
1453
|
+
"next_read",
|
|
1454
|
+
"next_search",
|
|
1455
|
+
"next_workflow_from_result",
|
|
1456
|
+
"next_sources_from_result",
|
|
1457
|
+
"next_read_from_result",
|
|
1458
|
+
];
|
|
1459
|
+
return detailed
|
|
1460
|
+
? [
|
|
1461
|
+
...base,
|
|
1462
|
+
"has_author",
|
|
1463
|
+
"has_venue",
|
|
1464
|
+
"next_get",
|
|
1465
|
+
"next_pdf",
|
|
1466
|
+
"next_fulltext",
|
|
1467
|
+
"next_code",
|
|
1468
|
+
"next_datasets",
|
|
1469
|
+
"next_citations",
|
|
1470
|
+
"next_references",
|
|
1471
|
+
"next_review",
|
|
1472
|
+
"next_author",
|
|
1473
|
+
"next_venue",
|
|
1474
|
+
"capabilities",
|
|
1475
|
+
"commands",
|
|
1476
|
+
]
|
|
1477
|
+
: base;
|
|
1478
|
+
}
|
|
1479
|
+
function sourceFilter(opts) {
|
|
1480
|
+
if (opts.source)
|
|
1481
|
+
return { explicit: true, sources: [opts.source] };
|
|
1482
|
+
if (opts.sources)
|
|
1483
|
+
return { explicit: true, sources: resolveScholarSources(opts.sources) };
|
|
1484
|
+
return { explicit: false, sources: [] };
|
|
1485
|
+
}
|
|
1486
|
+
function resolveAvailabilityCapabilitySources(capability, route, opts) {
|
|
1487
|
+
const filter = sourceFilter(opts);
|
|
1488
|
+
const selected = filter.explicit
|
|
1489
|
+
? filter.sources
|
|
1490
|
+
: route.kind === "unknown"
|
|
1491
|
+
? listScholarSourcesByCapability(capability)
|
|
1492
|
+
: route.preferredSources;
|
|
1493
|
+
return selected.filter((source) => {
|
|
1494
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
1495
|
+
return adapter
|
|
1496
|
+
? findScholarCommandByCapability(adapter, capability) !== undefined
|
|
1497
|
+
: false;
|
|
1498
|
+
});
|
|
1499
|
+
}
|
|
1500
|
+
function collectSourceErrors(outcomes) {
|
|
1501
|
+
const sourcesWithRecords = new Set(outcomes
|
|
1502
|
+
.filter((outcome) => outcome.records.length > 0)
|
|
1503
|
+
.map((outcome) => outcome.source));
|
|
1504
|
+
const unresolvedErrors = outcomes.filter((outcome) => outcome.error && !sourcesWithRecords.has(outcome.source));
|
|
1505
|
+
const sourcesWithSpecificErrors = new Set(unresolvedErrors
|
|
1506
|
+
.filter((outcome) => outcome.error?.code !== "capability_unsupported")
|
|
1507
|
+
.map((outcome) => outcome.source));
|
|
1508
|
+
return uniqueStrings(unresolvedErrors
|
|
1509
|
+
.filter((outcome) => outcome.error?.code !== "capability_unsupported" ||
|
|
1510
|
+
!sourcesWithSpecificErrors.has(outcome.source))
|
|
1511
|
+
.map(formatScholarOutcomeError)).slice(0, 16);
|
|
1512
|
+
}
|
|
1513
|
+
export function buildScholarAvailabilityRow(input) {
|
|
1514
|
+
const allRecords = reciprocalRankFusion([
|
|
1515
|
+
input.metadataRecords,
|
|
1516
|
+
input.pdfRecords,
|
|
1517
|
+
input.codeRecords,
|
|
1518
|
+
input.datasetRecords,
|
|
1519
|
+
], { topN: 10 });
|
|
1520
|
+
const representative = allRecords[0];
|
|
1521
|
+
const firstPdf = firstPdfRecord(input.pdfRecords);
|
|
1522
|
+
const codeRecord = input.codeRecords.find(hasCodeResource);
|
|
1523
|
+
const datasetRecord = input.datasetRecords.find(hasDatasetResource);
|
|
1524
|
+
const modelRecord = input.datasetRecords.find(hasModelResource);
|
|
1525
|
+
const spaceRecord = input.datasetRecords.find(hasSpaceResource);
|
|
1526
|
+
const hasRecord = representative !== undefined;
|
|
1527
|
+
const hasPdf = firstPdf !== undefined;
|
|
1528
|
+
const hasCode = codeRecord !== undefined;
|
|
1529
|
+
const hasDataset = datasetRecord !== undefined;
|
|
1530
|
+
const hasModel = modelRecord !== undefined;
|
|
1531
|
+
const hasSpace = spaceRecord !== undefined;
|
|
1532
|
+
const hasProject = (codeRecord !== undefined &&
|
|
1533
|
+
nonEmptyResourceField(codeRecord.project_url)) ||
|
|
1534
|
+
(representative !== undefined &&
|
|
1535
|
+
nonEmptyResourceField(representative.project_url));
|
|
1536
|
+
const codeCandidateSources = resolveAvailabilityCapabilitySources("scholar.code", input.route, input.opts);
|
|
1537
|
+
const datasetCandidateSources = resolveAvailabilityCapabilitySources("scholar.datasets", input.route, input.opts);
|
|
1538
|
+
const canonical = canonicalScholarReference(input.route, allRecords);
|
|
1539
|
+
const commandRef = canonical?.ref ?? input.ref;
|
|
1540
|
+
const reviewRef = canonical?.kind === "openreview" ? canonical.ref : input.ref;
|
|
1541
|
+
return {
|
|
1542
|
+
ref: input.ref,
|
|
1543
|
+
route_kind: input.route.kind,
|
|
1544
|
+
route_value: input.route.value,
|
|
1545
|
+
canonical_ref: canonical?.ref,
|
|
1546
|
+
canonical_ref_kind: canonical?.kind,
|
|
1547
|
+
record_found: hasRecord,
|
|
1548
|
+
id: representative?.id,
|
|
1549
|
+
title: representative?.title,
|
|
1550
|
+
year: representative?.year,
|
|
1551
|
+
doi: representative?.doi,
|
|
1552
|
+
arxiv_id: representative?.arxiv_id,
|
|
1553
|
+
pmid: representative?.pmid,
|
|
1554
|
+
pmc_id: representative?.pmc_id,
|
|
1555
|
+
openreview_id: representative?.openreview_id,
|
|
1556
|
+
source_adapter: representative?.source_adapter,
|
|
1557
|
+
source_url: representative?.source_url,
|
|
1558
|
+
pdf_url: firstPdf?.pdf_url,
|
|
1559
|
+
code_url: codeRecord?.code_url,
|
|
1560
|
+
project_url: codeRecord?.project_url ?? representative?.project_url,
|
|
1561
|
+
dataset_url: datasetRecord?.dataset_url,
|
|
1562
|
+
model_urls: modelRecord?.model_urls,
|
|
1563
|
+
dataset_urls: datasetRecord?.dataset_urls,
|
|
1564
|
+
space_urls: spaceRecord?.space_urls,
|
|
1565
|
+
has_pdf: hasPdf,
|
|
1566
|
+
has_fulltext_candidate: input.fulltextCandidateSources.length > 0,
|
|
1567
|
+
has_code: hasCode,
|
|
1568
|
+
has_project: hasProject,
|
|
1569
|
+
has_datasets: hasDataset,
|
|
1570
|
+
has_models: hasModel,
|
|
1571
|
+
has_spaces: hasSpace,
|
|
1572
|
+
metadata_sources: sourcesForRecords(input.metadataRecords),
|
|
1573
|
+
pdf_sources: sourcesForRecords(input.pdfRecords, (record) => firstPdfRecord([record]) !== undefined),
|
|
1574
|
+
fulltext_candidate_sources: input.fulltextCandidateSources,
|
|
1575
|
+
code_sources: sourcesForRecords(input.codeRecords, hasCodeResource),
|
|
1576
|
+
dataset_sources: sourcesForRecords(input.datasetRecords, (record) => hasDatasetResource(record) ||
|
|
1577
|
+
hasModelResource(record) ||
|
|
1578
|
+
hasSpaceResource(record)),
|
|
1579
|
+
citation_candidate_sources: input.citationCandidateSources,
|
|
1580
|
+
reference_candidate_sources: input.referenceCandidateSources,
|
|
1581
|
+
review_candidate_sources: input.reviewCandidateSources,
|
|
1582
|
+
next_workflow: scholarCommand("workflow", commandRef, input.opts),
|
|
1583
|
+
next_availability: scholarCommand("availability", commandRef, input.opts),
|
|
1584
|
+
next_evidence: scholarCommand("evidence", commandRef, input.opts),
|
|
1585
|
+
next_reproduce: scholarCommand("reproduce", commandRef, input.opts),
|
|
1586
|
+
next_get: scholarCommand("get", commandRef, input.opts),
|
|
1587
|
+
next_pdf: scholarCommand("pdf", commandRef, input.opts),
|
|
1588
|
+
next_read: hasPdf || input.fulltextCandidateSources.length > 0
|
|
1589
|
+
? scholarCommand("read", commandRef, input.opts)
|
|
1590
|
+
: undefined,
|
|
1591
|
+
next_download: hasPdf
|
|
1592
|
+
? scholarCommand("download", commandRef, input.opts)
|
|
1593
|
+
: undefined,
|
|
1594
|
+
next_code: input.codeRecords.length > 0 || codeCandidateSources.length > 0
|
|
1595
|
+
? scholarCommand("code", commandRef, {
|
|
1596
|
+
source: input.opts.source,
|
|
1597
|
+
sources: input.opts.sources,
|
|
1598
|
+
})
|
|
1599
|
+
: undefined,
|
|
1600
|
+
next_datasets: input.datasetRecords.length > 0 || datasetCandidateSources.length > 0
|
|
1601
|
+
? scholarCommand("datasets", commandRef, {
|
|
1602
|
+
source: input.opts.source,
|
|
1603
|
+
sources: input.opts.sources,
|
|
1604
|
+
})
|
|
1605
|
+
: undefined,
|
|
1606
|
+
next_citations: input.citationCandidateSources.length > 0
|
|
1607
|
+
? scholarCommand("citations", commandRef, {
|
|
1608
|
+
source: input.opts.source,
|
|
1609
|
+
sources: input.opts.sources,
|
|
1610
|
+
})
|
|
1611
|
+
: undefined,
|
|
1612
|
+
next_references: input.referenceCandidateSources.length > 0
|
|
1613
|
+
? scholarCommand("references", commandRef, {
|
|
1614
|
+
source: input.opts.source,
|
|
1615
|
+
sources: input.opts.sources,
|
|
1616
|
+
})
|
|
1617
|
+
: undefined,
|
|
1618
|
+
next_reviews: input.reviewCandidateSources.length > 0
|
|
1619
|
+
? scholarCommand("reviews", reviewRef, {
|
|
1620
|
+
source: input.opts.source,
|
|
1621
|
+
sources: input.opts.sources,
|
|
1622
|
+
})
|
|
1623
|
+
: undefined,
|
|
1624
|
+
source_errors: input.sourceErrors,
|
|
1625
|
+
retrieved_at: new Date().toISOString(),
|
|
1626
|
+
};
|
|
1627
|
+
}
|
|
1628
|
+
function rowBoolean(row, field) {
|
|
1629
|
+
return row[field] === true;
|
|
1630
|
+
}
|
|
1631
|
+
function rowString(row, field) {
|
|
1632
|
+
const candidate = row[field];
|
|
1633
|
+
return typeof candidate === "string" && candidate.trim().length > 0
|
|
1634
|
+
? candidate
|
|
1635
|
+
: undefined;
|
|
1636
|
+
}
|
|
1637
|
+
function rowStringArray(row, field) {
|
|
1638
|
+
const candidate = row[field];
|
|
1639
|
+
if (!Array.isArray(candidate))
|
|
1640
|
+
return [];
|
|
1641
|
+
return candidate.filter((entry) => typeof entry === "string" && entry.trim().length > 0);
|
|
1642
|
+
}
|
|
1643
|
+
function canonicalReferenceLookupOpts(opts) {
|
|
1644
|
+
return {
|
|
1645
|
+
sources: CANONICAL_REFERENCE_SOURCES.join(","),
|
|
1646
|
+
unpaywallEmail: opts.unpaywallEmail,
|
|
1647
|
+
};
|
|
1648
|
+
}
|
|
1649
|
+
async function resolveUnknownRefViaAvailability(ref, opts, input) {
|
|
1650
|
+
if (resolveScholarReference(ref).kind !== "unknown")
|
|
1651
|
+
return ref;
|
|
1652
|
+
const scoped = await collectAvailabilityEvidence(ref, opts);
|
|
1653
|
+
const scopedRef = rowString(scoped.row, "canonical_ref");
|
|
1654
|
+
if (scopedRef)
|
|
1655
|
+
return scopedRef;
|
|
1656
|
+
if (!input.fallbackToCanonicalSources || (!opts.source && !opts.sources)) {
|
|
1657
|
+
return ref;
|
|
1658
|
+
}
|
|
1659
|
+
const broad = await collectAvailabilityEvidence(ref, canonicalReferenceLookupOpts(opts));
|
|
1660
|
+
return rowString(broad.row, "canonical_ref") ?? ref;
|
|
1661
|
+
}
|
|
1662
|
+
async function resolveGraphLookupRef(capability, ref, opts) {
|
|
1663
|
+
if (capability !== "scholar.citations" &&
|
|
1664
|
+
capability !== "scholar.references") {
|
|
1665
|
+
return ref;
|
|
1666
|
+
}
|
|
1667
|
+
return resolveUnknownRefViaAvailability(ref, opts, {
|
|
1668
|
+
fallbackToCanonicalSources: true,
|
|
1669
|
+
});
|
|
1670
|
+
}
|
|
1671
|
+
async function resolveArtifactLookupRef(ref, opts) {
|
|
1672
|
+
if (!opts.source && !opts.sources)
|
|
1673
|
+
return ref;
|
|
1674
|
+
return resolveUnknownRefViaAvailability(ref, opts, {
|
|
1675
|
+
fallbackToCanonicalSources: true,
|
|
1676
|
+
});
|
|
1677
|
+
}
|
|
1678
|
+
function availabilityIdentifiers(row) {
|
|
1679
|
+
return [
|
|
1680
|
+
rowString(row, "doi") ? `doi:${rowString(row, "doi")}` : undefined,
|
|
1681
|
+
rowString(row, "arxiv_id")
|
|
1682
|
+
? `arxiv:${rowString(row, "arxiv_id")}`
|
|
1683
|
+
: undefined,
|
|
1684
|
+
rowString(row, "pmid") ? `pmid:${rowString(row, "pmid")}` : undefined,
|
|
1685
|
+
rowString(row, "pmc_id") ? `pmc:${rowString(row, "pmc_id")}` : undefined,
|
|
1686
|
+
rowString(row, "openreview_id")
|
|
1687
|
+
? `openreview:${rowString(row, "openreview_id")}`
|
|
1688
|
+
: undefined,
|
|
1689
|
+
].filter((entry) => entry !== undefined);
|
|
1690
|
+
}
|
|
1691
|
+
function availabilityPrimarySource(row) {
|
|
1692
|
+
return (rowString(row, "source_adapter") ??
|
|
1693
|
+
rowStringArray(row, "metadata_sources")[0] ??
|
|
1694
|
+
rowStringArray(row, "pdf_sources")[0] ??
|
|
1695
|
+
rowStringArray(row, "fulltext_candidate_sources")[0] ??
|
|
1696
|
+
rowStringArray(row, "code_sources")[0] ??
|
|
1697
|
+
rowStringArray(row, "dataset_sources")[0]);
|
|
1698
|
+
}
|
|
1699
|
+
function availabilityPrimaryEvidenceUrl(row) {
|
|
1700
|
+
return (rowString(row, "source_url") ??
|
|
1701
|
+
rowString(row, "pdf_url") ??
|
|
1702
|
+
rowString(row, "project_url") ??
|
|
1703
|
+
rowString(row, "code_url") ??
|
|
1704
|
+
rowString(row, "dataset_url"));
|
|
1705
|
+
}
|
|
1706
|
+
function availabilityMissingEvidence(row) {
|
|
1707
|
+
const missing = [];
|
|
1708
|
+
if (!rowBoolean(row, "record_found"))
|
|
1709
|
+
missing.push("metadata");
|
|
1710
|
+
if (!rowBoolean(row, "has_pdf") &&
|
|
1711
|
+
!rowBoolean(row, "has_fulltext_candidate")) {
|
|
1712
|
+
missing.push("readable-text");
|
|
1713
|
+
}
|
|
1714
|
+
if (!rowBoolean(row, "has_code") && !rowBoolean(row, "has_project")) {
|
|
1715
|
+
missing.push("code/project");
|
|
1716
|
+
}
|
|
1717
|
+
if (!rowBoolean(row, "has_datasets") &&
|
|
1718
|
+
!rowBoolean(row, "has_models") &&
|
|
1719
|
+
!rowBoolean(row, "has_spaces")) {
|
|
1720
|
+
missing.push("datasets/models/spaces");
|
|
1721
|
+
}
|
|
1722
|
+
if (rowStringArray(row, "citation_candidate_sources").length === 0 &&
|
|
1723
|
+
rowStringArray(row, "reference_candidate_sources").length === 0) {
|
|
1724
|
+
missing.push("citation/reference-graph");
|
|
1725
|
+
}
|
|
1726
|
+
if (rowStringArray(row, "review_candidate_sources").length === 0) {
|
|
1727
|
+
missing.push("peer-review-audit");
|
|
1728
|
+
}
|
|
1729
|
+
return missing;
|
|
1730
|
+
}
|
|
1731
|
+
function sourceRecordsForCapability(outcomes, source, capability) {
|
|
1732
|
+
return outcomes
|
|
1733
|
+
.filter((outcome) => outcome.source === source && outcome.capability === capability)
|
|
1734
|
+
.flatMap((outcome) => outcome.records);
|
|
1735
|
+
}
|
|
1736
|
+
function sourceOutcomeErrors(outcomes, source) {
|
|
1737
|
+
return uniqueStrings(outcomes
|
|
1738
|
+
.filter((outcome) => outcome.source === source && outcome.error)
|
|
1739
|
+
.map(formatScholarOutcomeError));
|
|
1740
|
+
}
|
|
1741
|
+
function sourceBlockingErrors(outcomes, source) {
|
|
1742
|
+
return uniqueStrings(outcomes
|
|
1743
|
+
.filter((outcome) => outcome.source === source &&
|
|
1744
|
+
outcome.error &&
|
|
1745
|
+
outcome.error.code !== "capability_unsupported")
|
|
1746
|
+
.map(formatScholarOutcomeError));
|
|
1747
|
+
}
|
|
1748
|
+
function sourceCapabilities(source) {
|
|
1749
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
1750
|
+
if (!adapter)
|
|
1751
|
+
return [];
|
|
1752
|
+
const capabilities = new Set();
|
|
1753
|
+
for (const command of Object.values(adapter.commands)) {
|
|
1754
|
+
for (const capability of command.capabilities ?? []) {
|
|
1755
|
+
if (capability.startsWith("scholar."))
|
|
1756
|
+
capabilities.add(capability);
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
return [...capabilities].sort();
|
|
1760
|
+
}
|
|
1761
|
+
function scholarCapabilitySetFromStrings(capabilities) {
|
|
1762
|
+
return new Set(capabilities.filter((capability) => SCHOLAR_CAPABILITIES.includes(capability)));
|
|
1763
|
+
}
|
|
1764
|
+
function sourceSearchCommandByName(source) {
|
|
1765
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
1766
|
+
return adapter ? sourceSearchCommand(adapter) : undefined;
|
|
1767
|
+
}
|
|
1768
|
+
function sourceScopedScholarCommand(subcommand, ref, source, opts = {}) {
|
|
1769
|
+
return scholarCommand(subcommand, ref, {
|
|
1770
|
+
source,
|
|
1771
|
+
unpaywallEmail: opts.unpaywallEmail,
|
|
1772
|
+
});
|
|
1773
|
+
}
|
|
1774
|
+
function sourceStatus(input) {
|
|
1775
|
+
if (input.hasReturnedEvidence)
|
|
1776
|
+
return "evidence_found";
|
|
1777
|
+
if (input.hasCandidateEvidence && input.hasBlockingErrors)
|
|
1778
|
+
return "candidate_with_errors";
|
|
1779
|
+
if (input.hasCandidateEvidence)
|
|
1780
|
+
return "candidate_only";
|
|
1781
|
+
if (input.hasBlockingErrors)
|
|
1782
|
+
return "error";
|
|
1783
|
+
if (input.hasOnlyUnsupportedErrors)
|
|
1784
|
+
return "unsupported";
|
|
1785
|
+
return "no_evidence";
|
|
1786
|
+
}
|
|
1787
|
+
function sourceAuditEvidenceTypes(input) {
|
|
1788
|
+
const evidence = [];
|
|
1789
|
+
if (input.hasMetadata)
|
|
1790
|
+
evidence.push("metadata");
|
|
1791
|
+
if (input.hasPdf)
|
|
1792
|
+
evidence.push("pdf");
|
|
1793
|
+
if (input.hasFulltextCandidate)
|
|
1794
|
+
evidence.push("fulltext-candidate");
|
|
1795
|
+
if (input.hasCode)
|
|
1796
|
+
evidence.push("code");
|
|
1797
|
+
if (input.hasProject)
|
|
1798
|
+
evidence.push("project");
|
|
1799
|
+
if (input.hasDatasets)
|
|
1800
|
+
evidence.push("datasets");
|
|
1801
|
+
if (input.hasModels)
|
|
1802
|
+
evidence.push("models");
|
|
1803
|
+
if (input.hasSpaces)
|
|
1804
|
+
evidence.push("spaces");
|
|
1805
|
+
if (input.hasCitationCandidate)
|
|
1806
|
+
evidence.push("citation-candidate");
|
|
1807
|
+
if (input.hasReferenceCandidate)
|
|
1808
|
+
evidence.push("reference-candidate");
|
|
1809
|
+
if (input.hasReviewCandidate)
|
|
1810
|
+
evidence.push("review-candidate");
|
|
1811
|
+
return evidence;
|
|
1812
|
+
}
|
|
1813
|
+
export function buildScholarSourceAuditRows(availability, outcomes, opts = {}) {
|
|
1814
|
+
const ref = String(availability.ref ?? "");
|
|
1815
|
+
const commandRef = rowString(availability, "canonical_ref") ?? ref;
|
|
1816
|
+
const canonicalRefKind = rowString(availability, "canonical_ref_kind");
|
|
1817
|
+
const reviewRef = canonicalRefKind === "openreview" ? commandRef : ref;
|
|
1818
|
+
const sources = uniqueStrings([
|
|
1819
|
+
...outcomes.map((outcome) => outcome.source),
|
|
1820
|
+
...rowStringArray(availability, "metadata_sources"),
|
|
1821
|
+
...rowStringArray(availability, "pdf_sources"),
|
|
1822
|
+
...rowStringArray(availability, "fulltext_candidate_sources"),
|
|
1823
|
+
...rowStringArray(availability, "code_sources"),
|
|
1824
|
+
...rowStringArray(availability, "dataset_sources"),
|
|
1825
|
+
...rowStringArray(availability, "citation_candidate_sources"),
|
|
1826
|
+
...rowStringArray(availability, "reference_candidate_sources"),
|
|
1827
|
+
...rowStringArray(availability, "review_candidate_sources"),
|
|
1828
|
+
]);
|
|
1829
|
+
return sources.map((source) => {
|
|
1830
|
+
const metadataRecords = sourceRecordsForCapability(outcomes, source, "scholar.get");
|
|
1831
|
+
const pdfRecords = sourceRecordsForCapability(outcomes, source, "scholar.pdf");
|
|
1832
|
+
const pdfEvidenceRecords = pdfRecords.filter((record) => firstPdfRecord([record]) !== undefined);
|
|
1833
|
+
const codeRecords = sourceRecordsForCapability(outcomes, source, "scholar.code");
|
|
1834
|
+
const codeEvidenceRecords = codeRecords.filter(hasCodeResource);
|
|
1835
|
+
const datasetRecords = sourceRecordsForCapability(outcomes, source, "scholar.datasets");
|
|
1836
|
+
const datasetEvidenceRecords = datasetRecords.filter((record) => hasDatasetResource(record) ||
|
|
1837
|
+
hasModelResource(record) ||
|
|
1838
|
+
hasSpaceResource(record));
|
|
1839
|
+
const sourceRecords = reciprocalRankFusion([
|
|
1840
|
+
metadataRecords,
|
|
1841
|
+
pdfEvidenceRecords,
|
|
1842
|
+
codeEvidenceRecords,
|
|
1843
|
+
datasetEvidenceRecords,
|
|
1844
|
+
], { topN: 10 });
|
|
1845
|
+
const representative = sourceRecords[0];
|
|
1846
|
+
const firstPdf = firstPdfRecord(pdfEvidenceRecords);
|
|
1847
|
+
const codeRecord = codeEvidenceRecords.find(hasCodeResource);
|
|
1848
|
+
const datasetRecord = datasetEvidenceRecords.find(hasDatasetResource);
|
|
1849
|
+
const modelRecord = datasetEvidenceRecords.find(hasModelResource);
|
|
1850
|
+
const spaceRecord = datasetEvidenceRecords.find(hasSpaceResource);
|
|
1851
|
+
const hasMetadata = metadataRecords.length > 0;
|
|
1852
|
+
const hasPdf = firstPdf !== undefined;
|
|
1853
|
+
const hasCode = codeRecord !== undefined;
|
|
1854
|
+
const hasProject = (codeRecord !== undefined &&
|
|
1855
|
+
nonEmptyResourceField(codeRecord.project_url)) ||
|
|
1856
|
+
(representative !== undefined &&
|
|
1857
|
+
nonEmptyResourceField(representative.project_url));
|
|
1858
|
+
const hasDatasets = datasetRecord !== undefined;
|
|
1859
|
+
const hasModels = modelRecord !== undefined;
|
|
1860
|
+
const hasSpaces = spaceRecord !== undefined;
|
|
1861
|
+
const hasFulltextCandidate = rowStringArray(availability, "fulltext_candidate_sources").includes(source);
|
|
1862
|
+
const hasCitationCandidate = rowStringArray(availability, "citation_candidate_sources").includes(source);
|
|
1863
|
+
const hasReferenceCandidate = rowStringArray(availability, "reference_candidate_sources").includes(source);
|
|
1864
|
+
const hasReviewCandidate = rowStringArray(availability, "review_candidate_sources").includes(source);
|
|
1865
|
+
const evidenceTypes = sourceAuditEvidenceTypes({
|
|
1866
|
+
hasMetadata,
|
|
1867
|
+
hasPdf,
|
|
1868
|
+
hasFulltextCandidate,
|
|
1869
|
+
hasCode,
|
|
1870
|
+
hasProject,
|
|
1871
|
+
hasDatasets,
|
|
1872
|
+
hasModels,
|
|
1873
|
+
hasSpaces,
|
|
1874
|
+
hasCitationCandidate,
|
|
1875
|
+
hasReferenceCandidate,
|
|
1876
|
+
hasReviewCandidate,
|
|
1877
|
+
});
|
|
1878
|
+
const errors = sourceOutcomeErrors(outcomes, source);
|
|
1879
|
+
const rawBlockingErrors = sourceBlockingErrors(outcomes, source);
|
|
1880
|
+
const capabilities = sourceCapabilities(source);
|
|
1881
|
+
const capabilitySet = scholarCapabilitySetFromStrings(capabilities);
|
|
1882
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
1883
|
+
const hasSourceScopedAvailability = adapter !== undefined
|
|
1884
|
+
? adapterSupportsSourceScopedAvailability(adapter, capabilitySet)
|
|
1885
|
+
: supportsSourceScopedAvailability(capabilitySet);
|
|
1886
|
+
const hasSearch = capabilitySet.has("scholar.search");
|
|
1887
|
+
const candidateCapabilities = [
|
|
1888
|
+
hasFulltextCandidate ? "scholar.fulltext" : undefined,
|
|
1889
|
+
hasCitationCandidate ? "scholar.citations" : undefined,
|
|
1890
|
+
hasReferenceCandidate ? "scholar.references" : undefined,
|
|
1891
|
+
hasReviewCandidate ? "scholar.review" : undefined,
|
|
1892
|
+
].filter((capability) => capability !== undefined);
|
|
1893
|
+
const executedCapabilities = uniqueStrings(outcomes
|
|
1894
|
+
.filter((outcome) => outcome.source === source)
|
|
1895
|
+
.map((outcome) => outcome.capability));
|
|
1896
|
+
const hasReturnedEvidence = hasMetadata ||
|
|
1897
|
+
hasPdf ||
|
|
1898
|
+
hasCode ||
|
|
1899
|
+
hasProject ||
|
|
1900
|
+
hasDatasets ||
|
|
1901
|
+
hasModels ||
|
|
1902
|
+
hasSpaces;
|
|
1903
|
+
const hasCandidateEvidence = candidateCapabilities.length > 0;
|
|
1904
|
+
const blockingErrors = hasReturnedEvidence ? [] : rawBlockingErrors;
|
|
1905
|
+
const recoveredErrors = hasReturnedEvidence ? rawBlockingErrors : [];
|
|
1906
|
+
const hasOnlyUnsupportedErrors = errors.length > 0 && blockingErrors.length === 0;
|
|
1907
|
+
return {
|
|
1908
|
+
ref,
|
|
1909
|
+
route_kind: availability.route_kind,
|
|
1910
|
+
canonical_ref: availability.canonical_ref,
|
|
1911
|
+
canonical_ref_kind: availability.canonical_ref_kind,
|
|
1912
|
+
source,
|
|
1913
|
+
source_status: sourceStatus({
|
|
1914
|
+
hasReturnedEvidence,
|
|
1915
|
+
hasCandidateEvidence,
|
|
1916
|
+
hasBlockingErrors: blockingErrors.length > 0,
|
|
1917
|
+
hasOnlyUnsupportedErrors,
|
|
1918
|
+
}),
|
|
1919
|
+
evidence_types: evidenceTypes,
|
|
1920
|
+
handoff_strategy: coverageHandoffStrategy(capabilitySet, adapter),
|
|
1921
|
+
record_count: sourceRecords.length,
|
|
1922
|
+
has_metadata: hasMetadata,
|
|
1923
|
+
has_pdf: hasPdf,
|
|
1924
|
+
has_fulltext_candidate: hasFulltextCandidate,
|
|
1925
|
+
has_code: hasCode,
|
|
1926
|
+
has_project: hasProject,
|
|
1927
|
+
has_datasets: hasDatasets,
|
|
1928
|
+
has_models: hasModels,
|
|
1929
|
+
has_spaces: hasSpaces,
|
|
1930
|
+
has_citation_candidate: hasCitationCandidate,
|
|
1931
|
+
has_reference_candidate: hasReferenceCandidate,
|
|
1932
|
+
has_review_candidate: hasReviewCandidate,
|
|
1933
|
+
title: representative?.title,
|
|
1934
|
+
year: representative?.year,
|
|
1935
|
+
primary_evidence_url: representative?.source_url ??
|
|
1936
|
+
firstPdf?.pdf_url ??
|
|
1937
|
+
codeRecord?.project_url ??
|
|
1938
|
+
codeRecord?.code_url ??
|
|
1939
|
+
datasetRecord?.dataset_url,
|
|
1940
|
+
pdf_url: firstPdf?.pdf_url,
|
|
1941
|
+
code_url: codeRecord?.code_url,
|
|
1942
|
+
project_url: codeRecord?.project_url ?? representative?.project_url,
|
|
1943
|
+
dataset_url: datasetRecord?.dataset_url,
|
|
1944
|
+
model_urls: modelRecord?.model_urls,
|
|
1945
|
+
dataset_urls: datasetRecord?.dataset_urls,
|
|
1946
|
+
space_urls: spaceRecord?.space_urls,
|
|
1947
|
+
capabilities,
|
|
1948
|
+
executed_capabilities: executedCapabilities,
|
|
1949
|
+
candidate_capabilities: candidateCapabilities,
|
|
1950
|
+
source_errors: errors,
|
|
1951
|
+
blocking_errors: blockingErrors,
|
|
1952
|
+
recovered_errors: recoveredErrors,
|
|
1953
|
+
next_source_availability: hasSourceScopedAvailability
|
|
1954
|
+
? sourceScopedScholarCommand("availability", commandRef, source, opts)
|
|
1955
|
+
: undefined,
|
|
1956
|
+
next_search: hasSearch ? sourceSearchCommandByName(source) : undefined,
|
|
1957
|
+
next_workflow_from_result: hasSearch
|
|
1958
|
+
? "unicli scholar workflow <title-or-id>"
|
|
1959
|
+
: undefined,
|
|
1960
|
+
next_sources_from_result: hasSearch
|
|
1961
|
+
? "unicli scholar sources <title-or-id>"
|
|
1962
|
+
: undefined,
|
|
1963
|
+
next_read_from_result: hasSearch
|
|
1964
|
+
? "unicli scholar read <title-or-id>"
|
|
1965
|
+
: undefined,
|
|
1966
|
+
next_read: hasPdf || hasFulltextCandidate
|
|
1967
|
+
? sourceScopedScholarCommand("read", commandRef, source, opts)
|
|
1968
|
+
: undefined,
|
|
1969
|
+
next_download: hasPdf
|
|
1970
|
+
? sourceScopedScholarCommand("download", commandRef, source, opts)
|
|
1971
|
+
: undefined,
|
|
1972
|
+
next_code: hasCode || capabilities.includes("scholar.code")
|
|
1973
|
+
? sourceScopedScholarCommand("code", commandRef, source, opts)
|
|
1974
|
+
: undefined,
|
|
1975
|
+
next_datasets: hasDatasets ||
|
|
1976
|
+
hasModels ||
|
|
1977
|
+
hasSpaces ||
|
|
1978
|
+
capabilities.includes("scholar.datasets")
|
|
1979
|
+
? sourceScopedScholarCommand("datasets", commandRef, source, opts)
|
|
1980
|
+
: undefined,
|
|
1981
|
+
next_citations: hasCitationCandidate
|
|
1982
|
+
? sourceScopedScholarCommand("citations", commandRef, source, opts)
|
|
1983
|
+
: undefined,
|
|
1984
|
+
next_references: hasReferenceCandidate
|
|
1985
|
+
? sourceScopedScholarCommand("references", commandRef, source, opts)
|
|
1986
|
+
: undefined,
|
|
1987
|
+
next_reviews: hasReviewCandidate
|
|
1988
|
+
? sourceScopedScholarCommand("reviews", reviewRef, source, opts)
|
|
1989
|
+
: undefined,
|
|
1990
|
+
next_reproduce: hasReturnedEvidence || hasCandidateEvidence
|
|
1991
|
+
? sourceScopedScholarCommand("reproduce", commandRef, source, opts)
|
|
1992
|
+
: undefined,
|
|
1993
|
+
retrieved_at: new Date().toISOString(),
|
|
1994
|
+
};
|
|
1995
|
+
});
|
|
1996
|
+
}
|
|
1997
|
+
function sourceAuditColumns(detailed = false) {
|
|
1998
|
+
const base = [
|
|
1999
|
+
"ref",
|
|
2000
|
+
"route_kind",
|
|
2001
|
+
"canonical_ref",
|
|
2002
|
+
"canonical_ref_kind",
|
|
2003
|
+
"source",
|
|
2004
|
+
"source_status",
|
|
2005
|
+
"evidence_types",
|
|
2006
|
+
"handoff_strategy",
|
|
2007
|
+
"record_count",
|
|
2008
|
+
"has_metadata",
|
|
2009
|
+
"has_pdf",
|
|
2010
|
+
"has_fulltext_candidate",
|
|
2011
|
+
"has_code",
|
|
2012
|
+
"has_datasets",
|
|
2013
|
+
"has_citation_candidate",
|
|
2014
|
+
"has_reference_candidate",
|
|
2015
|
+
"has_review_candidate",
|
|
2016
|
+
"title",
|
|
2017
|
+
"year",
|
|
2018
|
+
"next_source_availability",
|
|
2019
|
+
"next_search",
|
|
2020
|
+
"next_workflow_from_result",
|
|
2021
|
+
"next_sources_from_result",
|
|
2022
|
+
"next_read_from_result",
|
|
2023
|
+
"next_read",
|
|
2024
|
+
"next_reproduce",
|
|
2025
|
+
];
|
|
2026
|
+
return detailed
|
|
2027
|
+
? [
|
|
2028
|
+
...base,
|
|
2029
|
+
"has_project",
|
|
2030
|
+
"has_models",
|
|
2031
|
+
"has_spaces",
|
|
2032
|
+
"primary_evidence_url",
|
|
2033
|
+
"pdf_url",
|
|
2034
|
+
"code_url",
|
|
2035
|
+
"project_url",
|
|
2036
|
+
"dataset_url",
|
|
2037
|
+
"model_urls",
|
|
2038
|
+
"dataset_urls",
|
|
2039
|
+
"space_urls",
|
|
2040
|
+
"capabilities",
|
|
2041
|
+
"executed_capabilities",
|
|
2042
|
+
"candidate_capabilities",
|
|
2043
|
+
"source_errors",
|
|
2044
|
+
"blocking_errors",
|
|
2045
|
+
"recovered_errors",
|
|
2046
|
+
"next_download",
|
|
2047
|
+
"next_code",
|
|
2048
|
+
"next_datasets",
|
|
2049
|
+
"next_citations",
|
|
2050
|
+
"next_references",
|
|
2051
|
+
"next_reviews",
|
|
2052
|
+
"retrieved_at",
|
|
2053
|
+
]
|
|
2054
|
+
: base;
|
|
2055
|
+
}
|
|
2056
|
+
function availabilityHasReadableSource(row) {
|
|
2057
|
+
return (rowBoolean(row, "has_pdf") || rowBoolean(row, "has_fulltext_candidate"));
|
|
2058
|
+
}
|
|
2059
|
+
function availabilityHasPrimaryAnchor(row) {
|
|
2060
|
+
return (availabilityIdentifiers(row).length > 0 ||
|
|
2061
|
+
availabilityPrimaryEvidenceUrl(row) !== undefined);
|
|
2062
|
+
}
|
|
2063
|
+
function availabilityHasResourceEvidence(row) {
|
|
2064
|
+
return (rowBoolean(row, "has_code") ||
|
|
2065
|
+
rowBoolean(row, "has_project") ||
|
|
2066
|
+
rowBoolean(row, "has_datasets") ||
|
|
2067
|
+
rowBoolean(row, "has_models") ||
|
|
2068
|
+
rowBoolean(row, "has_spaces"));
|
|
2069
|
+
}
|
|
2070
|
+
function workflowStep(order, step, status, input) {
|
|
2071
|
+
return Object.fromEntries(definedEntries({
|
|
2072
|
+
order,
|
|
2073
|
+
step,
|
|
2074
|
+
status,
|
|
2075
|
+
command: input.command,
|
|
2076
|
+
commands: input.commands && input.commands.length > 0
|
|
2077
|
+
? input.commands
|
|
2078
|
+
: undefined,
|
|
2079
|
+
done_when: input.doneWhen,
|
|
2080
|
+
guard: input.guard,
|
|
2081
|
+
}));
|
|
2082
|
+
}
|
|
2083
|
+
function workflowStatus(recordFound, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
|
|
2084
|
+
if (!recordFound)
|
|
2085
|
+
return "blocked_no_source_record";
|
|
2086
|
+
if (!hasPrimaryAnchor)
|
|
2087
|
+
return "metadata_without_primary_anchor";
|
|
2088
|
+
if (!hasReadableSource && hasResourceEvidence)
|
|
2089
|
+
return "resources_found_needs_source_text";
|
|
2090
|
+
if (!hasReadableSource)
|
|
2091
|
+
return "metadata_only_needs_source_text";
|
|
2092
|
+
return "ready_for_agent_reading";
|
|
2093
|
+
}
|
|
2094
|
+
function workflowNextStep(recordFound, hasPrimaryAnchor, hasReadableSource) {
|
|
2095
|
+
if (!recordFound)
|
|
2096
|
+
return "resolve_source_record";
|
|
2097
|
+
if (!hasPrimaryAnchor)
|
|
2098
|
+
return "verify_primary_anchor";
|
|
2099
|
+
if (!hasReadableSource)
|
|
2100
|
+
return "find_readable_source";
|
|
2101
|
+
return "run_next_read_before_quoting_claims";
|
|
2102
|
+
}
|
|
2103
|
+
function completedWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
|
|
2104
|
+
const completed = [];
|
|
2105
|
+
if (rowBoolean(availability, "record_found"))
|
|
2106
|
+
completed.push("source_record_found");
|
|
2107
|
+
if (hasPrimaryAnchor)
|
|
2108
|
+
completed.push("primary_anchor_found");
|
|
2109
|
+
if (hasReadableSource)
|
|
2110
|
+
completed.push("readable_source_found");
|
|
2111
|
+
if (rowString(availability, "next_download"))
|
|
2112
|
+
completed.push("downloadable_pdf_found");
|
|
2113
|
+
if (rowString(availability, "next_citations") ||
|
|
2114
|
+
rowString(availability, "next_references")) {
|
|
2115
|
+
completed.push("citation_reference_candidate_found");
|
|
2116
|
+
}
|
|
2117
|
+
if (rowString(availability, "next_reviews"))
|
|
2118
|
+
completed.push("peer_review_candidate_found");
|
|
2119
|
+
if (hasResourceEvidence)
|
|
2120
|
+
completed.push("reproducibility_resource_found");
|
|
2121
|
+
return completed;
|
|
2122
|
+
}
|
|
2123
|
+
function pendingWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
|
|
2124
|
+
const pending = [];
|
|
2125
|
+
const recordFound = rowBoolean(availability, "record_found");
|
|
2126
|
+
if (!recordFound)
|
|
2127
|
+
pending.push("source_record");
|
|
2128
|
+
if (recordFound && !hasPrimaryAnchor)
|
|
2129
|
+
pending.push("primary_anchor");
|
|
2130
|
+
if (recordFound && !hasReadableSource)
|
|
2131
|
+
pending.push("readable_source");
|
|
2132
|
+
if (hasReadableSource)
|
|
2133
|
+
pending.push("source_text_reading");
|
|
2134
|
+
if (!rowString(availability, "next_citations") &&
|
|
2135
|
+
!rowString(availability, "next_references")) {
|
|
2136
|
+
pending.push("citation_reference_graph");
|
|
2137
|
+
}
|
|
2138
|
+
if (!rowString(availability, "next_reviews"))
|
|
2139
|
+
pending.push("peer_review_audit");
|
|
2140
|
+
if (!hasResourceEvidence)
|
|
2141
|
+
pending.push("code_data_model_resources");
|
|
2142
|
+
if (hasResourceEvidence)
|
|
2143
|
+
pending.push("resource_inspection");
|
|
2144
|
+
return pending;
|
|
2145
|
+
}
|
|
2146
|
+
function blockedWorkflowSteps(availability, hasReadableSource, hasResourceEvidence) {
|
|
2147
|
+
const blocked = [];
|
|
2148
|
+
const recordFound = rowBoolean(availability, "record_found");
|
|
2149
|
+
if (!recordFound) {
|
|
2150
|
+
return [
|
|
2151
|
+
"source_reading",
|
|
2152
|
+
"claim_quotation",
|
|
2153
|
+
"artifact_download",
|
|
2154
|
+
"citation_reference_audit",
|
|
2155
|
+
"peer_review_audit",
|
|
2156
|
+
"reproducibility_audit",
|
|
2157
|
+
];
|
|
2158
|
+
}
|
|
2159
|
+
if (!hasReadableSource) {
|
|
2160
|
+
blocked.push("source_reading");
|
|
2161
|
+
blocked.push("claim_quotation");
|
|
2162
|
+
blocked.push("artifact_download");
|
|
2163
|
+
}
|
|
2164
|
+
if (!rowString(availability, "next_citations") &&
|
|
2165
|
+
!rowString(availability, "next_references")) {
|
|
2166
|
+
blocked.push("citation_reference_audit");
|
|
2167
|
+
}
|
|
2168
|
+
if (!rowString(availability, "next_reviews"))
|
|
2169
|
+
blocked.push("peer_review_audit");
|
|
2170
|
+
if (!hasResourceEvidence)
|
|
2171
|
+
blocked.push("reproducibility_installation");
|
|
2172
|
+
return blocked;
|
|
2173
|
+
}
|
|
2174
|
+
function workflowRunbook(availability, recordFound) {
|
|
2175
|
+
const graphCommands = [
|
|
2176
|
+
rowString(availability, "next_citations"),
|
|
2177
|
+
rowString(availability, "next_references"),
|
|
2178
|
+
].filter((command) => command !== undefined);
|
|
2179
|
+
const resourceCommands = [
|
|
2180
|
+
rowString(availability, "next_code"),
|
|
2181
|
+
rowString(availability, "next_datasets"),
|
|
2182
|
+
].filter((command) => command !== undefined);
|
|
2183
|
+
return [
|
|
2184
|
+
workflowStep(1, "availability_audit", recordFound ? "complete" : "blocked", {
|
|
2185
|
+
command: rowString(availability, "next_availability"),
|
|
2186
|
+
doneWhen: "record_found is true and source_errors are inspected",
|
|
2187
|
+
guard: "do not cite, download, or reproduce without a source-backed record",
|
|
2188
|
+
}),
|
|
2189
|
+
workflowStep(2, "evidence_classification", recordFound ? "ready" : "blocked", {
|
|
2190
|
+
command: rowString(availability, "next_evidence"),
|
|
2191
|
+
doneWhen: "citation_safety and claim_boundary are explicit",
|
|
2192
|
+
guard: "metadata-only evidence cannot support paper claims",
|
|
2193
|
+
}),
|
|
2194
|
+
workflowStep(3, "read_source_text", rowString(availability, "next_read") ? "ready" : "blocked", {
|
|
2195
|
+
command: rowString(availability, "next_read"),
|
|
2196
|
+
doneWhen: "source-direct full text or extracted PDF text is returned",
|
|
2197
|
+
guard: "quote claims only from the returned source text",
|
|
2198
|
+
}),
|
|
2199
|
+
workflowStep(4, "download_artifact", rowString(availability, "next_download") ? "ready" : "blocked", {
|
|
2200
|
+
command: rowString(availability, "next_download"),
|
|
2201
|
+
doneWhen: "local artifact metadata is returned for a source-backed PDF",
|
|
2202
|
+
guard: "download only after a PDF candidate exists",
|
|
2203
|
+
}),
|
|
2204
|
+
workflowStep(5, "citation_reference_audit", graphCommands.length > 0 ? "ready" : "blocked", {
|
|
2205
|
+
commands: graphCommands,
|
|
2206
|
+
doneWhen: "citation and reference rows are retrieved from graph-capable sources",
|
|
2207
|
+
guard: "graph rows are provenance evidence, not a substitute for reading the paper",
|
|
2208
|
+
}),
|
|
2209
|
+
workflowStep(6, "peer_review_audit", rowString(availability, "next_reviews") ? "ready" : "blocked", {
|
|
2210
|
+
command: rowString(availability, "next_reviews"),
|
|
2211
|
+
doneWhen: "review, decision, rebuttal, or comment rows are retrieved when available",
|
|
2212
|
+
guard: "review rows qualify venue context but do not replace source text",
|
|
2213
|
+
}),
|
|
2214
|
+
workflowStep(7, "reproducibility_plan", recordFound ? "ready" : "blocked", {
|
|
2215
|
+
command: rowString(availability, "next_reproduce"),
|
|
2216
|
+
doneWhen: "install_readiness and execution_boundary are explicit",
|
|
2217
|
+
guard: "never clone, install, or run remote code during planning",
|
|
2218
|
+
}),
|
|
2219
|
+
workflowStep(8, "inspect_code_and_resources", resourceCommands.length > 0 ? "ready" : "blocked", {
|
|
2220
|
+
commands: resourceCommands,
|
|
2221
|
+
doneWhen: "code, dataset, model, or Space resource rows are inspected",
|
|
2222
|
+
guard: "inspect repository and data provenance before any install command",
|
|
2223
|
+
}),
|
|
2224
|
+
];
|
|
2225
|
+
}
|
|
2226
|
+
export function buildScholarWorkflowRow(availability) {
|
|
2227
|
+
const recordFound = rowBoolean(availability, "record_found");
|
|
2228
|
+
const hasReadableSource = availabilityHasReadableSource(availability);
|
|
2229
|
+
const hasPrimaryAnchor = availabilityHasPrimaryAnchor(availability);
|
|
2230
|
+
const hasResourceEvidence = availabilityHasResourceEvidence(availability);
|
|
2231
|
+
return {
|
|
2232
|
+
ref: availability.ref,
|
|
2233
|
+
route_kind: availability.route_kind,
|
|
2234
|
+
canonical_ref: availability.canonical_ref,
|
|
2235
|
+
canonical_ref_kind: availability.canonical_ref_kind,
|
|
2236
|
+
workflow_status: workflowStatus(recordFound, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
|
|
2237
|
+
next_step: workflowNextStep(recordFound, hasPrimaryAnchor, hasReadableSource),
|
|
2238
|
+
claim_boundary: hasReadableSource
|
|
2239
|
+
? "quote_claims_only_after_next_read_output"
|
|
2240
|
+
: "metadata_only_no_claim_extraction",
|
|
2241
|
+
execution_boundary: "no_download_clone_install_or_remote_code_execution",
|
|
2242
|
+
record_found: recordFound,
|
|
2243
|
+
title: availability.title,
|
|
2244
|
+
year: availability.year,
|
|
2245
|
+
primary_source: availabilityPrimarySource(availability),
|
|
2246
|
+
primary_evidence_url: availabilityPrimaryEvidenceUrl(availability),
|
|
2247
|
+
persistent_identifiers: availabilityIdentifiers(availability),
|
|
2248
|
+
readable_sources: uniqueStrings([
|
|
2249
|
+
...rowStringArray(availability, "fulltext_candidate_sources"),
|
|
2250
|
+
...rowStringArray(availability, "pdf_sources"),
|
|
2251
|
+
]),
|
|
2252
|
+
resource_sources: uniqueStrings([
|
|
2253
|
+
...rowStringArray(availability, "code_sources"),
|
|
2254
|
+
...rowStringArray(availability, "dataset_sources"),
|
|
2255
|
+
]),
|
|
2256
|
+
graph_sources: uniqueStrings([
|
|
2257
|
+
...rowStringArray(availability, "citation_candidate_sources"),
|
|
2258
|
+
...rowStringArray(availability, "reference_candidate_sources"),
|
|
2259
|
+
]),
|
|
2260
|
+
review_sources: rowStringArray(availability, "review_candidate_sources"),
|
|
2261
|
+
completed_steps: completedWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
|
|
2262
|
+
pending_steps: pendingWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
|
|
2263
|
+
blocked_steps: blockedWorkflowSteps(availability, hasReadableSource, hasResourceEvidence),
|
|
2264
|
+
agent_runbook: workflowRunbook(availability, recordFound),
|
|
2265
|
+
next_workflow: availability.next_workflow,
|
|
2266
|
+
next_availability: availability.next_availability,
|
|
2267
|
+
next_evidence: availability.next_evidence,
|
|
2268
|
+
next_read: availability.next_read,
|
|
2269
|
+
next_download: availability.next_download,
|
|
2270
|
+
next_code: availability.next_code,
|
|
2271
|
+
next_datasets: availability.next_datasets,
|
|
2272
|
+
next_citations: availability.next_citations,
|
|
2273
|
+
next_references: availability.next_references,
|
|
2274
|
+
next_reviews: availability.next_reviews,
|
|
2275
|
+
next_reproduce: availability.next_reproduce,
|
|
2276
|
+
source_errors: availability.source_errors,
|
|
2277
|
+
retrieved_at: new Date().toISOString(),
|
|
2278
|
+
};
|
|
2279
|
+
}
|
|
2280
|
+
function workflowColumns(detailed = false) {
|
|
2281
|
+
const base = [
|
|
2282
|
+
"ref",
|
|
2283
|
+
"route_kind",
|
|
2284
|
+
"canonical_ref",
|
|
2285
|
+
"canonical_ref_kind",
|
|
2286
|
+
"workflow_status",
|
|
2287
|
+
"next_step",
|
|
2288
|
+
"claim_boundary",
|
|
2289
|
+
"execution_boundary",
|
|
2290
|
+
"record_found",
|
|
2291
|
+
"title",
|
|
2292
|
+
"year",
|
|
2293
|
+
"completed_steps",
|
|
2294
|
+
"pending_steps",
|
|
2295
|
+
"blocked_steps",
|
|
2296
|
+
"next_read",
|
|
2297
|
+
"next_evidence",
|
|
2298
|
+
"next_reproduce",
|
|
2299
|
+
];
|
|
2300
|
+
return detailed
|
|
2301
|
+
? [
|
|
2302
|
+
...base,
|
|
2303
|
+
"primary_source",
|
|
2304
|
+
"primary_evidence_url",
|
|
2305
|
+
"persistent_identifiers",
|
|
2306
|
+
"readable_sources",
|
|
2307
|
+
"resource_sources",
|
|
2308
|
+
"graph_sources",
|
|
2309
|
+
"review_sources",
|
|
2310
|
+
"agent_runbook",
|
|
2311
|
+
"next_workflow",
|
|
2312
|
+
"next_availability",
|
|
2313
|
+
"next_download",
|
|
2314
|
+
"next_code",
|
|
2315
|
+
"next_datasets",
|
|
2316
|
+
"next_citations",
|
|
2317
|
+
"next_references",
|
|
2318
|
+
"next_reviews",
|
|
2319
|
+
"source_errors",
|
|
2320
|
+
"retrieved_at",
|
|
2321
|
+
]
|
|
2322
|
+
: base;
|
|
2323
|
+
}
|
|
2324
|
+
export function buildScholarEvidenceRow(availability) {
|
|
2325
|
+
const recordFound = rowBoolean(availability, "record_found");
|
|
2326
|
+
const hasPdf = rowBoolean(availability, "has_pdf");
|
|
2327
|
+
const hasFulltextCandidate = rowBoolean(availability, "has_fulltext_candidate");
|
|
2328
|
+
const hasReadableSource = hasPdf || hasFulltextCandidate;
|
|
2329
|
+
const persistentIdentifiers = availabilityIdentifiers(availability);
|
|
2330
|
+
const primaryEvidenceUrl = availabilityPrimaryEvidenceUrl(availability);
|
|
2331
|
+
const hasPrimaryAnchor = persistentIdentifiers.length > 0 || primaryEvidenceUrl !== undefined;
|
|
2332
|
+
const hasResourceEvidence = rowBoolean(availability, "has_code") ||
|
|
2333
|
+
rowBoolean(availability, "has_project") ||
|
|
2334
|
+
rowBoolean(availability, "has_datasets") ||
|
|
2335
|
+
rowBoolean(availability, "has_models") ||
|
|
2336
|
+
rowBoolean(availability, "has_spaces");
|
|
2337
|
+
const evidenceStatus = !recordFound
|
|
2338
|
+
? "unverified"
|
|
2339
|
+
: hasReadableSource && hasPrimaryAnchor
|
|
2340
|
+
? "readable_source_verified"
|
|
2341
|
+
: hasReadableSource
|
|
2342
|
+
? "readable_source_candidate"
|
|
2343
|
+
: hasPrimaryAnchor
|
|
2344
|
+
? "metadata_verified"
|
|
2345
|
+
: hasResourceEvidence
|
|
2346
|
+
? "resource_only"
|
|
2347
|
+
: "metadata_only";
|
|
2348
|
+
const citationSafety = !recordFound
|
|
2349
|
+
? "do_not_cite_unverified"
|
|
2350
|
+
: hasReadableSource && hasPrimaryAnchor
|
|
2351
|
+
? "cite_after_reading_source"
|
|
2352
|
+
: hasPrimaryAnchor
|
|
2353
|
+
? "metadata_only_do_not_quote_claims"
|
|
2354
|
+
: "do_not_cite_without_identifier";
|
|
2355
|
+
const readiness = rowString(availability, "next_read")
|
|
2356
|
+
? "read_now"
|
|
2357
|
+
: rowString(availability, "next_download")
|
|
2358
|
+
? "download_then_read"
|
|
2359
|
+
: recordFound
|
|
2360
|
+
? "metadata_or_resource_only"
|
|
2361
|
+
: "not_ready";
|
|
2362
|
+
const graphSources = uniqueStrings([
|
|
2363
|
+
...rowStringArray(availability, "citation_candidate_sources"),
|
|
2364
|
+
...rowStringArray(availability, "reference_candidate_sources"),
|
|
2365
|
+
]);
|
|
2366
|
+
return {
|
|
2367
|
+
ref: availability.ref,
|
|
2368
|
+
route_kind: availability.route_kind,
|
|
2369
|
+
evidence_status: evidenceStatus,
|
|
2370
|
+
citation_safety: citationSafety,
|
|
2371
|
+
readiness,
|
|
2372
|
+
claim_boundary: hasReadableSource
|
|
2373
|
+
? "quote_claims_only_after_next_read_output"
|
|
2374
|
+
: "metadata_only_no_claim_extraction",
|
|
2375
|
+
record_found: recordFound,
|
|
2376
|
+
title: availability.title,
|
|
2377
|
+
year: availability.year,
|
|
2378
|
+
primary_source: availabilityPrimarySource(availability),
|
|
2379
|
+
primary_evidence_url: primaryEvidenceUrl,
|
|
2380
|
+
persistent_identifiers: persistentIdentifiers,
|
|
2381
|
+
readable_sources: uniqueStrings([
|
|
2382
|
+
...rowStringArray(availability, "fulltext_candidate_sources"),
|
|
2383
|
+
...rowStringArray(availability, "pdf_sources"),
|
|
2384
|
+
]),
|
|
2385
|
+
resource_sources: uniqueStrings([
|
|
2386
|
+
...rowStringArray(availability, "code_sources"),
|
|
2387
|
+
...rowStringArray(availability, "dataset_sources"),
|
|
2388
|
+
]),
|
|
2389
|
+
graph_sources: graphSources,
|
|
2390
|
+
review_sources: rowStringArray(availability, "review_candidate_sources"),
|
|
2391
|
+
missing_evidence: availabilityMissingEvidence(availability),
|
|
2392
|
+
next_availability: availability.next_availability,
|
|
2393
|
+
next_read: availability.next_read,
|
|
2394
|
+
next_download: availability.next_download,
|
|
2395
|
+
next_code: availability.next_code,
|
|
2396
|
+
next_datasets: availability.next_datasets,
|
|
2397
|
+
next_citations: availability.next_citations,
|
|
2398
|
+
next_references: availability.next_references,
|
|
2399
|
+
next_reviews: availability.next_reviews,
|
|
2400
|
+
source_errors: availability.source_errors,
|
|
2401
|
+
retrieved_at: new Date().toISOString(),
|
|
2402
|
+
};
|
|
2403
|
+
}
|
|
2404
|
+
function evidenceColumns(detailed = false) {
|
|
2405
|
+
const base = [
|
|
2406
|
+
"ref",
|
|
2407
|
+
"route_kind",
|
|
2408
|
+
"evidence_status",
|
|
2409
|
+
"citation_safety",
|
|
2410
|
+
"readiness",
|
|
2411
|
+
"claim_boundary",
|
|
2412
|
+
"record_found",
|
|
2413
|
+
"title",
|
|
2414
|
+
"year",
|
|
2415
|
+
"primary_source",
|
|
2416
|
+
"primary_evidence_url",
|
|
2417
|
+
"persistent_identifiers",
|
|
2418
|
+
"readable_sources",
|
|
2419
|
+
"resource_sources",
|
|
2420
|
+
"graph_sources",
|
|
2421
|
+
"review_sources",
|
|
2422
|
+
"missing_evidence",
|
|
2423
|
+
"next_read",
|
|
2424
|
+
"next_code",
|
|
2425
|
+
"next_datasets",
|
|
2426
|
+
"next_citations",
|
|
2427
|
+
"next_references",
|
|
2428
|
+
"next_reviews",
|
|
2429
|
+
];
|
|
2430
|
+
return detailed
|
|
2431
|
+
? [
|
|
2432
|
+
...base,
|
|
2433
|
+
"next_availability",
|
|
2434
|
+
"next_download",
|
|
2435
|
+
"source_errors",
|
|
2436
|
+
"retrieved_at",
|
|
2437
|
+
]
|
|
2438
|
+
: base;
|
|
2439
|
+
}
|
|
2440
|
+
function cloneCandidateUrl(row) {
|
|
2441
|
+
const codeUrl = rowString(row, "code_url");
|
|
2442
|
+
if (!codeUrl)
|
|
2443
|
+
return undefined;
|
|
2444
|
+
if (!/^https?:\/\//i.test(codeUrl))
|
|
2445
|
+
return undefined;
|
|
2446
|
+
if (!/github\.com|gitlab\.com|bitbucket\.org|huggingface\.co/i.test(codeUrl)) {
|
|
2447
|
+
return undefined;
|
|
2448
|
+
}
|
|
2449
|
+
return codeUrl.replace(/\/$/, "");
|
|
2450
|
+
}
|
|
2451
|
+
function availabilityReproducibilityMissing(row) {
|
|
2452
|
+
const missing = [];
|
|
2453
|
+
if (!rowString(row, "code_url"))
|
|
2454
|
+
missing.push("code-repository");
|
|
2455
|
+
if (!rowBoolean(row, "has_project"))
|
|
2456
|
+
missing.push("project-page");
|
|
2457
|
+
if (!rowBoolean(row, "has_datasets") &&
|
|
2458
|
+
!rowBoolean(row, "has_models") &&
|
|
2459
|
+
!rowBoolean(row, "has_spaces")) {
|
|
2460
|
+
missing.push("datasets/models/spaces");
|
|
2461
|
+
}
|
|
2462
|
+
if (!rowString(row, "next_read"))
|
|
2463
|
+
missing.push("readable-paper");
|
|
2464
|
+
if (rowStringArray(row, "citation_candidate_sources").length === 0) {
|
|
2465
|
+
missing.push("citation-graph");
|
|
2466
|
+
}
|
|
2467
|
+
return missing;
|
|
2468
|
+
}
|
|
2469
|
+
export function buildScholarReproducibilityRow(availability) {
|
|
2470
|
+
const hasCodeRepository = rowString(availability, "code_url") !== undefined;
|
|
2471
|
+
const hasProject = rowBoolean(availability, "has_project");
|
|
2472
|
+
const hasResource = rowBoolean(availability, "has_datasets") ||
|
|
2473
|
+
rowBoolean(availability, "has_models") ||
|
|
2474
|
+
rowBoolean(availability, "has_spaces");
|
|
2475
|
+
const cloneUrl = cloneCandidateUrl(availability);
|
|
2476
|
+
const reproducibilityStatus = hasCodeRepository && hasResource
|
|
2477
|
+
? "code_and_resources_found"
|
|
2478
|
+
: hasCodeRepository
|
|
2479
|
+
? "code_found"
|
|
2480
|
+
: hasProject && hasResource
|
|
2481
|
+
? "project_and_resources_found"
|
|
2482
|
+
: hasProject
|
|
2483
|
+
? "project_page_found"
|
|
2484
|
+
: hasResource
|
|
2485
|
+
? "resources_without_code"
|
|
2486
|
+
: "no_reproducibility_resources";
|
|
2487
|
+
const installReadiness = cloneUrl
|
|
2488
|
+
? "clone_candidate_requires_inspection"
|
|
2489
|
+
: hasCodeRepository
|
|
2490
|
+
? "code_url_requires_manual_inspection"
|
|
2491
|
+
: hasProject
|
|
2492
|
+
? "project_page_requires_manual_inspection"
|
|
2493
|
+
: hasResource
|
|
2494
|
+
? "resource_only_no_install"
|
|
2495
|
+
: "not_ready";
|
|
2496
|
+
return {
|
|
2497
|
+
ref: availability.ref,
|
|
2498
|
+
route_kind: availability.route_kind,
|
|
2499
|
+
reproducibility_status: reproducibilityStatus,
|
|
2500
|
+
install_readiness: installReadiness,
|
|
2501
|
+
execution_boundary: "no_remote_code_executed",
|
|
2502
|
+
install_boundary: "inspect_repository_before_running_install_or_training_commands",
|
|
2503
|
+
record_found: availability.record_found,
|
|
2504
|
+
title: availability.title,
|
|
2505
|
+
year: availability.year,
|
|
2506
|
+
primary_source: availabilityPrimarySource(availability),
|
|
2507
|
+
primary_evidence_url: availabilityPrimaryEvidenceUrl(availability),
|
|
2508
|
+
code_url: availability.code_url,
|
|
2509
|
+
project_url: availability.project_url,
|
|
2510
|
+
clone_candidate_url: cloneUrl,
|
|
2511
|
+
dataset_url: availability.dataset_url,
|
|
2512
|
+
dataset_urls: availability.dataset_urls,
|
|
2513
|
+
model_urls: availability.model_urls,
|
|
2514
|
+
space_urls: availability.space_urls,
|
|
2515
|
+
resource_sources: uniqueStrings([
|
|
2516
|
+
...rowStringArray(availability, "code_sources"),
|
|
2517
|
+
...rowStringArray(availability, "dataset_sources"),
|
|
2518
|
+
]),
|
|
2519
|
+
missing_reproducibility: availabilityReproducibilityMissing(availability),
|
|
2520
|
+
next_evidence: availability.next_evidence,
|
|
2521
|
+
next_read: availability.next_read,
|
|
2522
|
+
next_download: availability.next_download,
|
|
2523
|
+
next_code: availability.next_code,
|
|
2524
|
+
next_datasets: availability.next_datasets,
|
|
2525
|
+
next_inspect_code: availability.next_code,
|
|
2526
|
+
next_inspect_resources: availability.next_datasets,
|
|
2527
|
+
source_errors: availability.source_errors,
|
|
2528
|
+
retrieved_at: new Date().toISOString(),
|
|
2529
|
+
};
|
|
2530
|
+
}
|
|
2531
|
+
function reproducibilityColumns(detailed = false) {
|
|
2532
|
+
const base = [
|
|
2533
|
+
"ref",
|
|
2534
|
+
"route_kind",
|
|
2535
|
+
"reproducibility_status",
|
|
2536
|
+
"install_readiness",
|
|
2537
|
+
"execution_boundary",
|
|
2538
|
+
"install_boundary",
|
|
2539
|
+
"record_found",
|
|
2540
|
+
"title",
|
|
2541
|
+
"year",
|
|
2542
|
+
"primary_source",
|
|
2543
|
+
"primary_evidence_url",
|
|
2544
|
+
"code_url",
|
|
2545
|
+
"project_url",
|
|
2546
|
+
"clone_candidate_url",
|
|
2547
|
+
"dataset_urls",
|
|
2548
|
+
"model_urls",
|
|
2549
|
+
"space_urls",
|
|
2550
|
+
"resource_sources",
|
|
2551
|
+
"missing_reproducibility",
|
|
2552
|
+
"next_evidence",
|
|
2553
|
+
"next_read",
|
|
2554
|
+
"next_code",
|
|
2555
|
+
"next_datasets",
|
|
2556
|
+
];
|
|
2557
|
+
return detailed
|
|
2558
|
+
? [
|
|
2559
|
+
...base,
|
|
2560
|
+
"dataset_url",
|
|
2561
|
+
"next_download",
|
|
2562
|
+
"next_inspect_code",
|
|
2563
|
+
"next_inspect_resources",
|
|
2564
|
+
"source_errors",
|
|
2565
|
+
"retrieved_at",
|
|
2566
|
+
]
|
|
2567
|
+
: base;
|
|
2568
|
+
}
|
|
2569
|
+
function availabilityColumns(detailed = false) {
|
|
2570
|
+
const base = [
|
|
2571
|
+
"ref",
|
|
2572
|
+
"route_kind",
|
|
2573
|
+
"canonical_ref",
|
|
2574
|
+
"canonical_ref_kind",
|
|
2575
|
+
"record_found",
|
|
2576
|
+
"title",
|
|
2577
|
+
"doi",
|
|
2578
|
+
"arxiv_id",
|
|
2579
|
+
"pmid",
|
|
2580
|
+
"openreview_id",
|
|
2581
|
+
"has_pdf",
|
|
2582
|
+
"has_fulltext_candidate",
|
|
2583
|
+
"has_code",
|
|
2584
|
+
"has_project",
|
|
2585
|
+
"has_datasets",
|
|
2586
|
+
"has_models",
|
|
2587
|
+
"has_spaces",
|
|
2588
|
+
"metadata_sources",
|
|
2589
|
+
"pdf_sources",
|
|
2590
|
+
"fulltext_candidate_sources",
|
|
2591
|
+
"code_sources",
|
|
2592
|
+
"dataset_sources",
|
|
2593
|
+
"citation_candidate_sources",
|
|
2594
|
+
"reference_candidate_sources",
|
|
2595
|
+
"review_candidate_sources",
|
|
2596
|
+
"next_read",
|
|
2597
|
+
"next_download",
|
|
2598
|
+
"next_code",
|
|
2599
|
+
"next_datasets",
|
|
2600
|
+
];
|
|
2601
|
+
return detailed
|
|
2602
|
+
? [
|
|
2603
|
+
...base,
|
|
2604
|
+
"route_value",
|
|
2605
|
+
"id",
|
|
2606
|
+
"year",
|
|
2607
|
+
"pmc_id",
|
|
2608
|
+
"source_adapter",
|
|
2609
|
+
"source_url",
|
|
2610
|
+
"pdf_url",
|
|
2611
|
+
"code_url",
|
|
2612
|
+
"project_url",
|
|
2613
|
+
"dataset_url",
|
|
2614
|
+
"model_urls",
|
|
2615
|
+
"dataset_urls",
|
|
2616
|
+
"space_urls",
|
|
2617
|
+
"next_workflow",
|
|
2618
|
+
"next_get",
|
|
2619
|
+
"next_pdf",
|
|
2620
|
+
"next_evidence",
|
|
2621
|
+
"next_reproduce",
|
|
2622
|
+
"next_citations",
|
|
2623
|
+
"next_references",
|
|
2624
|
+
"next_reviews",
|
|
2625
|
+
"source_errors",
|
|
2626
|
+
"retrieved_at",
|
|
2627
|
+
]
|
|
2628
|
+
: base;
|
|
2629
|
+
}
|
|
2630
|
+
async function collectAvailabilityEvidence(ref, opts) {
|
|
2631
|
+
const route = resolveScholarReference(ref);
|
|
2632
|
+
const metadata = await collectSingleRecords("scholar.get", ref, opts);
|
|
2633
|
+
const pdf = await collectPdfCandidates(ref, opts);
|
|
2634
|
+
const code = await collectResourceRecords("scholar.code", ref, opts);
|
|
2635
|
+
const datasets = await collectResourceRecords("scholar.datasets", ref, opts);
|
|
2636
|
+
const outcomes = [
|
|
2637
|
+
...metadata.outcomes,
|
|
2638
|
+
...pdf.outcomes,
|
|
2639
|
+
...code.outcomes,
|
|
2640
|
+
...datasets.outcomes,
|
|
2641
|
+
];
|
|
2642
|
+
const row = buildScholarAvailabilityRow({
|
|
2643
|
+
ref,
|
|
2644
|
+
route,
|
|
2645
|
+
metadataRecords: metadata.records,
|
|
2646
|
+
pdfRecords: pdf.records,
|
|
2647
|
+
codeRecords: code.records,
|
|
2648
|
+
datasetRecords: datasets.records,
|
|
2649
|
+
fulltextCandidateSources: resolveAvailabilityCapabilitySources("scholar.fulltext", route, opts),
|
|
2650
|
+
citationCandidateSources: resolveAvailabilityCapabilitySources("scholar.citations", route, opts),
|
|
2651
|
+
referenceCandidateSources: resolveAvailabilityCapabilitySources("scholar.references", route, opts),
|
|
2652
|
+
reviewCandidateSources: resolveAvailabilityCapabilitySources("scholar.review", route, opts),
|
|
2653
|
+
sourceErrors: collectSourceErrors(outcomes),
|
|
2654
|
+
opts,
|
|
2655
|
+
});
|
|
2656
|
+
return { row, outcomes };
|
|
2657
|
+
}
|
|
2658
|
+
async function collectCanonicalSourceAuditOutcomes(availability, opts) {
|
|
2659
|
+
const ref = String(availability.ref ?? "");
|
|
2660
|
+
const canonicalRef = rowString(availability, "canonical_ref");
|
|
2661
|
+
if (!canonicalRef || canonicalRef === ref)
|
|
2662
|
+
return [];
|
|
2663
|
+
const metadata = await collectSingleRecords("scholar.get", canonicalRef, opts);
|
|
2664
|
+
const pdf = await collectPdfCandidates(canonicalRef, opts);
|
|
2665
|
+
const code = await collectResourceRecords("scholar.code", canonicalRef, opts);
|
|
2666
|
+
const datasets = await collectResourceRecords("scholar.datasets", canonicalRef, opts);
|
|
2667
|
+
return [
|
|
2668
|
+
...metadata.outcomes,
|
|
2669
|
+
...pdf.outcomes,
|
|
2670
|
+
...code.outcomes,
|
|
2671
|
+
...datasets.outcomes,
|
|
2672
|
+
];
|
|
2673
|
+
}
|
|
2674
|
+
function mergeCanonicalAvailability(original, canonical) {
|
|
2675
|
+
return {
|
|
2676
|
+
row: {
|
|
2677
|
+
...canonical.row,
|
|
2678
|
+
ref: original.row.ref,
|
|
2679
|
+
route_kind: original.row.route_kind,
|
|
2680
|
+
route_value: original.row.route_value,
|
|
2681
|
+
source_errors: uniqueStrings([
|
|
2682
|
+
...rowStringArray(original.row, "source_errors"),
|
|
2683
|
+
...rowStringArray(canonical.row, "source_errors"),
|
|
2684
|
+
]),
|
|
2685
|
+
},
|
|
2686
|
+
outcomes: [...original.outcomes, ...canonical.outcomes],
|
|
2687
|
+
};
|
|
2688
|
+
}
|
|
2689
|
+
async function collectCanonicalizedAvailability(ref, opts) {
|
|
2690
|
+
const availability = await collectAvailabilityEvidence(ref, opts);
|
|
2691
|
+
if (availability.row.record_found === true ||
|
|
2692
|
+
resolveScholarReference(ref).kind !== "unknown" ||
|
|
2693
|
+
(!opts.source && !opts.sources)) {
|
|
2694
|
+
return availability;
|
|
2695
|
+
}
|
|
2696
|
+
const canonicalLookup = await collectAvailabilityEvidence(ref, canonicalReferenceLookupOpts(opts));
|
|
2697
|
+
const canonicalRef = rowString(canonicalLookup.row, "canonical_ref");
|
|
2698
|
+
if (!canonicalRef || canonicalRef === ref)
|
|
2699
|
+
return availability;
|
|
2700
|
+
const canonicalAvailability = await collectAvailabilityEvidence(canonicalRef, opts);
|
|
2701
|
+
return mergeCanonicalAvailability(availability, canonicalAvailability);
|
|
2702
|
+
}
|
|
2703
|
+
async function runAvailability(program, ref, opts) {
|
|
2704
|
+
const startedAt = Date.now();
|
|
2705
|
+
const fmt = detectFormat(program.opts().format);
|
|
2706
|
+
const ctx = makeCtx("scholar.availability", startedAt);
|
|
2707
|
+
const availability = await collectCanonicalizedAvailability(ref, opts);
|
|
2708
|
+
const row = availability.row;
|
|
2709
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2710
|
+
ctx.surface = "web";
|
|
2711
|
+
if (row.record_found !== true) {
|
|
2712
|
+
ctx.error = {
|
|
2713
|
+
code: "SCHOLAR_AVAILABILITY_NOT_FOUND",
|
|
2714
|
+
message: `no scholarly metadata, PDF, or resource evidence returned for "${ref}"`,
|
|
2715
|
+
suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
|
|
2716
|
+
? `Per-source errors: ${row.source_errors.join("; ")}`
|
|
2717
|
+
: "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
|
|
2718
|
+
retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
2719
|
+
};
|
|
2720
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2721
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
2722
|
+
}
|
|
2723
|
+
console.log(format([row], availabilityColumns(opts.detailed), fmt, ctx));
|
|
2724
|
+
}
|
|
2725
|
+
async function runSourceAudit(program, ref, opts) {
|
|
2726
|
+
const startedAt = Date.now();
|
|
2727
|
+
const fmt = detectFormat(program.opts().format);
|
|
2728
|
+
const ctx = makeCtx("scholar.sources", startedAt);
|
|
2729
|
+
const availability = await collectCanonicalizedAvailability(ref, opts);
|
|
2730
|
+
const canonicalOutcomes = await collectCanonicalSourceAuditOutcomes(availability.row, opts);
|
|
2731
|
+
const rows = buildScholarSourceAuditRows(availability.row, [...availability.outcomes, ...canonicalOutcomes], {
|
|
2732
|
+
unpaywallEmail: opts.unpaywallEmail,
|
|
2733
|
+
});
|
|
2734
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2735
|
+
ctx.surface = "web";
|
|
2736
|
+
if (rows.length === 0) {
|
|
2737
|
+
ctx.error = {
|
|
2738
|
+
code: "SCHOLAR_SOURCES_EMPTY",
|
|
2739
|
+
message: `no scholarly sources were audited for "${ref}"`,
|
|
2740
|
+
suggestion: "Run `unicli scholar doctor --sources all` to inspect registered scholarly sources, or pass --sources all.",
|
|
2741
|
+
retryable: false,
|
|
2742
|
+
};
|
|
2743
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2744
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
2745
|
+
}
|
|
2746
|
+
console.log(format(rows, sourceAuditColumns(opts.detailed), fmt, ctx));
|
|
2747
|
+
}
|
|
2748
|
+
async function runWorkflow(program, ref, opts) {
|
|
2749
|
+
const startedAt = Date.now();
|
|
2750
|
+
const fmt = detectFormat(program.opts().format);
|
|
2751
|
+
const ctx = makeCtx("scholar.workflow", startedAt);
|
|
2752
|
+
const availability = await collectCanonicalizedAvailability(ref, opts);
|
|
2753
|
+
const row = availability.row;
|
|
2754
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2755
|
+
ctx.surface = "web";
|
|
2756
|
+
if (row.record_found !== true) {
|
|
2757
|
+
ctx.error = {
|
|
2758
|
+
code: "SCHOLAR_WORKFLOW_NOT_FOUND",
|
|
2759
|
+
message: `no source-backed scholarly workflow evidence returned for "${ref}"`,
|
|
2760
|
+
suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
|
|
2761
|
+
? `Per-source errors: ${row.source_errors.join("; ")}`
|
|
2762
|
+
: "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
|
|
2763
|
+
retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
2764
|
+
};
|
|
2765
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2766
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
2767
|
+
}
|
|
2768
|
+
console.log(format([buildScholarWorkflowRow(row)], workflowColumns(opts.detailed), fmt, ctx));
|
|
2769
|
+
}
|
|
2770
|
+
async function runEvidence(program, ref, opts) {
|
|
2771
|
+
const startedAt = Date.now();
|
|
2772
|
+
const fmt = detectFormat(program.opts().format);
|
|
2773
|
+
const ctx = makeCtx("scholar.evidence", startedAt);
|
|
2774
|
+
const availability = await collectCanonicalizedAvailability(ref, opts);
|
|
2775
|
+
const row = availability.row;
|
|
2776
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2777
|
+
ctx.surface = "web";
|
|
2778
|
+
if (row.record_found !== true) {
|
|
2779
|
+
ctx.error = {
|
|
2780
|
+
code: "SCHOLAR_EVIDENCE_NOT_FOUND",
|
|
2781
|
+
message: `no source-backed scholarly evidence returned for "${ref}"`,
|
|
2782
|
+
suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
|
|
2783
|
+
? `Per-source errors: ${row.source_errors.join("; ")}`
|
|
2784
|
+
: "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
|
|
2785
|
+
retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
2786
|
+
};
|
|
2787
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2788
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
2789
|
+
}
|
|
2790
|
+
console.log(format([buildScholarEvidenceRow(row)], evidenceColumns(opts.detailed), fmt, ctx));
|
|
2791
|
+
}
|
|
2792
|
+
async function runReproducibility(program, ref, opts) {
|
|
2793
|
+
const startedAt = Date.now();
|
|
2794
|
+
const fmt = detectFormat(program.opts().format);
|
|
2795
|
+
const ctx = makeCtx("scholar.reproduce", startedAt);
|
|
2796
|
+
const availability = await collectCanonicalizedAvailability(ref, opts);
|
|
2797
|
+
const row = availability.row;
|
|
2798
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2799
|
+
ctx.surface = "web";
|
|
2800
|
+
if (row.record_found !== true) {
|
|
2801
|
+
ctx.error = {
|
|
2802
|
+
code: "SCHOLAR_REPRODUCIBILITY_NOT_FOUND",
|
|
2803
|
+
message: `no source-backed scholarly reproducibility evidence returned for "${ref}"`,
|
|
2804
|
+
suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
|
|
2805
|
+
? `Per-source errors: ${row.source_errors.join("; ")}`
|
|
2806
|
+
: "Try a DOI, arXiv id, OpenReview URL, or run `unicli scholar search` before requesting reproducibility resources.",
|
|
2807
|
+
retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
2808
|
+
};
|
|
2809
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2810
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
2811
|
+
}
|
|
2812
|
+
console.log(format([buildScholarReproducibilityRow(row)], reproducibilityColumns(opts.detailed), fmt, ctx));
|
|
2813
|
+
}
|
|
2814
|
+
function rootPermissionOptions(program) {
|
|
2815
|
+
const opts = program.opts();
|
|
2816
|
+
return {
|
|
2817
|
+
permissionProfile: opts.permissionProfile,
|
|
2818
|
+
approved: opts.yes === true,
|
|
2819
|
+
rememberApproval: opts.rememberApproval === true,
|
|
2820
|
+
};
|
|
2821
|
+
}
|
|
2822
|
+
function artifactArgs(record, opts) {
|
|
2823
|
+
const firstPage = opts.firstPage === undefined ? undefined : Number(opts.firstPage);
|
|
2824
|
+
const lastPage = opts.lastPage === undefined ? undefined : Number(opts.lastPage);
|
|
2825
|
+
const maxChars = opts.maxChars === undefined ? undefined : Number(opts.maxChars);
|
|
2826
|
+
return {
|
|
2827
|
+
pdf_url: record.pdf_url,
|
|
2828
|
+
title: record.title,
|
|
2829
|
+
id: record.id,
|
|
2830
|
+
source_adapter: record.source_adapter,
|
|
2831
|
+
source_url: record.source_url ?? record.landing_url,
|
|
2832
|
+
output: opts.output,
|
|
2833
|
+
"first-page": firstPage,
|
|
2834
|
+
"last-page": lastPage,
|
|
2835
|
+
"max-chars": maxChars,
|
|
2836
|
+
};
|
|
2837
|
+
}
|
|
2838
|
+
function fulltextArgs(route, opts) {
|
|
2839
|
+
const firstPage = opts.firstPage === undefined ? undefined : Number(opts.firstPage);
|
|
2840
|
+
const lastPage = opts.lastPage === undefined ? undefined : Number(opts.lastPage);
|
|
2841
|
+
const maxChars = opts.maxChars === undefined ? undefined : Number(opts.maxChars);
|
|
2842
|
+
return {
|
|
2843
|
+
...referenceArgs(route, opts),
|
|
2844
|
+
output: opts.output,
|
|
2845
|
+
"first-page": firstPage,
|
|
2846
|
+
"last-page": lastPage,
|
|
2847
|
+
"max-chars": maxChars,
|
|
2848
|
+
};
|
|
2849
|
+
}
|
|
2850
|
+
async function executeDirectFulltext(program, source, route, opts) {
|
|
2851
|
+
const adapter = getAllAdapters().find((candidate) => candidate.name === source);
|
|
2852
|
+
if (!adapter) {
|
|
2853
|
+
return {
|
|
2854
|
+
handled: false,
|
|
2855
|
+
outcome: {
|
|
2856
|
+
source,
|
|
2857
|
+
error: {
|
|
2858
|
+
code: "adapter_not_found",
|
|
2859
|
+
message: `unknown source: ${source}`,
|
|
2860
|
+
},
|
|
2861
|
+
},
|
|
2862
|
+
};
|
|
2863
|
+
}
|
|
2864
|
+
const found = findScholarCommandByCapability(adapter, "scholar.fulltext");
|
|
2865
|
+
if (!found) {
|
|
2866
|
+
return {
|
|
2867
|
+
handled: false,
|
|
2868
|
+
outcome: {
|
|
2869
|
+
source,
|
|
2870
|
+
error: {
|
|
2871
|
+
code: "capability_unsupported",
|
|
2872
|
+
message: `${source} does not expose scholar.fulltext`,
|
|
2873
|
+
},
|
|
2874
|
+
},
|
|
2875
|
+
};
|
|
2876
|
+
}
|
|
2877
|
+
const fmt = detectFormat(program.opts().format);
|
|
2878
|
+
const invocation = buildInvocation("cli", source, found.name, {
|
|
2879
|
+
args: normalizeScholarCommandArgs(found.command, fulltextArgs(route, opts)),
|
|
2880
|
+
source: "internal",
|
|
2881
|
+
}, rootPermissionOptions(program));
|
|
2882
|
+
if (!invocation) {
|
|
2883
|
+
return {
|
|
2884
|
+
handled: false,
|
|
2885
|
+
outcome: {
|
|
2886
|
+
source,
|
|
2887
|
+
error: {
|
|
2888
|
+
code: "build_invocation_failed",
|
|
2889
|
+
message: `could not build invocation for ${source}.${found.name}`,
|
|
2890
|
+
},
|
|
2891
|
+
},
|
|
2892
|
+
};
|
|
2893
|
+
}
|
|
2894
|
+
const result = await execute(invocation);
|
|
2895
|
+
if (result.error) {
|
|
2896
|
+
return {
|
|
2897
|
+
handled: false,
|
|
2898
|
+
outcome: {
|
|
2899
|
+
source,
|
|
2900
|
+
error: {
|
|
2901
|
+
code: result.error.code ?? "execution_error",
|
|
2902
|
+
message: result.error.message ?? "source fulltext command failed",
|
|
2903
|
+
retryable: result.error.retryable,
|
|
2904
|
+
},
|
|
2905
|
+
},
|
|
2906
|
+
};
|
|
2907
|
+
}
|
|
2908
|
+
if (!Array.isArray(result.results) || result.results.length === 0) {
|
|
2909
|
+
return {
|
|
2910
|
+
handled: false,
|
|
2911
|
+
outcome: {
|
|
2912
|
+
source,
|
|
2913
|
+
error: {
|
|
2914
|
+
code: "empty_result",
|
|
2915
|
+
message: `${source}.${found.name} returned no fulltext rows`,
|
|
2916
|
+
},
|
|
2917
|
+
},
|
|
2918
|
+
};
|
|
2919
|
+
}
|
|
2920
|
+
console.log(format(result.results, invocation.command.columns, fmt, result.envelope));
|
|
2921
|
+
return { handled: true, outcome: { source } };
|
|
2922
|
+
}
|
|
2923
|
+
async function tryDirectFulltextFromScholar(program, ref, opts) {
|
|
2924
|
+
const route = resolveScholarReference(ref);
|
|
2925
|
+
const sources = resolveScholarFulltextSources(opts.source, opts.sources, route);
|
|
2926
|
+
const outcomes = [];
|
|
2927
|
+
for (const source of sources) {
|
|
2928
|
+
const result = await executeDirectFulltext(program, source, route, opts);
|
|
2929
|
+
outcomes.push(result.outcome);
|
|
2930
|
+
if (result.handled)
|
|
2931
|
+
return { handled: true, outcomes };
|
|
2932
|
+
}
|
|
2933
|
+
return { handled: false, outcomes };
|
|
2934
|
+
}
|
|
2935
|
+
async function executeScholarArtifact(program, command, args) {
|
|
2936
|
+
const fmt = detectFormat(program.opts().format);
|
|
2937
|
+
const invocation = buildInvocation("cli", "scholar-artifacts", command, {
|
|
2938
|
+
args: Object.fromEntries(definedEntries(args)),
|
|
2939
|
+
source: "internal",
|
|
2940
|
+
}, rootPermissionOptions(program));
|
|
2941
|
+
if (!invocation) {
|
|
2942
|
+
const ctx = makeCtx(`scholar.${command}`, Date.now());
|
|
2943
|
+
ctx.surface = "web";
|
|
2944
|
+
ctx.error = {
|
|
2945
|
+
code: "SCHOLAR_ARTIFACT_ADAPTER_MISSING",
|
|
2946
|
+
message: "scholar-artifacts adapter is not registered",
|
|
2947
|
+
suggestion: "Run `unicli scholar doctor` and check adapter load diagnostics.",
|
|
2948
|
+
retryable: false,
|
|
2949
|
+
};
|
|
2950
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
2951
|
+
process.exit(ExitCode.CONFIG_ERROR);
|
|
2952
|
+
}
|
|
2953
|
+
const result = await execute(invocation);
|
|
2954
|
+
if (result.error) {
|
|
2955
|
+
process.stderr.write(format([], invocation.command.columns, fmt, result.envelope) + "\n");
|
|
2956
|
+
process.exit(result.exitCode);
|
|
2957
|
+
}
|
|
2958
|
+
console.log(format(result.results, invocation.command.columns, fmt, result.envelope));
|
|
2959
|
+
}
|
|
2960
|
+
async function runArtifactFromScholar(program, ref, opts, command) {
|
|
2961
|
+
const startedAt = Date.now();
|
|
2962
|
+
const fmt = detectFormat(program.opts().format);
|
|
2963
|
+
const ctx = makeCtx(command === "download-pdf" ? "scholar.download" : "scholar.read", startedAt);
|
|
2964
|
+
const fulltextOutcomes = [];
|
|
2965
|
+
if (command === "read-pdf" && (opts.source || opts.sources)) {
|
|
2966
|
+
const rawFulltext = await tryDirectFulltextFromScholar(program, ref, opts);
|
|
2967
|
+
fulltextOutcomes.push(...rawFulltext.outcomes);
|
|
2968
|
+
if (rawFulltext.handled)
|
|
2969
|
+
return;
|
|
2970
|
+
}
|
|
2971
|
+
const lookupRef = await resolveArtifactLookupRef(ref, opts);
|
|
2972
|
+
const fulltext = command === "read-pdf"
|
|
2973
|
+
? lookupRef === ref && fulltextOutcomes.length > 0
|
|
2974
|
+
? { handled: false, outcomes: fulltextOutcomes }
|
|
2975
|
+
: await tryDirectFulltextFromScholar(program, lookupRef, opts)
|
|
2976
|
+
: { handled: false, outcomes: [] };
|
|
2977
|
+
if (fulltext.outcomes !== fulltextOutcomes) {
|
|
2978
|
+
fulltextOutcomes.push(...fulltext.outcomes);
|
|
2979
|
+
}
|
|
2980
|
+
if (fulltext.handled)
|
|
2981
|
+
return;
|
|
2982
|
+
const { sourceList, outcomes, records } = await collectPdfCandidates(lookupRef, opts);
|
|
2983
|
+
const record = firstPdfRecord(records);
|
|
2984
|
+
if (!record) {
|
|
2985
|
+
const errors = outcomes.filter((outcome) => outcome.error);
|
|
2986
|
+
const fulltextErrors = fulltextOutcomes.filter((outcome) => outcome.error);
|
|
2987
|
+
const suggestions = [
|
|
2988
|
+
fulltextErrors.length > 0
|
|
2989
|
+
? `Fulltext errors: ${fulltextErrors.map(formatScholarOutcomeError).join("; ")}`
|
|
2990
|
+
: "",
|
|
2991
|
+
errors.length > 0
|
|
2992
|
+
? `PDF/source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
|
|
2993
|
+
: "",
|
|
2994
|
+
].filter(Boolean);
|
|
2995
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
2996
|
+
ctx.surface = "web";
|
|
2997
|
+
ctx.error = {
|
|
2998
|
+
code: command === "read-pdf"
|
|
2999
|
+
? "SCHOLAR_READ_NOT_FOUND"
|
|
3000
|
+
: "SCHOLAR_PDF_NOT_FOUND",
|
|
3001
|
+
message: command === "read-pdf"
|
|
3002
|
+
? `no source-direct scholarly full text or downloadable PDF returned for "${ref}" across [${sourceList.join(", ")}]`
|
|
3003
|
+
: `no downloadable scholarly PDF returned for "${ref}" across [${sourceList.join(", ")}]`,
|
|
3004
|
+
suggestion: suggestions.length > 0
|
|
3005
|
+
? suggestions.join(" ")
|
|
3006
|
+
: "Try --source with a site from `unicli scholar doctor`, or pass a more exact DOI/arXiv/OpenReview/PubMed id/title.",
|
|
3007
|
+
retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)) ||
|
|
3008
|
+
fulltextOutcomes.some((outcome) => isRetryableScholarError(outcome.error)),
|
|
3009
|
+
};
|
|
3010
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
3011
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
3012
|
+
}
|
|
3013
|
+
await executeScholarArtifact(program, command, artifactArgs(record, opts));
|
|
3014
|
+
}
|
|
3015
|
+
async function runCoverage(program, opts) {
|
|
3016
|
+
const startedAt = Date.now();
|
|
3017
|
+
const fmt = detectFormat(program.opts().format);
|
|
3018
|
+
const ctx = makeCtx("scholar.coverage", startedAt);
|
|
3019
|
+
const selected = resolveScholarSources(opts.sources, listScholarAdapters().map((adapter) => adapter.name));
|
|
3020
|
+
const selectedAdapters = listScholarAdapters().filter((adapter) => selected.includes(adapter.name));
|
|
3021
|
+
const rows = buildScholarCoverageRows(selectedAdapters).filter((row) => {
|
|
3022
|
+
if (!opts.gaps)
|
|
3023
|
+
return true;
|
|
3024
|
+
const missing = row.missing_closed_loop;
|
|
3025
|
+
return Array.isArray(missing) && missing.length > 0;
|
|
3026
|
+
});
|
|
3027
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
3028
|
+
ctx.surface = "web";
|
|
3029
|
+
if (rows.length === 0) {
|
|
3030
|
+
ctx.error = {
|
|
3031
|
+
code: "SCHOLAR_COVERAGE_EMPTY",
|
|
3032
|
+
message: `no scholarly sources matched [${selected.join(", ")}]`,
|
|
3033
|
+
suggestion: "Run `unicli scholar doctor --sources all` to inspect registered scholarly sources.",
|
|
3034
|
+
retryable: false,
|
|
3035
|
+
};
|
|
3036
|
+
console.error(format(null, undefined, fmt, ctx));
|
|
3037
|
+
process.exit(ExitCode.EMPTY_RESULT);
|
|
3038
|
+
}
|
|
3039
|
+
console.log(format(rows, coverageColumns(opts.detailed), fmt, ctx));
|
|
3040
|
+
}
|
|
3041
|
+
function isScholarNoMatchErrorMessage(message) {
|
|
3042
|
+
return /^No .+ matched ["“].+["”]\.?$/i.test(message.trim());
|
|
3043
|
+
}
|
|
3044
|
+
export function classifyScholarLiveProbeError(error) {
|
|
3045
|
+
const message = error.message ?? "adapter command failed";
|
|
3046
|
+
if (isScholarNoMatchErrorMessage(message)) {
|
|
3047
|
+
return {
|
|
3048
|
+
live_health: "empty",
|
|
3049
|
+
live_error_code: "empty_source_result",
|
|
3050
|
+
live_error_message: message,
|
|
3051
|
+
};
|
|
3052
|
+
}
|
|
3053
|
+
return {
|
|
3054
|
+
live_health: "failed",
|
|
3055
|
+
live_error_code: error.code ?? "execution_error",
|
|
3056
|
+
live_error_message: message,
|
|
3057
|
+
};
|
|
3058
|
+
}
|
|
3059
|
+
function buildStaticDoctorRow(adapter) {
|
|
3060
|
+
const caps = new Set();
|
|
3061
|
+
for (const command of Object.values(adapter.commands)) {
|
|
3062
|
+
for (const cap of command.capabilities ?? []) {
|
|
3063
|
+
if (cap.startsWith("scholar."))
|
|
3064
|
+
caps.add(cap);
|
|
3065
|
+
}
|
|
3066
|
+
}
|
|
3067
|
+
const health = resolveCommand(adapter.name, "health");
|
|
3068
|
+
const strategy = health
|
|
3069
|
+
? commandStrategy(adapter, health.command)
|
|
3070
|
+
: undefined;
|
|
3071
|
+
return {
|
|
3072
|
+
source: adapter.name,
|
|
3073
|
+
capabilities: [...caps].sort(),
|
|
3074
|
+
health: !health
|
|
3075
|
+
? "skipped"
|
|
3076
|
+
: strategy !== undefined && strategy !== Strategy.PUBLIC
|
|
3077
|
+
? "blocked"
|
|
3078
|
+
: "available",
|
|
3079
|
+
detail: !health
|
|
3080
|
+
? "no `health` command — adapter passes by capability introspection"
|
|
3081
|
+
: strategy !== undefined && strategy !== Strategy.PUBLIC
|
|
3082
|
+
? `health probe requires ${strategy} auth — skipped`
|
|
3083
|
+
: "health probe command is public",
|
|
3084
|
+
};
|
|
3085
|
+
}
|
|
3086
|
+
async function probeScholarDoctorRow(adapter, row, opts) {
|
|
3087
|
+
const found = findScholarQueryableSearchCommand(adapter);
|
|
3088
|
+
if (!found) {
|
|
3089
|
+
return {
|
|
3090
|
+
...row,
|
|
3091
|
+
live_health: "not_probeable",
|
|
3092
|
+
live_query: opts.query,
|
|
3093
|
+
live_count: 0,
|
|
3094
|
+
live_error_code: "no_queryable_search",
|
|
3095
|
+
live_error_message: "no queryable scholar.search command is registered for live probing",
|
|
3096
|
+
};
|
|
3097
|
+
}
|
|
3098
|
+
const strategy = commandStrategy(adapter, found.command);
|
|
3099
|
+
if (strategy !== undefined && strategy !== Strategy.PUBLIC) {
|
|
3100
|
+
return {
|
|
3101
|
+
...row,
|
|
3102
|
+
live_health: "blocked",
|
|
3103
|
+
live_command: sourceCommand(adapter.name, found.name, "<query>"),
|
|
3104
|
+
live_query: opts.query,
|
|
3105
|
+
live_count: 0,
|
|
3106
|
+
live_error_code: "auth_required",
|
|
3107
|
+
live_error_message: `live probe requires ${strategy} auth`,
|
|
3108
|
+
};
|
|
3109
|
+
}
|
|
3110
|
+
const outcome = await executeScholarAdapterCommand(adapter.name, found, { query: opts.query, limit: String(opts.limit) }, "scholar.search");
|
|
3111
|
+
if (outcome.error) {
|
|
3112
|
+
const liveError = classifyScholarLiveProbeError(outcome.error);
|
|
3113
|
+
return {
|
|
3114
|
+
...row,
|
|
3115
|
+
...liveError,
|
|
3116
|
+
live_command: sourceCommand(adapter.name, found.name, "<query>"),
|
|
3117
|
+
live_query: opts.query,
|
|
3118
|
+
live_count: 0,
|
|
3119
|
+
};
|
|
3120
|
+
}
|
|
3121
|
+
return {
|
|
3122
|
+
...row,
|
|
3123
|
+
live_health: outcome.records.length > 0 ? "passed" : "empty",
|
|
3124
|
+
live_command: sourceCommand(adapter.name, found.name, "<query>"),
|
|
3125
|
+
live_query: opts.query,
|
|
3126
|
+
live_count: outcome.records.length,
|
|
3127
|
+
live_error_code: outcome.records.length > 0 ? undefined : "empty_normalized_result",
|
|
3128
|
+
live_error_message: outcome.records.length > 0
|
|
3129
|
+
? undefined
|
|
3130
|
+
: "live probe returned no scholar-normalized records",
|
|
3131
|
+
};
|
|
3132
|
+
}
|
|
3133
|
+
async function runDoctor(program, opts) {
|
|
3134
|
+
const startedAt = Date.now();
|
|
3135
|
+
const fmt = detectFormat(program.opts().format);
|
|
3136
|
+
const ctx = makeCtx("scholar.doctor", startedAt);
|
|
3137
|
+
const selected = resolveScholarSources(opts.sources, listScholarAdapters().map((adapter) => adapter.name));
|
|
3138
|
+
let rows = listScholarAdapters()
|
|
3139
|
+
.filter((adapter) => selected.includes(adapter.name))
|
|
3140
|
+
.map(buildStaticDoctorRow);
|
|
3141
|
+
if (opts.live) {
|
|
3142
|
+
const adaptersByName = new Map(listScholarAdapters().map((adapter) => [adapter.name, adapter]));
|
|
3143
|
+
const query = opts.query ?? "Llama 2";
|
|
3144
|
+
const limit = numberOpt(opts.limit, 1, 5);
|
|
3145
|
+
rows = await Promise.all(rows.map((row) => {
|
|
3146
|
+
const adapter = adaptersByName.get(row.source);
|
|
3147
|
+
return adapter
|
|
3148
|
+
? probeScholarDoctorRow(adapter, row, { query, limit })
|
|
3149
|
+
: row;
|
|
3150
|
+
}));
|
|
3151
|
+
}
|
|
3152
|
+
ctx.duration_ms = Date.now() - startedAt;
|
|
3153
|
+
ctx.surface = "web";
|
|
3154
|
+
const columns = [
|
|
3155
|
+
"source",
|
|
3156
|
+
"capabilities",
|
|
3157
|
+
"health",
|
|
3158
|
+
"detail",
|
|
3159
|
+
...(opts.live || opts.detailed
|
|
3160
|
+
? [
|
|
3161
|
+
"live_health",
|
|
3162
|
+
"live_command",
|
|
3163
|
+
"live_query",
|
|
3164
|
+
"live_count",
|
|
3165
|
+
"live_error_code",
|
|
3166
|
+
"live_error_message",
|
|
3167
|
+
]
|
|
3168
|
+
: []),
|
|
3169
|
+
];
|
|
3170
|
+
console.log(format(rows, columns, fmt, ctx));
|
|
3171
|
+
}
|
|
3172
|
+
export function registerScholarCommand(program) {
|
|
3173
|
+
const scholar = program
|
|
3174
|
+
.command("scholar")
|
|
3175
|
+
.description("Scholarly meta-command — search, retrieve, PDF, resource links, citations, references, and source audit across first-source academic adapters");
|
|
441
3176
|
scholar
|
|
442
3177
|
.command("search <query>")
|
|
443
3178
|
.description("Fan-out scholarly paper search across first-source adapters")
|
|
@@ -447,11 +3182,89 @@ export function registerScholarCommand(program) {
|
|
|
447
3182
|
.action(async (query, opts) => {
|
|
448
3183
|
await runSearch(program, query, opts);
|
|
449
3184
|
});
|
|
3185
|
+
scholar
|
|
3186
|
+
.command("availability <ref>")
|
|
3187
|
+
.alias("audit")
|
|
3188
|
+
.description("Audit source-backed metadata, PDF, full-text, code, dataset/model, citation, reference, and review availability for one paper without downloading artifacts")
|
|
3189
|
+
.option("--source <site>", "force one source")
|
|
3190
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3191
|
+
.option("--venue <venue>", "source-local venue scope, e.g. CVPR or ICCV")
|
|
3192
|
+
.option("--year <year>", "source-local proceedings year")
|
|
3193
|
+
.option("--volume <volume>", "source-local proceedings volume, e.g. PMLR v235")
|
|
3194
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3195
|
+
.option("-D, --detailed", "include evidence URLs, errors, and next commands")
|
|
3196
|
+
.action(async (ref, opts) => {
|
|
3197
|
+
await runAvailability(program, ref, opts);
|
|
3198
|
+
});
|
|
3199
|
+
scholar
|
|
3200
|
+
.command("sources <ref>")
|
|
3201
|
+
.alias("source-audit")
|
|
3202
|
+
.description("Show a per-source scholarly provenance matrix for one paper, including source status, evidence types, candidate capabilities, next commands, and source errors without downloading artifacts")
|
|
3203
|
+
.option("--source <site>", "force one source")
|
|
3204
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3205
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3206
|
+
.option("-D, --detailed", "include URLs, source capabilities, errors, and per-source next commands")
|
|
3207
|
+
.action(async (ref, opts) => {
|
|
3208
|
+
await runSourceAudit(program, ref, opts);
|
|
3209
|
+
});
|
|
3210
|
+
scholar
|
|
3211
|
+
.command("workflow <ref>")
|
|
3212
|
+
.alias("runbook")
|
|
3213
|
+
.description("Build a source-backed agent runbook for the full scholarly loop: evidence, reading, download, citation/reference graph, peer review, and reproducibility planning without downloading, cloning, installing, or summarizing claims")
|
|
3214
|
+
.option("--source <site>", "force one source")
|
|
3215
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3216
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3217
|
+
.option("-D, --detailed", "include runbook steps, source errors, and timestamp")
|
|
3218
|
+
.action(async (ref, opts) => {
|
|
3219
|
+
await runWorkflow(program, ref, opts);
|
|
3220
|
+
});
|
|
3221
|
+
scholar
|
|
3222
|
+
.command("evidence <ref>")
|
|
3223
|
+
.alias("classify")
|
|
3224
|
+
.description("Classify source-backed scholarly evidence for one paper into citation safety, reading readiness, missing evidence, and next commands without downloading artifacts")
|
|
3225
|
+
.option("--source <site>", "force one source")
|
|
3226
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3227
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3228
|
+
.option("-D, --detailed", "include availability rerun command, source errors, and timestamp")
|
|
3229
|
+
.action(async (ref, opts) => {
|
|
3230
|
+
await runEvidence(program, ref, opts);
|
|
3231
|
+
});
|
|
3232
|
+
scholar
|
|
3233
|
+
.command("reproduce <ref>")
|
|
3234
|
+
.alias("install-plan")
|
|
3235
|
+
.description("Plan source-backed paper code/data reproduction and installation readiness without cloning, installing, or executing remote code")
|
|
3236
|
+
.option("--source <site>", "force one source")
|
|
3237
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3238
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3239
|
+
.option("-D, --detailed", "include source errors, download command, and timestamp")
|
|
3240
|
+
.action(async (ref, opts) => {
|
|
3241
|
+
await runReproducibility(program, ref, opts);
|
|
3242
|
+
});
|
|
3243
|
+
scholar
|
|
3244
|
+
.command("coverage")
|
|
3245
|
+
.description("Compare registered scholarly sources by discovery, metadata, PDF, full-text, code, dataset/model, citation, reference, and review coverage without network I/O")
|
|
3246
|
+
.option("--sources <csv>", "limit to sources or all")
|
|
3247
|
+
.option("--gaps", "show only sources with missing closed-loop capabilities")
|
|
3248
|
+
.option("-D, --detailed", "include command names and next commands")
|
|
3249
|
+
.action(async (opts) => {
|
|
3250
|
+
await runCoverage(program, opts);
|
|
3251
|
+
});
|
|
3252
|
+
scholar
|
|
3253
|
+
.command("reviews <ref>")
|
|
3254
|
+
.description("Fetch source-backed peer-review, decision, rebuttal, and comment rows for a scholarly paper review thread")
|
|
3255
|
+
.option("--source <site>", "force one review-capable source")
|
|
3256
|
+
.option("--sources <csv>", "override review-capable source list")
|
|
3257
|
+
.option("--max-length <n>", "per-row review text truncation length", "4000")
|
|
3258
|
+
.option("-D, --detailed", "include reviewer/signature and text size fields")
|
|
3259
|
+
.action(async (ref, opts) => {
|
|
3260
|
+
await runReviews(program, ref, opts);
|
|
3261
|
+
});
|
|
450
3262
|
scholar
|
|
451
3263
|
.command("get <ref>")
|
|
452
3264
|
.description("Retrieve one paper/work by DOI, arXiv id, PMID, OpenAlex id, Semantic Scholar id, dblp key, or OpenReview forum")
|
|
453
3265
|
.option("--source <site>", "force one source")
|
|
454
3266
|
.option("--sources <csv>", "override auto-routed source list")
|
|
3267
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
455
3268
|
.option("-D, --detailed", "include richer metadata columns")
|
|
456
3269
|
.action(async (ref, opts) => {
|
|
457
3270
|
await runSingle(program, "scholar.get", ref, opts);
|
|
@@ -461,10 +3274,55 @@ export function registerScholarCommand(program) {
|
|
|
461
3274
|
.description("Find open-access PDF candidates for a DOI, arXiv id, PMID, or source id")
|
|
462
3275
|
.option("--source <site>", "force one source")
|
|
463
3276
|
.option("--sources <csv>", "override auto-routed source list")
|
|
3277
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
464
3278
|
.option("-D, --detailed", "include richer metadata columns")
|
|
465
3279
|
.action(async (ref, opts) => {
|
|
466
3280
|
await runSingle(program, "scholar.pdf", ref, opts);
|
|
467
3281
|
});
|
|
3282
|
+
scholar
|
|
3283
|
+
.command("code <ref>")
|
|
3284
|
+
.description("Find code repository and project links for a paper through resource-capable scholarly adapters")
|
|
3285
|
+
.option("--source <site>", "force one source")
|
|
3286
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3287
|
+
.option("-D, --detailed", "include richer resource metadata columns")
|
|
3288
|
+
.action(async (ref, opts) => {
|
|
3289
|
+
await runResources(program, "scholar.code", ref, opts);
|
|
3290
|
+
});
|
|
3291
|
+
scholar
|
|
3292
|
+
.command("datasets <ref>")
|
|
3293
|
+
.description("Find linked datasets, models, and Spaces for a paper through resource-capable scholarly adapters")
|
|
3294
|
+
.option("--source <site>", "force one source")
|
|
3295
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3296
|
+
.option("-D, --detailed", "include richer resource metadata columns")
|
|
3297
|
+
.action(async (ref, opts) => {
|
|
3298
|
+
await runResources(program, "scholar.datasets", ref, opts);
|
|
3299
|
+
});
|
|
3300
|
+
scholar
|
|
3301
|
+
.command("download <ref>")
|
|
3302
|
+
.description("Resolve a scholarly PDF candidate, download it locally, and return artifact metadata")
|
|
3303
|
+
.option("--source <site>", "force one source")
|
|
3304
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3305
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3306
|
+
.option("--output <dir>", "output directory", "./scholar-downloads")
|
|
3307
|
+
.action(async (ref, opts) => {
|
|
3308
|
+
await runArtifactFromScholar(program, ref, opts, "download-pdf");
|
|
3309
|
+
});
|
|
3310
|
+
scholar
|
|
3311
|
+
.command("read <ref>")
|
|
3312
|
+
.description("Resolve a scholarly PDF candidate, download it locally, and extract text with pdftotext")
|
|
3313
|
+
.option("--source <site>", "force one source")
|
|
3314
|
+
.option("--sources <csv>", "override auto-routed source list")
|
|
3315
|
+
.option("--venue <venue>", "source-local venue scope, e.g. CVPR or ICCV")
|
|
3316
|
+
.option("--year <year>", "source-local proceedings year")
|
|
3317
|
+
.option("--volume <volume>", "source-local proceedings volume, e.g. PMLR v235")
|
|
3318
|
+
.option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
|
|
3319
|
+
.option("--output <dir>", "output directory", "./scholar-downloads")
|
|
3320
|
+
.option("--first-page <n>", "first page to extract", "1")
|
|
3321
|
+
.option("--last-page <n>", "last page to extract", "20")
|
|
3322
|
+
.option("--max-chars <n>", "maximum extracted/read text characters")
|
|
3323
|
+
.action(async (ref, opts) => {
|
|
3324
|
+
await runArtifactFromScholar(program, ref, opts, "read-pdf");
|
|
3325
|
+
});
|
|
468
3326
|
scholar
|
|
469
3327
|
.command("citations <ref>")
|
|
470
3328
|
.description("List works citing this paper when the source supports it")
|
|
@@ -485,8 +3343,12 @@ export function registerScholarCommand(program) {
|
|
|
485
3343
|
});
|
|
486
3344
|
scholar
|
|
487
3345
|
.command("doctor")
|
|
488
|
-
.description("Inspect registered scholarly adapters and
|
|
3346
|
+
.description("Inspect registered scholarly adapters, capability tags, and optional live search health")
|
|
489
3347
|
.option("--sources <csv>", "limit to a comma-separated source list")
|
|
3348
|
+
.option("--live", "run a queryable search probe for each selected source instead of relying only on capability introspection")
|
|
3349
|
+
.option("--query <query>", "query for --live probes", "Llama 2")
|
|
3350
|
+
.option("--limit <n>", "per-source --live probe limit", "1")
|
|
3351
|
+
.option("-D, --detailed", "include live probe fields in table output")
|
|
490
3352
|
.action(async (opts) => {
|
|
491
3353
|
await runDoctor(program, opts);
|
|
492
3354
|
});
|