@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -1,11 +1,11 @@
1
1
  /**
2
2
  * @owner src::commands::scholar
3
- * @does Top-level `unicli scholar` meta-command for academic source discovery: searches, retrieves, PDF availability, citation/reference traversal, and doctor output across adapters tagged with `scholar.*` capabilities.
3
+ * @does Top-level `unicli scholar` meta-command for academic source discovery: searches, retrieves, non-destructive availability audits, agent workflow/runbook planning, evidence/citation-safety classification, reproducibility/install planning, source coverage comparison, peer-review audit retrieval, PDF availability, source-direct full text, PDF artifact download/read, code/dataset/model resource lookup, citation/reference traversal, and doctor output across adapters tagged with `scholar.*` capabilities.
4
4
  * @needs src/registry.ts, src/types/scholarly.ts, src/engine/kernel/execute.ts, src/output/formatter.ts
5
5
  * @feeds src/cli.ts, MCP/agent command discovery via list/search/do
6
- * @breaks Missing capability tags make scholarly sources invisible to the meta-command; weak reference routing can send DOI/arXiv/PMID lookups to the wrong first source.
7
- * @invariants --sources default is a conservative first-source set; --sources all is registry capability discovery; DOI is the primary dedupe key; no non-scholarly adapter is invoked as fallback.
8
- * @side-effects Executes adapter commands through the engine kernel; writes stdout/stderr only.
6
+ * @breaks Missing capability tags make scholarly sources invisible; unfiltered internal ArgBags violate adapter schemas; weak reference routing can send DOI/arXiv/PMID/OpenReview lookups to the wrong first source; weak canonicalized availability can leave title-based Agent runbooks blocked on explicit source subsets; weak workflow/evidence classification can invite citation from metadata-only rows; weak reproducibility planning can encourage running uninspected remote code; weak review command selection can return search rows instead of review-thread evidence; missing fulltext/PDF artifact adapter blocks read; missing resource output fields hide linked code/data.
7
+ * @invariants --sources default is a conservative first-source set; coverage audits inspect registered capabilities without network I/O; workflow and evidence classification are derived from availability rows and never download, execute, or summarize claims; availability/evidence/source-audit/workflow/reproduce may use canonical lookup sources only to resolve an unknown title before rerunning the caller's requested source scope; reproducibility planning never executes clone/install/run commands and requires repository inspection before install; review retrieval prefers forum/thread commands over search commands; resource commands default to sources exposing the requested resource capability; availability audits fetch metadata/PDF/resource evidence without writing artifacts; source-direct fulltext is tried before PDF artifacts for `scholar read`; unknown artifact refs use every scholar.pdf source; --sources all is registry capability discovery; DOI is the primary dedupe key; internal fan-out passes only args declared by the target command.
8
+ * @side-effects Executes adapter commands through the engine kernel; source-direct fulltext may fetch remote XML; artifact subcommands may write PDFs and execute pdftotext through scholar-artifacts.
9
9
  * @perf Fan-out is sequential today, O(S * R), where S is source count and R is rows per source.
10
10
  * @concurrency safe — Commander handlers run one at a time per process
11
11
  * @test tests/unit/commands/scholar.test.ts
@@ -13,6 +13,7 @@
13
13
  * @since 2026-05-19
14
14
  */
15
15
  import { commandStrategy, getAllAdapters, resolveCommand, } from "../registry.js";
16
+ import { resolveArgs } from "../engine/args.js";
16
17
  import { buildInvocation, execute } from "../engine/kernel/execute.js";
17
18
  import { makeCtx } from "../output/envelope.js";
18
19
  import { detectFormat, format } from "../output/formatter.js";
@@ -25,6 +26,18 @@ export const DEFAULT_SCHOLAR_SOURCES = [
25
26
  "dblp",
26
27
  "pubmed",
27
28
  ];
29
+ const CANONICAL_REFERENCE_SOURCES = [
30
+ "semantic-scholar",
31
+ "openalex",
32
+ "crossref",
33
+ "arxiv",
34
+ "pubmed",
35
+ "openreview",
36
+ "huggingface-papers",
37
+ "hf",
38
+ "biorxiv",
39
+ "medrxiv",
40
+ ];
28
41
  export const SCHOLAR_CAPABILITIES = [
29
42
  "scholar.search",
30
43
  "scholar.get",
@@ -36,10 +49,28 @@ export const SCHOLAR_CAPABILITIES = [
36
49
  "scholar.datasets",
37
50
  "scholar.code",
38
51
  "scholar.review",
52
+ "scholar.fulltext",
39
53
  ];
54
+ const SINGLE_RECORD_ARG_NAMES = new Set([
55
+ "id",
56
+ "ref",
57
+ "doi",
58
+ "arxiv_id",
59
+ "pmid",
60
+ "key",
61
+ "forum",
62
+ ]);
40
63
  function hasAnyScholarCapability(adapter) {
41
64
  return Object.values(adapter.commands).some((command) => (command.capabilities ?? []).some((cap) => cap.startsWith("scholar.")));
42
65
  }
66
+ function isSingleRecordScholarCommand(command) {
67
+ if ((command.capabilities ?? []).includes("scholar.get"))
68
+ return true;
69
+ return (command.adapterArgs ?? []).some((arg) => arg.required === true && SINGLE_RECORD_ARG_NAMES.has(arg.name));
70
+ }
71
+ function declaresAdapterArg(command, name) {
72
+ return (command.adapterArgs ?? []).some((arg) => arg.name === name);
73
+ }
43
74
  export function listScholarAdapters() {
44
75
  return getAllAdapters()
45
76
  .filter(hasAnyScholarCapability)
@@ -56,12 +87,91 @@ export function resolveScholarSources(sourcesArg, fallback = DEFAULT_SCHOLAR_SOU
56
87
  .filter(Boolean);
57
88
  }
58
89
  export function findScholarCommandByCapability(adapter, capability) {
59
- for (const [name, command] of Object.entries(adapter.commands)) {
60
- if ((command.capabilities ?? []).includes(capability))
61
- return { name, command };
90
+ const matches = Object.entries(adapter.commands).filter(([, command]) => (command.capabilities ?? []).includes(capability));
91
+ if (capability === "scholar.pdf" ||
92
+ capability === "scholar.code" ||
93
+ capability === "scholar.datasets") {
94
+ const singleRecord = matches.find(([, command]) => isSingleRecordScholarCommand(command));
95
+ if (singleRecord) {
96
+ return { name: singleRecord[0], command: singleRecord[1] };
97
+ }
98
+ }
99
+ const first = matches[0];
100
+ if (first)
101
+ return { name: first[0], command: first[1] };
102
+ return undefined;
103
+ }
104
+ function findScholarSingleRecordCommandByCapability(adapter, capability) {
105
+ const match = Object.entries(adapter.commands).find(([, command]) => (command.capabilities ?? []).includes(capability) &&
106
+ isSingleRecordScholarCommand(command));
107
+ return match ? { name: match[0], command: match[1] } : undefined;
108
+ }
109
+ export function findScholarResourceSearchCommandByCapability(adapter, capability) {
110
+ const match = Object.entries(adapter.commands).find(([, command]) => {
111
+ const capabilities = command.capabilities ?? [];
112
+ return (capabilities.includes("scholar.search") &&
113
+ capabilities.includes(capability) &&
114
+ declaresAdapterArg(command, "query"));
115
+ });
116
+ if (!match)
117
+ return undefined;
118
+ return { name: match[0], command: match[1] };
119
+ }
120
+ export function findScholarQueryableSearchCommand(adapter) {
121
+ const match = Object.entries(adapter.commands).find(([, command]) => {
122
+ const capabilities = command.capabilities ?? [];
123
+ return (capabilities.includes("scholar.search") &&
124
+ declaresAdapterArg(command, "query"));
125
+ });
126
+ if (!match)
127
+ return undefined;
128
+ return { name: match[0], command: match[1] };
129
+ }
130
+ export function findScholarReviewThreadCommand(adapter) {
131
+ const matches = Object.entries(adapter.commands).filter(([, command]) => (command.capabilities ?? []).includes("scholar.review"));
132
+ const forumCommand = matches.find(([, command]) => (command.adapterArgs ?? []).some((arg) => arg.required === true && arg.name === "forum"));
133
+ if (forumCommand) {
134
+ return { name: forumCommand[0], command: forumCommand[1] };
135
+ }
136
+ const namedReviewCommand = matches.find(([name]) => /reviews?/i.test(name));
137
+ if (namedReviewCommand) {
138
+ return { name: namedReviewCommand[0], command: namedReviewCommand[1] };
62
139
  }
140
+ const first = matches[0];
141
+ if (first)
142
+ return { name: first[0], command: first[1] };
63
143
  return undefined;
64
144
  }
145
+ export function listScholarSourcesByCapability(capability) {
146
+ return listScholarAdapters()
147
+ .filter((adapter) => findScholarCommandByCapability(adapter, capability))
148
+ .map((adapter) => adapter.name);
149
+ }
150
+ function listScholarReviewSources() {
151
+ return listScholarAdapters()
152
+ .filter((adapter) => findScholarReviewThreadCommand(adapter))
153
+ .map((adapter) => adapter.name);
154
+ }
155
+ function listSingleRecordScholarSourcesByCapability(capability) {
156
+ return listScholarAdapters()
157
+ .filter((adapter) => Boolean(findScholarSingleRecordCommandByCapability(adapter, capability)))
158
+ .map((adapter) => adapter.name);
159
+ }
160
+ function listResourceSearchScholarSourcesByCapability(capability) {
161
+ return listScholarAdapters()
162
+ .filter((adapter) => Boolean(findScholarResourceSearchCommandByCapability(adapter, capability)))
163
+ .map((adapter) => adapter.name);
164
+ }
165
+ function listResourceDetailSourcesForSearchFallback(capability, opts) {
166
+ const singleRecordSources = listSingleRecordScholarSourcesByCapability(capability);
167
+ if (opts.source) {
168
+ return singleRecordSources.includes(opts.source) ? [opts.source] : [];
169
+ }
170
+ if (opts.sources) {
171
+ return resolveScholarSources(opts.sources).filter((source) => singleRecordSources.includes(source));
172
+ }
173
+ return singleRecordSources;
174
+ }
65
175
  function bareDoi(value) {
66
176
  return value
67
177
  .trim()
@@ -83,7 +193,14 @@ export function resolveScholarReference(ref) {
83
193
  return {
84
194
  kind: "doi",
85
195
  value: doi,
86
- preferredSources: ["openalex", "crossref", "semantic-scholar"],
196
+ preferredSources: [
197
+ "openalex",
198
+ "crossref",
199
+ "semantic-scholar",
200
+ "unpaywall",
201
+ "biorxiv",
202
+ "medrxiv",
203
+ ],
87
204
  };
88
205
  }
89
206
  if (/^(?:arxiv:|https?:\/\/arxiv\.org\/(?:abs|pdf)\/|\d{4}\.\d{4,5})/i.test(raw)) {
@@ -101,7 +218,7 @@ export function resolveScholarReference(ref) {
101
218
  preferredSources: ["pubmed", "semantic-scholar", "openalex"],
102
219
  };
103
220
  }
104
- const openReview = raw.match(/^openreview:\s*([A-Za-z0-9_-]{6,20})$/i);
221
+ const openReview = raw.match(/^(?:openreview:\s*|https?:\/\/openreview\.net\/forum\?id=)([A-Za-z0-9_-]{6,20})/i);
105
222
  if (openReview) {
106
223
  return {
107
224
  kind: "openreview",
@@ -178,9 +295,12 @@ function numberOpt(raw, fallback, max) {
178
295
  return n;
179
296
  }
180
297
  function coerceStringArray(value) {
181
- if (!Array.isArray(value))
182
- return undefined;
183
- const out = value.map((item) => String(item ?? "").trim()).filter(Boolean);
298
+ const raw = Array.isArray(value)
299
+ ? value
300
+ : typeof value === "string"
301
+ ? value.split(/\s*(?:,|;|\n)\s*/)
302
+ : [];
303
+ const out = raw.map((item) => String(item ?? "").trim()).filter(Boolean);
184
304
  return out.length > 0 ? out : undefined;
185
305
  }
186
306
  function coerceNumber(value) {
@@ -192,7 +312,7 @@ function coerceNumber(value) {
192
312
  }
193
313
  return undefined;
194
314
  }
195
- function coerceToScholarlyRecords(rows, source) {
315
+ export function coerceToScholarlyRecords(rows, source) {
196
316
  if (!Array.isArray(rows))
197
317
  return [];
198
318
  const out = [];
@@ -203,6 +323,26 @@ function coerceToScholarlyRecords(rows, source) {
203
323
  if (typeof record.id !== "string" || typeof record.title !== "string") {
204
324
  continue;
205
325
  }
326
+ const sourceUrl = typeof record.source_url === "string" && record.source_url.length > 0
327
+ ? record.source_url
328
+ : typeof record.url === "string" && record.url.length > 0
329
+ ? record.url
330
+ : undefined;
331
+ const pdfUrl = typeof record.pdf_url === "string" && record.pdf_url.length > 0
332
+ ? record.pdf_url
333
+ : typeof record.pdf === "string" && record.pdf.length > 0
334
+ ? record.pdf
335
+ : undefined;
336
+ const recordDate = typeof record.date === "string" && record.date.length > 0
337
+ ? record.date
338
+ : typeof record.pdate === "string" && record.pdate.length > 0
339
+ ? record.pdate
340
+ : typeof record.published === "string" && record.published.length > 0
341
+ ? record.published
342
+ : typeof record.publishedAt === "string" &&
343
+ record.publishedAt.length > 0
344
+ ? record.publishedAt
345
+ : undefined;
206
346
  const work = {
207
347
  id: record.id,
208
348
  title: record.title,
@@ -218,34 +358,62 @@ function coerceToScholarlyRecords(rows, source) {
218
358
  if (authors)
219
359
  work.authors = authors;
220
360
  const year = coerceNumber(record.year);
221
- if (year !== undefined)
361
+ if (year !== undefined) {
222
362
  work.year = year;
363
+ }
364
+ else if (recordDate) {
365
+ const dateYear = Number(recordDate.slice(0, 4));
366
+ if (Number.isInteger(dateYear))
367
+ work.year = dateYear;
368
+ }
369
+ if (recordDate)
370
+ work.date = recordDate;
371
+ if (sourceUrl)
372
+ work.source_url = sourceUrl;
373
+ if (pdfUrl)
374
+ work.pdf_url = pdfUrl;
223
375
  for (const field of [
224
- "date",
225
376
  "venue",
226
377
  "type",
227
378
  "abstract",
228
379
  "doi",
229
380
  "arxiv_id",
230
381
  "pmid",
382
+ "pmc_id",
231
383
  "openalex_id",
232
384
  "semantic_scholar_id",
233
385
  "dblp_key",
234
386
  "openreview_id",
235
387
  "oa_status",
236
- "pdf_url",
237
388
  "landing_url",
238
389
  "code_url",
390
+ "project_url",
239
391
  "dataset_url",
240
- "source_url",
392
+ "model_urls",
393
+ "dataset_urls",
394
+ "space_urls",
241
395
  ]) {
242
396
  if (typeof record[field] === "string" && record[field].length > 0) {
243
397
  work[field] = record[field];
244
398
  }
245
399
  }
400
+ if (!work.openreview_id && source === "openreview") {
401
+ work.openreview_id = work.id;
402
+ }
403
+ if (!work.arxiv_id) {
404
+ const arxivId = work.id
405
+ .replace(/^https?:\/\/arxiv\.org\/abs\//i, "")
406
+ .replace(/v\d+$/i, "");
407
+ if (/^\d{4}\.\d{4,5}/.test(arxivId))
408
+ work.arxiv_id = arxivId;
409
+ }
246
410
  for (const [sourceField, targetField] of [
247
411
  ["cited_by_count", "cited_by_count"],
248
412
  ["references_count", "references_count"],
413
+ ["github_stars", "github_stars"],
414
+ ["num_models", "num_models"],
415
+ ["num_datasets", "num_datasets"],
416
+ ["num_spaces", "num_spaces"],
249
417
  ]) {
250
418
  const n = coerceNumber(record[sourceField]);
251
419
  if (n !== undefined)
@@ -256,10 +424,184 @@ function coerceToScholarlyRecords(rows, source) {
256
424
  }
257
425
  if (record.raw !== undefined)
258
426
  work.raw = record.raw;
427
+ const matchedFields = coerceStringArray(record.matched_fields);
428
+ if (matchedFields)
429
+ work.matched_fields = matchedFields;
430
+ for (const field of ["search_scope", "search_window"]) {
431
+ if (typeof record[field] === "string" && record[field].length > 0) {
432
+ work[field] = record[field];
433
+ }
434
+ }
435
+ for (const [sourceField, targetField] of [
436
+ ["search_scanned_records", "search_scanned_records"],
437
+ ["search_total_records", "search_total_records"],
438
+ ]) {
439
+ const n = coerceNumber(record[sourceField]);
440
+ if (n !== undefined)
441
+ work[targetField] = n;
442
+ }
443
+ if (typeof record.search_exhaustive === "boolean") {
444
+ work.search_exhaustive = record.search_exhaustive;
445
+ }
259
446
  out.push(work);
260
447
  }
261
448
  return out;
262
449
  }
450
+ function definedEntries(args) {
451
+ return Object.entries(args).filter(([, value]) => value !== undefined);
452
+ }
453
+ export function normalizeScholarCommandArgs(command, args) {
454
+ const adapterArgs = command.adapterArgs ?? [];
455
+ const declared = new Set(adapterArgs.map((arg) => arg.name));
456
+ if (declared.size === 0)
457
+ return Object.fromEntries(definedEntries(args));
458
+ const filtered = Object.fromEntries(definedEntries(args).filter(([name]) => declared.has(name)));
459
+ const internalSchema = adapterArgs.map((arg) => ({
460
+ ...arg,
461
+ positional: false,
462
+ }));
463
+ const resolved = resolveArgs({
464
+ opts: filtered,
465
+ positionals: [],
466
+ schema: internalSchema,
467
+ stdinIsTTY: true,
468
+ });
469
+ return Object.fromEntries(definedEntries(resolved.args).filter(([name]) => declared.has(name)));
470
+ }
471
+ function referenceArgs(route, opts = {}) {
472
+ return {
473
+ ref: route.value,
474
+ id: route.value,
475
+ doi: route.kind === "doi" ? route.value : undefined,
476
+ arxiv_id: route.kind === "arxiv" ? route.value : undefined,
477
+ pmid: route.kind === "pmid" ? route.value : undefined,
478
+ key: route.kind === "dblp" ? route.value : undefined,
479
+ forum: route.kind === "openreview" ? route.value : undefined,
480
+ email: opts.unpaywallEmail,
481
+ venue: opts.venue,
482
+ year: opts.year,
483
+ volume: opts.volume,
484
+ };
485
+ }
486
+ export function resolveScholarArtifactSources(sourceArg, sourcesArg, route) {
487
+ if (sourceArg)
488
+ return [sourceArg];
489
+ if (sourcesArg)
490
+ return resolveScholarSources(sourcesArg);
491
+ if (route.kind === "unknown")
492
+ return listScholarSourcesByCapability("scholar.pdf");
493
+ return resolveScholarSources(undefined, route.preferredSources);
494
+ }
495
+ export function resolveScholarFulltextSources(sourceArg, sourcesArg, route) {
496
+ if (sourceArg)
497
+ return [sourceArg];
498
+ if (sourcesArg)
499
+ return resolveScholarSources(sourcesArg);
500
+ const candidates = route.kind === "unknown"
501
+ ? listScholarSourcesByCapability("scholar.fulltext")
502
+ : resolveScholarSources(undefined, route.preferredSources);
503
+ return candidates.filter((source) => {
504
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
505
+ return adapter
506
+ ? findScholarCommandByCapability(adapter, "scholar.fulltext") !==
507
+ undefined
508
+ : false;
509
+ });
510
+ }
511
+ async function executeScholarAdapterCommand(source, found, args, capability) {
512
+ const invocation = buildInvocation("cli", source, found.name, {
513
+ args: normalizeScholarCommandArgs(found.command, args),
514
+ source: "internal",
515
+ }, { approved: true });
516
+ if (!invocation) {
517
+ return {
518
+ source,
519
+ capability,
520
+ records: [],
521
+ error: {
522
+ code: "build_invocation_failed",
523
+ message: `could not build invocation for ${source}.${found.name}`,
524
+ },
525
+ };
526
+ }
527
+ const result = await execute(invocation);
528
+ if (result.error) {
529
+ return {
530
+ source,
531
+ capability,
532
+ records: [],
533
+ error: {
534
+ code: result.error.code ?? "execution_error",
535
+ message: result.error.message ?? "adapter command failed",
536
+ retryable: result.error.retryable,
537
+ },
538
+ };
539
+ }
540
+ return {
541
+ source,
542
+ capability,
543
+ records: coerceToScholarlyRecords(result.results, source),
544
+ };
545
+ }
546
+ async function executeScholarAdapterRows(source, found, args) {
547
+ const invocation = buildInvocation("cli", source, found.name, {
548
+ args: normalizeScholarCommandArgs(found.command, args),
549
+ source: "internal",
550
+ }, { approved: true });
551
+ if (!invocation) {
552
+ return {
553
+ source,
554
+ rows: [],
555
+ error: {
556
+ code: "build_invocation_failed",
557
+ message: `could not build invocation for ${source}.${found.name}`,
558
+ },
559
+ };
560
+ }
561
+ const result = await execute(invocation);
562
+ if (result.error) {
563
+ return {
564
+ source,
565
+ rows: [],
566
+ error: {
567
+ code: result.error.code ?? "execution_error",
568
+ message: result.error.message ?? "adapter command failed",
569
+ retryable: result.error.retryable,
570
+ },
571
+ };
572
+ }
573
+ const rows = Array.isArray(result.results)
574
+ ? result.results
575
+ .filter((row) => typeof row === "object" && row !== null && !Array.isArray(row))
576
+ .map((row) => ({ source_adapter: source, ...row }))
577
+ : [];
578
+ return { source, rows };
579
+ }
580
+ async function runReviewAdapterCommand(source, args) {
581
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
582
+ if (!adapter) {
583
+ return {
584
+ source,
585
+ rows: [],
586
+ error: {
587
+ code: "adapter_not_found",
588
+ message: `unknown source: ${source}`,
589
+ },
590
+ };
591
+ }
592
+ const found = findScholarReviewThreadCommand(adapter);
593
+ if (!found) {
594
+ return {
595
+ source,
596
+ rows: [],
597
+ error: {
598
+ code: "capability_unsupported",
599
+ message: `${source} does not expose scholar.review`,
600
+ },
601
+ };
602
+ }
603
+ return executeScholarAdapterRows(source, found, args);
604
+ }
263
605
  async function runAdapterCommand(source, capability, args) {
264
606
  const adapter = getAllAdapters().find((candidate) => candidate.name === source);
265
607
  if (!adapter) {
@@ -283,33 +625,244 @@ async function runAdapterCommand(source, capability, args) {
283
625
  },
284
626
  };
285
627
  }
286
- const invocation = buildInvocation("cli", source, found.name, { args, source: "internal" }, { approved: true });
287
- if (!invocation) {
628
+ return executeScholarAdapterCommand(source, found, args, capability);
629
+ }
630
+ async function runSingleRecordResourceCommand(source, capability, args) {
631
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
632
+ if (!adapter) {
288
633
  return {
289
634
  source,
290
635
  records: [],
291
636
  error: {
292
- code: "build_invocation_failed",
293
- message: `could not build invocation for ${source}.${found.name}`,
637
+ code: "adapter_not_found",
638
+ message: `unknown source: ${source}`,
294
639
  },
295
640
  };
296
641
  }
297
- const result = await execute(invocation);
298
- if (result.error) {
642
+ const found = findScholarSingleRecordCommandByCapability(adapter, capability);
643
+ if (!found) {
299
644
  return {
300
645
  source,
646
+ capability,
301
647
  records: [],
302
648
  error: {
303
- code: result.error.code ?? "execution_error",
304
- message: result.error.message ?? "adapter command failed",
649
+ code: "capability_unsupported",
650
+ message: `${source} does not expose single-record ${capability}`,
305
651
  },
306
652
  };
307
653
  }
654
+ return executeScholarAdapterCommand(source, found, args, capability);
655
+ }
656
+ async function collectSingleRecords(capability, ref, opts, sourceFallback) {
657
+ const route = resolveScholarReference(ref);
658
+ const sourceList = opts.source
659
+ ? [opts.source]
660
+ : resolveScholarSources(opts.sources, sourceFallback ?? route.preferredSources);
661
+ const outcomes = [];
662
+ for (const source of sourceList) {
663
+ outcomes.push(await runAdapterCommand(source, capability, {
664
+ ...referenceArgs(route, opts),
665
+ }));
666
+ }
308
667
  return {
309
- source,
310
- records: coerceToScholarlyRecords(result.results, source),
668
+ sourceList,
669
+ outcomes,
670
+ records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: capability === "scholar.pdf" ? 10 : 50 }),
671
+ };
672
+ }
673
+ async function collectResourceSearchRecords(capability, query, opts) {
674
+ const sourceList = opts.source
675
+ ? [opts.source]
676
+ : resolveScholarSources(opts.sources, listResourceSearchScholarSourcesByCapability(capability));
677
+ const outcomes = [];
678
+ for (const source of sourceList) {
679
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
680
+ const found = adapter
681
+ ? findScholarResourceSearchCommandByCapability(adapter, capability)
682
+ : undefined;
683
+ if (!adapter) {
684
+ outcomes.push({
685
+ source,
686
+ records: [],
687
+ error: {
688
+ code: "adapter_not_found",
689
+ message: `unknown source: ${source}`,
690
+ },
691
+ });
692
+ continue;
693
+ }
694
+ if (!found) {
695
+ outcomes.push({
696
+ source,
697
+ records: [],
698
+ error: {
699
+ code: "capability_unsupported",
700
+ message: `${source} does not expose queryable ${capability}`,
701
+ },
702
+ });
703
+ continue;
704
+ }
705
+ outcomes.push(await executeScholarAdapterCommand(source, found, {
706
+ query,
707
+ limit: "5",
708
+ }, capability));
709
+ }
710
+ return {
711
+ sourceList,
712
+ outcomes,
713
+ records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
714
+ };
715
+ }
716
+ async function collectResourceDetailRecordsFromSearch(capability, searchRecords, opts) {
717
+ const sourceList = listResourceDetailSourcesForSearchFallback(capability, opts);
718
+ const refs = [
719
+ ...new Set(searchRecords
720
+ .map((record) => record.arxiv_id ?? record.id)
721
+ .filter((ref) => /^\d{4}\.\d{4,5}(?:v\d+)?$/i.test(ref))),
722
+ ].slice(0, 3);
723
+ const outcomes = [];
724
+ for (const ref of refs) {
725
+ for (const source of sourceList) {
726
+ outcomes.push(await runAdapterCommand(source, capability, {
727
+ ...referenceArgs(resolveScholarReference(ref)),
728
+ }));
729
+ }
730
+ }
731
+ return {
732
+ sourceList,
733
+ outcomes,
734
+ records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
735
+ };
736
+ }
737
+ async function collectPdfCandidates(ref, opts) {
738
+ const route = resolveScholarReference(ref);
739
+ const sourceList = resolveScholarArtifactSources(opts.source, opts.sources, route);
740
+ const outcomes = [];
741
+ for (const source of sourceList) {
742
+ outcomes.push(await runAdapterCommand(source, "scholar.pdf", {
743
+ ...referenceArgs(route, opts),
744
+ }));
745
+ }
746
+ if (route.kind === "unknown" ||
747
+ outcomes.every((outcome) => outcome.records.length === 0)) {
748
+ for (const source of sourceList) {
749
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
750
+ const found = adapter
751
+ ? findScholarQueryableSearchCommand(adapter)
752
+ : undefined;
753
+ if (!adapter) {
754
+ outcomes.push({
755
+ source,
756
+ records: [],
757
+ error: {
758
+ code: "adapter_not_found",
759
+ message: `unknown source: ${source}`,
760
+ },
761
+ });
762
+ continue;
763
+ }
764
+ if (!found) {
765
+ outcomes.push({
766
+ source,
767
+ records: [],
768
+ error: {
769
+ code: "capability_unsupported",
770
+ message: `${source} does not expose queryable scholar.search`,
771
+ },
772
+ });
773
+ continue;
774
+ }
775
+ const outcome = await executeScholarAdapterCommand(source, found, {
776
+ query: ref,
777
+ limit: "5",
778
+ }, "scholar.pdf");
779
+ outcomes.push(route.kind === "unknown"
780
+ ? onlyRelevantUnknownQueryRecords(outcome, ref)
781
+ : outcome);
782
+ }
783
+ }
784
+ return {
785
+ sourceList,
786
+ outcomes,
787
+ records: reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }),
788
+ };
789
+ }
790
+ function firstPdfRecord(records) {
791
+ return records.find((record) => typeof record.pdf_url === "string" && record.pdf_url.length > 0);
792
+ }
793
+ function normalizedTitleKey(value) {
794
+ return value
795
+ .toLowerCase()
796
+ .replace(/[^a-z0-9]+/g, " ")
797
+ .trim()
798
+ .replace(/\s+/g, " ");
799
+ }
800
+ export function isScholarlyRecordRelevantToQuery(record, query) {
801
+ const queryKey = normalizedTitleKey(query);
802
+ if (!queryKey)
803
+ return true;
804
+ const titleKey = normalizedTitleKey(record.title);
805
+ if (!titleKey)
806
+ return false;
807
+ if (titleKey === queryKey || titleKey.startsWith(`${queryKey} `)) {
808
+ return true;
809
+ }
810
+ if (queryKey.length >= 12 && titleKey.includes(queryKey))
811
+ return true;
812
+ const queryTokens = queryKey.split(" ").filter(Boolean);
813
+ if (queryTokens.length <= 3)
814
+ return false;
815
+ const titleTokens = new Set(titleKey.split(" ").filter(Boolean));
816
+ const matched = queryTokens.filter((token) => titleTokens.has(token)).length;
817
+ return matched / queryTokens.length >= 0.8;
818
+ }
819
+ function onlyRelevantUnknownQueryRecords(outcome, query) {
820
+ return {
821
+ ...outcome,
822
+ records: outcome.records.filter((record) => isScholarlyRecordRelevantToQuery(record, query)),
311
823
  };
312
824
  }
825
+ export function isScholarlyRecordRelevantToRef(record, ref) {
826
+ const route = resolveScholarReference(ref);
827
+ const candidates = [
828
+ record.id,
829
+ record.arxiv_id,
830
+ record.doi,
831
+ record.pmid,
832
+ record.pmc_id,
833
+ record.openreview_id,
834
+ record.semantic_scholar_id,
835
+ record.source_url,
836
+ record.pdf_url,
837
+ ];
838
+ if (route.kind === "arxiv") {
839
+ return candidates.some((candidate) => canonicalArxivId(candidate) === route.value);
840
+ }
841
+ if (route.kind === "doi") {
842
+ return candidates.some((candidate) => canonicalDoi(candidate) === route.value);
843
+ }
844
+ const needle = route.value.toLowerCase();
845
+ return candidates.some((candidate) => typeof candidate === "string" &&
846
+ candidate.trim().toLowerCase().includes(needle));
847
+ }
848
+ function onlyRelevantRefRecords(outcome, ref) {
849
+ return {
850
+ ...outcome,
851
+ records: outcome.records.filter((record) => isScholarlyRecordRelevantToRef(record, ref)),
852
+ };
853
+ }
854
+ function isRetryableScholarError(error) {
855
+ return (error?.retryable === true ||
856
+ error?.code === "rate_limit" ||
857
+ error?.code === "rate_limited");
858
+ }
859
+ function formatScholarOutcomeError(outcome) {
860
+ const code = outcome.error?.code ?? "unknown_error";
861
+ const message = outcome.error?.message?.trim();
862
+ return message
863
+ ? `${outcome.source}: ${code} (${message})`
864
+ : `${outcome.source}: ${code}`;
865
+ }
313
866
  function columns(detailed = false) {
314
867
  return detailed
315
868
  ? [
@@ -327,11 +880,71 @@ function columns(detailed = false) {
327
880
  "is_open_access",
328
881
  "oa_status",
329
882
  "pdf_url",
883
+ "code_url",
884
+ "project_url",
885
+ "dataset_url",
886
+ "model_urls",
887
+ "dataset_urls",
888
+ "space_urls",
889
+ "github_stars",
890
+ "num_models",
891
+ "num_datasets",
892
+ "num_spaces",
330
893
  "source_adapter",
331
894
  "source_url",
895
+ "search_scope",
896
+ "search_window",
897
+ "search_exhaustive",
332
898
  ]
333
899
  : ["id", "title", "year", "venue", "doi", "pdf_url", "source_adapter"];
334
900
  }
901
+ function resourceColumns(detailed = false) {
902
+ const base = [
903
+ "id",
904
+ "title",
905
+ "source_adapter",
906
+ "source_url",
907
+ "pdf_url",
908
+ "code_url",
909
+ "project_url",
910
+ "dataset_url",
911
+ "model_urls",
912
+ "dataset_urls",
913
+ "space_urls",
914
+ ];
915
+ return detailed
916
+ ? [
917
+ ...base,
918
+ "authors",
919
+ "year",
920
+ "github_stars",
921
+ "num_models",
922
+ "num_datasets",
923
+ "num_spaces",
924
+ "retrieved_at",
925
+ ]
926
+ : base;
927
+ }
928
+ function columnsForCapability(capability, detailed = false) {
929
+ return capability === "scholar.code" || capability === "scholar.datasets"
930
+ ? resourceColumns(detailed)
931
+ : columns(detailed);
932
+ }
933
+ function reviewColumns(detailed = false) {
934
+ const base = [
935
+ "source_adapter",
936
+ "forum",
937
+ "note_id",
938
+ "type",
939
+ "created_at",
940
+ "source_url",
941
+ "rating",
942
+ "confidence",
943
+ "text",
944
+ "text_truncated",
945
+ ];
946
+ return detailed ? [...base, "author", "invitation", "text_chars"] : base;
947
+ }
335
948
  async function runSearch(program, query, opts) {
336
949
  const startedAt = Date.now();
337
950
  const fmt = detectFormat(program.opts().format);
@@ -351,9 +964,9 @@ async function runSearch(program, query, opts) {
351
964
  code: "SCHOLAR_NOT_FOUND",
352
965
  message: `no scholarly works returned for "${query}" across [${sources.join(", ")}]`,
353
966
  suggestion: errors.length > 0
354
- ? `Per-source errors: ${errors.map((outcome) => `${outcome.source}: ${outcome.error?.code}`).join("; ")}`
967
+ ? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
355
968
  : "Try --sources all or a more specific query.",
356
- retryable: errors.some((outcome) => outcome.error?.code === "rate_limit"),
969
+ retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
357
970
  };
358
971
  console.error(format(null, undefined, fmt, ctx));
359
972
  process.exit(ExitCode.EMPTY_RESULT);
@@ -364,80 +977,2202 @@ async function runSingle(program, capability, ref, opts) {
364
977
  const startedAt = Date.now();
365
978
  const fmt = detectFormat(program.opts().format);
366
979
  const ctx = makeCtx(capability, startedAt);
980
+ const lookupRef = await resolveGraphLookupRef(capability, ref, opts);
981
+ const { sourceList, outcomes, records } = await collectSingleRecords(capability, lookupRef, opts);
982
+ ctx.duration_ms = Date.now() - startedAt;
983
+ ctx.surface = "web";
984
+ if (records.length === 0) {
985
+ const errors = outcomes.filter((outcome) => outcome.error);
986
+ ctx.error = {
987
+ code: "SCHOLAR_NOT_FOUND",
988
+ message: `no scholarly records returned for "${ref}" across [${sourceList.join(", ")}]`,
989
+ suggestion: errors.length > 0
990
+ ? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
991
+ : "Run `unicli scholar doctor` to inspect available scholarly sources.",
992
+ retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
993
+ };
994
+ console.error(format(null, undefined, fmt, ctx));
995
+ process.exit(ExitCode.EMPTY_RESULT);
996
+ }
997
+ console.log(format(records, columnsForCapability(capability, opts.detailed), fmt, ctx));
998
+ }
999
+ async function runReviews(program, ref, opts) {
1000
+ const startedAt = Date.now();
1001
+ const fmt = detectFormat(program.opts().format);
1002
+ const ctx = makeCtx("scholar.reviews", startedAt);
367
1003
  const route = resolveScholarReference(ref);
368
1004
  const sourceList = opts.source
369
1005
  ? [opts.source]
370
- : resolveScholarSources(opts.sources, route.preferredSources);
1006
+ : resolveScholarSources(opts.sources, route.kind === "openreview"
1007
+ ? route.preferredSources
1008
+ : listScholarReviewSources());
371
1009
  const outcomes = [];
372
1010
  for (const source of sourceList) {
373
- outcomes.push(await runAdapterCommand(source, capability, {
374
- ref: route.value,
375
- id: route.value,
376
- doi: route.kind === "doi" ? route.value : undefined,
377
- arxiv_id: route.kind === "arxiv" ? route.value : undefined,
378
- pmid: route.kind === "pmid" ? route.value : undefined,
1011
+ const maxLength = opts.maxLength === undefined ? undefined : Number(opts.maxLength);
1012
+ outcomes.push(await runReviewAdapterCommand(source, {
1013
+ ...referenceArgs(route),
1014
+ forum: route.value,
1015
+ "max-length": maxLength,
379
1016
  }));
380
1017
  }
381
- const fused = reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: capability === "scholar.pdf" ? 10 : 50 });
1018
+ const rows = outcomes.flatMap((outcome) => outcome.rows);
382
1019
  ctx.duration_ms = Date.now() - startedAt;
383
1020
  ctx.surface = "web";
384
- if (fused.length === 0) {
1021
+ if (rows.length === 0) {
385
1022
  const errors = outcomes.filter((outcome) => outcome.error);
386
1023
  ctx.error = {
387
- code: "SCHOLAR_NOT_FOUND",
388
- message: `no scholarly records returned for "${ref}" across [${sourceList.join(", ")}]`,
1024
+ code: "SCHOLAR_REVIEWS_NOT_FOUND",
1025
+ message: `no scholarly review rows returned for "${ref}" across [${sourceList.join(", ")}]`,
389
1026
  suggestion: errors.length > 0
390
- ? `Per-source errors: ${errors.map((outcome) => `${outcome.source}: ${outcome.error?.code}`).join("; ")}`
391
- : "Run `unicli scholar doctor` to inspect available scholarly sources.",
392
- retryable: errors.some((outcome) => outcome.error?.code === "rate_limit"),
1027
+ ? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
1028
+ : "Use an OpenReview forum id or URL, or run `unicli scholar search <query> --sources openreview` before requesting reviews.",
1029
+ retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
393
1030
  };
394
1031
  console.error(format(null, undefined, fmt, ctx));
395
1032
  process.exit(ExitCode.EMPTY_RESULT);
396
1033
  }
397
- console.log(format(fused, columns(opts.detailed), fmt, ctx));
1034
+ console.log(format(rows, reviewColumns(opts.detailed), fmt, ctx));
398
1035
  }
399
- async function runDoctor(program, opts) {
1036
+ function nonEmptyResourceField(value) {
1037
+ return typeof value === "string" && value.trim().length > 0;
1038
+ }
1039
+ function positiveResourceCount(value) {
1040
+ return value !== undefined && value > 0;
1041
+ }
1042
+ export function hasResourceForCapability(record, capability) {
1043
+ if (capability === "scholar.code") {
1044
+ return (nonEmptyResourceField(record.code_url) ||
1045
+ nonEmptyResourceField(record.project_url));
1046
+ }
1047
+ return (nonEmptyResourceField(record.dataset_url) ||
1048
+ nonEmptyResourceField(record.dataset_urls) ||
1049
+ nonEmptyResourceField(record.model_urls) ||
1050
+ nonEmptyResourceField(record.space_urls) ||
1051
+ positiveResourceCount(record.num_datasets) ||
1052
+ positiveResourceCount(record.num_models) ||
1053
+ positiveResourceCount(record.num_spaces));
1054
+ }
1055
+ function hasCodeResource(record) {
1056
+ return (nonEmptyResourceField(record.code_url) ||
1057
+ nonEmptyResourceField(record.project_url));
1058
+ }
1059
+ function hasDatasetResource(record) {
1060
+ return (nonEmptyResourceField(record.dataset_url) ||
1061
+ nonEmptyResourceField(record.dataset_urls) ||
1062
+ positiveResourceCount(record.num_datasets));
1063
+ }
1064
+ function hasModelResource(record) {
1065
+ return (nonEmptyResourceField(record.model_urls) ||
1066
+ positiveResourceCount(record.num_models));
1067
+ }
1068
+ function hasSpaceResource(record) {
1069
+ return (nonEmptyResourceField(record.space_urls) ||
1070
+ positiveResourceCount(record.num_spaces));
1071
+ }
1072
+ async function collectResourceRecords(capability, ref, opts) {
1073
+ const route = resolveScholarReference(ref);
1074
+ let sourceList = route.kind === "unknown"
1075
+ ? []
1076
+ : opts.source
1077
+ ? [opts.source]
1078
+ : resolveScholarSources(opts.sources, listSingleRecordScholarSourcesByCapability(capability));
1079
+ let outcomes = [];
1080
+ if (route.kind !== "unknown") {
1081
+ for (const source of sourceList) {
1082
+ outcomes.push(await runSingleRecordResourceCommand(source, capability, {
1083
+ ...referenceArgs(route),
1084
+ }));
1085
+ }
1086
+ }
1087
+ let resourceRecords = reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }).filter((record) => hasResourceForCapability(record, capability));
1088
+ if (resourceRecords.length === 0 && route.kind !== "unknown") {
1089
+ const searched = await collectResourceSearchRecords(capability, ref, opts);
1090
+ const searchOutcomes = searched.outcomes.map((outcome) => onlyRelevantRefRecords(outcome, ref));
1091
+ const searchedSources = new Set(searchOutcomes
1092
+ .filter((outcome) => !outcome.error)
1093
+ .map((outcome) => outcome.source));
1094
+ sourceList = uniqueStrings([...sourceList, ...searched.sourceList]);
1095
+ outcomes = [
1096
+ ...outcomes.filter((outcome) => outcome.error?.code !== "capability_unsupported" ||
1097
+ !searchedSources.has(outcome.source)),
1098
+ ...searchOutcomes,
1099
+ ];
1100
+ resourceRecords = reciprocalRankFusion(outcomes.map((outcome) => outcome.records), { topN: 10 }).filter((record) => hasResourceForCapability(record, capability));
1101
+ }
1102
+ if (resourceRecords.length === 0 && route.kind === "unknown") {
1103
+ const searched = await collectResourceSearchRecords(capability, ref, opts);
1104
+ const searchOutcomes = searched.outcomes.map((outcome) => onlyRelevantUnknownQueryRecords(outcome, ref));
1105
+ const searchRecords = reciprocalRankFusion(searchOutcomes.map((outcome) => outcome.records), { topN: 10 });
1106
+ const enriched = await collectResourceDetailRecordsFromSearch(capability, searchRecords, opts);
1107
+ sourceList = [...sourceList, ...searched.sourceList];
1108
+ if (enriched.sourceList.length > 0) {
1109
+ sourceList = [...sourceList, ...enriched.sourceList];
1110
+ }
1111
+ outcomes = [...outcomes, ...searchOutcomes, ...enriched.outcomes];
1112
+ resourceRecords = reciprocalRankFusion([enriched.records, searchRecords], {
1113
+ topN: 10,
1114
+ }).filter((record) => hasResourceForCapability(record, capability));
1115
+ }
1116
+ return { sourceList, outcomes, records: resourceRecords };
1117
+ }
1118
+ async function runResources(program, capability, ref, opts) {
400
1119
  const startedAt = Date.now();
401
1120
  const fmt = detectFormat(program.opts().format);
402
- const ctx = makeCtx("scholar.doctor", startedAt);
403
- const selected = resolveScholarSources(opts.sources, listScholarAdapters().map((adapter) => adapter.name));
404
- const rows = listScholarAdapters()
405
- .filter((adapter) => selected.includes(adapter.name))
406
- .map((adapter) => {
407
- const caps = new Set();
408
- for (const command of Object.values(adapter.commands)) {
409
- for (const cap of command.capabilities ?? []) {
410
- if (cap.startsWith("scholar."))
411
- caps.add(cap);
412
- }
413
- }
414
- const health = resolveCommand(adapter.name, "health");
415
- const strategy = health
416
- ? commandStrategy(adapter, health.command)
417
- : undefined;
418
- return {
419
- source: adapter.name,
420
- capabilities: [...caps].sort(),
421
- health: !health
422
- ? "skipped"
423
- : strategy !== undefined && strategy !== Strategy.PUBLIC
424
- ? "blocked"
425
- : "available",
426
- detail: !health
427
- ? "no `health` command — adapter passes by capability introspection"
428
- : strategy !== undefined && strategy !== Strategy.PUBLIC
429
- ? `health probe requires ${strategy} auth — skipped`
430
- : "health probe command is public",
431
- };
432
- });
1121
+ const ctx = makeCtx(capability, startedAt);
1122
+ const { sourceList, outcomes, records: resourceRecords, } = await collectResourceRecords(capability, ref, opts);
433
1123
  ctx.duration_ms = Date.now() - startedAt;
434
1124
  ctx.surface = "web";
435
- console.log(format(rows, ["source", "capabilities", "health", "detail"], fmt, ctx));
436
- }
437
- export function registerScholarCommand(program) {
438
- const scholar = program
439
- .command("scholar")
440
- .description("Scholarly meta-command search, retrieve, PDF, citations, references, and source audit across first-source academic adapters");
1125
+ if (resourceRecords.length === 0) {
1126
+ const errors = outcomes.filter((outcome) => outcome.error);
1127
+ const label = capability === "scholar.code" ? "code" : "dataset/model";
1128
+ ctx.error = {
1129
+ code: "SCHOLAR_RESOURCE_NOT_FOUND",
1130
+ message: `no scholarly ${label} resources returned for "${ref}" across [${sourceList.join(", ")}]`,
1131
+ suggestion: errors.length > 0
1132
+ ? `Per-source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
1133
+ : "Try --source hf for Hugging Face paper resources, or run `unicli scholar doctor` to inspect resource-capable sources.",
1134
+ retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)),
1135
+ };
1136
+ console.error(format(null, undefined, fmt, ctx));
1137
+ process.exit(ExitCode.EMPTY_RESULT);
1138
+ }
1139
+ console.log(format(resourceRecords, resourceColumns(opts.detailed), fmt, ctx));
1140
+ }
1141
+ function uniqueStrings(values) {
1142
+ return [...new Set([...values].filter(Boolean))];
1143
+ }
1144
+ function sourcesForRecords(records, predicate = () => true) {
1145
+ return uniqueStrings(records
1146
+ .filter(predicate)
1147
+ .map((record) => record.source_adapter)
1148
+ .filter(Boolean));
1149
+ }
1150
+ function quoteCliArg(value) {
1151
+ return `'${value.replace(/'/g, "'\\''")}'`;
1152
+ }
1153
+ function scholarCommand(subcommand, ref, opts) {
1154
+ const flags = [
1155
+ opts.source ? `--source ${quoteCliArg(opts.source)}` : undefined,
1156
+ opts.sources ? `--sources ${quoteCliArg(opts.sources)}` : undefined,
1157
+ opts.unpaywallEmail
1158
+ ? `--unpaywall-email ${quoteCliArg(opts.unpaywallEmail)}`
1159
+ : undefined,
1160
+ ].filter(Boolean);
1161
+ return [`unicli scholar ${subcommand}`, quoteCliArg(ref), ...flags].join(" ");
1162
+ }
1163
+ function canonicalArxivId(value) {
1164
+ if (typeof value !== "string")
1165
+ return undefined;
1166
+ const match = value.match(/(?:arxiv(?:\.org\/(?:abs|pdf)\/|:)|^)(\d{4}\.\d{4,5})(?:v\d+)?/i);
1167
+ return match?.[1];
1168
+ }
1169
+ function canonicalDoi(value) {
1170
+ if (typeof value !== "string")
1171
+ return undefined;
1172
+ const doi = bareDoi(value);
1173
+ return /^10\.\S+\/\S+$/i.test(doi) ? doi : undefined;
1174
+ }
1175
+ function firstRecordValue(records, pick) {
1176
+ for (const record of records) {
1177
+ const value = pick(record);
1178
+ if (value)
1179
+ return value;
1180
+ }
1181
+ return undefined;
1182
+ }
1183
+ function canonicalScholarReference(route, records) {
1184
+ const arxivId = firstRecordValue(records, (record) => canonicalArxivId(record.arxiv_id) ??
1185
+ canonicalArxivId(record.id) ??
1186
+ canonicalArxivId(record.source_url) ??
1187
+ canonicalArxivId(record.pdf_url));
1188
+ if (arxivId)
1189
+ return { kind: "arxiv", ref: arxivId };
1190
+ const doi = firstRecordValue(records, (record) => canonicalDoi(record.doi) ?? canonicalDoi(record.id));
1191
+ if (doi)
1192
+ return { kind: "doi", ref: doi };
1193
+ const pmid = firstRecordValue(records, (record) => typeof record.pmid === "string" && record.pmid.trim().length > 0
1194
+ ? record.pmid.trim()
1195
+ : undefined);
1196
+ if (pmid)
1197
+ return { kind: "pmid", ref: pmid };
1198
+ const openreviewId = firstRecordValue(records, (record) => typeof record.openreview_id === "string" &&
1199
+ record.openreview_id.trim().length > 0
1200
+ ? `openreview:${record.openreview_id.trim()}`
1201
+ : undefined);
1202
+ if (openreviewId)
1203
+ return { kind: "openreview", ref: openreviewId };
1204
+ const semanticScholarId = firstRecordValue(records, (record) => typeof record.semantic_scholar_id === "string" &&
1205
+ record.semantic_scholar_id.trim().length > 0
1206
+ ? record.semantic_scholar_id.trim()
1207
+ : undefined);
1208
+ if (semanticScholarId)
1209
+ return { kind: "semantic-scholar", ref: semanticScholarId };
1210
+ return route.kind === "unknown"
1211
+ ? undefined
1212
+ : { kind: route.kind, ref: route.value };
1213
+ }
1214
+ function sourceCommand(source, command, placeholder) {
1215
+ if (!command)
1216
+ return undefined;
1217
+ return [`unicli ${source} ${command}`, placeholder].filter(Boolean).join(" ");
1218
+ }
1219
+ function capabilitySet(adapter) {
1220
+ const capabilities = new Set();
1221
+ for (const command of Object.values(adapter.commands)) {
1222
+ for (const capability of command.capabilities ?? []) {
1223
+ if (SCHOLAR_CAPABILITIES.includes(capability)) {
1224
+ capabilities.add(capability);
1225
+ }
1226
+ }
1227
+ }
1228
+ return capabilities;
1229
+ }
1230
+ function sourceCommandByCapability(adapter, capability) {
1231
+ if (capability === "scholar.code" || capability === "scholar.datasets") {
1232
+ return (findScholarSingleRecordCommandByCapability(adapter, capability)?.name ??
1233
+ findScholarResourceSearchCommandByCapability(adapter, capability)?.name);
1234
+ }
1235
+ return findScholarCommandByCapability(adapter, capability)?.name;
1236
+ }
1237
+ function adapterSupportsSourceScopedAvailability(adapter, capabilities = capabilitySet(adapter)) {
1238
+ return (capabilities.has("scholar.get") ||
1239
+ capabilities.has("scholar.pdf") ||
1240
+ sourceCommandByCapability(adapter, "scholar.code") !== undefined ||
1241
+ sourceCommandByCapability(adapter, "scholar.datasets") !== undefined);
1242
+ }
1243
+ function sourceReviewCommand(adapter) {
1244
+ return findScholarReviewThreadCommand(adapter)?.name;
1245
+ }
1246
+ function coverageCommandByCapability(adapter, capability) {
1247
+ return capability === "scholar.review"
1248
+ ? sourceReviewCommand(adapter)
1249
+ : sourceCommandByCapability(adapter, capability);
1250
+ }
1251
+ function sourceSearchCommand(adapter) {
1252
+ const queryable = findScholarQueryableSearchCommand(adapter);
1253
+ if (queryable)
1254
+ return sourceCommand(adapter.name, queryable.name, "<query>");
1255
+ return sourceCommand(adapter.name, sourceCommandByCapability(adapter, "scholar.search"));
1256
+ }
1257
+ function recommendedUses(capabilities) {
1258
+ const uses = [];
1259
+ if (capabilities.has("scholar.search"))
1260
+ uses.push("discovery");
1261
+ if (capabilities.has("scholar.get"))
1262
+ uses.push("metadata");
1263
+ if (capabilities.has("scholar.pdf"))
1264
+ uses.push("pdf-download/read");
1265
+ if (capabilities.has("scholar.fulltext"))
1266
+ uses.push("source-fulltext");
1267
+ if (capabilities.has("scholar.code"))
1268
+ uses.push("code/project");
1269
+ if (capabilities.has("scholar.datasets"))
1270
+ uses.push("datasets/models/spaces");
1271
+ if (capabilities.has("scholar.citations") ||
1272
+ capabilities.has("scholar.references")) {
1273
+ uses.push("citation-graph");
1274
+ }
1275
+ if (capabilities.has("scholar.review"))
1276
+ uses.push("peer-review-audit");
1277
+ if (capabilities.has("scholar.venue") || capabilities.has("scholar.author"))
1278
+ uses.push("venue/author-browse");
1279
+ return uses;
1280
+ }
1281
+ function sourceRole(capabilities) {
1282
+ if (capabilities.has("scholar.review") &&
1283
+ capabilities.has("scholar.fulltext"))
1284
+ return "review-fulltext-source";
1285
+ if (capabilities.has("scholar.code") || capabilities.has("scholar.datasets"))
1286
+ return "resource-source";
1287
+ if (capabilities.has("scholar.pdf") && capabilities.has("scholar.get"))
1288
+ return "artifact-source";
1289
+ if (capabilities.has("scholar.fulltext"))
1290
+ return "fulltext-source";
1291
+ if (capabilities.has("scholar.citations") ||
1292
+ capabilities.has("scholar.references")) {
1293
+ return "graph-source";
1294
+ }
1295
+ if (capabilities.has("scholar.search"))
1296
+ return "discovery-source";
1297
+ return "metadata-source";
1298
+ }
1299
+ function readStrategy(capabilities) {
1300
+ const hasPdf = capabilities.has("scholar.pdf");
1301
+ const hasFulltext = capabilities.has("scholar.fulltext");
1302
+ if (hasFulltext && hasPdf)
1303
+ return "source-fulltext-then-pdf";
1304
+ if (hasFulltext)
1305
+ return "source-fulltext";
1306
+ if (hasPdf)
1307
+ return "pdf-download";
1308
+ if (capabilities.has("scholar.get"))
1309
+ return "metadata-only";
1310
+ return "discovery-only";
1311
+ }
1312
+ function supportsSourceScopedAvailability(capabilities) {
1313
+ return (capabilities.has("scholar.get") ||
1314
+ capabilities.has("scholar.pdf") ||
1315
+ capabilities.has("scholar.code") ||
1316
+ capabilities.has("scholar.datasets"));
1317
+ }
1318
+ function coverageHandoffStrategy(capabilities, adapter) {
1319
+ if (adapter
1320
+ ? adapterSupportsSourceScopedAvailability(adapter, capabilities)
1321
+ : supportsSourceScopedAvailability(capabilities)) {
1322
+ return "source-scoped-evidence";
1323
+ }
1324
+ if (capabilities.has("scholar.search")) {
1325
+ return "discovery-result-to-canonical-workflow";
1326
+ }
1327
+ if (capabilities.has("scholar.citations") ||
1328
+ capabilities.has("scholar.references") ||
1329
+ capabilities.has("scholar.review") ||
1330
+ capabilities.has("scholar.fulltext")) {
1331
+ return "identifier-required";
1332
+ }
1333
+ return "metadata-only";
1334
+ }
1335
+ function missingClosedLoopCapabilities(capabilities) {
1336
+ const missing = [];
1337
+ if (!capabilities.has("scholar.search"))
1338
+ missing.push("search");
1339
+ if (!capabilities.has("scholar.get"))
1340
+ missing.push("metadata-get");
1341
+ if (!capabilities.has("scholar.pdf") &&
1342
+ !capabilities.has("scholar.fulltext")) {
1343
+ missing.push("readable-text");
1344
+ }
1345
+ if (!capabilities.has("scholar.pdf"))
1346
+ missing.push("pdf-download");
1347
+ if (!capabilities.has("scholar.fulltext"))
1348
+ missing.push("source-fulltext");
1349
+ if (!capabilities.has("scholar.code"))
1350
+ missing.push("code/project");
1351
+ if (!capabilities.has("scholar.datasets"))
1352
+ missing.push("datasets/models/spaces");
1353
+ if (!capabilities.has("scholar.citations") &&
1354
+ !capabilities.has("scholar.references")) {
1355
+ missing.push("citation/reference-graph");
1356
+ }
1357
+ if (!capabilities.has("scholar.review"))
1358
+ missing.push("peer-review-audit");
1359
+ return missing;
1360
+ }
1361
+ export function buildScholarCoverageRows(adapters = listScholarAdapters()) {
1362
+ return [...adapters]
1363
+ .sort((left, right) => left.name.localeCompare(right.name))
1364
+ .map((adapter) => {
1365
+ const capabilities = capabilitySet(adapter);
1366
+ const get = sourceCommandByCapability(adapter, "scholar.get");
1367
+ const pdf = sourceCommandByCapability(adapter, "scholar.pdf");
1368
+ const fulltext = sourceCommandByCapability(adapter, "scholar.fulltext");
1369
+ const code = sourceCommandByCapability(adapter, "scholar.code");
1370
+ const datasets = sourceCommandByCapability(adapter, "scholar.datasets");
1371
+ const citations = sourceCommandByCapability(adapter, "scholar.citations");
1372
+ const references = sourceCommandByCapability(adapter, "scholar.references");
1373
+ const review = sourceReviewCommand(adapter);
1374
+ const author = sourceCommandByCapability(adapter, "scholar.author");
1375
+ const venue = sourceCommandByCapability(adapter, "scholar.venue");
1376
+ const missing = missingClosedLoopCapabilities(capabilities);
1377
+ const hasSourceScopedAvailability = adapterSupportsSourceScopedAvailability(adapter, capabilities);
1378
+ return {
1379
+ source: adapter.name,
1380
+ role: sourceRole(capabilities),
1381
+ recommended_for: recommendedUses(capabilities),
1382
+ read_strategy: readStrategy(capabilities),
1383
+ handoff_strategy: coverageHandoffStrategy(capabilities, adapter),
1384
+ coverage_score: capabilities.size,
1385
+ coverage_total: SCHOLAR_CAPABILITIES.length,
1386
+ missing_closed_loop: missing,
1387
+ has_search: capabilities.has("scholar.search"),
1388
+ has_get: capabilities.has("scholar.get"),
1389
+ has_pdf: capabilities.has("scholar.pdf"),
1390
+ has_fulltext: capabilities.has("scholar.fulltext"),
1391
+ has_code: capabilities.has("scholar.code"),
1392
+ has_datasets: capabilities.has("scholar.datasets"),
1393
+ has_citations: capabilities.has("scholar.citations"),
1394
+ has_references: capabilities.has("scholar.references"),
1395
+ has_review: capabilities.has("scholar.review"),
1396
+ has_author: capabilities.has("scholar.author"),
1397
+ has_venue: capabilities.has("scholar.venue"),
1398
+ next_availability: hasSourceScopedAvailability
1399
+ ? `unicli scholar availability <ref> --source ${adapter.name}`
1400
+ : undefined,
1401
+ next_read: capabilities.has("scholar.pdf") ||
1402
+ capabilities.has("scholar.fulltext")
1403
+ ? `unicli scholar read <ref> --source ${adapter.name}`
1404
+ : undefined,
1405
+ next_search: sourceSearchCommand(adapter),
1406
+ next_workflow_from_result: capabilities.has("scholar.search")
1407
+ ? "unicli scholar workflow <title-or-id>"
1408
+ : undefined,
1409
+ next_sources_from_result: capabilities.has("scholar.search")
1410
+ ? "unicli scholar sources <title-or-id>"
1411
+ : undefined,
1412
+ next_read_from_result: capabilities.has("scholar.search")
1413
+ ? "unicli scholar read <title-or-id>"
1414
+ : undefined,
1415
+ next_get: sourceCommand(adapter.name, get, "<id-or-ref>"),
1416
+ next_pdf: sourceCommand(adapter.name, pdf, "<id-or-ref>"),
1417
+ next_fulltext: sourceCommand(adapter.name, fulltext, "<id-or-ref>"),
1418
+ next_code: sourceCommand(adapter.name, code, "<id-or-ref>"),
1419
+ next_datasets: sourceCommand(adapter.name, datasets, "<id-or-ref>"),
1420
+ next_citations: sourceCommand(adapter.name, citations, "<id-or-ref>"),
1421
+ next_references: sourceCommand(adapter.name, references, "<id-or-ref>"),
1422
+ next_review: sourceCommand(adapter.name, review, "<id-or-ref>"),
1423
+ next_author: sourceCommand(adapter.name, author, "<author>"),
1424
+ next_venue: sourceCommand(adapter.name, venue, "<venue>"),
1425
+ capabilities: [...capabilities].sort(),
1426
+ commands: Object.fromEntries(SCHOLAR_CAPABILITIES.map((capability) => [
1427
+ capability.replace(/^scholar\./, ""),
1428
+ coverageCommandByCapability(adapter, capability),
1429
+ ]).filter(([, command]) => command !== undefined)),
1430
+ };
1431
+ });
1432
+ }
1433
+ function coverageColumns(detailed = false) {
1434
+ const base = [
1435
+ "source",
1436
+ "role",
1437
+ "recommended_for",
1438
+ "read_strategy",
1439
+ "handoff_strategy",
1440
+ "coverage_score",
1441
+ "coverage_total",
1442
+ "missing_closed_loop",
1443
+ "has_search",
1444
+ "has_get",
1445
+ "has_pdf",
1446
+ "has_fulltext",
1447
+ "has_code",
1448
+ "has_datasets",
1449
+ "has_citations",
1450
+ "has_references",
1451
+ "has_review",
1452
+ "next_availability",
1453
+ "next_read",
1454
+ "next_search",
1455
+ "next_workflow_from_result",
1456
+ "next_sources_from_result",
1457
+ "next_read_from_result",
1458
+ ];
1459
+ return detailed
1460
+ ? [
1461
+ ...base,
1462
+ "has_author",
1463
+ "has_venue",
1464
+ "next_get",
1465
+ "next_pdf",
1466
+ "next_fulltext",
1467
+ "next_code",
1468
+ "next_datasets",
1469
+ "next_citations",
1470
+ "next_references",
1471
+ "next_review",
1472
+ "next_author",
1473
+ "next_venue",
1474
+ "capabilities",
1475
+ "commands",
1476
+ ]
1477
+ : base;
1478
+ }
1479
+ function sourceFilter(opts) {
1480
+ if (opts.source)
1481
+ return { explicit: true, sources: [opts.source] };
1482
+ if (opts.sources)
1483
+ return { explicit: true, sources: resolveScholarSources(opts.sources) };
1484
+ return { explicit: false, sources: [] };
1485
+ }
1486
+ function resolveAvailabilityCapabilitySources(capability, route, opts) {
1487
+ const filter = sourceFilter(opts);
1488
+ const selected = filter.explicit
1489
+ ? filter.sources
1490
+ : route.kind === "unknown"
1491
+ ? listScholarSourcesByCapability(capability)
1492
+ : route.preferredSources;
1493
+ return selected.filter((source) => {
1494
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
1495
+ return adapter
1496
+ ? findScholarCommandByCapability(adapter, capability) !== undefined
1497
+ : false;
1498
+ });
1499
+ }
1500
+ function collectSourceErrors(outcomes) {
1501
+ const sourcesWithRecords = new Set(outcomes
1502
+ .filter((outcome) => outcome.records.length > 0)
1503
+ .map((outcome) => outcome.source));
1504
+ const unresolvedErrors = outcomes.filter((outcome) => outcome.error && !sourcesWithRecords.has(outcome.source));
1505
+ const sourcesWithSpecificErrors = new Set(unresolvedErrors
1506
+ .filter((outcome) => outcome.error?.code !== "capability_unsupported")
1507
+ .map((outcome) => outcome.source));
1508
+ return uniqueStrings(unresolvedErrors
1509
+ .filter((outcome) => outcome.error?.code !== "capability_unsupported" ||
1510
+ !sourcesWithSpecificErrors.has(outcome.source))
1511
+ .map(formatScholarOutcomeError)).slice(0, 16);
1512
+ }
1513
+ export function buildScholarAvailabilityRow(input) {
1514
+ const allRecords = reciprocalRankFusion([
1515
+ input.metadataRecords,
1516
+ input.pdfRecords,
1517
+ input.codeRecords,
1518
+ input.datasetRecords,
1519
+ ], { topN: 10 });
1520
+ const representative = allRecords[0];
1521
+ const firstPdf = firstPdfRecord(input.pdfRecords);
1522
+ const codeRecord = input.codeRecords.find(hasCodeResource);
1523
+ const datasetRecord = input.datasetRecords.find(hasDatasetResource);
1524
+ const modelRecord = input.datasetRecords.find(hasModelResource);
1525
+ const spaceRecord = input.datasetRecords.find(hasSpaceResource);
1526
+ const hasRecord = representative !== undefined;
1527
+ const hasPdf = firstPdf !== undefined;
1528
+ const hasCode = codeRecord !== undefined;
1529
+ const hasDataset = datasetRecord !== undefined;
1530
+ const hasModel = modelRecord !== undefined;
1531
+ const hasSpace = spaceRecord !== undefined;
1532
+ const hasProject = (codeRecord !== undefined &&
1533
+ nonEmptyResourceField(codeRecord.project_url)) ||
1534
+ (representative !== undefined &&
1535
+ nonEmptyResourceField(representative.project_url));
1536
+ const codeCandidateSources = resolveAvailabilityCapabilitySources("scholar.code", input.route, input.opts);
1537
+ const datasetCandidateSources = resolveAvailabilityCapabilitySources("scholar.datasets", input.route, input.opts);
1538
+ const canonical = canonicalScholarReference(input.route, allRecords);
1539
+ const commandRef = canonical?.ref ?? input.ref;
1540
+ const reviewRef = canonical?.kind === "openreview" ? canonical.ref : input.ref;
1541
+ return {
1542
+ ref: input.ref,
1543
+ route_kind: input.route.kind,
1544
+ route_value: input.route.value,
1545
+ canonical_ref: canonical?.ref,
1546
+ canonical_ref_kind: canonical?.kind,
1547
+ record_found: hasRecord,
1548
+ id: representative?.id,
1549
+ title: representative?.title,
1550
+ year: representative?.year,
1551
+ doi: representative?.doi,
1552
+ arxiv_id: representative?.arxiv_id,
1553
+ pmid: representative?.pmid,
1554
+ pmc_id: representative?.pmc_id,
1555
+ openreview_id: representative?.openreview_id,
1556
+ source_adapter: representative?.source_adapter,
1557
+ source_url: representative?.source_url,
1558
+ pdf_url: firstPdf?.pdf_url,
1559
+ code_url: codeRecord?.code_url,
1560
+ project_url: codeRecord?.project_url ?? representative?.project_url,
1561
+ dataset_url: datasetRecord?.dataset_url,
1562
+ model_urls: modelRecord?.model_urls,
1563
+ dataset_urls: datasetRecord?.dataset_urls,
1564
+ space_urls: spaceRecord?.space_urls,
1565
+ has_pdf: hasPdf,
1566
+ has_fulltext_candidate: input.fulltextCandidateSources.length > 0,
1567
+ has_code: hasCode,
1568
+ has_project: hasProject,
1569
+ has_datasets: hasDataset,
1570
+ has_models: hasModel,
1571
+ has_spaces: hasSpace,
1572
+ metadata_sources: sourcesForRecords(input.metadataRecords),
1573
+ pdf_sources: sourcesForRecords(input.pdfRecords, (record) => firstPdfRecord([record]) !== undefined),
1574
+ fulltext_candidate_sources: input.fulltextCandidateSources,
1575
+ code_sources: sourcesForRecords(input.codeRecords, hasCodeResource),
1576
+ dataset_sources: sourcesForRecords(input.datasetRecords, (record) => hasDatasetResource(record) ||
1577
+ hasModelResource(record) ||
1578
+ hasSpaceResource(record)),
1579
+ citation_candidate_sources: input.citationCandidateSources,
1580
+ reference_candidate_sources: input.referenceCandidateSources,
1581
+ review_candidate_sources: input.reviewCandidateSources,
1582
+ next_workflow: scholarCommand("workflow", commandRef, input.opts),
1583
+ next_availability: scholarCommand("availability", commandRef, input.opts),
1584
+ next_evidence: scholarCommand("evidence", commandRef, input.opts),
1585
+ next_reproduce: scholarCommand("reproduce", commandRef, input.opts),
1586
+ next_get: scholarCommand("get", commandRef, input.opts),
1587
+ next_pdf: scholarCommand("pdf", commandRef, input.opts),
1588
+ next_read: hasPdf || input.fulltextCandidateSources.length > 0
1589
+ ? scholarCommand("read", commandRef, input.opts)
1590
+ : undefined,
1591
+ next_download: hasPdf
1592
+ ? scholarCommand("download", commandRef, input.opts)
1593
+ : undefined,
1594
+ next_code: input.codeRecords.length > 0 || codeCandidateSources.length > 0
1595
+ ? scholarCommand("code", commandRef, {
1596
+ source: input.opts.source,
1597
+ sources: input.opts.sources,
1598
+ })
1599
+ : undefined,
1600
+ next_datasets: input.datasetRecords.length > 0 || datasetCandidateSources.length > 0
1601
+ ? scholarCommand("datasets", commandRef, {
1602
+ source: input.opts.source,
1603
+ sources: input.opts.sources,
1604
+ })
1605
+ : undefined,
1606
+ next_citations: input.citationCandidateSources.length > 0
1607
+ ? scholarCommand("citations", commandRef, {
1608
+ source: input.opts.source,
1609
+ sources: input.opts.sources,
1610
+ })
1611
+ : undefined,
1612
+ next_references: input.referenceCandidateSources.length > 0
1613
+ ? scholarCommand("references", commandRef, {
1614
+ source: input.opts.source,
1615
+ sources: input.opts.sources,
1616
+ })
1617
+ : undefined,
1618
+ next_reviews: input.reviewCandidateSources.length > 0
1619
+ ? scholarCommand("reviews", reviewRef, {
1620
+ source: input.opts.source,
1621
+ sources: input.opts.sources,
1622
+ })
1623
+ : undefined,
1624
+ source_errors: input.sourceErrors,
1625
+ retrieved_at: new Date().toISOString(),
1626
+ };
1627
+ }
1628
+ function rowBoolean(row, field) {
1629
+ return row[field] === true;
1630
+ }
1631
+ function rowString(row, field) {
1632
+ const candidate = row[field];
1633
+ return typeof candidate === "string" && candidate.trim().length > 0
1634
+ ? candidate
1635
+ : undefined;
1636
+ }
1637
+ function rowStringArray(row, field) {
1638
+ const candidate = row[field];
1639
+ if (!Array.isArray(candidate))
1640
+ return [];
1641
+ return candidate.filter((entry) => typeof entry === "string" && entry.trim().length > 0);
1642
+ }
1643
+ function canonicalReferenceLookupOpts(opts) {
1644
+ return {
1645
+ sources: CANONICAL_REFERENCE_SOURCES.join(","),
1646
+ unpaywallEmail: opts.unpaywallEmail,
1647
+ };
1648
+ }
1649
+ async function resolveUnknownRefViaAvailability(ref, opts, input) {
1650
+ if (resolveScholarReference(ref).kind !== "unknown")
1651
+ return ref;
1652
+ const scoped = await collectAvailabilityEvidence(ref, opts);
1653
+ const scopedRef = rowString(scoped.row, "canonical_ref");
1654
+ if (scopedRef)
1655
+ return scopedRef;
1656
+ if (!input.fallbackToCanonicalSources || (!opts.source && !opts.sources)) {
1657
+ return ref;
1658
+ }
1659
+ const broad = await collectAvailabilityEvidence(ref, canonicalReferenceLookupOpts(opts));
1660
+ return rowString(broad.row, "canonical_ref") ?? ref;
1661
+ }
1662
+ async function resolveGraphLookupRef(capability, ref, opts) {
1663
+ if (capability !== "scholar.citations" &&
1664
+ capability !== "scholar.references") {
1665
+ return ref;
1666
+ }
1667
+ return resolveUnknownRefViaAvailability(ref, opts, {
1668
+ fallbackToCanonicalSources: true,
1669
+ });
1670
+ }
1671
+ async function resolveArtifactLookupRef(ref, opts) {
1672
+ if (!opts.source && !opts.sources)
1673
+ return ref;
1674
+ return resolveUnknownRefViaAvailability(ref, opts, {
1675
+ fallbackToCanonicalSources: true,
1676
+ });
1677
+ }
1678
+ function availabilityIdentifiers(row) {
1679
+ return [
1680
+ rowString(row, "doi") ? `doi:${rowString(row, "doi")}` : undefined,
1681
+ rowString(row, "arxiv_id")
1682
+ ? `arxiv:${rowString(row, "arxiv_id")}`
1683
+ : undefined,
1684
+ rowString(row, "pmid") ? `pmid:${rowString(row, "pmid")}` : undefined,
1685
+ rowString(row, "pmc_id") ? `pmc:${rowString(row, "pmc_id")}` : undefined,
1686
+ rowString(row, "openreview_id")
1687
+ ? `openreview:${rowString(row, "openreview_id")}`
1688
+ : undefined,
1689
+ ].filter((entry) => entry !== undefined);
1690
+ }
1691
+ function availabilityPrimarySource(row) {
1692
+ return (rowString(row, "source_adapter") ??
1693
+ rowStringArray(row, "metadata_sources")[0] ??
1694
+ rowStringArray(row, "pdf_sources")[0] ??
1695
+ rowStringArray(row, "fulltext_candidate_sources")[0] ??
1696
+ rowStringArray(row, "code_sources")[0] ??
1697
+ rowStringArray(row, "dataset_sources")[0]);
1698
+ }
1699
+ function availabilityPrimaryEvidenceUrl(row) {
1700
+ return (rowString(row, "source_url") ??
1701
+ rowString(row, "pdf_url") ??
1702
+ rowString(row, "project_url") ??
1703
+ rowString(row, "code_url") ??
1704
+ rowString(row, "dataset_url"));
1705
+ }
1706
+ function availabilityMissingEvidence(row) {
1707
+ const missing = [];
1708
+ if (!rowBoolean(row, "record_found"))
1709
+ missing.push("metadata");
1710
+ if (!rowBoolean(row, "has_pdf") &&
1711
+ !rowBoolean(row, "has_fulltext_candidate")) {
1712
+ missing.push("readable-text");
1713
+ }
1714
+ if (!rowBoolean(row, "has_code") && !rowBoolean(row, "has_project")) {
1715
+ missing.push("code/project");
1716
+ }
1717
+ if (!rowBoolean(row, "has_datasets") &&
1718
+ !rowBoolean(row, "has_models") &&
1719
+ !rowBoolean(row, "has_spaces")) {
1720
+ missing.push("datasets/models/spaces");
1721
+ }
1722
+ if (rowStringArray(row, "citation_candidate_sources").length === 0 &&
1723
+ rowStringArray(row, "reference_candidate_sources").length === 0) {
1724
+ missing.push("citation/reference-graph");
1725
+ }
1726
+ if (rowStringArray(row, "review_candidate_sources").length === 0) {
1727
+ missing.push("peer-review-audit");
1728
+ }
1729
+ return missing;
1730
+ }
1731
+ function sourceRecordsForCapability(outcomes, source, capability) {
1732
+ return outcomes
1733
+ .filter((outcome) => outcome.source === source && outcome.capability === capability)
1734
+ .flatMap((outcome) => outcome.records);
1735
+ }
1736
+ function sourceOutcomeErrors(outcomes, source) {
1737
+ return uniqueStrings(outcomes
1738
+ .filter((outcome) => outcome.source === source && outcome.error)
1739
+ .map(formatScholarOutcomeError));
1740
+ }
1741
+ function sourceBlockingErrors(outcomes, source) {
1742
+ return uniqueStrings(outcomes
1743
+ .filter((outcome) => outcome.source === source &&
1744
+ outcome.error &&
1745
+ outcome.error.code !== "capability_unsupported")
1746
+ .map(formatScholarOutcomeError));
1747
+ }
1748
+ function sourceCapabilities(source) {
1749
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
1750
+ if (!adapter)
1751
+ return [];
1752
+ const capabilities = new Set();
1753
+ for (const command of Object.values(adapter.commands)) {
1754
+ for (const capability of command.capabilities ?? []) {
1755
+ if (capability.startsWith("scholar."))
1756
+ capabilities.add(capability);
1757
+ }
1758
+ }
1759
+ return [...capabilities].sort();
1760
+ }
1761
+ function scholarCapabilitySetFromStrings(capabilities) {
1762
+ return new Set(capabilities.filter((capability) => SCHOLAR_CAPABILITIES.includes(capability)));
1763
+ }
1764
+ function sourceSearchCommandByName(source) {
1765
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
1766
+ return adapter ? sourceSearchCommand(adapter) : undefined;
1767
+ }
1768
+ function sourceScopedScholarCommand(subcommand, ref, source, opts = {}) {
1769
+ return scholarCommand(subcommand, ref, {
1770
+ source,
1771
+ unpaywallEmail: opts.unpaywallEmail,
1772
+ });
1773
+ }
1774
+ function sourceStatus(input) {
1775
+ if (input.hasReturnedEvidence)
1776
+ return "evidence_found";
1777
+ if (input.hasCandidateEvidence && input.hasBlockingErrors)
1778
+ return "candidate_with_errors";
1779
+ if (input.hasCandidateEvidence)
1780
+ return "candidate_only";
1781
+ if (input.hasBlockingErrors)
1782
+ return "error";
1783
+ if (input.hasOnlyUnsupportedErrors)
1784
+ return "unsupported";
1785
+ return "no_evidence";
1786
+ }
1787
+ function sourceAuditEvidenceTypes(input) {
1788
+ const evidence = [];
1789
+ if (input.hasMetadata)
1790
+ evidence.push("metadata");
1791
+ if (input.hasPdf)
1792
+ evidence.push("pdf");
1793
+ if (input.hasFulltextCandidate)
1794
+ evidence.push("fulltext-candidate");
1795
+ if (input.hasCode)
1796
+ evidence.push("code");
1797
+ if (input.hasProject)
1798
+ evidence.push("project");
1799
+ if (input.hasDatasets)
1800
+ evidence.push("datasets");
1801
+ if (input.hasModels)
1802
+ evidence.push("models");
1803
+ if (input.hasSpaces)
1804
+ evidence.push("spaces");
1805
+ if (input.hasCitationCandidate)
1806
+ evidence.push("citation-candidate");
1807
+ if (input.hasReferenceCandidate)
1808
+ evidence.push("reference-candidate");
1809
+ if (input.hasReviewCandidate)
1810
+ evidence.push("review-candidate");
1811
+ return evidence;
1812
+ }
1813
+ export function buildScholarSourceAuditRows(availability, outcomes, opts = {}) {
1814
+ const ref = String(availability.ref ?? "");
1815
+ const commandRef = rowString(availability, "canonical_ref") ?? ref;
1816
+ const canonicalRefKind = rowString(availability, "canonical_ref_kind");
1817
+ const reviewRef = canonicalRefKind === "openreview" ? commandRef : ref;
1818
+ const sources = uniqueStrings([
1819
+ ...outcomes.map((outcome) => outcome.source),
1820
+ ...rowStringArray(availability, "metadata_sources"),
1821
+ ...rowStringArray(availability, "pdf_sources"),
1822
+ ...rowStringArray(availability, "fulltext_candidate_sources"),
1823
+ ...rowStringArray(availability, "code_sources"),
1824
+ ...rowStringArray(availability, "dataset_sources"),
1825
+ ...rowStringArray(availability, "citation_candidate_sources"),
1826
+ ...rowStringArray(availability, "reference_candidate_sources"),
1827
+ ...rowStringArray(availability, "review_candidate_sources"),
1828
+ ]);
1829
+ return sources.map((source) => {
1830
+ const metadataRecords = sourceRecordsForCapability(outcomes, source, "scholar.get");
1831
+ const pdfRecords = sourceRecordsForCapability(outcomes, source, "scholar.pdf");
1832
+ const pdfEvidenceRecords = pdfRecords.filter((record) => firstPdfRecord([record]) !== undefined);
1833
+ const codeRecords = sourceRecordsForCapability(outcomes, source, "scholar.code");
1834
+ const codeEvidenceRecords = codeRecords.filter(hasCodeResource);
1835
+ const datasetRecords = sourceRecordsForCapability(outcomes, source, "scholar.datasets");
1836
+ const datasetEvidenceRecords = datasetRecords.filter((record) => hasDatasetResource(record) ||
1837
+ hasModelResource(record) ||
1838
+ hasSpaceResource(record));
1839
+ const sourceRecords = reciprocalRankFusion([
1840
+ metadataRecords,
1841
+ pdfEvidenceRecords,
1842
+ codeEvidenceRecords,
1843
+ datasetEvidenceRecords,
1844
+ ], { topN: 10 });
1845
+ const representative = sourceRecords[0];
1846
+ const firstPdf = firstPdfRecord(pdfEvidenceRecords);
1847
+ const codeRecord = codeEvidenceRecords.find(hasCodeResource);
1848
+ const datasetRecord = datasetEvidenceRecords.find(hasDatasetResource);
1849
+ const modelRecord = datasetEvidenceRecords.find(hasModelResource);
1850
+ const spaceRecord = datasetEvidenceRecords.find(hasSpaceResource);
1851
+ const hasMetadata = metadataRecords.length > 0;
1852
+ const hasPdf = firstPdf !== undefined;
1853
+ const hasCode = codeRecord !== undefined;
1854
+ const hasProject = (codeRecord !== undefined &&
1855
+ nonEmptyResourceField(codeRecord.project_url)) ||
1856
+ (representative !== undefined &&
1857
+ nonEmptyResourceField(representative.project_url));
1858
+ const hasDatasets = datasetRecord !== undefined;
1859
+ const hasModels = modelRecord !== undefined;
1860
+ const hasSpaces = spaceRecord !== undefined;
1861
+ const hasFulltextCandidate = rowStringArray(availability, "fulltext_candidate_sources").includes(source);
1862
+ const hasCitationCandidate = rowStringArray(availability, "citation_candidate_sources").includes(source);
1863
+ const hasReferenceCandidate = rowStringArray(availability, "reference_candidate_sources").includes(source);
1864
+ const hasReviewCandidate = rowStringArray(availability, "review_candidate_sources").includes(source);
1865
+ const evidenceTypes = sourceAuditEvidenceTypes({
1866
+ hasMetadata,
1867
+ hasPdf,
1868
+ hasFulltextCandidate,
1869
+ hasCode,
1870
+ hasProject,
1871
+ hasDatasets,
1872
+ hasModels,
1873
+ hasSpaces,
1874
+ hasCitationCandidate,
1875
+ hasReferenceCandidate,
1876
+ hasReviewCandidate,
1877
+ });
1878
+ const errors = sourceOutcomeErrors(outcomes, source);
1879
+ const rawBlockingErrors = sourceBlockingErrors(outcomes, source);
1880
+ const capabilities = sourceCapabilities(source);
1881
+ const capabilitySet = scholarCapabilitySetFromStrings(capabilities);
1882
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
1883
+ const hasSourceScopedAvailability = adapter !== undefined
1884
+ ? adapterSupportsSourceScopedAvailability(adapter, capabilitySet)
1885
+ : supportsSourceScopedAvailability(capabilitySet);
1886
+ const hasSearch = capabilitySet.has("scholar.search");
1887
+ const candidateCapabilities = [
1888
+ hasFulltextCandidate ? "scholar.fulltext" : undefined,
1889
+ hasCitationCandidate ? "scholar.citations" : undefined,
1890
+ hasReferenceCandidate ? "scholar.references" : undefined,
1891
+ hasReviewCandidate ? "scholar.review" : undefined,
1892
+ ].filter((capability) => capability !== undefined);
1893
+ const executedCapabilities = uniqueStrings(outcomes
1894
+ .filter((outcome) => outcome.source === source)
1895
+ .map((outcome) => outcome.capability));
1896
+ const hasReturnedEvidence = hasMetadata ||
1897
+ hasPdf ||
1898
+ hasCode ||
1899
+ hasProject ||
1900
+ hasDatasets ||
1901
+ hasModels ||
1902
+ hasSpaces;
1903
+ const hasCandidateEvidence = candidateCapabilities.length > 0;
1904
+ const blockingErrors = hasReturnedEvidence ? [] : rawBlockingErrors;
1905
+ const recoveredErrors = hasReturnedEvidence ? rawBlockingErrors : [];
1906
+ const hasOnlyUnsupportedErrors = errors.length > 0 && blockingErrors.length === 0;
1907
+ return {
1908
+ ref,
1909
+ route_kind: availability.route_kind,
1910
+ canonical_ref: availability.canonical_ref,
1911
+ canonical_ref_kind: availability.canonical_ref_kind,
1912
+ source,
1913
+ source_status: sourceStatus({
1914
+ hasReturnedEvidence,
1915
+ hasCandidateEvidence,
1916
+ hasBlockingErrors: blockingErrors.length > 0,
1917
+ hasOnlyUnsupportedErrors,
1918
+ }),
1919
+ evidence_types: evidenceTypes,
1920
+ handoff_strategy: coverageHandoffStrategy(capabilitySet, adapter),
1921
+ record_count: sourceRecords.length,
1922
+ has_metadata: hasMetadata,
1923
+ has_pdf: hasPdf,
1924
+ has_fulltext_candidate: hasFulltextCandidate,
1925
+ has_code: hasCode,
1926
+ has_project: hasProject,
1927
+ has_datasets: hasDatasets,
1928
+ has_models: hasModels,
1929
+ has_spaces: hasSpaces,
1930
+ has_citation_candidate: hasCitationCandidate,
1931
+ has_reference_candidate: hasReferenceCandidate,
1932
+ has_review_candidate: hasReviewCandidate,
1933
+ title: representative?.title,
1934
+ year: representative?.year,
1935
+ primary_evidence_url: representative?.source_url ??
1936
+ firstPdf?.pdf_url ??
1937
+ codeRecord?.project_url ??
1938
+ codeRecord?.code_url ??
1939
+ datasetRecord?.dataset_url,
1940
+ pdf_url: firstPdf?.pdf_url,
1941
+ code_url: codeRecord?.code_url,
1942
+ project_url: codeRecord?.project_url ?? representative?.project_url,
1943
+ dataset_url: datasetRecord?.dataset_url,
1944
+ model_urls: modelRecord?.model_urls,
1945
+ dataset_urls: datasetRecord?.dataset_urls,
1946
+ space_urls: spaceRecord?.space_urls,
1947
+ capabilities,
1948
+ executed_capabilities: executedCapabilities,
1949
+ candidate_capabilities: candidateCapabilities,
1950
+ source_errors: errors,
1951
+ blocking_errors: blockingErrors,
1952
+ recovered_errors: recoveredErrors,
1953
+ next_source_availability: hasSourceScopedAvailability
1954
+ ? sourceScopedScholarCommand("availability", commandRef, source, opts)
1955
+ : undefined,
1956
+ next_search: hasSearch ? sourceSearchCommandByName(source) : undefined,
1957
+ next_workflow_from_result: hasSearch
1958
+ ? "unicli scholar workflow <title-or-id>"
1959
+ : undefined,
1960
+ next_sources_from_result: hasSearch
1961
+ ? "unicli scholar sources <title-or-id>"
1962
+ : undefined,
1963
+ next_read_from_result: hasSearch
1964
+ ? "unicli scholar read <title-or-id>"
1965
+ : undefined,
1966
+ next_read: hasPdf || hasFulltextCandidate
1967
+ ? sourceScopedScholarCommand("read", commandRef, source, opts)
1968
+ : undefined,
1969
+ next_download: hasPdf
1970
+ ? sourceScopedScholarCommand("download", commandRef, source, opts)
1971
+ : undefined,
1972
+ next_code: hasCode || capabilities.includes("scholar.code")
1973
+ ? sourceScopedScholarCommand("code", commandRef, source, opts)
1974
+ : undefined,
1975
+ next_datasets: hasDatasets ||
1976
+ hasModels ||
1977
+ hasSpaces ||
1978
+ capabilities.includes("scholar.datasets")
1979
+ ? sourceScopedScholarCommand("datasets", commandRef, source, opts)
1980
+ : undefined,
1981
+ next_citations: hasCitationCandidate
1982
+ ? sourceScopedScholarCommand("citations", commandRef, source, opts)
1983
+ : undefined,
1984
+ next_references: hasReferenceCandidate
1985
+ ? sourceScopedScholarCommand("references", commandRef, source, opts)
1986
+ : undefined,
1987
+ next_reviews: hasReviewCandidate
1988
+ ? sourceScopedScholarCommand("reviews", reviewRef, source, opts)
1989
+ : undefined,
1990
+ next_reproduce: hasReturnedEvidence || hasCandidateEvidence
1991
+ ? sourceScopedScholarCommand("reproduce", commandRef, source, opts)
1992
+ : undefined,
1993
+ retrieved_at: new Date().toISOString(),
1994
+ };
1995
+ });
1996
+ }
1997
+ function sourceAuditColumns(detailed = false) {
1998
+ const base = [
1999
+ "ref",
2000
+ "route_kind",
2001
+ "canonical_ref",
2002
+ "canonical_ref_kind",
2003
+ "source",
2004
+ "source_status",
2005
+ "evidence_types",
2006
+ "handoff_strategy",
2007
+ "record_count",
2008
+ "has_metadata",
2009
+ "has_pdf",
2010
+ "has_fulltext_candidate",
2011
+ "has_code",
2012
+ "has_datasets",
2013
+ "has_citation_candidate",
2014
+ "has_reference_candidate",
2015
+ "has_review_candidate",
2016
+ "title",
2017
+ "year",
2018
+ "next_source_availability",
2019
+ "next_search",
2020
+ "next_workflow_from_result",
2021
+ "next_sources_from_result",
2022
+ "next_read_from_result",
2023
+ "next_read",
2024
+ "next_reproduce",
2025
+ ];
2026
+ return detailed
2027
+ ? [
2028
+ ...base,
2029
+ "has_project",
2030
+ "has_models",
2031
+ "has_spaces",
2032
+ "primary_evidence_url",
2033
+ "pdf_url",
2034
+ "code_url",
2035
+ "project_url",
2036
+ "dataset_url",
2037
+ "model_urls",
2038
+ "dataset_urls",
2039
+ "space_urls",
2040
+ "capabilities",
2041
+ "executed_capabilities",
2042
+ "candidate_capabilities",
2043
+ "source_errors",
2044
+ "blocking_errors",
2045
+ "recovered_errors",
2046
+ "next_download",
2047
+ "next_code",
2048
+ "next_datasets",
2049
+ "next_citations",
2050
+ "next_references",
2051
+ "next_reviews",
2052
+ "retrieved_at",
2053
+ ]
2054
+ : base;
2055
+ }
2056
+ function availabilityHasReadableSource(row) {
2057
+ return (rowBoolean(row, "has_pdf") || rowBoolean(row, "has_fulltext_candidate"));
2058
+ }
2059
+ function availabilityHasPrimaryAnchor(row) {
2060
+ return (availabilityIdentifiers(row).length > 0 ||
2061
+ availabilityPrimaryEvidenceUrl(row) !== undefined);
2062
+ }
2063
+ function availabilityHasResourceEvidence(row) {
2064
+ return (rowBoolean(row, "has_code") ||
2065
+ rowBoolean(row, "has_project") ||
2066
+ rowBoolean(row, "has_datasets") ||
2067
+ rowBoolean(row, "has_models") ||
2068
+ rowBoolean(row, "has_spaces"));
2069
+ }
2070
+ function workflowStep(order, step, status, input) {
2071
+ return Object.fromEntries(definedEntries({
2072
+ order,
2073
+ step,
2074
+ status,
2075
+ command: input.command,
2076
+ commands: input.commands && input.commands.length > 0
2077
+ ? input.commands
2078
+ : undefined,
2079
+ done_when: input.doneWhen,
2080
+ guard: input.guard,
2081
+ }));
2082
+ }
2083
+ function workflowStatus(recordFound, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
2084
+ if (!recordFound)
2085
+ return "blocked_no_source_record";
2086
+ if (!hasPrimaryAnchor)
2087
+ return "metadata_without_primary_anchor";
2088
+ if (!hasReadableSource && hasResourceEvidence)
2089
+ return "resources_found_needs_source_text";
2090
+ if (!hasReadableSource)
2091
+ return "metadata_only_needs_source_text";
2092
+ return "ready_for_agent_reading";
2093
+ }
2094
+ function workflowNextStep(recordFound, hasPrimaryAnchor, hasReadableSource) {
2095
+ if (!recordFound)
2096
+ return "resolve_source_record";
2097
+ if (!hasPrimaryAnchor)
2098
+ return "verify_primary_anchor";
2099
+ if (!hasReadableSource)
2100
+ return "find_readable_source";
2101
+ return "run_next_read_before_quoting_claims";
2102
+ }
2103
+ function completedWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
2104
+ const completed = [];
2105
+ if (rowBoolean(availability, "record_found"))
2106
+ completed.push("source_record_found");
2107
+ if (hasPrimaryAnchor)
2108
+ completed.push("primary_anchor_found");
2109
+ if (hasReadableSource)
2110
+ completed.push("readable_source_found");
2111
+ if (rowString(availability, "next_download"))
2112
+ completed.push("downloadable_pdf_found");
2113
+ if (rowString(availability, "next_citations") ||
2114
+ rowString(availability, "next_references")) {
2115
+ completed.push("citation_reference_candidate_found");
2116
+ }
2117
+ if (rowString(availability, "next_reviews"))
2118
+ completed.push("peer_review_candidate_found");
2119
+ if (hasResourceEvidence)
2120
+ completed.push("reproducibility_resource_found");
2121
+ return completed;
2122
+ }
2123
+ function pendingWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence) {
2124
+ const pending = [];
2125
+ const recordFound = rowBoolean(availability, "record_found");
2126
+ if (!recordFound)
2127
+ pending.push("source_record");
2128
+ if (recordFound && !hasPrimaryAnchor)
2129
+ pending.push("primary_anchor");
2130
+ if (recordFound && !hasReadableSource)
2131
+ pending.push("readable_source");
2132
+ if (hasReadableSource)
2133
+ pending.push("source_text_reading");
2134
+ if (!rowString(availability, "next_citations") &&
2135
+ !rowString(availability, "next_references")) {
2136
+ pending.push("citation_reference_graph");
2137
+ }
2138
+ if (!rowString(availability, "next_reviews"))
2139
+ pending.push("peer_review_audit");
2140
+ if (!hasResourceEvidence)
2141
+ pending.push("code_data_model_resources");
2142
+ if (hasResourceEvidence)
2143
+ pending.push("resource_inspection");
2144
+ return pending;
2145
+ }
2146
+ function blockedWorkflowSteps(availability, hasReadableSource, hasResourceEvidence) {
2147
+ const blocked = [];
2148
+ const recordFound = rowBoolean(availability, "record_found");
2149
+ if (!recordFound) {
2150
+ return [
2151
+ "source_reading",
2152
+ "claim_quotation",
2153
+ "artifact_download",
2154
+ "citation_reference_audit",
2155
+ "peer_review_audit",
2156
+ "reproducibility_audit",
2157
+ ];
2158
+ }
2159
+ if (!hasReadableSource) {
2160
+ blocked.push("source_reading");
2161
+ blocked.push("claim_quotation");
2162
+ blocked.push("artifact_download");
2163
+ }
2164
+ if (!rowString(availability, "next_citations") &&
2165
+ !rowString(availability, "next_references")) {
2166
+ blocked.push("citation_reference_audit");
2167
+ }
2168
+ if (!rowString(availability, "next_reviews"))
2169
+ blocked.push("peer_review_audit");
2170
+ if (!hasResourceEvidence)
2171
+ blocked.push("reproducibility_installation");
2172
+ return blocked;
2173
+ }
2174
+ function workflowRunbook(availability, recordFound) {
2175
+ const graphCommands = [
2176
+ rowString(availability, "next_citations"),
2177
+ rowString(availability, "next_references"),
2178
+ ].filter((command) => command !== undefined);
2179
+ const resourceCommands = [
2180
+ rowString(availability, "next_code"),
2181
+ rowString(availability, "next_datasets"),
2182
+ ].filter((command) => command !== undefined);
2183
+ return [
2184
+ workflowStep(1, "availability_audit", recordFound ? "complete" : "blocked", {
2185
+ command: rowString(availability, "next_availability"),
2186
+ doneWhen: "record_found is true and source_errors are inspected",
2187
+ guard: "do not cite, download, or reproduce without a source-backed record",
2188
+ }),
2189
+ workflowStep(2, "evidence_classification", recordFound ? "ready" : "blocked", {
2190
+ command: rowString(availability, "next_evidence"),
2191
+ doneWhen: "citation_safety and claim_boundary are explicit",
2192
+ guard: "metadata-only evidence cannot support paper claims",
2193
+ }),
2194
+ workflowStep(3, "read_source_text", rowString(availability, "next_read") ? "ready" : "blocked", {
2195
+ command: rowString(availability, "next_read"),
2196
+ doneWhen: "source-direct full text or extracted PDF text is returned",
2197
+ guard: "quote claims only from the returned source text",
2198
+ }),
2199
+ workflowStep(4, "download_artifact", rowString(availability, "next_download") ? "ready" : "blocked", {
2200
+ command: rowString(availability, "next_download"),
2201
+ doneWhen: "local artifact metadata is returned for a source-backed PDF",
2202
+ guard: "download only after a PDF candidate exists",
2203
+ }),
2204
+ workflowStep(5, "citation_reference_audit", graphCommands.length > 0 ? "ready" : "blocked", {
2205
+ commands: graphCommands,
2206
+ doneWhen: "citation and reference rows are retrieved from graph-capable sources",
2207
+ guard: "graph rows are provenance evidence, not a substitute for reading the paper",
2208
+ }),
2209
+ workflowStep(6, "peer_review_audit", rowString(availability, "next_reviews") ? "ready" : "blocked", {
2210
+ command: rowString(availability, "next_reviews"),
2211
+ doneWhen: "review, decision, rebuttal, or comment rows are retrieved when available",
2212
+ guard: "review rows qualify venue context but do not replace source text",
2213
+ }),
2214
+ workflowStep(7, "reproducibility_plan", recordFound ? "ready" : "blocked", {
2215
+ command: rowString(availability, "next_reproduce"),
2216
+ doneWhen: "install_readiness and execution_boundary are explicit",
2217
+ guard: "never clone, install, or run remote code during planning",
2218
+ }),
2219
+ workflowStep(8, "inspect_code_and_resources", resourceCommands.length > 0 ? "ready" : "blocked", {
2220
+ commands: resourceCommands,
2221
+ doneWhen: "code, dataset, model, or Space resource rows are inspected",
2222
+ guard: "inspect repository and data provenance before any install command",
2223
+ }),
2224
+ ];
2225
+ }
2226
+ export function buildScholarWorkflowRow(availability) {
2227
+ const recordFound = rowBoolean(availability, "record_found");
2228
+ const hasReadableSource = availabilityHasReadableSource(availability);
2229
+ const hasPrimaryAnchor = availabilityHasPrimaryAnchor(availability);
2230
+ const hasResourceEvidence = availabilityHasResourceEvidence(availability);
2231
+ return {
2232
+ ref: availability.ref,
2233
+ route_kind: availability.route_kind,
2234
+ canonical_ref: availability.canonical_ref,
2235
+ canonical_ref_kind: availability.canonical_ref_kind,
2236
+ workflow_status: workflowStatus(recordFound, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
2237
+ next_step: workflowNextStep(recordFound, hasPrimaryAnchor, hasReadableSource),
2238
+ claim_boundary: hasReadableSource
2239
+ ? "quote_claims_only_after_next_read_output"
2240
+ : "metadata_only_no_claim_extraction",
2241
+ execution_boundary: "no_download_clone_install_or_remote_code_execution",
2242
+ record_found: recordFound,
2243
+ title: availability.title,
2244
+ year: availability.year,
2245
+ primary_source: availabilityPrimarySource(availability),
2246
+ primary_evidence_url: availabilityPrimaryEvidenceUrl(availability),
2247
+ persistent_identifiers: availabilityIdentifiers(availability),
2248
+ readable_sources: uniqueStrings([
2249
+ ...rowStringArray(availability, "fulltext_candidate_sources"),
2250
+ ...rowStringArray(availability, "pdf_sources"),
2251
+ ]),
2252
+ resource_sources: uniqueStrings([
2253
+ ...rowStringArray(availability, "code_sources"),
2254
+ ...rowStringArray(availability, "dataset_sources"),
2255
+ ]),
2256
+ graph_sources: uniqueStrings([
2257
+ ...rowStringArray(availability, "citation_candidate_sources"),
2258
+ ...rowStringArray(availability, "reference_candidate_sources"),
2259
+ ]),
2260
+ review_sources: rowStringArray(availability, "review_candidate_sources"),
2261
+ completed_steps: completedWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
2262
+ pending_steps: pendingWorkflowSteps(availability, hasPrimaryAnchor, hasReadableSource, hasResourceEvidence),
2263
+ blocked_steps: blockedWorkflowSteps(availability, hasReadableSource, hasResourceEvidence),
2264
+ agent_runbook: workflowRunbook(availability, recordFound),
2265
+ next_workflow: availability.next_workflow,
2266
+ next_availability: availability.next_availability,
2267
+ next_evidence: availability.next_evidence,
2268
+ next_read: availability.next_read,
2269
+ next_download: availability.next_download,
2270
+ next_code: availability.next_code,
2271
+ next_datasets: availability.next_datasets,
2272
+ next_citations: availability.next_citations,
2273
+ next_references: availability.next_references,
2274
+ next_reviews: availability.next_reviews,
2275
+ next_reproduce: availability.next_reproduce,
2276
+ source_errors: availability.source_errors,
2277
+ retrieved_at: new Date().toISOString(),
2278
+ };
2279
+ }
2280
+ function workflowColumns(detailed = false) {
2281
+ const base = [
2282
+ "ref",
2283
+ "route_kind",
2284
+ "canonical_ref",
2285
+ "canonical_ref_kind",
2286
+ "workflow_status",
2287
+ "next_step",
2288
+ "claim_boundary",
2289
+ "execution_boundary",
2290
+ "record_found",
2291
+ "title",
2292
+ "year",
2293
+ "completed_steps",
2294
+ "pending_steps",
2295
+ "blocked_steps",
2296
+ "next_read",
2297
+ "next_evidence",
2298
+ "next_reproduce",
2299
+ ];
2300
+ return detailed
2301
+ ? [
2302
+ ...base,
2303
+ "primary_source",
2304
+ "primary_evidence_url",
2305
+ "persistent_identifiers",
2306
+ "readable_sources",
2307
+ "resource_sources",
2308
+ "graph_sources",
2309
+ "review_sources",
2310
+ "agent_runbook",
2311
+ "next_workflow",
2312
+ "next_availability",
2313
+ "next_download",
2314
+ "next_code",
2315
+ "next_datasets",
2316
+ "next_citations",
2317
+ "next_references",
2318
+ "next_reviews",
2319
+ "source_errors",
2320
+ "retrieved_at",
2321
+ ]
2322
+ : base;
2323
+ }
2324
+ export function buildScholarEvidenceRow(availability) {
2325
+ const recordFound = rowBoolean(availability, "record_found");
2326
+ const hasPdf = rowBoolean(availability, "has_pdf");
2327
+ const hasFulltextCandidate = rowBoolean(availability, "has_fulltext_candidate");
2328
+ const hasReadableSource = hasPdf || hasFulltextCandidate;
2329
+ const persistentIdentifiers = availabilityIdentifiers(availability);
2330
+ const primaryEvidenceUrl = availabilityPrimaryEvidenceUrl(availability);
2331
+ const hasPrimaryAnchor = persistentIdentifiers.length > 0 || primaryEvidenceUrl !== undefined;
2332
+ const hasResourceEvidence = rowBoolean(availability, "has_code") ||
2333
+ rowBoolean(availability, "has_project") ||
2334
+ rowBoolean(availability, "has_datasets") ||
2335
+ rowBoolean(availability, "has_models") ||
2336
+ rowBoolean(availability, "has_spaces");
2337
+ const evidenceStatus = !recordFound
2338
+ ? "unverified"
2339
+ : hasReadableSource && hasPrimaryAnchor
2340
+ ? "readable_source_verified"
2341
+ : hasReadableSource
2342
+ ? "readable_source_candidate"
2343
+ : hasPrimaryAnchor
2344
+ ? "metadata_verified"
2345
+ : hasResourceEvidence
2346
+ ? "resource_only"
2347
+ : "metadata_only";
2348
+ const citationSafety = !recordFound
2349
+ ? "do_not_cite_unverified"
2350
+ : hasReadableSource && hasPrimaryAnchor
2351
+ ? "cite_after_reading_source"
2352
+ : hasPrimaryAnchor
2353
+ ? "metadata_only_do_not_quote_claims"
2354
+ : "do_not_cite_without_identifier";
2355
+ const readiness = rowString(availability, "next_read")
2356
+ ? "read_now"
2357
+ : rowString(availability, "next_download")
2358
+ ? "download_then_read"
2359
+ : recordFound
2360
+ ? "metadata_or_resource_only"
2361
+ : "not_ready";
2362
+ const graphSources = uniqueStrings([
2363
+ ...rowStringArray(availability, "citation_candidate_sources"),
2364
+ ...rowStringArray(availability, "reference_candidate_sources"),
2365
+ ]);
2366
+ return {
2367
+ ref: availability.ref,
2368
+ route_kind: availability.route_kind,
2369
+ evidence_status: evidenceStatus,
2370
+ citation_safety: citationSafety,
2371
+ readiness,
2372
+ claim_boundary: hasReadableSource
2373
+ ? "quote_claims_only_after_next_read_output"
2374
+ : "metadata_only_no_claim_extraction",
2375
+ record_found: recordFound,
2376
+ title: availability.title,
2377
+ year: availability.year,
2378
+ primary_source: availabilityPrimarySource(availability),
2379
+ primary_evidence_url: primaryEvidenceUrl,
2380
+ persistent_identifiers: persistentIdentifiers,
2381
+ readable_sources: uniqueStrings([
2382
+ ...rowStringArray(availability, "fulltext_candidate_sources"),
2383
+ ...rowStringArray(availability, "pdf_sources"),
2384
+ ]),
2385
+ resource_sources: uniqueStrings([
2386
+ ...rowStringArray(availability, "code_sources"),
2387
+ ...rowStringArray(availability, "dataset_sources"),
2388
+ ]),
2389
+ graph_sources: graphSources,
2390
+ review_sources: rowStringArray(availability, "review_candidate_sources"),
2391
+ missing_evidence: availabilityMissingEvidence(availability),
2392
+ next_availability: availability.next_availability,
2393
+ next_read: availability.next_read,
2394
+ next_download: availability.next_download,
2395
+ next_code: availability.next_code,
2396
+ next_datasets: availability.next_datasets,
2397
+ next_citations: availability.next_citations,
2398
+ next_references: availability.next_references,
2399
+ next_reviews: availability.next_reviews,
2400
+ source_errors: availability.source_errors,
2401
+ retrieved_at: new Date().toISOString(),
2402
+ };
2403
+ }
2404
+ function evidenceColumns(detailed = false) {
2405
+ const base = [
2406
+ "ref",
2407
+ "route_kind",
2408
+ "evidence_status",
2409
+ "citation_safety",
2410
+ "readiness",
2411
+ "claim_boundary",
2412
+ "record_found",
2413
+ "title",
2414
+ "year",
2415
+ "primary_source",
2416
+ "primary_evidence_url",
2417
+ "persistent_identifiers",
2418
+ "readable_sources",
2419
+ "resource_sources",
2420
+ "graph_sources",
2421
+ "review_sources",
2422
+ "missing_evidence",
2423
+ "next_read",
2424
+ "next_code",
2425
+ "next_datasets",
2426
+ "next_citations",
2427
+ "next_references",
2428
+ "next_reviews",
2429
+ ];
2430
+ return detailed
2431
+ ? [
2432
+ ...base,
2433
+ "next_availability",
2434
+ "next_download",
2435
+ "source_errors",
2436
+ "retrieved_at",
2437
+ ]
2438
+ : base;
2439
+ }
2440
+ function cloneCandidateUrl(row) {
2441
+ const codeUrl = rowString(row, "code_url");
2442
+ if (!codeUrl)
2443
+ return undefined;
2444
+ if (!/^https?:\/\//i.test(codeUrl))
2445
+ return undefined;
2446
+ if (!/github\.com|gitlab\.com|bitbucket\.org|huggingface\.co/i.test(codeUrl)) {
2447
+ return undefined;
2448
+ }
2449
+ return codeUrl.replace(/\/$/, "");
2450
+ }
2451
+ function availabilityReproducibilityMissing(row) {
2452
+ const missing = [];
2453
+ if (!rowString(row, "code_url"))
2454
+ missing.push("code-repository");
2455
+ if (!rowBoolean(row, "has_project"))
2456
+ missing.push("project-page");
2457
+ if (!rowBoolean(row, "has_datasets") &&
2458
+ !rowBoolean(row, "has_models") &&
2459
+ !rowBoolean(row, "has_spaces")) {
2460
+ missing.push("datasets/models/spaces");
2461
+ }
2462
+ if (!rowString(row, "next_read"))
2463
+ missing.push("readable-paper");
2464
+ if (rowStringArray(row, "citation_candidate_sources").length === 0) {
2465
+ missing.push("citation-graph");
2466
+ }
2467
+ return missing;
2468
+ }
2469
+ export function buildScholarReproducibilityRow(availability) {
2470
+ const hasCodeRepository = rowString(availability, "code_url") !== undefined;
2471
+ const hasProject = rowBoolean(availability, "has_project");
2472
+ const hasResource = rowBoolean(availability, "has_datasets") ||
2473
+ rowBoolean(availability, "has_models") ||
2474
+ rowBoolean(availability, "has_spaces");
2475
+ const cloneUrl = cloneCandidateUrl(availability);
2476
+ const reproducibilityStatus = hasCodeRepository && hasResource
2477
+ ? "code_and_resources_found"
2478
+ : hasCodeRepository
2479
+ ? "code_found"
2480
+ : hasProject && hasResource
2481
+ ? "project_and_resources_found"
2482
+ : hasProject
2483
+ ? "project_page_found"
2484
+ : hasResource
2485
+ ? "resources_without_code"
2486
+ : "no_reproducibility_resources";
2487
+ const installReadiness = cloneUrl
2488
+ ? "clone_candidate_requires_inspection"
2489
+ : hasCodeRepository
2490
+ ? "code_url_requires_manual_inspection"
2491
+ : hasProject
2492
+ ? "project_page_requires_manual_inspection"
2493
+ : hasResource
2494
+ ? "resource_only_no_install"
2495
+ : "not_ready";
2496
+ return {
2497
+ ref: availability.ref,
2498
+ route_kind: availability.route_kind,
2499
+ reproducibility_status: reproducibilityStatus,
2500
+ install_readiness: installReadiness,
2501
+ execution_boundary: "no_remote_code_executed",
2502
+ install_boundary: "inspect_repository_before_running_install_or_training_commands",
2503
+ record_found: availability.record_found,
2504
+ title: availability.title,
2505
+ year: availability.year,
2506
+ primary_source: availabilityPrimarySource(availability),
2507
+ primary_evidence_url: availabilityPrimaryEvidenceUrl(availability),
2508
+ code_url: availability.code_url,
2509
+ project_url: availability.project_url,
2510
+ clone_candidate_url: cloneUrl,
2511
+ dataset_url: availability.dataset_url,
2512
+ dataset_urls: availability.dataset_urls,
2513
+ model_urls: availability.model_urls,
2514
+ space_urls: availability.space_urls,
2515
+ resource_sources: uniqueStrings([
2516
+ ...rowStringArray(availability, "code_sources"),
2517
+ ...rowStringArray(availability, "dataset_sources"),
2518
+ ]),
2519
+ missing_reproducibility: availabilityReproducibilityMissing(availability),
2520
+ next_evidence: availability.next_evidence,
2521
+ next_read: availability.next_read,
2522
+ next_download: availability.next_download,
2523
+ next_code: availability.next_code,
2524
+ next_datasets: availability.next_datasets,
2525
+ next_inspect_code: availability.next_code,
2526
+ next_inspect_resources: availability.next_datasets,
2527
+ source_errors: availability.source_errors,
2528
+ retrieved_at: new Date().toISOString(),
2529
+ };
2530
+ }
2531
+ function reproducibilityColumns(detailed = false) {
2532
+ const base = [
2533
+ "ref",
2534
+ "route_kind",
2535
+ "reproducibility_status",
2536
+ "install_readiness",
2537
+ "execution_boundary",
2538
+ "install_boundary",
2539
+ "record_found",
2540
+ "title",
2541
+ "year",
2542
+ "primary_source",
2543
+ "primary_evidence_url",
2544
+ "code_url",
2545
+ "project_url",
2546
+ "clone_candidate_url",
2547
+ "dataset_urls",
2548
+ "model_urls",
2549
+ "space_urls",
2550
+ "resource_sources",
2551
+ "missing_reproducibility",
2552
+ "next_evidence",
2553
+ "next_read",
2554
+ "next_code",
2555
+ "next_datasets",
2556
+ ];
2557
+ return detailed
2558
+ ? [
2559
+ ...base,
2560
+ "dataset_url",
2561
+ "next_download",
2562
+ "next_inspect_code",
2563
+ "next_inspect_resources",
2564
+ "source_errors",
2565
+ "retrieved_at",
2566
+ ]
2567
+ : base;
2568
+ }
2569
+ function availabilityColumns(detailed = false) {
2570
+ const base = [
2571
+ "ref",
2572
+ "route_kind",
2573
+ "canonical_ref",
2574
+ "canonical_ref_kind",
2575
+ "record_found",
2576
+ "title",
2577
+ "doi",
2578
+ "arxiv_id",
2579
+ "pmid",
2580
+ "openreview_id",
2581
+ "has_pdf",
2582
+ "has_fulltext_candidate",
2583
+ "has_code",
2584
+ "has_project",
2585
+ "has_datasets",
2586
+ "has_models",
2587
+ "has_spaces",
2588
+ "metadata_sources",
2589
+ "pdf_sources",
2590
+ "fulltext_candidate_sources",
2591
+ "code_sources",
2592
+ "dataset_sources",
2593
+ "citation_candidate_sources",
2594
+ "reference_candidate_sources",
2595
+ "review_candidate_sources",
2596
+ "next_read",
2597
+ "next_download",
2598
+ "next_code",
2599
+ "next_datasets",
2600
+ ];
2601
+ return detailed
2602
+ ? [
2603
+ ...base,
2604
+ "route_value",
2605
+ "id",
2606
+ "year",
2607
+ "pmc_id",
2608
+ "source_adapter",
2609
+ "source_url",
2610
+ "pdf_url",
2611
+ "code_url",
2612
+ "project_url",
2613
+ "dataset_url",
2614
+ "model_urls",
2615
+ "dataset_urls",
2616
+ "space_urls",
2617
+ "next_workflow",
2618
+ "next_get",
2619
+ "next_pdf",
2620
+ "next_evidence",
2621
+ "next_reproduce",
2622
+ "next_citations",
2623
+ "next_references",
2624
+ "next_reviews",
2625
+ "source_errors",
2626
+ "retrieved_at",
2627
+ ]
2628
+ : base;
2629
+ }
2630
+ async function collectAvailabilityEvidence(ref, opts) {
2631
+ const route = resolveScholarReference(ref);
2632
+ const metadata = await collectSingleRecords("scholar.get", ref, opts);
2633
+ const pdf = await collectPdfCandidates(ref, opts);
2634
+ const code = await collectResourceRecords("scholar.code", ref, opts);
2635
+ const datasets = await collectResourceRecords("scholar.datasets", ref, opts);
2636
+ const outcomes = [
2637
+ ...metadata.outcomes,
2638
+ ...pdf.outcomes,
2639
+ ...code.outcomes,
2640
+ ...datasets.outcomes,
2641
+ ];
2642
+ const row = buildScholarAvailabilityRow({
2643
+ ref,
2644
+ route,
2645
+ metadataRecords: metadata.records,
2646
+ pdfRecords: pdf.records,
2647
+ codeRecords: code.records,
2648
+ datasetRecords: datasets.records,
2649
+ fulltextCandidateSources: resolveAvailabilityCapabilitySources("scholar.fulltext", route, opts),
2650
+ citationCandidateSources: resolveAvailabilityCapabilitySources("scholar.citations", route, opts),
2651
+ referenceCandidateSources: resolveAvailabilityCapabilitySources("scholar.references", route, opts),
2652
+ reviewCandidateSources: resolveAvailabilityCapabilitySources("scholar.review", route, opts),
2653
+ sourceErrors: collectSourceErrors(outcomes),
2654
+ opts,
2655
+ });
2656
+ return { row, outcomes };
2657
+ }
2658
+ async function collectCanonicalSourceAuditOutcomes(availability, opts) {
2659
+ const ref = String(availability.ref ?? "");
2660
+ const canonicalRef = rowString(availability, "canonical_ref");
2661
+ if (!canonicalRef || canonicalRef === ref)
2662
+ return [];
2663
+ const metadata = await collectSingleRecords("scholar.get", canonicalRef, opts);
2664
+ const pdf = await collectPdfCandidates(canonicalRef, opts);
2665
+ const code = await collectResourceRecords("scholar.code", canonicalRef, opts);
2666
+ const datasets = await collectResourceRecords("scholar.datasets", canonicalRef, opts);
2667
+ return [
2668
+ ...metadata.outcomes,
2669
+ ...pdf.outcomes,
2670
+ ...code.outcomes,
2671
+ ...datasets.outcomes,
2672
+ ];
2673
+ }
2674
+ function mergeCanonicalAvailability(original, canonical) {
2675
+ return {
2676
+ row: {
2677
+ ...canonical.row,
2678
+ ref: original.row.ref,
2679
+ route_kind: original.row.route_kind,
2680
+ route_value: original.row.route_value,
2681
+ source_errors: uniqueStrings([
2682
+ ...rowStringArray(original.row, "source_errors"),
2683
+ ...rowStringArray(canonical.row, "source_errors"),
2684
+ ]),
2685
+ },
2686
+ outcomes: [...original.outcomes, ...canonical.outcomes],
2687
+ };
2688
+ }
2689
+ async function collectCanonicalizedAvailability(ref, opts) {
2690
+ const availability = await collectAvailabilityEvidence(ref, opts);
2691
+ if (availability.row.record_found === true ||
2692
+ resolveScholarReference(ref).kind !== "unknown" ||
2693
+ (!opts.source && !opts.sources)) {
2694
+ return availability;
2695
+ }
2696
+ const canonicalLookup = await collectAvailabilityEvidence(ref, canonicalReferenceLookupOpts(opts));
2697
+ const canonicalRef = rowString(canonicalLookup.row, "canonical_ref");
2698
+ if (!canonicalRef || canonicalRef === ref)
2699
+ return availability;
2700
+ const canonicalAvailability = await collectAvailabilityEvidence(canonicalRef, opts);
2701
+ return mergeCanonicalAvailability(availability, canonicalAvailability);
2702
+ }
2703
+ async function runAvailability(program, ref, opts) {
2704
+ const startedAt = Date.now();
2705
+ const fmt = detectFormat(program.opts().format);
2706
+ const ctx = makeCtx("scholar.availability", startedAt);
2707
+ const availability = await collectCanonicalizedAvailability(ref, opts);
2708
+ const row = availability.row;
2709
+ ctx.duration_ms = Date.now() - startedAt;
2710
+ ctx.surface = "web";
2711
+ if (row.record_found !== true) {
2712
+ ctx.error = {
2713
+ code: "SCHOLAR_AVAILABILITY_NOT_FOUND",
2714
+ message: `no scholarly metadata, PDF, or resource evidence returned for "${ref}"`,
2715
+ suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
2716
+ ? `Per-source errors: ${row.source_errors.join("; ")}`
2717
+ : "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
2718
+ retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
2719
+ };
2720
+ console.error(format(null, undefined, fmt, ctx));
2721
+ process.exit(ExitCode.EMPTY_RESULT);
2722
+ }
2723
+ console.log(format([row], availabilityColumns(opts.detailed), fmt, ctx));
2724
+ }
2725
+ async function runSourceAudit(program, ref, opts) {
2726
+ const startedAt = Date.now();
2727
+ const fmt = detectFormat(program.opts().format);
2728
+ const ctx = makeCtx("scholar.sources", startedAt);
2729
+ const availability = await collectCanonicalizedAvailability(ref, opts);
2730
+ const canonicalOutcomes = await collectCanonicalSourceAuditOutcomes(availability.row, opts);
2731
+ const rows = buildScholarSourceAuditRows(availability.row, [...availability.outcomes, ...canonicalOutcomes], {
2732
+ unpaywallEmail: opts.unpaywallEmail,
2733
+ });
2734
+ ctx.duration_ms = Date.now() - startedAt;
2735
+ ctx.surface = "web";
2736
+ if (rows.length === 0) {
2737
+ ctx.error = {
2738
+ code: "SCHOLAR_SOURCES_EMPTY",
2739
+ message: `no scholarly sources were audited for "${ref}"`,
2740
+ suggestion: "Run `unicli scholar doctor --sources all` to inspect registered scholarly sources, or pass --sources all.",
2741
+ retryable: false,
2742
+ };
2743
+ console.error(format(null, undefined, fmt, ctx));
2744
+ process.exit(ExitCode.EMPTY_RESULT);
2745
+ }
2746
+ console.log(format(rows, sourceAuditColumns(opts.detailed), fmt, ctx));
2747
+ }
2748
+ async function runWorkflow(program, ref, opts) {
2749
+ const startedAt = Date.now();
2750
+ const fmt = detectFormat(program.opts().format);
2751
+ const ctx = makeCtx("scholar.workflow", startedAt);
2752
+ const availability = await collectCanonicalizedAvailability(ref, opts);
2753
+ const row = availability.row;
2754
+ ctx.duration_ms = Date.now() - startedAt;
2755
+ ctx.surface = "web";
2756
+ if (row.record_found !== true) {
2757
+ ctx.error = {
2758
+ code: "SCHOLAR_WORKFLOW_NOT_FOUND",
2759
+ message: `no source-backed scholarly workflow evidence returned for "${ref}"`,
2760
+ suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
2761
+ ? `Per-source errors: ${row.source_errors.join("; ")}`
2762
+ : "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
2763
+ retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
2764
+ };
2765
+ console.error(format(null, undefined, fmt, ctx));
2766
+ process.exit(ExitCode.EMPTY_RESULT);
2767
+ }
2768
+ console.log(format([buildScholarWorkflowRow(row)], workflowColumns(opts.detailed), fmt, ctx));
2769
+ }
2770
+ async function runEvidence(program, ref, opts) {
2771
+ const startedAt = Date.now();
2772
+ const fmt = detectFormat(program.opts().format);
2773
+ const ctx = makeCtx("scholar.evidence", startedAt);
2774
+ const availability = await collectCanonicalizedAvailability(ref, opts);
2775
+ const row = availability.row;
2776
+ ctx.duration_ms = Date.now() - startedAt;
2777
+ ctx.surface = "web";
2778
+ if (row.record_found !== true) {
2779
+ ctx.error = {
2780
+ code: "SCHOLAR_EVIDENCE_NOT_FOUND",
2781
+ message: `no source-backed scholarly evidence returned for "${ref}"`,
2782
+ suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
2783
+ ? `Per-source errors: ${row.source_errors.join("; ")}`
2784
+ : "Try a DOI, arXiv id, PMID, OpenReview URL, or run `unicli scholar search` first.",
2785
+ retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
2786
+ };
2787
+ console.error(format(null, undefined, fmt, ctx));
2788
+ process.exit(ExitCode.EMPTY_RESULT);
2789
+ }
2790
+ console.log(format([buildScholarEvidenceRow(row)], evidenceColumns(opts.detailed), fmt, ctx));
2791
+ }
2792
+ async function runReproducibility(program, ref, opts) {
2793
+ const startedAt = Date.now();
2794
+ const fmt = detectFormat(program.opts().format);
2795
+ const ctx = makeCtx("scholar.reproduce", startedAt);
2796
+ const availability = await collectCanonicalizedAvailability(ref, opts);
2797
+ const row = availability.row;
2798
+ ctx.duration_ms = Date.now() - startedAt;
2799
+ ctx.surface = "web";
2800
+ if (row.record_found !== true) {
2801
+ ctx.error = {
2802
+ code: "SCHOLAR_REPRODUCIBILITY_NOT_FOUND",
2803
+ message: `no source-backed scholarly reproducibility evidence returned for "${ref}"`,
2804
+ suggestion: Array.isArray(row.source_errors) && row.source_errors.length > 0
2805
+ ? `Per-source errors: ${row.source_errors.join("; ")}`
2806
+ : "Try a DOI, arXiv id, OpenReview URL, or run `unicli scholar search` before requesting reproducibility resources.",
2807
+ retryable: availability.outcomes.some((outcome) => isRetryableScholarError(outcome.error)),
2808
+ };
2809
+ console.error(format(null, undefined, fmt, ctx));
2810
+ process.exit(ExitCode.EMPTY_RESULT);
2811
+ }
2812
+ console.log(format([buildScholarReproducibilityRow(row)], reproducibilityColumns(opts.detailed), fmt, ctx));
2813
+ }
2814
+ function rootPermissionOptions(program) {
2815
+ const opts = program.opts();
2816
+ return {
2817
+ permissionProfile: opts.permissionProfile,
2818
+ approved: opts.yes === true,
2819
+ rememberApproval: opts.rememberApproval === true,
2820
+ };
2821
+ }
2822
+ function artifactArgs(record, opts) {
2823
+ const firstPage = opts.firstPage === undefined ? undefined : Number(opts.firstPage);
2824
+ const lastPage = opts.lastPage === undefined ? undefined : Number(opts.lastPage);
2825
+ const maxChars = opts.maxChars === undefined ? undefined : Number(opts.maxChars);
2826
+ return {
2827
+ pdf_url: record.pdf_url,
2828
+ title: record.title,
2829
+ id: record.id,
2830
+ source_adapter: record.source_adapter,
2831
+ source_url: record.source_url ?? record.landing_url,
2832
+ output: opts.output,
2833
+ "first-page": firstPage,
2834
+ "last-page": lastPage,
2835
+ "max-chars": maxChars,
2836
+ };
2837
+ }
2838
+ function fulltextArgs(route, opts) {
2839
+ const firstPage = opts.firstPage === undefined ? undefined : Number(opts.firstPage);
2840
+ const lastPage = opts.lastPage === undefined ? undefined : Number(opts.lastPage);
2841
+ const maxChars = opts.maxChars === undefined ? undefined : Number(opts.maxChars);
2842
+ return {
2843
+ ...referenceArgs(route, opts),
2844
+ output: opts.output,
2845
+ "first-page": firstPage,
2846
+ "last-page": lastPage,
2847
+ "max-chars": maxChars,
2848
+ };
2849
+ }
2850
+ async function executeDirectFulltext(program, source, route, opts) {
2851
+ const adapter = getAllAdapters().find((candidate) => candidate.name === source);
2852
+ if (!adapter) {
2853
+ return {
2854
+ handled: false,
2855
+ outcome: {
2856
+ source,
2857
+ error: {
2858
+ code: "adapter_not_found",
2859
+ message: `unknown source: ${source}`,
2860
+ },
2861
+ },
2862
+ };
2863
+ }
2864
+ const found = findScholarCommandByCapability(adapter, "scholar.fulltext");
2865
+ if (!found) {
2866
+ return {
2867
+ handled: false,
2868
+ outcome: {
2869
+ source,
2870
+ error: {
2871
+ code: "capability_unsupported",
2872
+ message: `${source} does not expose scholar.fulltext`,
2873
+ },
2874
+ },
2875
+ };
2876
+ }
2877
+ const fmt = detectFormat(program.opts().format);
2878
+ const invocation = buildInvocation("cli", source, found.name, {
2879
+ args: normalizeScholarCommandArgs(found.command, fulltextArgs(route, opts)),
2880
+ source: "internal",
2881
+ }, rootPermissionOptions(program));
2882
+ if (!invocation) {
2883
+ return {
2884
+ handled: false,
2885
+ outcome: {
2886
+ source,
2887
+ error: {
2888
+ code: "build_invocation_failed",
2889
+ message: `could not build invocation for ${source}.${found.name}`,
2890
+ },
2891
+ },
2892
+ };
2893
+ }
2894
+ const result = await execute(invocation);
2895
+ if (result.error) {
2896
+ return {
2897
+ handled: false,
2898
+ outcome: {
2899
+ source,
2900
+ error: {
2901
+ code: result.error.code ?? "execution_error",
2902
+ message: result.error.message ?? "source fulltext command failed",
2903
+ retryable: result.error.retryable,
2904
+ },
2905
+ },
2906
+ };
2907
+ }
2908
+ if (!Array.isArray(result.results) || result.results.length === 0) {
2909
+ return {
2910
+ handled: false,
2911
+ outcome: {
2912
+ source,
2913
+ error: {
2914
+ code: "empty_result",
2915
+ message: `${source}.${found.name} returned no fulltext rows`,
2916
+ },
2917
+ },
2918
+ };
2919
+ }
2920
+ console.log(format(result.results, invocation.command.columns, fmt, result.envelope));
2921
+ return { handled: true, outcome: { source } };
2922
+ }
2923
+ async function tryDirectFulltextFromScholar(program, ref, opts) {
2924
+ const route = resolveScholarReference(ref);
2925
+ const sources = resolveScholarFulltextSources(opts.source, opts.sources, route);
2926
+ const outcomes = [];
2927
+ for (const source of sources) {
2928
+ const result = await executeDirectFulltext(program, source, route, opts);
2929
+ outcomes.push(result.outcome);
2930
+ if (result.handled)
2931
+ return { handled: true, outcomes };
2932
+ }
2933
+ return { handled: false, outcomes };
2934
+ }
2935
+ async function executeScholarArtifact(program, command, args) {
2936
+ const fmt = detectFormat(program.opts().format);
2937
+ const invocation = buildInvocation("cli", "scholar-artifacts", command, {
2938
+ args: Object.fromEntries(definedEntries(args)),
2939
+ source: "internal",
2940
+ }, rootPermissionOptions(program));
2941
+ if (!invocation) {
2942
+ const ctx = makeCtx(`scholar.${command}`, Date.now());
2943
+ ctx.surface = "web";
2944
+ ctx.error = {
2945
+ code: "SCHOLAR_ARTIFACT_ADAPTER_MISSING",
2946
+ message: "scholar-artifacts adapter is not registered",
2947
+ suggestion: "Run `unicli scholar doctor` and check adapter load diagnostics.",
2948
+ retryable: false,
2949
+ };
2950
+ console.error(format(null, undefined, fmt, ctx));
2951
+ process.exit(ExitCode.CONFIG_ERROR);
2952
+ }
2953
+ const result = await execute(invocation);
2954
+ if (result.error) {
2955
+ process.stderr.write(format([], invocation.command.columns, fmt, result.envelope) + "\n");
2956
+ process.exit(result.exitCode);
2957
+ }
2958
+ console.log(format(result.results, invocation.command.columns, fmt, result.envelope));
2959
+ }
2960
+ async function runArtifactFromScholar(program, ref, opts, command) {
2961
+ const startedAt = Date.now();
2962
+ const fmt = detectFormat(program.opts().format);
2963
+ const ctx = makeCtx(command === "download-pdf" ? "scholar.download" : "scholar.read", startedAt);
2964
+ const fulltextOutcomes = [];
2965
+ if (command === "read-pdf" && (opts.source || opts.sources)) {
2966
+ const rawFulltext = await tryDirectFulltextFromScholar(program, ref, opts);
2967
+ fulltextOutcomes.push(...rawFulltext.outcomes);
2968
+ if (rawFulltext.handled)
2969
+ return;
2970
+ }
2971
+ const lookupRef = await resolveArtifactLookupRef(ref, opts);
2972
+ const fulltext = command === "read-pdf"
2973
+ ? lookupRef === ref && fulltextOutcomes.length > 0
2974
+ ? { handled: false, outcomes: fulltextOutcomes }
2975
+ : await tryDirectFulltextFromScholar(program, lookupRef, opts)
2976
+ : { handled: false, outcomes: [] };
2977
+ if (fulltext.outcomes !== fulltextOutcomes) {
2978
+ fulltextOutcomes.push(...fulltext.outcomes);
2979
+ }
2980
+ if (fulltext.handled)
2981
+ return;
2982
+ const { sourceList, outcomes, records } = await collectPdfCandidates(lookupRef, opts);
2983
+ const record = firstPdfRecord(records);
2984
+ if (!record) {
2985
+ const errors = outcomes.filter((outcome) => outcome.error);
2986
+ const fulltextErrors = fulltextOutcomes.filter((outcome) => outcome.error);
2987
+ const suggestions = [
2988
+ fulltextErrors.length > 0
2989
+ ? `Fulltext errors: ${fulltextErrors.map(formatScholarOutcomeError).join("; ")}`
2990
+ : "",
2991
+ errors.length > 0
2992
+ ? `PDF/source errors: ${errors.map(formatScholarOutcomeError).join("; ")}`
2993
+ : "",
2994
+ ].filter(Boolean);
2995
+ ctx.duration_ms = Date.now() - startedAt;
2996
+ ctx.surface = "web";
2997
+ ctx.error = {
2998
+ code: command === "read-pdf"
2999
+ ? "SCHOLAR_READ_NOT_FOUND"
3000
+ : "SCHOLAR_PDF_NOT_FOUND",
3001
+ message: command === "read-pdf"
3002
+ ? `no source-direct scholarly full text or downloadable PDF returned for "${ref}" across [${sourceList.join(", ")}]`
3003
+ : `no downloadable scholarly PDF returned for "${ref}" across [${sourceList.join(", ")}]`,
3004
+ suggestion: suggestions.length > 0
3005
+ ? suggestions.join(" ")
3006
+ : "Try --source with a site from `unicli scholar doctor`, or pass a more exact DOI/arXiv/OpenReview/PubMed id/title.",
3007
+ retryable: errors.some((outcome) => isRetryableScholarError(outcome.error)) ||
3008
+ fulltextOutcomes.some((outcome) => isRetryableScholarError(outcome.error)),
3009
+ };
3010
+ console.error(format(null, undefined, fmt, ctx));
3011
+ process.exit(ExitCode.EMPTY_RESULT);
3012
+ }
3013
+ await executeScholarArtifact(program, command, artifactArgs(record, opts));
3014
+ }
3015
+ async function runCoverage(program, opts) {
3016
+ const startedAt = Date.now();
3017
+ const fmt = detectFormat(program.opts().format);
3018
+ const ctx = makeCtx("scholar.coverage", startedAt);
3019
+ const selected = resolveScholarSources(opts.sources, listScholarAdapters().map((adapter) => adapter.name));
3020
+ const selectedAdapters = listScholarAdapters().filter((adapter) => selected.includes(adapter.name));
3021
+ const rows = buildScholarCoverageRows(selectedAdapters).filter((row) => {
3022
+ if (!opts.gaps)
3023
+ return true;
3024
+ const missing = row.missing_closed_loop;
3025
+ return Array.isArray(missing) && missing.length > 0;
3026
+ });
3027
+ ctx.duration_ms = Date.now() - startedAt;
3028
+ ctx.surface = "web";
3029
+ if (rows.length === 0) {
3030
+ ctx.error = {
3031
+ code: "SCHOLAR_COVERAGE_EMPTY",
3032
+ message: `no scholarly sources matched [${selected.join(", ")}]`,
3033
+ suggestion: "Run `unicli scholar doctor --sources all` to inspect registered scholarly sources.",
3034
+ retryable: false,
3035
+ };
3036
+ console.error(format(null, undefined, fmt, ctx));
3037
+ process.exit(ExitCode.EMPTY_RESULT);
3038
+ }
3039
+ console.log(format(rows, coverageColumns(opts.detailed), fmt, ctx));
3040
+ }
3041
+ function isScholarNoMatchErrorMessage(message) {
3042
+ return /^No .+ matched ["“].+["”]\.?$/i.test(message.trim());
3043
+ }
3044
+ export function classifyScholarLiveProbeError(error) {
3045
+ const message = error.message ?? "adapter command failed";
3046
+ if (isScholarNoMatchErrorMessage(message)) {
3047
+ return {
3048
+ live_health: "empty",
3049
+ live_error_code: "empty_source_result",
3050
+ live_error_message: message,
3051
+ };
3052
+ }
3053
+ return {
3054
+ live_health: "failed",
3055
+ live_error_code: error.code ?? "execution_error",
3056
+ live_error_message: message,
3057
+ };
3058
+ }
3059
+ function buildStaticDoctorRow(adapter) {
3060
+ const caps = new Set();
3061
+ for (const command of Object.values(adapter.commands)) {
3062
+ for (const cap of command.capabilities ?? []) {
3063
+ if (cap.startsWith("scholar."))
3064
+ caps.add(cap);
3065
+ }
3066
+ }
3067
+ const health = resolveCommand(adapter.name, "health");
3068
+ const strategy = health
3069
+ ? commandStrategy(adapter, health.command)
3070
+ : undefined;
3071
+ return {
3072
+ source: adapter.name,
3073
+ capabilities: [...caps].sort(),
3074
+ health: !health
3075
+ ? "skipped"
3076
+ : strategy !== undefined && strategy !== Strategy.PUBLIC
3077
+ ? "blocked"
3078
+ : "available",
3079
+ detail: !health
3080
+ ? "no `health` command — adapter passes by capability introspection"
3081
+ : strategy !== undefined && strategy !== Strategy.PUBLIC
3082
+ ? `health probe requires ${strategy} auth — skipped`
3083
+ : "health probe command is public",
3084
+ };
3085
+ }
3086
+ async function probeScholarDoctorRow(adapter, row, opts) {
3087
+ const found = findScholarQueryableSearchCommand(adapter);
3088
+ if (!found) {
3089
+ return {
3090
+ ...row,
3091
+ live_health: "not_probeable",
3092
+ live_query: opts.query,
3093
+ live_count: 0,
3094
+ live_error_code: "no_queryable_search",
3095
+ live_error_message: "no queryable scholar.search command is registered for live probing",
3096
+ };
3097
+ }
3098
+ const strategy = commandStrategy(adapter, found.command);
3099
+ if (strategy !== undefined && strategy !== Strategy.PUBLIC) {
3100
+ return {
3101
+ ...row,
3102
+ live_health: "blocked",
3103
+ live_command: sourceCommand(adapter.name, found.name, "<query>"),
3104
+ live_query: opts.query,
3105
+ live_count: 0,
3106
+ live_error_code: "auth_required",
3107
+ live_error_message: `live probe requires ${strategy} auth`,
3108
+ };
3109
+ }
3110
+ const outcome = await executeScholarAdapterCommand(adapter.name, found, { query: opts.query, limit: String(opts.limit) }, "scholar.search");
3111
+ if (outcome.error) {
3112
+ const liveError = classifyScholarLiveProbeError(outcome.error);
3113
+ return {
3114
+ ...row,
3115
+ ...liveError,
3116
+ live_command: sourceCommand(adapter.name, found.name, "<query>"),
3117
+ live_query: opts.query,
3118
+ live_count: 0,
3119
+ };
3120
+ }
3121
+ return {
3122
+ ...row,
3123
+ live_health: outcome.records.length > 0 ? "passed" : "empty",
3124
+ live_command: sourceCommand(adapter.name, found.name, "<query>"),
3125
+ live_query: opts.query,
3126
+ live_count: outcome.records.length,
3127
+ live_error_code: outcome.records.length > 0 ? undefined : "empty_normalized_result",
3128
+ live_error_message: outcome.records.length > 0
3129
+ ? undefined
3130
+ : "live probe returned no scholar-normalized records",
3131
+ };
3132
+ }
3133
+ async function runDoctor(program, opts) {
3134
+ const startedAt = Date.now();
3135
+ const fmt = detectFormat(program.opts().format);
3136
+ const ctx = makeCtx("scholar.doctor", startedAt);
3137
+ const selected = resolveScholarSources(opts.sources, listScholarAdapters().map((adapter) => adapter.name));
3138
+ let rows = listScholarAdapters()
3139
+ .filter((adapter) => selected.includes(adapter.name))
3140
+ .map(buildStaticDoctorRow);
3141
+ if (opts.live) {
3142
+ const adaptersByName = new Map(listScholarAdapters().map((adapter) => [adapter.name, adapter]));
3143
+ const query = opts.query ?? "Llama 2";
3144
+ const limit = numberOpt(opts.limit, 1, 5);
3145
+ rows = await Promise.all(rows.map((row) => {
3146
+ const adapter = adaptersByName.get(row.source);
3147
+ return adapter
3148
+ ? probeScholarDoctorRow(adapter, row, { query, limit })
3149
+ : row;
3150
+ }));
3151
+ }
3152
+ ctx.duration_ms = Date.now() - startedAt;
3153
+ ctx.surface = "web";
3154
+ const columns = [
3155
+ "source",
3156
+ "capabilities",
3157
+ "health",
3158
+ "detail",
3159
+ ...(opts.live || opts.detailed
3160
+ ? [
3161
+ "live_health",
3162
+ "live_command",
3163
+ "live_query",
3164
+ "live_count",
3165
+ "live_error_code",
3166
+ "live_error_message",
3167
+ ]
3168
+ : []),
3169
+ ];
3170
+ console.log(format(rows, columns, fmt, ctx));
3171
+ }
3172
+ export function registerScholarCommand(program) {
3173
+ const scholar = program
3174
+ .command("scholar")
3175
+ .description("Scholarly meta-command — search, retrieve, PDF, resource links, citations, references, and source audit across first-source academic adapters");
441
3176
  scholar
442
3177
  .command("search <query>")
443
3178
  .description("Fan-out scholarly paper search across first-source adapters")
@@ -447,11 +3182,89 @@ export function registerScholarCommand(program) {
447
3182
  .action(async (query, opts) => {
448
3183
  await runSearch(program, query, opts);
449
3184
  });
3185
+ scholar
3186
+ .command("availability <ref>")
3187
+ .alias("audit")
3188
+ .description("Audit source-backed metadata, PDF, full-text, code, dataset/model, citation, reference, and review availability for one paper without downloading artifacts")
3189
+ .option("--source <site>", "force one source")
3190
+ .option("--sources <csv>", "override auto-routed source list")
3191
+ .option("--venue <venue>", "source-local venue scope, e.g. CVPR or ICCV")
3192
+ .option("--year <year>", "source-local proceedings year")
3193
+ .option("--volume <volume>", "source-local proceedings volume, e.g. PMLR v235")
3194
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3195
+ .option("-D, --detailed", "include evidence URLs, errors, and next commands")
3196
+ .action(async (ref, opts) => {
3197
+ await runAvailability(program, ref, opts);
3198
+ });
3199
+ scholar
3200
+ .command("sources <ref>")
3201
+ .alias("source-audit")
3202
+ .description("Show a per-source scholarly provenance matrix for one paper, including source status, evidence types, candidate capabilities, next commands, and source errors without downloading artifacts")
3203
+ .option("--source <site>", "force one source")
3204
+ .option("--sources <csv>", "override auto-routed source list")
3205
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3206
+ .option("-D, --detailed", "include URLs, source capabilities, errors, and per-source next commands")
3207
+ .action(async (ref, opts) => {
3208
+ await runSourceAudit(program, ref, opts);
3209
+ });
3210
+ scholar
3211
+ .command("workflow <ref>")
3212
+ .alias("runbook")
3213
+ .description("Build a source-backed agent runbook for the full scholarly loop: evidence, reading, download, citation/reference graph, peer review, and reproducibility planning without downloading, cloning, installing, or summarizing claims")
3214
+ .option("--source <site>", "force one source")
3215
+ .option("--sources <csv>", "override auto-routed source list")
3216
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3217
+ .option("-D, --detailed", "include runbook steps, source errors, and timestamp")
3218
+ .action(async (ref, opts) => {
3219
+ await runWorkflow(program, ref, opts);
3220
+ });
3221
+ scholar
3222
+ .command("evidence <ref>")
3223
+ .alias("classify")
3224
+ .description("Classify source-backed scholarly evidence for one paper into citation safety, reading readiness, missing evidence, and next commands without downloading artifacts")
3225
+ .option("--source <site>", "force one source")
3226
+ .option("--sources <csv>", "override auto-routed source list")
3227
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3228
+ .option("-D, --detailed", "include availability rerun command, source errors, and timestamp")
3229
+ .action(async (ref, opts) => {
3230
+ await runEvidence(program, ref, opts);
3231
+ });
3232
+ scholar
3233
+ .command("reproduce <ref>")
3234
+ .alias("install-plan")
3235
+ .description("Plan source-backed paper code/data reproduction and installation readiness without cloning, installing, or executing remote code")
3236
+ .option("--source <site>", "force one source")
3237
+ .option("--sources <csv>", "override auto-routed source list")
3238
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3239
+ .option("-D, --detailed", "include source errors, download command, and timestamp")
3240
+ .action(async (ref, opts) => {
3241
+ await runReproducibility(program, ref, opts);
3242
+ });
3243
+ scholar
3244
+ .command("coverage")
3245
+ .description("Compare registered scholarly sources by discovery, metadata, PDF, full-text, code, dataset/model, citation, reference, and review coverage without network I/O")
3246
+ .option("--sources <csv>", "limit to sources or all")
3247
+ .option("--gaps", "show only sources with missing closed-loop capabilities")
3248
+ .option("-D, --detailed", "include command names and next commands")
3249
+ .action(async (opts) => {
3250
+ await runCoverage(program, opts);
3251
+ });
3252
+ scholar
3253
+ .command("reviews <ref>")
3254
+ .description("Fetch source-backed peer-review, decision, rebuttal, and comment rows for a scholarly paper review thread")
3255
+ .option("--source <site>", "force one review-capable source")
3256
+ .option("--sources <csv>", "override review-capable source list")
3257
+ .option("--max-length <n>", "per-row review text truncation length", "4000")
3258
+ .option("-D, --detailed", "include reviewer/signature and text size fields")
3259
+ .action(async (ref, opts) => {
3260
+ await runReviews(program, ref, opts);
3261
+ });
450
3262
  scholar
451
3263
  .command("get <ref>")
452
3264
  .description("Retrieve one paper/work by DOI, arXiv id, PMID, OpenAlex id, Semantic Scholar id, dblp key, or OpenReview forum")
453
3265
  .option("--source <site>", "force one source")
454
3266
  .option("--sources <csv>", "override auto-routed source list")
3267
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
455
3268
  .option("-D, --detailed", "include richer metadata columns")
456
3269
  .action(async (ref, opts) => {
457
3270
  await runSingle(program, "scholar.get", ref, opts);
@@ -461,10 +3274,55 @@ export function registerScholarCommand(program) {
461
3274
  .description("Find open-access PDF candidates for a DOI, arXiv id, PMID, or source id")
462
3275
  .option("--source <site>", "force one source")
463
3276
  .option("--sources <csv>", "override auto-routed source list")
3277
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
464
3278
  .option("-D, --detailed", "include richer metadata columns")
465
3279
  .action(async (ref, opts) => {
466
3280
  await runSingle(program, "scholar.pdf", ref, opts);
467
3281
  });
3282
+ scholar
3283
+ .command("code <ref>")
3284
+ .description("Find code repository and project links for a paper through resource-capable scholarly adapters")
3285
+ .option("--source <site>", "force one source")
3286
+ .option("--sources <csv>", "override auto-routed source list")
3287
+ .option("-D, --detailed", "include richer resource metadata columns")
3288
+ .action(async (ref, opts) => {
3289
+ await runResources(program, "scholar.code", ref, opts);
3290
+ });
3291
+ scholar
3292
+ .command("datasets <ref>")
3293
+ .description("Find linked datasets, models, and Spaces for a paper through resource-capable scholarly adapters")
3294
+ .option("--source <site>", "force one source")
3295
+ .option("--sources <csv>", "override auto-routed source list")
3296
+ .option("-D, --detailed", "include richer resource metadata columns")
3297
+ .action(async (ref, opts) => {
3298
+ await runResources(program, "scholar.datasets", ref, opts);
3299
+ });
3300
+ scholar
3301
+ .command("download <ref>")
3302
+ .description("Resolve a scholarly PDF candidate, download it locally, and return artifact metadata")
3303
+ .option("--source <site>", "force one source")
3304
+ .option("--sources <csv>", "override auto-routed source list")
3305
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3306
+ .option("--output <dir>", "output directory", "./scholar-downloads")
3307
+ .action(async (ref, opts) => {
3308
+ await runArtifactFromScholar(program, ref, opts, "download-pdf");
3309
+ });
3310
+ scholar
3311
+ .command("read <ref>")
3312
+ .description("Resolve a scholarly PDF candidate, download it locally, and extract text with pdftotext")
3313
+ .option("--source <site>", "force one source")
3314
+ .option("--sources <csv>", "override auto-routed source list")
3315
+ .option("--venue <venue>", "source-local venue scope, e.g. CVPR or ICCV")
3316
+ .option("--year <year>", "source-local proceedings year")
3317
+ .option("--volume <volume>", "source-local proceedings volume, e.g. PMLR v235")
3318
+ .option("--unpaywall-email <email>", "requester email for Unpaywall DOI lookup")
3319
+ .option("--output <dir>", "output directory", "./scholar-downloads")
3320
+ .option("--first-page <n>", "first page to extract", "1")
3321
+ .option("--last-page <n>", "last page to extract", "20")
3322
+ .option("--max-chars <n>", "maximum extracted/read text characters")
3323
+ .action(async (ref, opts) => {
3324
+ await runArtifactFromScholar(program, ref, opts, "read-pdf");
3325
+ });
468
3326
  scholar
469
3327
  .command("citations <ref>")
470
3328
  .description("List works citing this paper when the source supports it")
@@ -485,8 +3343,12 @@ export function registerScholarCommand(program) {
485
3343
  });
486
3344
  scholar
487
3345
  .command("doctor")
488
- .description("Inspect registered scholarly adapters and capability tags")
3346
+ .description("Inspect registered scholarly adapters, capability tags, and optional live search health")
489
3347
  .option("--sources <csv>", "limit to a comma-separated source list")
3348
+ .option("--live", "run a queryable search probe for each selected source instead of relying only on capability introspection")
3349
+ .option("--query <query>", "query for --live probes", "Llama 2")
3350
+ .option("--limit <n>", "per-source --live probe limit", "1")
3351
+ .option("-D, --detailed", "include live probe fields in table output")
490
3352
  .action(async (opts) => {
491
3353
  await runDoctor(program, opts);
492
3354
  });