@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -17,24 +17,58 @@ args:
17
17
 
18
18
  pipeline:
19
19
  - fetch:
20
- url: https://huggingface.co/api/papers
20
+ url: https://huggingface.co/api/papers/search
21
21
  params:
22
- search: "${{ args.query }}"
22
+ q: "${{ args.query }}"
23
23
  limit: "${{ args.limit }}"
24
24
 
25
25
  - map:
26
- title: "${{ item.title || '' }}"
27
- authors: "${{ item.authors ? item.authors.slice(0,3).map(a => a.name || a).join(', ') : '' }}"
28
- published: "${{ item.publishedAt || '' }}"
29
- upvotes: "${{ item.upvotes || 0 }}"
30
- url: "${{ 'https://huggingface.co/papers/' + (item.id || '') }}"
26
+ id: "${{ (item.paper || item).id || '' }}"
27
+ title: "${{ (item.paper || item).title || item.title || '' }}"
28
+ authors: "${{ (item.paper || item).authors ? (item.paper || item).authors.slice(0,3).map(a => a.name || a).join(', ') : '' }}"
29
+ published: "${{ (item.paper || item).publishedAt || item.publishedAt || '' }}"
30
+ upvotes: "${{ (item.paper || item).upvotes || item.upvotes || 0 }}"
31
+ url: "${{ 'https://huggingface.co/papers/' + ((item.paper || item).id || '') }}"
32
+ source_url: "${{ (item.paper || item).id ? 'https://huggingface.co/papers/' + (item.paper || item).id : '' }}"
33
+ pdf_url: "${{ (item.paper || item).id ? 'https://arxiv.org/pdf/' + (item.paper || item).id : '' }}"
34
+ code_url: "${{ (item.paper || item).githubRepo || '' }}"
35
+ github_stars: "${{ (item.paper || item).githubStars || '' }}"
36
+ project_url: "${{ (item.paper || item).projectPage || '' }}"
37
+ dataset_url: "${{ (item.paper || item).linkedDatasets && (item.paper || item).linkedDatasets[0] ? 'https://huggingface.co/datasets/' + (item.paper || item).linkedDatasets[0].id : '' }}"
38
+ model_urls: "${{ (item.paper || item).linkedModels ? (item.paper || item).linkedModels.map(x => 'https://huggingface.co/' + x.id).join(', ') : '' }}"
39
+ dataset_urls: "${{ (item.paper || item).linkedDatasets ? (item.paper || item).linkedDatasets.map(x => 'https://huggingface.co/datasets/' + x.id).join(', ') : '' }}"
40
+ space_urls: "${{ (item.paper || item).linkedSpaces ? (item.paper || item).linkedSpaces.map(x => 'https://huggingface.co/spaces/' + x.id).join(', ') : '' }}"
41
+ num_models: "${{ (item.paper || item).numTotalModels !== null && (item.paper || item).numTotalModels !== undefined ? (item.paper || item).numTotalModels : '' }}"
42
+ num_datasets: "${{ (item.paper || item).numTotalDatasets !== null && (item.paper || item).numTotalDatasets !== undefined ? (item.paper || item).numTotalDatasets : '' }}"
43
+ num_spaces: "${{ (item.paper || item).numTotalSpaces !== null && (item.paper || item).numTotalSpaces !== undefined ? (item.paper || item).numTotalSpaces : '' }}"
31
44
 
32
45
  - limit: ${{ args.limit }}
33
46
 
34
- columns: [title, authors, upvotes, published, url]
47
+ columns:
48
+ [
49
+ id,
50
+ title,
51
+ authors,
52
+ upvotes,
53
+ published,
54
+ url,
55
+ source_url,
56
+ pdf_url,
57
+ code_url,
58
+ github_stars,
59
+ project_url,
60
+ dataset_url,
61
+ model_urls,
62
+ dataset_urls,
63
+ space_urls,
64
+ num_models,
65
+ num_datasets,
66
+ num_spaces,
67
+ ]
35
68
 
36
69
  # schema-v2 metadata — injected by `unicli migrate schema-v2`
37
- capabilities: ["http.fetch", "scholar.search", "scholar.code"]
70
+ capabilities:
71
+ ["http.fetch", "scholar.search", "scholar.code", "scholar.datasets"]
38
72
  minimum_capability: http.fetch
39
73
  trust: public
40
74
  confidentiality: public
@@ -0,0 +1,112 @@
1
+ /**
2
+ * @owner src::adapters::medrxiv::preprints
3
+ * @does Registers medRxiv recent/search, DOI metadata, PDF download, and read commands backed by the official xRxiv API helpers.
4
+ * @needs src/adapters/rxiv/preprints.ts, api.biorxiv.org medRxiv endpoints, medRxiv PDF/JATS asset URLs.
5
+ * @feeds surface coverage ledger, clinical preprint discovery/search, scholar DOI read/download routing.
6
+ * @breaks medRxiv API drift, date-window search exhaustion, source-asset denial, or missing pdftotext stops read/download rather than fabricating text.
7
+ */
8
+
9
+ import { cli, Strategy } from "../../registry.js";
10
+ import {
11
+ downloadRxivPdf,
12
+ fetchPaperRow,
13
+ fetchRecentRows,
14
+ fetchSearchRows,
15
+ readRxivPaper,
16
+ RXIV_DOWNLOAD_ARGS,
17
+ RXIV_DOWNLOAD_CAPABILITIES,
18
+ RXIV_DOWNLOAD_COLUMNS,
19
+ RXIV_PAPER_ARGS,
20
+ RXIV_PAPER_CAPABILITIES,
21
+ RXIV_PAPER_COLUMNS,
22
+ RXIV_READ_ARGS,
23
+ RXIV_READ_CAPABILITIES,
24
+ RXIV_READ_COLUMNS,
25
+ RXIV_RECENT_ARGS,
26
+ RXIV_RECENT_CAPABILITIES,
27
+ RXIV_RECENT_COLUMNS,
28
+ RXIV_SEARCH_ARGS,
29
+ RXIV_SEARCH_CAPABILITIES,
30
+ RXIV_SEARCH_COLUMNS,
31
+ type RxivConfig,
32
+ } from "../rxiv/preprints.js";
33
+
34
+ const CONFIG: RxivConfig = {
35
+ site: "medrxiv",
36
+ label: "medRxiv",
37
+ apiServer: "medrxiv",
38
+ webOrigin: "https://www.medrxiv.org",
39
+ };
40
+ const DOMAIN = "api.biorxiv.org";
41
+
42
+ cli({
43
+ site: "medrxiv",
44
+ name: "recent",
45
+ description: "List recent medRxiv preprints from the official API",
46
+ domain: DOMAIN,
47
+ strategy: Strategy.PUBLIC,
48
+ args: RXIV_RECENT_ARGS,
49
+ columns: RXIV_RECENT_COLUMNS,
50
+ capabilities: RXIV_RECENT_CAPABILITIES,
51
+ func: async (_page, kwargs) => fetchRecentRows(CONFIG, kwargs),
52
+ });
53
+
54
+ cli({
55
+ site: "medrxiv",
56
+ name: "search",
57
+ description:
58
+ "Search medRxiv official API metadata within a bounded date window",
59
+ domain: DOMAIN,
60
+ strategy: Strategy.PUBLIC,
61
+ args: RXIV_SEARCH_ARGS,
62
+ columns: RXIV_SEARCH_COLUMNS,
63
+ capabilities: RXIV_SEARCH_CAPABILITIES,
64
+ func: async (_page, kwargs) => fetchSearchRows(CONFIG, kwargs),
65
+ });
66
+
67
+ cli({
68
+ site: "medrxiv",
69
+ name: "paper",
70
+ description: "Fetch medRxiv preprint metadata by DOI",
71
+ domain: DOMAIN,
72
+ strategy: Strategy.PUBLIC,
73
+ args: RXIV_PAPER_ARGS,
74
+ columns: RXIV_PAPER_COLUMNS,
75
+ capabilities: RXIV_PAPER_CAPABILITIES,
76
+ func: async (_page, kwargs) => [
77
+ await fetchPaperRow(CONFIG, kwargs.doi ?? kwargs.id ?? kwargs.ref),
78
+ ],
79
+ });
80
+
81
+ cli({
82
+ site: "medrxiv",
83
+ name: "download",
84
+ description: "Download a medRxiv preprint PDF by DOI",
85
+ domain: DOMAIN,
86
+ strategy: Strategy.PUBLIC,
87
+ args: RXIV_DOWNLOAD_ARGS,
88
+ columns: RXIV_DOWNLOAD_COLUMNS,
89
+ capabilities: RXIV_DOWNLOAD_CAPABILITIES,
90
+ minimum_capability: "http.download",
91
+ func: async (_page, kwargs) => [
92
+ await downloadRxivPdf(
93
+ CONFIG,
94
+ await fetchPaperRow(CONFIG, kwargs.doi ?? kwargs.id ?? kwargs.ref),
95
+ kwargs.output,
96
+ ),
97
+ ],
98
+ });
99
+
100
+ cli({
101
+ site: "medrxiv",
102
+ name: "read",
103
+ description:
104
+ "Read medRxiv preprint text by DOI, preferring JATS XML before PDF extraction",
105
+ domain: DOMAIN,
106
+ strategy: Strategy.PUBLIC,
107
+ args: RXIV_READ_ARGS,
108
+ columns: RXIV_READ_COLUMNS,
109
+ capabilities: RXIV_READ_CAPABILITIES,
110
+ minimum_capability: "subprocess.exec",
111
+ func: async (_page, kwargs) => [await readRxivPaper(CONFIG, kwargs)],
112
+ });
@@ -1,12 +1,12 @@
1
1
  /**
2
2
  * @owner src::adapters::neurips::proceedings
3
- * @does Registers NeurIPS proceedings search over the official yearly paper list.
4
- * @needs proceedings.neurips.cc static HTML, src/registry.ts
5
- * @feeds src/commands/scholar.ts via scholar.search, scholar.pdf, and scholar.venue
6
- * @breaks NeurIPS markup drift surfaces as empty parse output; no unrelated source fallback is used.
7
- * @invariants Year is explicit; paper URLs are absolutized against proceedings.neurips.cc.
8
- * @side-effects HTTPS egress to proceedings.neurips.cc only
9
- * @perf O(N) over one proceedings HTML page
3
+ * @does Registers NeurIPS proceedings search, paper detail retrieval, and PDF text reading over official paper pages.
4
+ * @needs proceedings.neurips.cc static HTML/PDFs, src/adapters/scholar-artifacts/pdf-read.ts, src/registry.ts
5
+ * @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, scholar.fulltext, and scholar.venue
6
+ * @breaks NeurIPS markup/PDF drift, denied downloads, or missing pdftotext surface as explicit adapter errors; no unrelated source fallback is used.
7
+ * @invariants Year is explicit; paper detail prefers citation_* metadata and official /file/ PDF URLs.
8
+ * @side-effects HTTPS egress to proceedings.neurips.cc; read writes one PDF artifact and executes pdftotext.
9
+ * @perf O(N) over one proceedings HTML page; read is O(PDF bytes + selected pages)
10
10
  * @concurrency safe
11
11
  * @test tests/unit/adapters/scholar-sources.test.ts
12
12
  * @stability experimental
@@ -15,8 +15,18 @@
15
15
 
16
16
  import { cli, Strategy } from "../../registry.js";
17
17
  import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
18
+ import { readScholarPdf } from "../scholar-artifacts/pdf-read.js";
18
19
 
19
20
  const ORIGIN = "https://proceedings.neurips.cc";
21
+ const NEURIPS_USER_AGENT =
22
+ "unicli-neurips/1.0 (https://github.com/olo-dot-io/Uni-CLI)";
23
+
24
+ type NeuripsActionableError = Error & {
25
+ code?: string;
26
+ suggestion?: string;
27
+ retryable?: boolean;
28
+ alternatives?: string[];
29
+ };
20
30
 
21
31
  function decode(value: string): string {
22
32
  return value
@@ -35,6 +45,21 @@ function absolute(path: string): string {
35
45
  : `${ORIGIN}${path.startsWith("/") ? "" : "/"}${path}`;
36
46
  }
37
47
 
48
+ function metaContents(html: string, name: string): string[] {
49
+ const values: string[] = [];
50
+ const re = new RegExp(
51
+ `<meta\\s+name=["']${name}["']\\s+content=["']([^"']*)["'][^>]*>`,
52
+ "gi",
53
+ );
54
+ let match: RegExpExecArray | null;
55
+ while ((match = re.exec(html)) !== null) values.push(decode(match[1]));
56
+ return values;
57
+ }
58
+
59
+ function firstMetaContent(html: string, name: string): string {
60
+ return metaContents(html, name)[0] ?? "";
61
+ }
62
+
38
63
  function requireYear(value: unknown): string {
39
64
  const year = String(value ?? "").trim();
40
65
  if (!/^\d{4}$/.test(year))
@@ -42,6 +67,90 @@ function requireYear(value: unknown): string {
42
67
  return year;
43
68
  }
44
69
 
70
+ function requireNeuripsPaperId(value: unknown): string {
71
+ const raw = String(value ?? "").trim();
72
+ const id =
73
+ raw
74
+ .match(/\/(?:hash|file)\/([^/?#]+\.html)/)?.[1]
75
+ ?.replace(/\.html$/, "") ?? raw.replace(/\.html$/, "");
76
+ if (!/^[A-Za-z0-9_.-]+$/.test(id)) {
77
+ throw new Error(`NeurIPS paper id "${raw}" is not valid.`);
78
+ }
79
+ return id;
80
+ }
81
+
82
+ function abstractUrl(id: string, year: string): string {
83
+ return `${ORIGIN}/paper_files/paper/${year}/hash/${id}.html`;
84
+ }
85
+
86
+ function pdfUrlFromAbstractUrl(sourceUrl: string): string {
87
+ return sourceUrl
88
+ .replace("/hash/", "/file/")
89
+ .replace("-Abstract-", "-Paper-")
90
+ .replace(/\.html$/, ".pdf");
91
+ }
92
+
93
+ function neuripsUpstreamError(
94
+ label: string,
95
+ detail: string,
96
+ ): NeuripsActionableError {
97
+ const error = new Error(
98
+ `${label} failed: ${detail}.`,
99
+ ) as NeuripsActionableError;
100
+ error.code = "upstream_error";
101
+ error.suggestion =
102
+ "NeurIPS proceedings did not return the expected public paper page on this network path; retry later or verify the official proceedings.neurips.cc page manually.";
103
+ error.retryable =
104
+ /fetch failed|timeout|ECONNRESET|ETIMEDOUT|HTTP (429|5\d\d)/i.test(detail);
105
+ error.alternatives = [];
106
+ return error;
107
+ }
108
+
109
+ export function parseNeuripsPaperPage(
110
+ html: string,
111
+ sourceUrl: string,
112
+ ): ScholarlyWorkRecord {
113
+ const title =
114
+ firstMetaContent(html, "citation_title") ||
115
+ decode(
116
+ html
117
+ .match(/<h1 class="paper-title">([\s\S]*?)<\/h1>/i)?.[1]
118
+ ?.replace(/<[^>]+>/g, " ") ?? "",
119
+ );
120
+ if (!title) throw new Error("NeurIPS paper page did not expose a title.");
121
+ const year =
122
+ firstMetaContent(html, "citation_publication_date").match(/\d{4}/)?.[0] ??
123
+ sourceUrl.match(/\/paper\/(\d{4})\//)?.[1];
124
+ const id =
125
+ sourceUrl
126
+ .split("/")
127
+ .pop()
128
+ ?.replace(/\.html$/, "") ?? title;
129
+ const pdfUrl =
130
+ firstMetaContent(html, "citation_pdf_url") ||
131
+ html.match(/href=["']([^"']+-Paper-[^"']+\.pdf)["']/i)?.[1] ||
132
+ "";
133
+ return {
134
+ id,
135
+ title,
136
+ authors: metaContents(html, "citation_author"),
137
+ year: year ? Number(year) : undefined,
138
+ venue: "NeurIPS",
139
+ type: firstMetaContent(html, "citation_journal_title") || undefined,
140
+ doi: firstMetaContent(html, "citation_doi") || undefined,
141
+ abstract:
142
+ decode(
143
+ html
144
+ .match(/<p class="paper-abstract">([\s\S]*?)<\/p>\s*<\/p>/i)?.[1]
145
+ ?.replace(/<[^>]+>/g, " ") ?? "",
146
+ ) || undefined,
147
+ pdf_url: pdfUrl ? absolute(pdfUrl) : undefined,
148
+ source_adapter: "neurips",
149
+ source_url: sourceUrl,
150
+ retrieved_at: new Date().toISOString(),
151
+ };
152
+ }
153
+
45
154
  export function parseNeuripsRows(
46
155
  html: string,
47
156
  year = "2024",
@@ -65,9 +174,7 @@ export function parseNeuripsRows(
65
174
  .filter(Boolean),
66
175
  year: Number(year),
67
176
  venue: "NeurIPS",
68
- pdf_url: sourceUrl
69
- .replace("-Abstract-", "-Paper-")
70
- .replace(/\.html$/, ".pdf"),
177
+ pdf_url: pdfUrlFromAbstractUrl(sourceUrl),
71
178
  source_adapter: "neurips",
72
179
  source_url: sourceUrl,
73
180
  retrieved_at: new Date().toISOString(),
@@ -76,6 +183,54 @@ export function parseNeuripsRows(
76
183
  return out;
77
184
  }
78
185
 
186
+ async function fetchNeuripsHtml(url: string, label: string): Promise<string> {
187
+ let response: Response;
188
+ try {
189
+ response = await fetch(url, {
190
+ headers: {
191
+ Accept: "text/html",
192
+ "User-Agent": NEURIPS_USER_AGENT,
193
+ },
194
+ });
195
+ } catch (error) {
196
+ throw neuripsUpstreamError(
197
+ label,
198
+ error instanceof Error ? error.message : String(error),
199
+ );
200
+ }
201
+ if (response.status === 404) throw new Error(`${label} returned no page.`);
202
+ if (!response.ok)
203
+ throw neuripsUpstreamError(label, `HTTP ${response.status}`);
204
+ return response.text();
205
+ }
206
+
207
+ async function readNeuripsPaperPdf(
208
+ row: ScholarlyWorkRecord,
209
+ kwargs: Record<string, unknown>,
210
+ ): Promise<Record<string, unknown>> {
211
+ if (!row.pdf_url) throw new Error(`NeurIPS paper ${row.id} has no PDF URL.`);
212
+ return readScholarPdf(
213
+ {
214
+ id: row.id,
215
+ title: row.title,
216
+ source_adapter: "neurips",
217
+ source_url: row.source_url,
218
+ pdf_url: row.pdf_url,
219
+ output: kwargs.output,
220
+ filename: kwargs.filename,
221
+ "first-page": kwargs["first-page"] ?? kwargs.firstPage,
222
+ "last-page": kwargs["last-page"] ?? kwargs.lastPage,
223
+ "max-chars": kwargs["max-chars"] ?? kwargs.maxChars,
224
+ },
225
+ {
226
+ site: "neurips",
227
+ command: "read",
228
+ defaultOutput: "./neurips-downloads",
229
+ userAgent: NEURIPS_USER_AGENT,
230
+ },
231
+ );
232
+ }
233
+
79
234
  cli({
80
235
  site: "neurips",
81
236
  name: "search",
@@ -100,19 +255,14 @@ cli({
100
255
  .toLowerCase();
101
256
  if (!query) throw new Error("neurips search query cannot be empty.");
102
257
  const year = requireYear(kwargs.year);
103
- const response = await fetch(`${ORIGIN}/paper_files/paper/${year}`, {
104
- headers: {
105
- Accept: "text/html",
106
- "User-Agent":
107
- "unicli-neurips/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
108
- },
109
- });
110
- if (response.status === 404)
111
- throw new Error(`NeurIPS ${year} returned no proceedings page.`);
112
- if (!response.ok)
113
- throw new Error(`NeurIPS ${year} returned HTTP ${response.status}.`);
114
258
  const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 200);
115
- const rows = parseNeuripsRows(await response.text(), year)
259
+ const rows = parseNeuripsRows(
260
+ await fetchNeuripsHtml(
261
+ `${ORIGIN}/paper_files/paper/${year}`,
262
+ `NeurIPS ${year}`,
263
+ ),
264
+ year,
265
+ )
116
266
  .filter((row) =>
117
267
  `${row.title} ${row.authors?.join(" ") ?? ""}`
118
268
  .toLowerCase()
@@ -124,3 +274,97 @@ cli({
124
274
  return rows;
125
275
  },
126
276
  });
277
+
278
+ cli({
279
+ site: "neurips",
280
+ name: "paper",
281
+ description: "Fetch NeurIPS proceedings paper metadata by page id",
282
+ domain: "proceedings.neurips.cc",
283
+ strategy: Strategy.PUBLIC,
284
+ args: [
285
+ { name: "id", type: "str", required: true, positional: true },
286
+ { name: "year", type: "str", default: "2024" },
287
+ ],
288
+ columns: [
289
+ "id",
290
+ "title",
291
+ "authors",
292
+ "year",
293
+ "venue",
294
+ "doi",
295
+ "pdf_url",
296
+ "source_url",
297
+ ],
298
+ capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
299
+ func: async (_page, kwargs) => {
300
+ const year = requireYear(kwargs.year);
301
+ const id = requireNeuripsPaperId(kwargs.id ?? kwargs.ref);
302
+ const url = abstractUrl(id, year);
303
+ return [
304
+ parseNeuripsPaperPage(
305
+ await fetchNeuripsHtml(url, `NeurIPS paper ${id}`),
306
+ url,
307
+ ),
308
+ ];
309
+ },
310
+ });
311
+
312
+ cli({
313
+ site: "neurips",
314
+ name: "read",
315
+ description:
316
+ "Download a NeurIPS proceedings paper PDF by page id and extract text",
317
+ domain: "proceedings.neurips.cc",
318
+ strategy: Strategy.PUBLIC,
319
+ args: [
320
+ { name: "id", type: "str", required: true, positional: true },
321
+ { name: "year", type: "str", default: "2024" },
322
+ {
323
+ name: "output",
324
+ type: "str",
325
+ default: "./neurips-downloads",
326
+ description: "Output directory for the downloaded PDF",
327
+ "x-unicli-kind": "path",
328
+ },
329
+ { name: "filename", type: "str", description: "Output PDF filename" },
330
+ { name: "first-page", type: "int", default: 1, description: "First page" },
331
+ { name: "last-page", type: "int", default: 20, description: "Last page" },
332
+ {
333
+ name: "max-chars",
334
+ type: "int",
335
+ default: 40000,
336
+ description: "Maximum extracted text characters",
337
+ },
338
+ ],
339
+ columns: [
340
+ "id",
341
+ "title",
342
+ "source_adapter",
343
+ "source_url",
344
+ "pdf_url",
345
+ "path",
346
+ "text_source",
347
+ "text",
348
+ "text_chars",
349
+ "text_truncated",
350
+ ],
351
+ capabilities: [
352
+ "http.fetch",
353
+ "http.download",
354
+ "subprocess.exec",
355
+ "scholar.fulltext",
356
+ "scholar.pdf",
357
+ ],
358
+ executables: ["pdftotext"],
359
+ minimum_capability: "subprocess.exec",
360
+ func: async (_page, kwargs) => {
361
+ const year = requireYear(kwargs.year);
362
+ const id = requireNeuripsPaperId(kwargs.id ?? kwargs.ref);
363
+ const url = abstractUrl(id, year);
364
+ const row = parseNeuripsPaperPage(
365
+ await fetchNeuripsHtml(url, `NeurIPS paper ${id}`),
366
+ url,
367
+ );
368
+ return [await readNeuripsPaperPdf(row, kwargs)];
369
+ },
370
+ });
@@ -40,8 +40,12 @@ describe("openalex agent-facing commands", () => {
40
40
  publication_year: 2026,
41
41
  cited_by_count: 5,
42
42
  authorships: [{ author: { display_name: "Ada" } }],
43
- primary_location: { source: { display_name: "Journal" } },
44
- open_access: { is_oa: true },
43
+ primary_location: {
44
+ landing_page_url: "https://publisher.test/paper",
45
+ pdf_url: "https://publisher.test/paper.pdf",
46
+ source: { display_name: "Journal" },
47
+ },
48
+ open_access: { is_oa: true, oa_url: "https://publisher.test/paper" },
45
49
  type: "article",
46
50
  },
47
51
  ],
@@ -62,7 +66,8 @@ describe("openalex agent-facing commands", () => {
62
66
  is_open_access: true,
63
67
  type: "article",
64
68
  doi: "10.1/example",
65
- pdf_url: "",
69
+ pdf_url: "https://publisher.test/paper.pdf",
70
+ landing_url: "https://publisher.test/paper",
66
71
  openalex_id: "W1234",
67
72
  source_adapter: "openalex",
68
73
  source_url: "https://openalex.org/W1234",
@@ -86,7 +91,11 @@ describe("openalex agent-facing commands", () => {
86
91
  { author: { display_name: "Ada" } },
87
92
  { author: { display_name: "Grace" } },
88
93
  ],
89
- primary_location: { source: { display_name: "Journal" } },
94
+ primary_location: {
95
+ landing_page_url: "https://publisher.test/paper",
96
+ source: { display_name: "Journal" },
97
+ },
98
+ best_oa_location: { pdf_url: "https://repository.test/paper.pdf" },
90
99
  cited_by_count: 5,
91
100
  open_access: { is_oa: true, oa_url: "https://example.test/pdf" },
92
101
  referenced_works: ["W1", "W2"],
@@ -98,6 +107,8 @@ describe("openalex agent-facing commands", () => {
98
107
  authors: "Ada, Grace",
99
108
  referencedCount: 2,
100
109
  doi: "10.1/example",
110
+ pdf_url: "https://repository.test/paper.pdf",
111
+ landing_url: "https://publisher.test/paper",
101
112
  abstract: "hello world",
102
113
  });
103
114
  expect(() => mapOpenAlexWorkRow({})).toThrow(