@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
package/AGENTS.md CHANGED
@@ -36,7 +36,7 @@ it starts only the Uni-CLI automation profile.
36
36
 
37
37
  <!-- BEGIN COUNTS -->
38
38
 
39
- > <!-- STATS:site_count -->317<!-- /STATS --> sites, <!-- STATS:command_count -->1772<!-- /STATS --> commands, <!-- STATS:pipeline_step_count -->103<!-- /STATS --> pipeline steps, BM25 bilingual search. `npm install -g @zenalexa/unicli`
39
+ > <!-- STATS:site_count -->320<!-- /STATS --> sites, <!-- STATS:command_count -->1798<!-- /STATS --> commands, <!-- STATS:pipeline_step_count -->103<!-- /STATS --> pipeline steps, BM25 bilingual search. `npm install -g @zenalexa/unicli`
40
40
 
41
41
  <!-- END COUNTS -->
42
42
 
@@ -128,7 +128,7 @@ allowlist entry without a one-line `// REASON:` justification in
128
128
 
129
129
  ## Version
130
130
 
131
- 0.225.2 — Apollo · Gordon
131
+ 0.225.3 — Apollo · Schmitt
132
132
 
133
133
  ## MCP one-liner (Claude Desktop / Cursor / Continue)
134
134
 
package/README.md CHANGED
@@ -41,7 +41,7 @@
41
41
  </p>
42
42
 
43
43
  <p align="center">
44
- <sub>Native CLI · MCP · ACP · JSON/Markdown envelopes · browser CDP · visual fallback · macOS desktop AX · <!-- STATS:site_count -->317<!-- /STATS --> surfaces · <!-- STATS:test_count -->9181<!-- /STATS --> tests</sub>
44
+ <sub>Native CLI · MCP · ACP · JSON/Markdown envelopes · browser CDP · visual fallback · macOS desktop AX · <!-- STATS:site_count -->320<!-- /STATS --> surfaces · <!-- STATS:test_count -->9255<!-- /STATS --> tests</sub>
45
45
  </p>
46
46
 
47
47
  <p align="center">
@@ -340,7 +340,7 @@ The wall below is generated from active manifest sites with real logo support. B
340
340
  <a data-site="replicate" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="replicate: 2 commands"><img alt="replicate" src="https://img.shields.io/static/v1?label=replicate&message=2+cmds&color=7c3aed&style=flat-square&logo=replicate&logoColor=white"></a>
341
341
  </p>
342
342
  <p><strong>scholarly</strong><br>
343
- <a data-site="arxiv" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="arxiv: 5 commands"><img alt="arxiv" src="https://img.shields.io/static/v1?label=arxiv&message=5+cmds&color=0f766e&style=flat-square&logo=arxiv&logoColor=white"></a>
343
+ <a data-site="arxiv" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="arxiv: 6 commands"><img alt="arxiv" src="https://img.shields.io/static/v1?label=arxiv&message=6+cmds&color=0f766e&style=flat-square&logo=arxiv&logoColor=white"></a>
344
344
  <a data-site="google-scholar" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="google-scholar: 3 commands"><img alt="google-scholar" src="https://img.shields.io/static/v1?label=google-scholar&message=3+cmds&color=0f766e&style=flat-square&logo=google&logoColor=white"></a>
345
345
  <a data-site="huggingface-papers" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="huggingface-papers: 2 commands"><img alt="huggingface-papers" src="https://img.shields.io/static/v1?label=huggingface-papers&message=2+cmds&color=0f766e&style=flat-square&logo=huggingface&logoColor=white"></a>
346
346
  <a data-site="zotero" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="zotero: 8 commands"><img alt="zotero" src="https://img.shields.io/static/v1?label=zotero&message=8+cmds&color=0f766e&style=flat-square&logo=zotero&logoColor=white"></a>
@@ -552,5 +552,5 @@ npm run verify
552
552
  [Apache-2.0](./LICENSE)
553
553
 
554
554
  <p align="center">
555
- <sub>v0.225.2 — Apollo · Gordon</sub>
555
+ <sub>v0.225.3 — Apollo · Schmitt</sub>
556
556
  </p>
package/README.zh-CN.md CHANGED
@@ -40,7 +40,7 @@
40
40
  </p>
41
41
 
42
42
  <p align="center">
43
- <sub>Native CLI · MCP · ACP · JSON/Markdown envelope · browser CDP · visual fallback · macOS desktop AX · <!-- STATS:site_count -->317<!-- /STATS --> 个 surface · <!-- STATS:test_count -->9181<!-- /STATS --> 个测试</sub>
43
+ <sub>Native CLI · MCP · ACP · JSON/Markdown envelope · browser CDP · visual fallback · macOS desktop AX · <!-- STATS:site_count -->320<!-- /STATS --> 个 surface · <!-- STATS:test_count -->9255<!-- /STATS --> 个测试</sub>
44
44
  </p>
45
45
 
46
46
  <p align="center">
@@ -299,7 +299,7 @@ ACP 作为编辑器和桥接兼容层保留。真正跑任务时,优先 native
299
299
  <a data-site="replicate" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="replicate: 2 commands"><img alt="replicate" src="https://img.shields.io/static/v1?label=replicate&message=2+cmds&color=7c3aed&style=flat-square&logo=replicate&logoColor=white"></a>
300
300
  </p>
301
301
  <p><strong>学术</strong><br>
302
- <a data-site="arxiv" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="arxiv: 5 commands"><img alt="arxiv" src="https://img.shields.io/static/v1?label=arxiv&message=5+cmds&color=0f766e&style=flat-square&logo=arxiv&logoColor=white"></a>
302
+ <a data-site="arxiv" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="arxiv: 6 commands"><img alt="arxiv" src="https://img.shields.io/static/v1?label=arxiv&message=6+cmds&color=0f766e&style=flat-square&logo=arxiv&logoColor=white"></a>
303
303
  <a data-site="google-scholar" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="google-scholar: 3 commands"><img alt="google-scholar" src="https://img.shields.io/static/v1?label=google-scholar&message=3+cmds&color=0f766e&style=flat-square&logo=google&logoColor=white"></a>
304
304
  <a data-site="huggingface-papers" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="huggingface-papers: 2 commands"><img alt="huggingface-papers" src="https://img.shields.io/static/v1?label=huggingface-papers&message=2+cmds&color=0f766e&style=flat-square&logo=huggingface&logoColor=white"></a>
305
305
  <a data-site="zotero" href="https://olo-dot-io.github.io/Uni-CLI/reference/sites" title="zotero: 8 commands"><img alt="zotero" src="https://img.shields.io/static/v1?label=zotero&message=8+cmds&color=0f766e&style=flat-square&logo=zotero&logoColor=white"></a>
@@ -496,5 +496,5 @@ npm run verify
496
496
  [Apache-2.0](./LICENSE)
497
497
 
498
498
  <p align="center">
499
- <sub>v0.225.2 — Apollo · Gordon</sub>
499
+ <sub>v0.225.3 — Apollo · Schmitt</sub>
500
500
  </p>
@@ -1,16 +1,23 @@
1
1
  /**
2
2
  * @owner src::adapters::acl-anthology::papers
3
- * @does Registers ACL Anthology paper search and event proceedings listing from official Anthology pages.
4
- * @needs aclanthology.org static search/event HTML, src/registry.ts
5
- * @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, and scholar.venue
6
- * @breaks ACL Anthology markup drift surfaces as empty parse output; no browser workaround is used.
7
- * @invariants Paper URLs/PDF URLs are absolutized against aclanthology.org; event keys are explicit.
8
- * @side-effects HTTPS egress to aclanthology.org only
9
- * @perf O(N) over one HTML response
3
+ * @does Registers ACL Anthology paper search, metadata lookup, PDF download, and PDF text-read commands from official Anthology pages.
4
+ * @needs aclanthology.org static search/paper HTML, scholar-artifacts PDF reader, src/registry.ts
5
+ * @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, scholar.fulltext, and scholar.venue
6
+ * @breaks ACL Anthology markup drift, denied PDF downloads, missing pdftotext, or empty PDF text surfaces as source read failure.
7
+ * @invariants Paper URLs/PDF URLs are absolutized against aclanthology.org; read output labels `text_source=pdf`.
8
+ * @side-effects HTTPS egress to aclanthology.org; read writes PDFs under the requested output directory and executes pdftotext.
9
+ * @perf O(N) over one HTML response for search; O(PDF bytes + extracted pages) for read.
10
10
  * @concurrency safe
11
- * @test covered by scholar command discovery and parser style tests for sibling proceedings sources
11
+ * @test src/adapters/acl-anthology/papers.test.ts, tests/unit/adapters/scholar-sources.test.ts
12
12
  * @stability experimental
13
13
  * @since 2026-05-19
14
14
  */
15
- export {};
15
+ import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
16
+ export declare function cleanAclHtml(value: string): string;
17
+ export declare function normalizeAclAnthologyId(value: unknown): string;
18
+ export declare function aclAnthologyPdfUrl(id: string): string;
19
+ export declare function parseAclBibEntries(bib: string): ScholarlyWorkRecord[];
20
+ export declare function searchAclBibRows(rows: readonly ScholarlyWorkRecord[], query: string, limit: number): ScholarlyWorkRecord[];
21
+ export declare function requireAclReadPageArgs(kwargs: Record<string, unknown>): Record<string, unknown>;
22
+ export declare function aclArtifactFilename(record: ScholarlyWorkRecord): string;
16
23
  //# sourceMappingURL=papers.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"papers.d.ts","sourceRoot":"","sources":["../../../src/adapters/acl-anthology/papers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG"}
1
+ {"version":3,"file":"papers.d.ts","sourceRoot":"","sources":["../../../src/adapters/acl-anthology/papers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AASH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAkBpE,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAElD;AAUD,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAU9D;AAED,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAErD;AAwCD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,mBAAmB,EAAE,CA8BrE;AA8BD,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,SAAS,mBAAmB,EAAE,EACpC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,GACZ,mBAAmB,EAAE,CAWvB;AAED,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAMzB;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,mBAAmB,GAAG,MAAM,CAKvE"}
@@ -1,19 +1,27 @@
1
1
  /**
2
2
  * @owner src::adapters::acl-anthology::papers
3
- * @does Registers ACL Anthology paper search and event proceedings listing from official Anthology pages.
4
- * @needs aclanthology.org static search/event HTML, src/registry.ts
5
- * @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, and scholar.venue
6
- * @breaks ACL Anthology markup drift surfaces as empty parse output; no browser workaround is used.
7
- * @invariants Paper URLs/PDF URLs are absolutized against aclanthology.org; event keys are explicit.
8
- * @side-effects HTTPS egress to aclanthology.org only
9
- * @perf O(N) over one HTML response
3
+ * @does Registers ACL Anthology paper search, metadata lookup, PDF download, and PDF text-read commands from official Anthology pages.
4
+ * @needs aclanthology.org static search/paper HTML, scholar-artifacts PDF reader, src/registry.ts
5
+ * @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, scholar.fulltext, and scholar.venue
6
+ * @breaks ACL Anthology markup drift, denied PDF downloads, missing pdftotext, or empty PDF text surfaces as source read failure.
7
+ * @invariants Paper URLs/PDF URLs are absolutized against aclanthology.org; read output labels `text_source=pdf`.
8
+ * @side-effects HTTPS egress to aclanthology.org; read writes PDFs under the requested output directory and executes pdftotext.
9
+ * @perf O(N) over one HTML response for search; O(PDF bytes + extracted pages) for read.
10
10
  * @concurrency safe
11
- * @test covered by scholar command discovery and parser style tests for sibling proceedings sources
11
+ * @test src/adapters/acl-anthology/papers.test.ts, tests/unit/adapters/scholar-sources.test.ts
12
12
  * @stability experimental
13
13
  * @since 2026-05-19
14
14
  */
15
+ import { execFile } from "node:child_process";
16
+ import { join, resolve } from "node:path";
17
+ import { promisify } from "node:util";
18
+ import { gunzipSync } from "node:zlib";
15
19
  import { cli, Strategy } from "../../registry.js";
20
+ import { httpDownload, sanitizeFilename } from "../../engine/download.js";
16
21
  const ORIGIN = "https://aclanthology.org";
22
+ const ANTHOLOGY_BIB_URL = `${ORIGIN}/anthology.bib.gz`;
23
+ const execFileAsync = promisify(execFile);
24
+ let anthologyBibCache;
17
25
  function decode(value) {
18
26
  return value
19
27
  .replace(/&amp;/g, "&")
@@ -24,48 +32,178 @@ function decode(value) {
24
32
  .replace(/\s+/g, " ")
25
33
  .trim();
26
34
  }
27
- function absolute(path) {
28
- return /^https?:\/\//i.test(path)
29
- ? path
30
- : `${ORIGIN}${path.startsWith("/") ? "" : "/"}${path}`;
35
+ export function cleanAclHtml(value) {
36
+ return decode(value.replace(/<[^>]+>/g, ""));
31
37
  }
32
- function normalizeId(value) {
33
- const raw = String(value ?? "").trim();
38
+ function normalizeSearchText(value) {
39
+ return value
40
+ .toLowerCase()
41
+ .replace(/[^a-z0-9]+/g, " ")
42
+ .replace(/\s+/g, " ")
43
+ .trim();
44
+ }
45
+ export function normalizeAclAnthologyId(value) {
46
+ const raw = String(value ?? "")
47
+ .trim()
48
+ .replace(/^https?:\/\/(?:www\.)?aclanthology\.org\//i, "")
49
+ .replace(/\.pdf$/i, "")
50
+ .replace(/\/$/i, "");
34
51
  if (!/^[A-Za-z0-9.-]+$/.test(raw)) {
35
- throw new Error(`ACL Anthology id "${raw}" is not valid.`);
52
+ throw new Error(`ACL Anthology id "${String(value ?? "")}" is not valid.`);
36
53
  }
37
54
  return raw.replace(/\.$/, "");
38
55
  }
39
- function parseRows(html, source = "acl-anthology") {
40
- const out = [];
41
- const re = /<p class="d-sm-flex[^"]*">[\s\S]*?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>([\s\S]*?)(?=<p class="d-sm-flex|$)/g;
56
+ export function aclAnthologyPdfUrl(id) {
57
+ return `${ORIGIN}/${id}.pdf`;
58
+ }
59
+ function cleanBibValue(value) {
60
+ return decode(value
61
+ .trim()
62
+ .replace(/,$/, "")
63
+ .replace(/^["{]|["}]$/g, "")
64
+ .replace(/\\"/g, '"')
65
+ .replace(/\\&/g, "&")
66
+ .replace(/\\url\{([^}]+)\}/g, "$1")
67
+ .replace(/\\[a-zA-Z]+\{([^}]*)\}/g, "$1")
68
+ .replace(/[{}]/g, "")
69
+ .replace(/\s+/g, " "));
70
+ }
71
+ function normalizeBibAuthors(value) {
72
+ const authors = cleanBibValue(value)
73
+ .split(/\s+and\s+/)
74
+ .map((author) => {
75
+ const parts = author.split(/\s*,\s*/);
76
+ return parts.length === 2 ? `${parts[1]} ${parts[0]}` : author;
77
+ })
78
+ .map((author) => author.trim())
79
+ .filter(Boolean);
80
+ return authors.length > 0 ? authors.slice(0, 20) : undefined;
81
+ }
82
+ function parseBibFields(entry) {
83
+ const fields = {};
84
+ const re = /^\s*([A-Za-z][A-Za-z0-9_-]*)\s*=\s*([\s\S]*?)(?=,\n\s*[A-Za-z][A-Za-z0-9_-]*\s*=|\n}\s*$)/gm;
42
85
  let match;
43
- while ((match = re.exec(html)) !== null) {
44
- const sourceUrl = absolute(match[1]);
45
- const id = sourceUrl.replace(`${ORIGIN}/`, "").replace(/\/$/, "");
46
- const block = match[3];
47
- const pdf = block.match(/href="([^"]+\.pdf)"/i)?.[1] ?? "";
48
- const authorText = block
49
- .replace(/<span class="d-block">[\s\S]*?<\/span>/g, " ")
50
- .replace(/<[^>]+>/g, " ");
51
- const authors = decode(authorText)
52
- .split(/,\s*/)
53
- .map((author) => author.trim())
54
- .filter(Boolean)
55
- .slice(0, 20);
56
- out.push({
86
+ while ((match = re.exec(entry)) !== null) {
87
+ fields[match[1].toLowerCase()] = match[2];
88
+ }
89
+ return fields;
90
+ }
91
+ export function parseAclBibEntries(bib) {
92
+ const rows = [];
93
+ for (const entry of bib.split(/\n(?=@[A-Za-z]+\{)/)) {
94
+ const header = entry.match(/^@([A-Za-z]+)\{([^,]+),/);
95
+ if (!header)
96
+ continue;
97
+ const fields = parseBibFields(entry);
98
+ const title = fields.title ? cleanBibValue(fields.title) : "";
99
+ const sourceUrl = fields.url ? cleanBibValue(fields.url) : "";
100
+ const id = sourceUrl
101
+ ? normalizeAclAnthologyId(sourceUrl)
102
+ : normalizeAclAnthologyId(header[2]);
103
+ if (!title || !id)
104
+ continue;
105
+ const year = fields.year ? Number(cleanBibValue(fields.year)) : undefined;
106
+ const doi = fields.doi ? cleanBibValue(fields.doi) : undefined;
107
+ rows.push({
57
108
  id,
58
- title: decode(match[2].replace(/<[^>]+>/g, " ")),
59
- authors: authors.length > 0 ? authors : undefined,
60
- year: Number(id.slice(0, 4)) || undefined,
61
- venue: "ACL Anthology",
62
- pdf_url: pdf ? absolute(pdf) : `${sourceUrl}.pdf`,
63
- source_adapter: source,
64
- source_url: sourceUrl,
109
+ title,
110
+ authors: fields.author ? normalizeBibAuthors(fields.author) : undefined,
111
+ year: Number.isInteger(year) ? year : undefined,
112
+ venue: fields.booktitle
113
+ ? cleanBibValue(fields.booktitle)
114
+ : "ACL Anthology",
115
+ doi,
116
+ pdf_url: aclAnthologyPdfUrl(id),
117
+ source_adapter: "acl-anthology",
118
+ source_url: `${ORIGIN}/${id}/`,
65
119
  retrieved_at: new Date().toISOString(),
66
120
  });
67
121
  }
68
- return out;
122
+ return rows;
123
+ }
124
+ function scoreAclBibRow(row, query) {
125
+ const normalizedQuery = normalizeSearchText(query);
126
+ const terms = normalizedQuery.split(" ").filter(Boolean);
127
+ const title = normalizeSearchText(row.title);
128
+ const authors = normalizeSearchText((row.authors ?? []).join(" "));
129
+ const haystack = normalizeSearchText([
130
+ row.id,
131
+ row.title,
132
+ row.venue,
133
+ row.year,
134
+ row.doi,
135
+ (row.authors ?? []).join(" "),
136
+ ]
137
+ .filter(Boolean)
138
+ .join(" "));
139
+ if (!normalizedQuery || !terms.every((term) => haystack.includes(term))) {
140
+ return 0;
141
+ }
142
+ let score = 10;
143
+ if (row.id.toLowerCase() === normalizedQuery)
144
+ score += 100;
145
+ if (title === normalizedQuery)
146
+ score += 80;
147
+ if (title.includes(normalizedQuery))
148
+ score += 40;
149
+ if (authors.includes(normalizedQuery))
150
+ score += 20;
151
+ return score + Math.min(Number(row.year ?? 0) / 10_000, 1);
152
+ }
153
+ export function searchAclBibRows(rows, query, limit) {
154
+ return rows
155
+ .map((row, index) => ({
156
+ row,
157
+ index,
158
+ score: scoreAclBibRow(row, query),
159
+ }))
160
+ .filter((candidate) => candidate.score > 0)
161
+ .sort((a, b) => b.score - a.score || a.index - b.index)
162
+ .slice(0, limit)
163
+ .map((candidate) => candidate.row);
164
+ }
165
+ export function requireAclReadPageArgs(kwargs) {
166
+ return {
167
+ first_page: kwargs["first-page"] ?? kwargs.firstPage,
168
+ last_page: kwargs["last-page"] ?? kwargs.lastPage,
169
+ max_chars: kwargs["max-chars"] ?? kwargs.maxChars,
170
+ };
171
+ }
172
+ export function aclArtifactFilename(record) {
173
+ const title = sanitizeFilename(String(record.title ?? ""))
174
+ .replace(/\s+/g, "_")
175
+ .slice(0, 96);
176
+ return `${sanitizeFilename(record.id)}${title ? `-${title}` : ""}.pdf`;
177
+ }
178
+ function requireAclPageRange(firstPage, lastPage) {
179
+ const first = Number(firstPage ?? 1);
180
+ const last = Number(lastPage ?? 20);
181
+ if (!Number.isInteger(first) || first < 1) {
182
+ throw new Error("acl-anthology first-page must be an integer >= 1.");
183
+ }
184
+ if (!Number.isInteger(last) || last < first) {
185
+ throw new Error("acl-anthology last-page must be an integer >= first-page.");
186
+ }
187
+ return { firstPage: first, lastPage: last };
188
+ }
189
+ function requireAclMaxChars(value, fallback = 40_000) {
190
+ if (value === undefined || value === null || value === "")
191
+ return fallback;
192
+ const n = Number(value);
193
+ if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
194
+ throw new Error(`acl-anthology max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`);
195
+ }
196
+ return n;
197
+ }
198
+ function truncateAclText(text, maxChars) {
199
+ if (text.length <= maxChars) {
200
+ return { text, truncated: false, originalChars: text.length };
201
+ }
202
+ return {
203
+ text: `${text.slice(0, maxChars).trimEnd()}\n\n[truncated at ${maxChars} characters]`,
204
+ truncated: true,
205
+ originalChars: text.length,
206
+ };
69
207
  }
70
208
  async function fetchHtml(url, label) {
71
209
  const response = await fetch(url, {
@@ -80,6 +218,78 @@ async function fetchHtml(url, label) {
80
218
  throw new Error(`${label} returned HTTP ${response.status}.`);
81
219
  return response.text();
82
220
  }
221
+ async function fetchAnthologyBib() {
222
+ anthologyBibCache ??= (async () => {
223
+ const response = await fetch(ANTHOLOGY_BIB_URL, {
224
+ headers: {
225
+ Accept: "application/x-gzip, application/gzip, */*",
226
+ "User-Agent": "unicli-acl-anthology/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
227
+ },
228
+ });
229
+ if (!response.ok)
230
+ throw new Error(`ACL Anthology BibTeX returned HTTP ${response.status}.`);
231
+ return gunzipSync(Buffer.from(await response.arrayBuffer())).toString("utf8");
232
+ })();
233
+ return anthologyBibCache;
234
+ }
235
+ async function fetchAclPaperRecord(id) {
236
+ const html = await fetchHtml(`${ORIGIN}/${id}/`, `acl-anthology paper ${id}`);
237
+ const title = cleanAclHtml(html.match(/<h2[^>]*id=title[^>]*>([\s\S]*?)<\/h2>/)?.[1] ?? "");
238
+ if (!title)
239
+ throw new Error(`ACL Anthology paper ${id} did not expose a title.`);
240
+ return {
241
+ id,
242
+ title,
243
+ year: Number(id.slice(0, 4)) || undefined,
244
+ venue: "ACL Anthology",
245
+ pdf_url: aclAnthologyPdfUrl(id),
246
+ source_adapter: "acl-anthology",
247
+ source_url: `${ORIGIN}/${id}/`,
248
+ retrieved_at: new Date().toISOString(),
249
+ };
250
+ }
251
+ async function readAclPaperPdf(record, kwargs) {
252
+ if (!record.pdf_url) {
253
+ throw new Error(`ACL Anthology paper ${record.id} did not expose a PDF.`);
254
+ }
255
+ const outputDir = resolve(String(kwargs.output ?? "./acl-anthology-downloads"));
256
+ const path = join(outputDir, aclArtifactFilename(record));
257
+ const download = await httpDownload(record.pdf_url, path, {
258
+ Accept: "application/pdf,*/*",
259
+ Referer: record.source_url ?? `${ORIGIN}/${record.id}/`,
260
+ "User-Agent": "unicli-acl-anthology/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
261
+ });
262
+ if (download.status === "failed" || !download.path) {
263
+ throw new Error(`ACL Anthology PDF download failed for ${record.id}: ${download.error ?? "no path"}.`);
264
+ }
265
+ const pageArgs = requireAclReadPageArgs(kwargs);
266
+ const { firstPage, lastPage } = requireAclPageRange(pageArgs.first_page, pageArgs.last_page);
267
+ const maxChars = requireAclMaxChars(pageArgs.max_chars);
268
+ const { stdout } = await execFileAsync("pdftotext", [
269
+ "-layout",
270
+ "-enc",
271
+ "UTF-8",
272
+ "-f",
273
+ String(firstPage),
274
+ "-l",
275
+ String(lastPage),
276
+ download.path,
277
+ "-",
278
+ ], { timeout: 60_000, maxBuffer: 10 * 1024 * 1024 });
279
+ const extracted = stdout.trim();
280
+ if (!extracted) {
281
+ throw new Error(`pdftotext returned no text for ACL Anthology ${record.id} pages ${firstPage}-${lastPage}.`);
282
+ }
283
+ const truncated = truncateAclText(extracted, maxChars);
284
+ return {
285
+ ...record,
286
+ path: download.path,
287
+ text: truncated.text,
288
+ text_chars: truncated.originalChars,
289
+ text_truncated: truncated.truncated,
290
+ text_source: "pdf",
291
+ };
292
+ }
83
293
  cli({
84
294
  site: "acl-anthology",
85
295
  name: "search",
@@ -97,7 +307,7 @@ cli({
97
307
  if (!query)
98
308
  throw new Error("acl-anthology search query cannot be empty.");
99
309
  const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 100);
100
- const rows = parseRows(await fetchHtml(`${ORIGIN}/search/?q=${encodeURIComponent(query)}`, "acl-anthology search")).slice(0, limit);
310
+ const rows = searchAclBibRows(parseAclBibEntries(await fetchAnthologyBib()), query, limit);
101
311
  if (rows.length === 0)
102
312
  throw new Error(`No ACL Anthology papers matched "${query}".`);
103
313
  return rows;
@@ -113,23 +323,77 @@ cli({
113
323
  columns: ["id", "title", "authors", "year", "venue", "pdf_url", "source_url"],
114
324
  capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
115
325
  func: async (_page, kwargs) => {
116
- const id = normalizeId(kwargs.id ?? kwargs.ref);
117
- const html = await fetchHtml(`${ORIGIN}/${id}/`, `acl-anthology paper ${id}`);
118
- const title = decode(html.match(/<h2[^>]*id=title[^>]*>([\s\S]*?)<\/h2>/)?.[1] ?? "");
119
- if (!title)
120
- throw new Error(`ACL Anthology paper ${id} did not expose a title.`);
121
- return [
122
- {
123
- id,
124
- title,
125
- year: Number(id.slice(0, 4)) || undefined,
126
- venue: "ACL Anthology",
127
- pdf_url: `${ORIGIN}/${id}.pdf`,
128
- source_adapter: "acl-anthology",
129
- source_url: `${ORIGIN}/${id}/`,
130
- retrieved_at: new Date().toISOString(),
131
- },
132
- ];
326
+ const id = normalizeAclAnthologyId(kwargs.id ?? kwargs.ref);
327
+ return [await fetchAclPaperRecord(id)];
328
+ },
329
+ });
330
+ cli({
331
+ site: "acl-anthology",
332
+ name: "read",
333
+ description: "Download an ACL Anthology paper PDF by id and extract text",
334
+ domain: "aclanthology.org",
335
+ strategy: Strategy.PUBLIC,
336
+ args: [
337
+ {
338
+ name: "id",
339
+ type: "str",
340
+ required: true,
341
+ positional: true,
342
+ description: "ACL Anthology paper id (e.g. 2020.acl-main.447)",
343
+ "x-unicli-kind": "id",
344
+ "x-unicli-accepts": ["url"],
345
+ },
346
+ {
347
+ name: "output",
348
+ type: "str",
349
+ default: "./acl-anthology-downloads",
350
+ description: "Output directory",
351
+ "x-unicli-kind": "path",
352
+ },
353
+ {
354
+ name: "first-page",
355
+ type: "int",
356
+ default: 1,
357
+ description: "First PDF page to extract",
358
+ },
359
+ {
360
+ name: "last-page",
361
+ type: "int",
362
+ default: 20,
363
+ description: "Last PDF page to extract",
364
+ },
365
+ {
366
+ name: "max-chars",
367
+ type: "int",
368
+ default: 40000,
369
+ description: "Maximum extracted text characters",
370
+ },
371
+ ],
372
+ columns: [
373
+ "id",
374
+ "title",
375
+ "source_adapter",
376
+ "source_url",
377
+ "pdf_url",
378
+ "path",
379
+ "text_source",
380
+ "text",
381
+ "text_chars",
382
+ "text_truncated",
383
+ ],
384
+ capabilities: [
385
+ "http.fetch",
386
+ "http.download",
387
+ "subprocess.exec",
388
+ "scholar.fulltext",
389
+ "scholar.pdf",
390
+ ],
391
+ executables: ["pdftotext"],
392
+ minimum_capability: "subprocess.exec",
393
+ func: async (_page, kwargs) => {
394
+ const id = normalizeAclAnthologyId(kwargs.id ?? kwargs.ref);
395
+ const record = await fetchAclPaperRecord(id);
396
+ return [await readAclPaperPdf(record, kwargs)];
133
397
  },
134
398
  });
135
399
  //# sourceMappingURL=papers.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"papers.js","sourceRoot":"","sources":["../../../src/adapters/acl-anthology/papers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAGlD,MAAM,MAAM,GAAG,0BAA0B,CAAC;AAE1C,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,KAAK;SACT,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;QAC/B,CAAC,CAAC,IAAI;QACN,CAAC,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,EAAE,CAAC;AAC3D,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACvC,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,GAAG,iBAAiB,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,SAAS,CAChB,IAAY,EACZ,MAAM,GAAG,eAAe;IAExB,MAAM,GAAG,GAA0B,EAAE,CAAC;IACtC,MAAM,EAAE,GACN,kHAAkH,CAAC;IACrH,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,EAAE,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,MAAM,GAAG,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAClE,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC3D,MAAM,UAAU,GAAG,KAAK;aACrB,OAAO,CAAC,yCAAyC,EAAE,GAAG,CAAC;aACvD,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAC5B,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC;aAC/B,KAAK,CAAC,MAAM,CAAC;aACb,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;aAC9B,MAAM,CAAC,OAAO,CAAC;aACf,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAChB,GAAG,CAAC,IAAI,CAAC;YACP,EAAE;YACF,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAChD,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACjD,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS;YACzC,KAAK,EAAE,eAAe;YACtB,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,MAAM;YACjD,cAAc,EAAE,MAAM;YACtB,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACvC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,KAAa;IACjD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,MAAM,EAAE,WAAW;YACnB,YAAY,EACV,kEAAkE;SACrE;KACF,CAAC,CAAC;IACH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG;QAAE,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,sBAAsB,CAAC,CAAC;IAC7E,IAAI,CAAC,QAAQ,CAAC,EAAE;QACd,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,kBAAkB,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;IAChE,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,GAAG,CAAC;IACF,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,QAAQ;IACd,WAAW,EAAE,6BAA6B;IAC1C,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,QAAQ,CAAC,MAAM;IACzB,IAAI,EAAE;QACJ,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE;QAChE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE;KAC5C;IACD,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC;IAC7E,YAAY,EAAE,CAAC,YAAY,EAAE,gBAAgB,EAAE,aAAa,CAAC;IAC7D,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAC3E,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACrE,MAAM,IAAI,GAAG,SAAS,CACpB,MAAM,SAAS,CACb,GAAG,MAAM,cAAc,kBAAkB,CAAC,KAAK,CAAC,EAAE,EAClD,sBAAsB,CACvB,CACF,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QAClB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,oCAAoC,KAAK,IAAI,CAAC,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC;CACF,CAAC,CAAC;AAEH,GAAG,CAAC;IACF,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,OAAO;IACb,WAAW,EAAE,8CAA8C;IAC3D,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,QAAQ,CAAC,MAAM;IACzB,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACrE,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC;IAC7E,YAAY,EAAE,CAAC,YAAY,EAAE,aAAa,EAAE,aAAa,CAAC;IAC1D,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;QAC5B,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,MAAM,SAAS,CAC1B,GAAG,MAAM,IAAI,EAAE,GAAG,EAClB,uBAAuB,EAAE,EAAE,CAC5B,CAAC;QACF,MAAM,KAAK,GAAG,MAAM,CAClB,IAAI,CAAC,KAAK,CAAC,wCAAwC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAChE,CAAC;QACF,IAAI,CAAC,KAAK;YACR,MAAM,IAAI,KAAK,CAAC,uBAAuB,EAAE,0BAA0B,CAAC,CAAC;QACvE,OAAO;YACL;gBACE,EAAE;gBACF,KAAK;gBACL,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS;gBACzC,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,GAAG,MAAM,IAAI,EAAE,MAAM;gBAC9B,cAAc,EAAE,eAAe;gBAC/B,UAAU,EAAE,GAAG,MAAM,IAAI,EAAE,GAAG;gBAC9B,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACT;SAChC,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
1
+ {"version":3,"file":"papers.js","sourceRoot":"","sources":["../../../src/adapters/acl-anthology/papers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAG1E,MAAM,MAAM,GAAG,0BAA0B,CAAC;AAC1C,MAAM,iBAAiB,GAAG,GAAG,MAAM,mBAAmB,CAAC;AACvD,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAC1C,IAAI,iBAA8C,CAAC;AAEnD,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,KAAK;SACT,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,OAAO,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAa;IACxC,OAAO,KAAK;SACT,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAc;IACpD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;SAC5B,IAAI,EAAE;SACN,OAAO,CAAC,4CAA4C,EAAE,EAAE,CAAC;SACzD,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;SACtB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACvB,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,EAAU;IAC3C,OAAO,GAAG,MAAM,IAAI,EAAE,MAAM,CAAC;AAC/B,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,MAAM,CACX,KAAK;SACF,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;SACjB,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;SAClC,OAAO,CAAC,yBAAyB,EAAE,IAAI,CAAC;SACxC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CACxB,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAa;IACxC,MAAM,OAAO,GAAG,aAAa,CAAC,KAAK,CAAC;SACjC,KAAK,CAAC,WAAW,CAAC;SAClB,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;QACd,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACtC,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;IACjE,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;SAC9B,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC/D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,MAAM,EAAE,GACN,6FAA6F,CAAC;IAChG,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,MAAM,IAAI,GAA0B,EAAE,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,oBAAoB,CAAC,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACtD,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9D,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9D,MAAM,EAAE,GAAG,SAAS;YAClB,CAAC,CAAC,uBAAuB,CAAC,SAAS,CAAC;YACpC,CAAC,CAAC,uBAAuB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,IAAI,CAAC,EAAE;YAAE,SAAS;QAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC1E,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC/D,IAAI,CAAC,IAAI,CAAC;YACR,EAAE;YACF,KAAK;YACL,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,mBAAmB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS;YACvE,IAAI,EAAE,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC/C,KAAK,EAAE,MAAM,CAAC,SAAS;gBACrB,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,SAAS,CAAC;gBACjC,CAAC,CAAC,eAAe;YACnB,GAAG;YACH,OAAO,EAAE,kBAAkB,CAAC,EAAE,CAAC;YAC/B,cAAc,EAAE,eAAe;YAC/B,UAAU,EAAE,GAAG,MAAM,IAAI,EAAE,GAAG;YAC9B,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACvC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,GAAwB,EAAE,KAAa;IAC7D,MAAM,eAAe,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,mBAAmB,CAAC,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,mBAAmB,CAClC;QACE,GAAG,CAAC,EAAE;QACN,GAAG,CAAC,KAAK;QACT,GAAG,CAAC,KAAK;QACT,GAAG,CAAC,IAAI;QACR,GAAG,CAAC,GAAG;QACP,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;KAC9B;SACE,MAAM,CAAC,OAAO,CAAC;SACf,IAAI,CAAC,GAAG,CAAC,CACb,CAAC;IACF,IAAI,CAAC,eAAe,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QACxE,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,GAAG,CAAC,EAAE,CAAC,WAAW,EAAE,KAAK,eAAe;QAAE,KAAK,IAAI,GAAG,CAAC;IAC3D,IAAI,KAAK,KAAK,eAAe;QAAE,KAAK,IAAI,EAAE,CAAC;IAC3C,IAAI,KAAK,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,KAAK,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,KAAK,IAAI,EAAE,CAAC;IACnD,OAAO,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAoC,EACpC,KAAa,EACb,KAAa;IAEb,OAAO,IAAI;SACR,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QACpB,GAAG;QACH,KAAK;QACL,KAAK,EAAE,cAAc,CAAC,GAAG,EAAE,KAAK,CAAC;KAClC,CAAC,CAAC;SACF,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;SAC1C,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACtD,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;SACf,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,MAA+B;IAE/B,OAAO;QACL,UAAU,EAAE,MAAM,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC,SAAS;QACpD,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,MAAM,CAAC,QAAQ;QACjD,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,MAAM,CAAC,QAAQ;KAClD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,MAA2B;IAC7D,MAAM,KAAK,GAAG,gBAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;SACvD,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAChB,OAAO,GAAG,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC;AACzE,CAAC;AAED,SAAS,mBAAmB,CAC1B,SAAkB,EAClB,QAAiB;IAEjB,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;IACpC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,KAAK,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CACb,2DAA2D,CAC5D,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;AAC9C,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc,EAAE,QAAQ,GAAG,MAAM;IAC3D,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,EAAE;QAAE,OAAO,QAAQ,CAAC;IAC3E,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IACxB,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,GAAG,SAAS,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CACb,uEAAuE,MAAM,CAAC,KAAK,CAAC,EAAE,CACvF,CAAC;IACJ,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,eAAe,CACtB,IAAY,EACZ,QAAgB;IAEhB,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;IAChE,CAAC;IACD,OAAO;QACL,IAAI,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,OAAO,EAAE,qBAAqB,QAAQ,cAAc;QACrF,SAAS,EAAE,IAAI;QACf,aAAa,EAAE,IAAI,CAAC,MAAM;KAC3B,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,KAAa;IACjD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,MAAM,EAAE,WAAW;YACnB,YAAY,EACV,kEAAkE;SACrE;KACF,CAAC,CAAC;IACH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG;QAAE,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,sBAAsB,CAAC,CAAC;IAC7E,IAAI,CAAC,QAAQ,CAAC,EAAE;QACd,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,kBAAkB,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;IAChE,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,iBAAiB;IAC9B,iBAAiB,KAAK,CAAC,KAAK,IAAI,EAAE;QAChC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,iBAAiB,EAAE;YAC9C,OAAO,EAAE;gBACP,MAAM,EAAE,2CAA2C;gBACnD,YAAY,EACV,kEAAkE;aACrE;SACF,CAAC,CAAC;QACH,IAAI,CAAC,QAAQ,CAAC,EAAE;YACd,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;QAC5E,OAAO,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,QAAQ,CACnE,MAAM,CACP,CAAC;IACJ,CAAC,CAAC,EAAE,CAAC;IACL,OAAO,iBAAiB,CAAC;AAC3B,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,EAAU;IAC3C,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,GAAG,MAAM,IAAI,EAAE,GAAG,EAAE,uBAAuB,EAAE,EAAE,CAAC,CAAC;IAC9E,MAAM,KAAK,GAAG,YAAY,CACxB,IAAI,CAAC,KAAK,CAAC,wCAAwC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAChE,CAAC;IACF,IAAI,CAAC,KAAK;QACR,MAAM,IAAI,KAAK,CAAC,uBAAuB,EAAE,0BAA0B,CAAC,CAAC;IACvE,OAAO;QACL,EAAE;QACF,KAAK;QACL,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS;QACzC,KAAK,EAAE,eAAe;QACtB,OAAO,EAAE,kBAAkB,CAAC,EAAE,CAAC;QAC/B,cAAc,EAAE,eAAe;QAC/B,UAAU,EAAE,GAAG,MAAM,IAAI,EAAE,GAAG;QAC9B,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACvC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,MAA2B,EAC3B,MAA+B;IAE/B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,CAAC,EAAE,wBAAwB,CAAC,CAAC;IAC5E,CAAC;IACD,MAAM,SAAS,GAAG,OAAO,CACvB,MAAM,CAAC,MAAM,CAAC,MAAM,IAAI,2BAA2B,CAAC,CACrD,CAAC;IACF,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE;QACxD,MAAM,EAAE,qBAAqB;QAC7B,OAAO,EAAE,MAAM,CAAC,UAAU,IAAI,GAAG,MAAM,IAAI,MAAM,CAAC,EAAE,GAAG;QACvD,YAAY,EACV,kEAAkE;KACrE,CAAC,CAAC;IACH,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnD,MAAM,IAAI,KAAK,CACb,yCAAyC,MAAM,CAAC,EAAE,KAAK,QAAQ,CAAC,KAAK,IAAI,SAAS,GAAG,CACtF,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAChD,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,mBAAmB,CACjD,QAAQ,CAAC,UAAU,EACnB,QAAQ,CAAC,SAAS,CACnB,CAAC;IACF,MAAM,QAAQ,GAAG,kBAAkB,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,WAAW,EACX;QACE,SAAS;QACT,MAAM;QACN,OAAO;QACP,IAAI;QACJ,MAAM,CAAC,SAAS,CAAC;QACjB,IAAI;QACJ,MAAM,CAAC,QAAQ,CAAC;QAChB,QAAQ,CAAC,IAAI;QACb,GAAG;KACJ,EACD,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CACjD,CAAC;IACF,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACb,gDAAgD,MAAM,CAAC,EAAE,UAAU,SAAS,IAAI,QAAQ,GAAG,CAC5F,CAAC;IACJ,CAAC;IACD,MAAM,SAAS,GAAG,eAAe,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IACvD,OAAO;QACL,GAAG,MAAM;QACT,IAAI,EAAE,QAAQ,CAAC,IAAI;QACnB,IAAI,EAAE,SAAS,CAAC,IAAI;QACpB,UAAU,EAAE,SAAS,CAAC,aAAa;QACnC,cAAc,EAAE,SAAS,CAAC,SAAS;QACnC,WAAW,EAAE,KAAK;KACnB,CAAC;AACJ,CAAC;AAED,GAAG,CAAC;IACF,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,QAAQ;IACd,WAAW,EAAE,6BAA6B;IAC1C,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,QAAQ,CAAC,MAAM;IACzB,IAAI,EAAE;QACJ,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE;QAChE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE;KAC5C;IACD,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC;IAC7E,YAAY,EAAE,CAAC,YAAY,EAAE,gBAAgB,EAAE,aAAa,CAAC;IAC7D,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAC3E,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACrE,MAAM,IAAI,GAAG,gBAAgB,CAC3B,kBAAkB,CAAC,MAAM,iBAAiB,EAAE,CAAC,EAC7C,KAAK,EACL,KAAK,CACN,CAAC;QACF,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,oCAAoC,KAAK,IAAI,CAAC,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC;CACF,CAAC,CAAC;AAEH,GAAG,CAAC;IACF,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,OAAO;IACb,WAAW,EAAE,8CAA8C;IAC3D,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,QAAQ,CAAC,MAAM;IACzB,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACrE,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC;IAC7E,YAAY,EAAE,CAAC,YAAY,EAAE,aAAa,EAAE,aAAa,CAAC;IAC1D,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;QAC5B,MAAM,EAAE,GAAG,uBAAuB,CAAC,MAAM,CAAC,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAC5D,OAAO,CAAC,MAAM,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;CACF,CAAC,CAAC;AAEH,GAAG,CAAC;IACF,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,MAAM;IACZ,WAAW,EAAE,4DAA4D;IACzE,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,QAAQ,CAAC,MAAM;IACzB,IAAI,EAAE;QACJ;YACE,IAAI,EAAE,IAAI;YACV,IAAI,EAAE,KAAK;YACX,QAAQ,EAAE,IAAI;YACd,UAAU,EAAE,IAAI;YAChB,WAAW,EAAE,iDAAiD;YAC9D,eAAe,EAAE,IAAI;YACrB,kBAAkB,EAAE,CAAC,KAAK,CAAC;SAC5B;QACD;YACE,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,KAAK;YACX,OAAO,EAAE,2BAA2B;YACpC,WAAW,EAAE,kBAAkB;YAC/B,eAAe,EAAE,MAAM;SACxB;QACD;YACE,IAAI,EAAE,YAAY;YAClB,IAAI,EAAE,KAAK;YACX,OAAO,EAAE,CAAC;YACV,WAAW,EAAE,2BAA2B;SACzC;QACD;YACE,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,KAAK;YACX,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,0BAA0B;SACxC;QACD;YACE,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,KAAK;YACX,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,mCAAmC;SACjD;KACF;IACD,OAAO,EAAE;QACP,IAAI;QACJ,OAAO;QACP,gBAAgB;QAChB,YAAY;QACZ,SAAS;QACT,MAAM;QACN,aAAa;QACb,MAAM;QACN,YAAY;QACZ,gBAAgB;KACjB;IACD,YAAY,EAAE;QACZ,YAAY;QACZ,eAAe;QACf,iBAAiB;QACjB,kBAAkB;QAClB,aAAa;KACd;IACD,WAAW,EAAE,CAAC,WAAW,CAAC;IAC1B,kBAAkB,EAAE,iBAAiB;IACrC,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;QAC5B,MAAM,EAAE,GAAG,uBAAuB,CAAC,MAAM,CAAC,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAC7C,OAAO,CAAC,MAAM,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACjD,CAAC;CACF,CAAC,CAAC"}
@@ -1,9 +1,16 @@
1
1
  /**
2
2
  * @owner src/adapters/arxiv/papers.ts
3
- * @does Register agent-facing arXiv author and recent category commands.
4
- * @needs export.arxiv.org Atom API, category validation, conservative XML parsing.
5
- * @feeds surface coverage ledger, scholarly search workflow, arXiv category monitoring.
6
- * @breaks arXiv Atom shape drift, weak category parsing, or silent empty feeds hide paper discovery failures.
3
+ * @does Register agent-facing arXiv author, recent category, and PDF text-read commands.
4
+ * @needs export.arxiv.org Atom API, arxiv.org PDF URLs, category/id validation, conservative XML parsing, pdftotext.
5
+ * @feeds surface coverage ledger, scholarly search/read workflow, arXiv category monitoring.
6
+ * @breaks arXiv Atom/PDF shape drift, weak category/id parsing, denied PDF downloads, missing pdftotext, or silent empty feeds hide paper discovery/read failures.
7
+ * @invariants arXiv ids are normalized before URL construction; read returns PDF-derived text only and labels `text_source=pdf`.
8
+ * @side-effects HTTPS egress to export.arxiv.org and arxiv.org; read writes PDFs under the requested output directory and executes pdftotext.
9
+ * @perf O(limit) for Atom discovery; O(PDF bytes + extracted pages) for read.
10
+ * @concurrency safe - per-command local state only
11
+ * @test src/adapters/arxiv/papers.test.ts, tests/unit/commands/scholar.test.ts
12
+ * @stability experimental
13
+ * @since 0.225.2
7
14
  */
8
15
  interface ArxivEntry {
9
16
  id: string;
@@ -20,8 +27,19 @@ interface ArxivEntry {
20
27
  }
21
28
  export declare function requireArxivLimit(value: unknown, fallback: number, max?: number): number;
22
29
  export declare function requireArxivAuthor(value: unknown): string;
30
+ export declare function normalizeArxivId(value: unknown): string;
23
31
  export declare function requireArxivCategory(value: unknown): string;
24
32
  export declare function decodeArxivEntities(value: string): string;
25
33
  export declare function parseArxivEntries(xml: string): ArxivEntry[];
34
+ export declare function arxivArtifactFilename(input: {
35
+ id: string;
36
+ title?: unknown;
37
+ }): string;
38
+ export declare function requireArxivPageRange(firstPage: unknown, lastPage: unknown): {
39
+ firstPage: number;
40
+ lastPage: number;
41
+ };
42
+ export declare function requireArxivMaxChars(value: unknown, fallback?: number): number;
43
+ export declare function readArxivPaper(kwargs: Record<string, unknown>): Promise<Record<string, unknown>>;
26
44
  export {};
27
45
  //# sourceMappingURL=papers.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"papers.d.ts","sourceRoot":"","sources":["../../../src/adapters/arxiv/papers.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAOH,UAAU,UAAU;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,OAAO,EACd,QAAQ,EAAE,MAAM,EAChB,GAAG,SAAK,GACP,MAAM,CAOR;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAIzD;AAED,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAM3D;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAQzD;AAwCD,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,EAAE,CAgC3D"}
1
+ {"version":3,"file":"papers.d.ts","sourceRoot":"","sources":["../../../src/adapters/arxiv/papers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAaH,UAAU,UAAU;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,OAAO,EACd,QAAQ,EAAE,MAAM,EAChB,GAAG,SAAK,GACP,MAAM,CAOR;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAIzD;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAYvD;AAED,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAM3D;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAQzD;AAwCD,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,EAAE,CAgC3D;AAiCD,wBAAgB,qBAAqB,CAAC,KAAK,EAAE;IAC3C,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB,GAAG,MAAM,CAQT;AAED,wBAAgB,qBAAqB,CACnC,SAAS,EAAE,OAAO,EAClB,QAAQ,EAAE,OAAO,GAChB;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAUzC;AAED,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,OAAO,EACd,QAAQ,SAAS,GAChB,MAAM,CASR;AA4BD,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CA0ElC"}