@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -1,12 +1,12 @@
1
1
  /**
2
2
  * @owner src::adapters::unpaywall::works
3
- * @does Registers Unpaywall DOI open-access lookup for PDF availability.
4
- * @needs api.unpaywall.org v2, UNPAYWALL_EMAIL or --email, src/registry.ts
5
- * @feeds src/commands/scholar.ts via scholar.pdf and scholar.get
6
- * @breaks Missing email is an explicit invalid-input error; Unpaywall drift surfaces as adapter error, never as a fabricated PDF.
7
- * @invariants Only DOI-shaped references are accepted; best_oa_location is preferred for PDF and landing URLs.
8
- * @side-effects HTTPS egress to api.unpaywall.org only
9
- * @perf O(1) per DOI
3
+ * @does Registers Unpaywall DOI open-access lookup and source PDF read commands.
4
+ * @needs api.unpaywall.org v2, UNPAYWALL_EMAIL or --email, src/adapters/scholar-artifacts/pdf-read.ts, pdftotext
5
+ * @feeds src/commands/scholar.ts via scholar.pdf, scholar.get, and scholar.fulltext
6
+ * @breaks Missing email is an explicit invalid-input error; Unpaywall drift, missing best OA PDF URLs, or pdftotext failures surface as adapter errors.
7
+ * @invariants Only DOI-shaped references are accepted; best_oa_location is preferred for PDF and landing URLs; read requires a source-provided PDF URL.
8
+ * @side-effects HTTPS egress to api.unpaywall.org and source PDF hosts; read writes one PDF and executes pdftotext.
9
+ * @perf O(1) DOI lookup plus O(PDF bytes + extracted page range) for read
10
10
  * @concurrency safe
11
11
  * @test tests/unit/adapters/scholar-sources.test.ts
12
12
  * @stability experimental
@@ -15,9 +15,17 @@
15
15
 
16
16
  import { cli, Strategy } from "../../registry.js";
17
17
  import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
18
+ import { readScholarPdf } from "../scholar-artifacts/pdf-read.js";
18
19
 
19
20
  const API = "https://api.unpaywall.org/v2";
20
21
 
22
+ type UnpaywallActionableError = Error & {
23
+ code?: string;
24
+ suggestion?: string;
25
+ retryable?: boolean;
26
+ alternatives?: string[];
27
+ };
28
+
21
29
  interface OaLocation {
22
30
  url_for_pdf?: unknown;
23
31
  url_for_landing_page?: unknown;
@@ -44,11 +52,37 @@ function bareDoi(value: unknown): string {
44
52
  .replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "");
45
53
  }
46
54
 
55
+ function unpaywallInputError(
56
+ message: string,
57
+ suggestion: string,
58
+ ): UnpaywallActionableError {
59
+ const error = new Error(message) as UnpaywallActionableError;
60
+ error.code = "invalid_input";
61
+ error.suggestion = suggestion;
62
+ error.retryable = false;
63
+ error.alternatives = [];
64
+ return error;
65
+ }
66
+
67
+ function unpaywallUpstreamError(
68
+ message: string,
69
+ retryable: boolean,
70
+ ): UnpaywallActionableError {
71
+ const error = new Error(message) as UnpaywallActionableError;
72
+ error.code = "upstream_error";
73
+ error.suggestion =
74
+ "Unpaywall did not return a usable open-access response on this request; retry later, provide a valid requester email, or fall back to OpenAlex/Semantic Scholar.";
75
+ error.retryable = retryable;
76
+ error.alternatives = [];
77
+ return error;
78
+ }
79
+
47
80
  export function requireUnpaywallDoi(value: unknown): string {
48
81
  const doi = bareDoi(value);
49
82
  if (!/^10\.\S+\/\S+/.test(doi)) {
50
- throw new Error(
83
+ throw unpaywallInputError(
51
84
  `unpaywall DOI "${String(value ?? "")}" is not recognised.`,
85
+ "Pass a DOI such as 10.1038/nature12373 or https://doi.org/10.1038/nature12373.",
52
86
  );
53
87
  }
54
88
  return doi;
@@ -57,7 +91,10 @@ export function requireUnpaywallDoi(value: unknown): string {
57
91
  function requireEmail(value: unknown): string {
58
92
  const email = str(value) || process.env.UNPAYWALL_EMAIL?.trim() || "";
59
93
  if (!/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)) {
60
- throw new Error("unpaywall lookup requires --email or UNPAYWALL_EMAIL.");
94
+ throw unpaywallInputError(
95
+ "unpaywall lookup requires --email or UNPAYWALL_EMAIL.",
96
+ "Pass --email <requester-email> to `unicli unpaywall oa`, or --unpaywall-email <requester-email> through `unicli scholar pdf/read/download`.",
97
+ );
61
98
  }
62
99
  return email;
63
100
  }
@@ -86,6 +123,32 @@ export function mapUnpaywallWork(
86
123
  };
87
124
  }
88
125
 
126
+ async function readUnpaywallWorkPdf(
127
+ row: ScholarlyWorkRecord,
128
+ kwargs: Record<string, unknown>,
129
+ ): Promise<Record<string, unknown>> {
130
+ const pdfUrl = str(row.pdf_url);
131
+ if (!pdfUrl) {
132
+ throw new Error(`Unpaywall work ${row.id} has no source PDF URL.`);
133
+ }
134
+ return readScholarPdf(
135
+ {
136
+ ...kwargs,
137
+ id: row.id,
138
+ title: row.title,
139
+ source_adapter: row.source_adapter,
140
+ source_url: row.source_url,
141
+ pdf_url: pdfUrl,
142
+ },
143
+ {
144
+ site: "unpaywall",
145
+ command: "read",
146
+ defaultOutput: "./unpaywall-downloads",
147
+ userAgent: "unicli-unpaywall/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
148
+ },
149
+ );
150
+ }
151
+
89
152
  async function fetchUnpaywall(
90
153
  doi: string,
91
154
  email: string,
@@ -103,10 +166,17 @@ async function fetchUnpaywall(
103
166
  if (response.status === 404)
104
167
  throw new Error(`Unpaywall returned no result for ${doi}.`);
105
168
  if (response.status === 422)
106
- throw new Error("Unpaywall rejected the email parameter.");
107
- if (response.status === 429) throw new Error("Unpaywall returned HTTP 429.");
169
+ throw unpaywallInputError(
170
+ "Unpaywall rejected the email parameter.",
171
+ "Provide a valid requester email address; Unpaywall requires a real contact email for API use.",
172
+ );
173
+ if (response.status === 429)
174
+ throw unpaywallUpstreamError("Unpaywall returned HTTP 429.", true);
108
175
  if (!response.ok)
109
- throw new Error(`Unpaywall returned HTTP ${response.status}.`);
176
+ throw unpaywallUpstreamError(
177
+ `Unpaywall returned HTTP ${response.status}.`,
178
+ response.status >= 500,
179
+ );
110
180
  return response.json() as Promise<UnpaywallWork>;
111
181
  }
112
182
 
@@ -136,3 +206,62 @@ cli({
136
206
  return [mapUnpaywallWork(await fetchUnpaywall(doi, email), "unpaywall")];
137
207
  },
138
208
  });
209
+
210
+ cli({
211
+ site: "unpaywall",
212
+ name: "read",
213
+ description: "Download an Unpaywall open-access PDF by DOI and extract text",
214
+ domain: "api.unpaywall.org",
215
+ strategy: Strategy.PUBLIC,
216
+ args: [
217
+ { name: "doi", type: "str", required: true, positional: true },
218
+ { name: "email", type: "str", description: "Unpaywall requester email" },
219
+ {
220
+ name: "output",
221
+ type: "str",
222
+ default: "./unpaywall-downloads",
223
+ description: "Output directory for the downloaded PDF",
224
+ "x-unicli-kind": "path",
225
+ },
226
+ { name: "filename", type: "str", description: "Output PDF filename" },
227
+ { name: "first-page", type: "int", default: 1, description: "First page" },
228
+ { name: "last-page", type: "int", default: 20, description: "Last page" },
229
+ {
230
+ name: "max-chars",
231
+ type: "int",
232
+ default: 40000,
233
+ description: "Maximum extracted text characters",
234
+ },
235
+ ],
236
+ columns: [
237
+ "id",
238
+ "title",
239
+ "source_adapter",
240
+ "source_url",
241
+ "pdf_url",
242
+ "path",
243
+ "text_source",
244
+ "text",
245
+ "text_chars",
246
+ "text_truncated",
247
+ ],
248
+ capabilities: [
249
+ "http.fetch",
250
+ "http.download",
251
+ "subprocess.exec",
252
+ "scholar.fulltext",
253
+ "scholar.pdf",
254
+ ],
255
+ executables: ["pdftotext"],
256
+ minimum_capability: "subprocess.exec",
257
+ func: async (_page, kwargs) => {
258
+ const doi = requireUnpaywallDoi(kwargs.doi ?? kwargs.id ?? kwargs.ref);
259
+ const email = requireEmail(kwargs.email);
260
+ return [
261
+ await readUnpaywallWorkPdf(
262
+ mapUnpaywallWork(await fetchUnpaywall(doi, email), "unpaywall"),
263
+ kwargs,
264
+ ),
265
+ ];
266
+ },
267
+ });
@@ -1,3 +1,18 @@
1
+ /**
2
+ * @owner src::adapters::wanfang::search
3
+ * @does Registers the Wanfang browser search adapter as a discovery-only scholarly source with normalized paper rows.
4
+ * @needs src/registry.ts, src/types.ts, src/adapters/_shared/browser-tools.ts, Wanfang public search pages
5
+ * @feeds src/commands/scholar.ts capability discovery, `unicli wanfang search`, `unicli scholar coverage/doctor`
6
+ * @breaks Upstream DOM changes can return empty search rows; missing id/source_url prevents scholar-layer normalization.
7
+ * @invariants Search is discovery-only and never claims metadata-get, PDF, full-text, citation, review, code, or dataset evidence.
8
+ * @side-effects Navigates a Uni-CLI managed browser page to Wanfang public search.
9
+ * @perf O(limit) DOM extraction after one page navigation.
10
+ * @concurrency safe — command state is page-local
11
+ * @test tests/unit/commands/scholar.test.ts
12
+ * @stability experimental
13
+ * @since 2026-06-27
14
+ */
15
+
1
16
  import { cli, Strategy } from "../../registry.js";
2
17
  import type { IPage } from "../../types.js";
3
18
  import { intArg, js, str } from "../_shared/browser-tools.js";
@@ -13,7 +28,12 @@ cli({
13
28
  { name: "query", type: "str", required: true, positional: true },
14
29
  { name: "limit", type: "int", default: 10 },
15
30
  ],
16
- columns: ["title", "authors", "source", "url"],
31
+ columns: ["id", "title", "authors", "source", "year", "source_url"],
32
+ capabilities: [
33
+ "mcp-browser.navigate",
34
+ "mcp-browser.evaluate",
35
+ "scholar.search",
36
+ ],
17
37
  func: async (page, kwargs) => {
18
38
  const p = page as IPage;
19
39
  const limit = intArg(kwargs.limit, 10, 50);
@@ -22,14 +42,44 @@ cli({
22
42
  { settleMs: 2500 },
23
43
  );
24
44
  const rows = await p.evaluate(`(() => {
25
- const cards = [...document.querySelectorAll('.normal-list .item, .result-list .item, .paper-item, .record-item')];
45
+ const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
46
+ const detailUrl = (id) => {
47
+ const parts = String(id || '').split('_');
48
+ if (parts.length < 2) return '';
49
+ const type = parts[0];
50
+ const key = parts.slice(1).join('_');
51
+ return key ? 'https://d.wanfangdata.com.cn/' + type + '/' + key : '';
52
+ };
53
+ const cards = [...document.querySelectorAll('.normal-list')].filter((card) =>
54
+ card.querySelector('.title-area .title, .title-id-hidden')
55
+ );
26
56
  return cards.map((card) => {
27
- const link = card.querySelector('a[href], span.title a');
57
+ const id = normalize(card.querySelector('.title-id-hidden')?.textContent);
58
+ const title = normalize(card.querySelector('.title-area .title')?.textContent);
59
+ const authorArea = card.querySelector('.author-area');
60
+ const authorTexts = [...(authorArea?.querySelectorAll('.authors') || [])]
61
+ .map((node) => normalize(node.textContent))
62
+ .filter((text) => text && !/(19|20)\\d{2}年/.test(text) && text !== '等');
63
+ const source = normalize(authorArea?.querySelector('.periodical-title')?.textContent).replace(/^《|》$/g, '');
64
+ const type = normalize(authorArea?.querySelector('.essay-type')?.textContent);
65
+ const authorText = normalize(authorArea?.textContent);
66
+ const year = (authorText.match(/(19|20)\\d{2}/) || [])[0] || '';
67
+ const abstract = normalize(card.querySelector('.abstract-area')?.textContent).replace(/^摘要:?/, '');
68
+ const metrics = normalize(card.querySelector('.button-area')?.textContent);
69
+ const cited = (metrics.match(/被引[::]?\\s*(\\d+)/) || [])[1] || '';
70
+ const url = detailUrl(id);
28
71
  return {
29
- title: (card.querySelector('span.title, .title, a[href]')?.textContent || '').replace(/\\s+/g, ' ').trim(),
30
- authors: (card.querySelector('span.authors, .authors, .author')?.textContent || '').replace(/\\s+/g, ' ').trim(),
31
- source: (card.querySelector('.source, .journal, .info')?.textContent || '').replace(/\\s+/g, ' ').trim(),
32
- url: link ? new URL(link.getAttribute('href') || '', location.href).href : ''
72
+ id: url || title,
73
+ title,
74
+ authors: authorTexts.join(', '),
75
+ source,
76
+ venue: source,
77
+ type,
78
+ year,
79
+ abstract,
80
+ cited_by_count: cited,
81
+ source_url: url,
82
+ url
33
83
  };
34
84
  }).filter((row) => row.title).slice(0, ${js(limit)});
35
85
  })()`);
@@ -1,49 +0,0 @@
1
- site: cnki
2
- name: search
3
- description: Search CNKI academic papers
4
- domain: scholar.cnki.net
5
- type: web-api
6
- strategy: public
7
- lint_listing_detail: skip
8
-
9
- args:
10
- query:
11
- type: str
12
- required: true
13
- positional: true
14
- description: Search query
15
- limit:
16
- type: int
17
- default: 20
18
- description: Maximum results
19
-
20
- pipeline:
21
- - fetch:
22
- url: https://scholar.cnki.net/api/search
23
- params:
24
- q: "${{ args.query }}"
25
- page: "1"
26
- size: "${{ args.limit }}"
27
- sort: relevant
28
-
29
- - select: data.items
30
-
31
- - map:
32
- rank: ${{ index + 1 }}
33
- title: "${{ (item.title || '') | strip_html }}"
34
- authors: "${{ (item.authors || []).map(a => a.name).join(', ') }}"
35
- source: "${{ item.publicationName || '' }}"
36
- year: "${{ item.year || '' }}"
37
- citations: ${{ item.citedCount || 0 }}
38
-
39
- - limit: ${{ args.limit }}
40
-
41
- columns: [rank, title, authors, source, year, citations]
42
-
43
- # schema-v2 metadata — injected by `unicli migrate schema-v2`
44
- capabilities: ["http.fetch"]
45
- minimum_capability: http.fetch
46
- trust: public
47
- confidentiality: public
48
- quarantine: false
49
- schema_version: v2