@zenalexa/unicli 0.225.2 → 0.225.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/dist/adapters/acl-anthology/papers.d.ts +16 -9
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
- package/dist/adapters/acl-anthology/papers.js +322 -58
- package/dist/adapters/acl-anthology/papers.js.map +1 -1
- package/dist/adapters/arxiv/papers.d.ts +22 -4
- package/dist/adapters/arxiv/papers.d.ts.map +1 -1
- package/dist/adapters/arxiv/papers.js +202 -4
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.d.ts +15 -1
- package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +72 -8
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/biorxiv/preprints.d.ts +9 -0
- package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/biorxiv/preprints.js +78 -0
- package/dist/adapters/biorxiv/preprints.js.map +1 -0
- package/dist/adapters/cnki/search.d.ts +82 -0
- package/dist/adapters/cnki/search.d.ts.map +1 -0
- package/dist/adapters/cnki/search.js +236 -0
- package/dist/adapters/cnki/search.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +12 -7
- package/dist/adapters/cvf/papers.d.ts.map +1 -1
- package/dist/adapters/cvf/papers.js +210 -27
- package/dist/adapters/cvf/papers.js.map +1 -1
- package/dist/adapters/dblp/publications.d.ts +12 -5
- package/dist/adapters/dblp/publications.d.ts.map +1 -1
- package/dist/adapters/dblp/publications.js +31 -8
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/search.d.ts +22 -1
- package/dist/adapters/google-scholar/search.d.ts.map +1 -1
- package/dist/adapters/google-scholar/search.js +129 -14
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.d.ts +12 -3
- package/dist/adapters/hf/paper.d.ts.map +1 -1
- package/dist/adapters/hf/paper.js +65 -5
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/medrxiv/preprints.d.ts +9 -0
- package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/medrxiv/preprints.js +78 -0
- package/dist/adapters/medrxiv/preprints.js.map +1 -0
- package/dist/adapters/neurips/proceedings.d.ts +8 -7
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
- package/dist/adapters/neurips/proceedings.js +209 -21
- package/dist/adapters/neurips/proceedings.js.map +1 -1
- package/dist/adapters/openalex/works.d.ts +21 -5
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +108 -8
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.d.ts +10 -4
- package/dist/adapters/openreview/papers.d.ts.map +1 -1
- package/dist/adapters/openreview/papers.js +351 -24
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +6 -6
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
- package/dist/adapters/pmlr/proceedings.js +92 -12
- package/dist/adapters/pmlr/proceedings.js.map +1 -1
- package/dist/adapters/pubmed/articles.d.ts +8 -4
- package/dist/adapters/pubmed/articles.d.ts.map +1 -1
- package/dist/adapters/pubmed/articles.js +272 -39
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/rxiv/preprints.d.ts +75 -0
- package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/rxiv/preprints.js +651 -0
- package/dist/adapters/rxiv/preprints.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.js +122 -0
- package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
- package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
- package/dist/adapters/semantic-scholar/papers.js +80 -6
- package/dist/adapters/semantic-scholar/papers.js.map +1 -1
- package/dist/adapters/unpaywall/works.d.ts +7 -7
- package/dist/adapters/unpaywall/works.d.ts.map +1 -1
- package/dist/adapters/unpaywall/works.js +104 -12
- package/dist/adapters/unpaywall/works.js.map +1 -1
- package/dist/adapters/wanfang/search.d.ts +14 -0
- package/dist/adapters/wanfang/search.d.ts.map +1 -1
- package/dist/adapters/wanfang/search.js +56 -7
- package/dist/adapters/wanfang/search.js.map +1 -1
- package/dist/browser/page.d.ts +2 -0
- package/dist/browser/page.d.ts.map +1 -1
- package/dist/browser/page.js +12 -0
- package/dist/browser/page.js.map +1 -1
- package/dist/commands/browser/actions.d.ts.map +1 -1
- package/dist/commands/browser/actions.js +59 -3
- package/dist/commands/browser/actions.js.map +1 -1
- package/dist/commands/scholar.d.ts +77 -5
- package/dist/commands/scholar.d.ts.map +1 -1
- package/dist/commands/scholar.js +2945 -83
- package/dist/commands/scholar.js.map +1 -1
- package/dist/core/command-contract.d.ts.map +1 -1
- package/dist/core/command-contract.js +5 -0
- package/dist/core/command-contract.js.map +1 -1
- package/dist/core/schema-v2.d.ts +1 -0
- package/dist/core/schema-v2.d.ts.map +1 -1
- package/dist/core/schema-v2.js +1 -0
- package/dist/core/schema-v2.js.map +1 -1
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +208 -0
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts +2 -0
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +487 -0
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +273 -2
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +3 -0
- package/dist/discovery/loader.js.map +1 -1
- package/dist/engine/capability-policy.d.ts.map +1 -1
- package/dist/engine/capability-policy.js +30 -4
- package/dist/engine/capability-policy.js.map +1 -1
- package/dist/engine/kernel/stages.d.ts.map +1 -1
- package/dist/engine/kernel/stages.js +3 -0
- package/dist/engine/kernel/stages.js.map +1 -1
- package/dist/engine/operation-policy.d.ts +4 -1
- package/dist/engine/operation-policy.d.ts.map +1 -1
- package/dist/engine/operation-policy.js +23 -0
- package/dist/engine/operation-policy.js.map +1 -1
- package/dist/fast-path/manifest.d.ts +3 -0
- package/dist/fast-path/manifest.d.ts.map +1 -1
- package/dist/fast-path/manifest.js.map +1 -1
- package/dist/fast-path/policy.d.ts.map +1 -1
- package/dist/fast-path/policy.js +3 -0
- package/dist/fast-path/policy.js.map +1 -1
- package/dist/manifest-compact.txt +1 -1
- package/dist/manifest.json +6804 -1002
- package/dist/registry.d.ts +2 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +1 -0
- package/dist/registry.js.map +1 -1
- package/dist/types/scholarly.d.ts +19 -4
- package/dist/types/scholarly.d.ts.map +1 -1
- package/dist/types/scholarly.js +4 -4
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.test.ts +111 -0
- package/src/adapters/acl-anthology/papers.ts +379 -71
- package/src/adapters/arxiv/papers.test.ts +46 -0
- package/src/adapters/arxiv/papers.ts +251 -4
- package/src/adapters/baidu-scholar/search.ts +74 -11
- package/src/adapters/biorxiv/preprints.ts +112 -0
- package/src/adapters/cnki/search.ts +357 -0
- package/src/adapters/cvf/papers.ts +260 -27
- package/src/adapters/dblp/publications.test.ts +9 -0
- package/src/adapters/dblp/publications.ts +31 -8
- package/src/adapters/google-scholar/search.ts +165 -17
- package/src/adapters/hf/paper.test.ts +23 -0
- package/src/adapters/hf/paper.ts +89 -5
- package/src/adapters/hf/top.yaml +34 -2
- package/src/adapters/huggingface-papers/daily.yaml +37 -3
- package/src/adapters/huggingface-papers/search.yaml +43 -9
- package/src/adapters/medrxiv/preprints.ts +112 -0
- package/src/adapters/neurips/proceedings.ts +266 -22
- package/src/adapters/openalex/works.test.ts +15 -4
- package/src/adapters/openalex/works.ts +136 -8
- package/src/adapters/openreview/papers.test.ts +31 -0
- package/src/adapters/openreview/papers.ts +407 -29
- package/src/adapters/pmlr/proceedings.ts +102 -12
- package/src/adapters/pubmed/articles.test.ts +88 -1
- package/src/adapters/pubmed/articles.ts +343 -44
- package/src/adapters/rxiv/preprints.test.ts +233 -0
- package/src/adapters/rxiv/preprints.ts +849 -0
- package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
- package/src/adapters/scholar-artifacts/pdf.ts +133 -0
- package/src/adapters/semantic-scholar/papers.ts +98 -6
- package/src/adapters/unpaywall/works.ts +141 -12
- package/src/adapters/wanfang/search.ts +57 -7
- package/src/adapters/cnki/search.yaml +0 -49
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @owner src::adapters::unpaywall::works
|
|
3
|
-
* @does Registers Unpaywall DOI open-access lookup
|
|
4
|
-
* @needs api.unpaywall.org v2, UNPAYWALL_EMAIL or --email, src/
|
|
5
|
-
* @feeds src/commands/scholar.ts via scholar.pdf and scholar.
|
|
6
|
-
* @breaks Missing email is an explicit invalid-input error; Unpaywall drift
|
|
7
|
-
* @invariants Only DOI-shaped references are accepted; best_oa_location is preferred for PDF and landing URLs.
|
|
8
|
-
* @side-effects HTTPS egress to api.unpaywall.org
|
|
9
|
-
* @perf O(1)
|
|
3
|
+
* @does Registers Unpaywall DOI open-access lookup and source PDF read commands.
|
|
4
|
+
* @needs api.unpaywall.org v2, UNPAYWALL_EMAIL or --email, src/adapters/scholar-artifacts/pdf-read.ts, pdftotext
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.pdf, scholar.get, and scholar.fulltext
|
|
6
|
+
* @breaks Missing email is an explicit invalid-input error; Unpaywall drift, missing best OA PDF URLs, or pdftotext failures surface as adapter errors.
|
|
7
|
+
* @invariants Only DOI-shaped references are accepted; best_oa_location is preferred for PDF and landing URLs; read requires a source-provided PDF URL.
|
|
8
|
+
* @side-effects HTTPS egress to api.unpaywall.org and source PDF hosts; read writes one PDF and executes pdftotext.
|
|
9
|
+
* @perf O(1) DOI lookup plus O(PDF bytes + extracted page range) for read
|
|
10
10
|
* @concurrency safe
|
|
11
11
|
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
12
|
* @stability experimental
|
|
@@ -15,9 +15,17 @@
|
|
|
15
15
|
|
|
16
16
|
import { cli, Strategy } from "../../registry.js";
|
|
17
17
|
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
import { readScholarPdf } from "../scholar-artifacts/pdf-read.js";
|
|
18
19
|
|
|
19
20
|
const API = "https://api.unpaywall.org/v2";
|
|
20
21
|
|
|
22
|
+
type UnpaywallActionableError = Error & {
|
|
23
|
+
code?: string;
|
|
24
|
+
suggestion?: string;
|
|
25
|
+
retryable?: boolean;
|
|
26
|
+
alternatives?: string[];
|
|
27
|
+
};
|
|
28
|
+
|
|
21
29
|
interface OaLocation {
|
|
22
30
|
url_for_pdf?: unknown;
|
|
23
31
|
url_for_landing_page?: unknown;
|
|
@@ -44,11 +52,37 @@ function bareDoi(value: unknown): string {
|
|
|
44
52
|
.replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "");
|
|
45
53
|
}
|
|
46
54
|
|
|
55
|
+
function unpaywallInputError(
|
|
56
|
+
message: string,
|
|
57
|
+
suggestion: string,
|
|
58
|
+
): UnpaywallActionableError {
|
|
59
|
+
const error = new Error(message) as UnpaywallActionableError;
|
|
60
|
+
error.code = "invalid_input";
|
|
61
|
+
error.suggestion = suggestion;
|
|
62
|
+
error.retryable = false;
|
|
63
|
+
error.alternatives = [];
|
|
64
|
+
return error;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function unpaywallUpstreamError(
|
|
68
|
+
message: string,
|
|
69
|
+
retryable: boolean,
|
|
70
|
+
): UnpaywallActionableError {
|
|
71
|
+
const error = new Error(message) as UnpaywallActionableError;
|
|
72
|
+
error.code = "upstream_error";
|
|
73
|
+
error.suggestion =
|
|
74
|
+
"Unpaywall did not return a usable open-access response on this request; retry later, provide a valid requester email, or fall back to OpenAlex/Semantic Scholar.";
|
|
75
|
+
error.retryable = retryable;
|
|
76
|
+
error.alternatives = [];
|
|
77
|
+
return error;
|
|
78
|
+
}
|
|
79
|
+
|
|
47
80
|
export function requireUnpaywallDoi(value: unknown): string {
|
|
48
81
|
const doi = bareDoi(value);
|
|
49
82
|
if (!/^10\.\S+\/\S+/.test(doi)) {
|
|
50
|
-
throw
|
|
83
|
+
throw unpaywallInputError(
|
|
51
84
|
`unpaywall DOI "${String(value ?? "")}" is not recognised.`,
|
|
85
|
+
"Pass a DOI such as 10.1038/nature12373 or https://doi.org/10.1038/nature12373.",
|
|
52
86
|
);
|
|
53
87
|
}
|
|
54
88
|
return doi;
|
|
@@ -57,7 +91,10 @@ export function requireUnpaywallDoi(value: unknown): string {
|
|
|
57
91
|
function requireEmail(value: unknown): string {
|
|
58
92
|
const email = str(value) || process.env.UNPAYWALL_EMAIL?.trim() || "";
|
|
59
93
|
if (!/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)) {
|
|
60
|
-
throw
|
|
94
|
+
throw unpaywallInputError(
|
|
95
|
+
"unpaywall lookup requires --email or UNPAYWALL_EMAIL.",
|
|
96
|
+
"Pass --email <requester-email> to `unicli unpaywall oa`, or --unpaywall-email <requester-email> through `unicli scholar pdf/read/download`.",
|
|
97
|
+
);
|
|
61
98
|
}
|
|
62
99
|
return email;
|
|
63
100
|
}
|
|
@@ -86,6 +123,32 @@ export function mapUnpaywallWork(
|
|
|
86
123
|
};
|
|
87
124
|
}
|
|
88
125
|
|
|
126
|
+
async function readUnpaywallWorkPdf(
|
|
127
|
+
row: ScholarlyWorkRecord,
|
|
128
|
+
kwargs: Record<string, unknown>,
|
|
129
|
+
): Promise<Record<string, unknown>> {
|
|
130
|
+
const pdfUrl = str(row.pdf_url);
|
|
131
|
+
if (!pdfUrl) {
|
|
132
|
+
throw new Error(`Unpaywall work ${row.id} has no source PDF URL.`);
|
|
133
|
+
}
|
|
134
|
+
return readScholarPdf(
|
|
135
|
+
{
|
|
136
|
+
...kwargs,
|
|
137
|
+
id: row.id,
|
|
138
|
+
title: row.title,
|
|
139
|
+
source_adapter: row.source_adapter,
|
|
140
|
+
source_url: row.source_url,
|
|
141
|
+
pdf_url: pdfUrl,
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
site: "unpaywall",
|
|
145
|
+
command: "read",
|
|
146
|
+
defaultOutput: "./unpaywall-downloads",
|
|
147
|
+
userAgent: "unicli-unpaywall/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
148
|
+
},
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
89
152
|
async function fetchUnpaywall(
|
|
90
153
|
doi: string,
|
|
91
154
|
email: string,
|
|
@@ -103,10 +166,17 @@ async function fetchUnpaywall(
|
|
|
103
166
|
if (response.status === 404)
|
|
104
167
|
throw new Error(`Unpaywall returned no result for ${doi}.`);
|
|
105
168
|
if (response.status === 422)
|
|
106
|
-
throw
|
|
107
|
-
|
|
169
|
+
throw unpaywallInputError(
|
|
170
|
+
"Unpaywall rejected the email parameter.",
|
|
171
|
+
"Provide a valid requester email address; Unpaywall requires a real contact email for API use.",
|
|
172
|
+
);
|
|
173
|
+
if (response.status === 429)
|
|
174
|
+
throw unpaywallUpstreamError("Unpaywall returned HTTP 429.", true);
|
|
108
175
|
if (!response.ok)
|
|
109
|
-
throw
|
|
176
|
+
throw unpaywallUpstreamError(
|
|
177
|
+
`Unpaywall returned HTTP ${response.status}.`,
|
|
178
|
+
response.status >= 500,
|
|
179
|
+
);
|
|
110
180
|
return response.json() as Promise<UnpaywallWork>;
|
|
111
181
|
}
|
|
112
182
|
|
|
@@ -136,3 +206,62 @@ cli({
|
|
|
136
206
|
return [mapUnpaywallWork(await fetchUnpaywall(doi, email), "unpaywall")];
|
|
137
207
|
},
|
|
138
208
|
});
|
|
209
|
+
|
|
210
|
+
cli({
|
|
211
|
+
site: "unpaywall",
|
|
212
|
+
name: "read",
|
|
213
|
+
description: "Download an Unpaywall open-access PDF by DOI and extract text",
|
|
214
|
+
domain: "api.unpaywall.org",
|
|
215
|
+
strategy: Strategy.PUBLIC,
|
|
216
|
+
args: [
|
|
217
|
+
{ name: "doi", type: "str", required: true, positional: true },
|
|
218
|
+
{ name: "email", type: "str", description: "Unpaywall requester email" },
|
|
219
|
+
{
|
|
220
|
+
name: "output",
|
|
221
|
+
type: "str",
|
|
222
|
+
default: "./unpaywall-downloads",
|
|
223
|
+
description: "Output directory for the downloaded PDF",
|
|
224
|
+
"x-unicli-kind": "path",
|
|
225
|
+
},
|
|
226
|
+
{ name: "filename", type: "str", description: "Output PDF filename" },
|
|
227
|
+
{ name: "first-page", type: "int", default: 1, description: "First page" },
|
|
228
|
+
{ name: "last-page", type: "int", default: 20, description: "Last page" },
|
|
229
|
+
{
|
|
230
|
+
name: "max-chars",
|
|
231
|
+
type: "int",
|
|
232
|
+
default: 40000,
|
|
233
|
+
description: "Maximum extracted text characters",
|
|
234
|
+
},
|
|
235
|
+
],
|
|
236
|
+
columns: [
|
|
237
|
+
"id",
|
|
238
|
+
"title",
|
|
239
|
+
"source_adapter",
|
|
240
|
+
"source_url",
|
|
241
|
+
"pdf_url",
|
|
242
|
+
"path",
|
|
243
|
+
"text_source",
|
|
244
|
+
"text",
|
|
245
|
+
"text_chars",
|
|
246
|
+
"text_truncated",
|
|
247
|
+
],
|
|
248
|
+
capabilities: [
|
|
249
|
+
"http.fetch",
|
|
250
|
+
"http.download",
|
|
251
|
+
"subprocess.exec",
|
|
252
|
+
"scholar.fulltext",
|
|
253
|
+
"scholar.pdf",
|
|
254
|
+
],
|
|
255
|
+
executables: ["pdftotext"],
|
|
256
|
+
minimum_capability: "subprocess.exec",
|
|
257
|
+
func: async (_page, kwargs) => {
|
|
258
|
+
const doi = requireUnpaywallDoi(kwargs.doi ?? kwargs.id ?? kwargs.ref);
|
|
259
|
+
const email = requireEmail(kwargs.email);
|
|
260
|
+
return [
|
|
261
|
+
await readUnpaywallWorkPdf(
|
|
262
|
+
mapUnpaywallWork(await fetchUnpaywall(doi, email), "unpaywall"),
|
|
263
|
+
kwargs,
|
|
264
|
+
),
|
|
265
|
+
];
|
|
266
|
+
},
|
|
267
|
+
});
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::wanfang::search
|
|
3
|
+
* @does Registers the Wanfang browser search adapter as a discovery-only scholarly source with normalized paper rows.
|
|
4
|
+
* @needs src/registry.ts, src/types.ts, src/adapters/_shared/browser-tools.ts, Wanfang public search pages
|
|
5
|
+
* @feeds src/commands/scholar.ts capability discovery, `unicli wanfang search`, `unicli scholar coverage/doctor`
|
|
6
|
+
* @breaks Upstream DOM changes can return empty search rows; missing id/source_url prevents scholar-layer normalization.
|
|
7
|
+
* @invariants Search is discovery-only and never claims metadata-get, PDF, full-text, citation, review, code, or dataset evidence.
|
|
8
|
+
* @side-effects Navigates a Uni-CLI managed browser page to Wanfang public search.
|
|
9
|
+
* @perf O(limit) DOM extraction after one page navigation.
|
|
10
|
+
* @concurrency safe — command state is page-local
|
|
11
|
+
* @test tests/unit/commands/scholar.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-06-27
|
|
14
|
+
*/
|
|
15
|
+
|
|
1
16
|
import { cli, Strategy } from "../../registry.js";
|
|
2
17
|
import type { IPage } from "../../types.js";
|
|
3
18
|
import { intArg, js, str } from "../_shared/browser-tools.js";
|
|
@@ -13,7 +28,12 @@ cli({
|
|
|
13
28
|
{ name: "query", type: "str", required: true, positional: true },
|
|
14
29
|
{ name: "limit", type: "int", default: 10 },
|
|
15
30
|
],
|
|
16
|
-
columns: ["title", "authors", "source", "
|
|
31
|
+
columns: ["id", "title", "authors", "source", "year", "source_url"],
|
|
32
|
+
capabilities: [
|
|
33
|
+
"mcp-browser.navigate",
|
|
34
|
+
"mcp-browser.evaluate",
|
|
35
|
+
"scholar.search",
|
|
36
|
+
],
|
|
17
37
|
func: async (page, kwargs) => {
|
|
18
38
|
const p = page as IPage;
|
|
19
39
|
const limit = intArg(kwargs.limit, 10, 50);
|
|
@@ -22,14 +42,44 @@ cli({
|
|
|
22
42
|
{ settleMs: 2500 },
|
|
23
43
|
);
|
|
24
44
|
const rows = await p.evaluate(`(() => {
|
|
25
|
-
const
|
|
45
|
+
const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
|
|
46
|
+
const detailUrl = (id) => {
|
|
47
|
+
const parts = String(id || '').split('_');
|
|
48
|
+
if (parts.length < 2) return '';
|
|
49
|
+
const type = parts[0];
|
|
50
|
+
const key = parts.slice(1).join('_');
|
|
51
|
+
return key ? 'https://d.wanfangdata.com.cn/' + type + '/' + key : '';
|
|
52
|
+
};
|
|
53
|
+
const cards = [...document.querySelectorAll('.normal-list')].filter((card) =>
|
|
54
|
+
card.querySelector('.title-area .title, .title-id-hidden')
|
|
55
|
+
);
|
|
26
56
|
return cards.map((card) => {
|
|
27
|
-
const
|
|
57
|
+
const id = normalize(card.querySelector('.title-id-hidden')?.textContent);
|
|
58
|
+
const title = normalize(card.querySelector('.title-area .title')?.textContent);
|
|
59
|
+
const authorArea = card.querySelector('.author-area');
|
|
60
|
+
const authorTexts = [...(authorArea?.querySelectorAll('.authors') || [])]
|
|
61
|
+
.map((node) => normalize(node.textContent))
|
|
62
|
+
.filter((text) => text && !/(19|20)\\d{2}年/.test(text) && text !== '等');
|
|
63
|
+
const source = normalize(authorArea?.querySelector('.periodical-title')?.textContent).replace(/^《|》$/g, '');
|
|
64
|
+
const type = normalize(authorArea?.querySelector('.essay-type')?.textContent);
|
|
65
|
+
const authorText = normalize(authorArea?.textContent);
|
|
66
|
+
const year = (authorText.match(/(19|20)\\d{2}/) || [])[0] || '';
|
|
67
|
+
const abstract = normalize(card.querySelector('.abstract-area')?.textContent).replace(/^摘要:?/, '');
|
|
68
|
+
const metrics = normalize(card.querySelector('.button-area')?.textContent);
|
|
69
|
+
const cited = (metrics.match(/被引[::]?\\s*(\\d+)/) || [])[1] || '';
|
|
70
|
+
const url = detailUrl(id);
|
|
28
71
|
return {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
72
|
+
id: url || title,
|
|
73
|
+
title,
|
|
74
|
+
authors: authorTexts.join(', '),
|
|
75
|
+
source,
|
|
76
|
+
venue: source,
|
|
77
|
+
type,
|
|
78
|
+
year,
|
|
79
|
+
abstract,
|
|
80
|
+
cited_by_count: cited,
|
|
81
|
+
source_url: url,
|
|
82
|
+
url
|
|
33
83
|
};
|
|
34
84
|
}).filter((row) => row.title).slice(0, ${js(limit)});
|
|
35
85
|
})()`);
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
site: cnki
|
|
2
|
-
name: search
|
|
3
|
-
description: Search CNKI academic papers
|
|
4
|
-
domain: scholar.cnki.net
|
|
5
|
-
type: web-api
|
|
6
|
-
strategy: public
|
|
7
|
-
lint_listing_detail: skip
|
|
8
|
-
|
|
9
|
-
args:
|
|
10
|
-
query:
|
|
11
|
-
type: str
|
|
12
|
-
required: true
|
|
13
|
-
positional: true
|
|
14
|
-
description: Search query
|
|
15
|
-
limit:
|
|
16
|
-
type: int
|
|
17
|
-
default: 20
|
|
18
|
-
description: Maximum results
|
|
19
|
-
|
|
20
|
-
pipeline:
|
|
21
|
-
- fetch:
|
|
22
|
-
url: https://scholar.cnki.net/api/search
|
|
23
|
-
params:
|
|
24
|
-
q: "${{ args.query }}"
|
|
25
|
-
page: "1"
|
|
26
|
-
size: "${{ args.limit }}"
|
|
27
|
-
sort: relevant
|
|
28
|
-
|
|
29
|
-
- select: data.items
|
|
30
|
-
|
|
31
|
-
- map:
|
|
32
|
-
rank: ${{ index + 1 }}
|
|
33
|
-
title: "${{ (item.title || '') | strip_html }}"
|
|
34
|
-
authors: "${{ (item.authors || []).map(a => a.name).join(', ') }}"
|
|
35
|
-
source: "${{ item.publicationName || '' }}"
|
|
36
|
-
year: "${{ item.year || '' }}"
|
|
37
|
-
citations: ${{ item.citedCount || 0 }}
|
|
38
|
-
|
|
39
|
-
- limit: ${{ args.limit }}
|
|
40
|
-
|
|
41
|
-
columns: [rank, title, authors, source, year, citations]
|
|
42
|
-
|
|
43
|
-
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
44
|
-
capabilities: ["http.fetch"]
|
|
45
|
-
minimum_capability: http.fetch
|
|
46
|
-
trust: public
|
|
47
|
-
confidentiality: public
|
|
48
|
-
quarantine: false
|
|
49
|
-
schema_version: v2
|