@zenalexa/unicli 0.225.1 → 0.225.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -3
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/dist/adapters/acl-anthology/papers.d.ts +16 -9
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
- package/dist/adapters/acl-anthology/papers.js +322 -58
- package/dist/adapters/acl-anthology/papers.js.map +1 -1
- package/dist/adapters/arxiv/papers.d.ts +22 -4
- package/dist/adapters/arxiv/papers.d.ts.map +1 -1
- package/dist/adapters/arxiv/papers.js +202 -4
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.d.ts +15 -1
- package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +72 -8
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/biorxiv/preprints.d.ts +9 -0
- package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/biorxiv/preprints.js +78 -0
- package/dist/adapters/biorxiv/preprints.js.map +1 -0
- package/dist/adapters/cnki/search.d.ts +82 -0
- package/dist/adapters/cnki/search.d.ts.map +1 -0
- package/dist/adapters/cnki/search.js +236 -0
- package/dist/adapters/cnki/search.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +12 -7
- package/dist/adapters/cvf/papers.d.ts.map +1 -1
- package/dist/adapters/cvf/papers.js +210 -27
- package/dist/adapters/cvf/papers.js.map +1 -1
- package/dist/adapters/dblp/publications.d.ts +12 -5
- package/dist/adapters/dblp/publications.d.ts.map +1 -1
- package/dist/adapters/dblp/publications.js +31 -8
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/search.d.ts +22 -1
- package/dist/adapters/google-scholar/search.d.ts.map +1 -1
- package/dist/adapters/google-scholar/search.js +129 -14
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.d.ts +12 -3
- package/dist/adapters/hf/paper.d.ts.map +1 -1
- package/dist/adapters/hf/paper.js +65 -5
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/medrxiv/preprints.d.ts +9 -0
- package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/medrxiv/preprints.js +78 -0
- package/dist/adapters/medrxiv/preprints.js.map +1 -0
- package/dist/adapters/neurips/proceedings.d.ts +8 -7
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
- package/dist/adapters/neurips/proceedings.js +209 -21
- package/dist/adapters/neurips/proceedings.js.map +1 -1
- package/dist/adapters/openalex/works.d.ts +21 -5
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +108 -8
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.d.ts +10 -4
- package/dist/adapters/openreview/papers.d.ts.map +1 -1
- package/dist/adapters/openreview/papers.js +351 -24
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +6 -6
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
- package/dist/adapters/pmlr/proceedings.js +92 -12
- package/dist/adapters/pmlr/proceedings.js.map +1 -1
- package/dist/adapters/pubmed/articles.d.ts +8 -4
- package/dist/adapters/pubmed/articles.d.ts.map +1 -1
- package/dist/adapters/pubmed/articles.js +272 -39
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/rxiv/preprints.d.ts +75 -0
- package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/rxiv/preprints.js +651 -0
- package/dist/adapters/rxiv/preprints.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.js +122 -0
- package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
- package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
- package/dist/adapters/semantic-scholar/papers.js +80 -6
- package/dist/adapters/semantic-scholar/papers.js.map +1 -1
- package/dist/adapters/unpaywall/works.d.ts +7 -7
- package/dist/adapters/unpaywall/works.d.ts.map +1 -1
- package/dist/adapters/unpaywall/works.js +104 -12
- package/dist/adapters/unpaywall/works.js.map +1 -1
- package/dist/adapters/wanfang/search.d.ts +14 -0
- package/dist/adapters/wanfang/search.d.ts.map +1 -1
- package/dist/adapters/wanfang/search.js +56 -7
- package/dist/adapters/wanfang/search.js.map +1 -1
- package/dist/browser/page.d.ts +2 -0
- package/dist/browser/page.d.ts.map +1 -1
- package/dist/browser/page.js +12 -0
- package/dist/browser/page.js.map +1 -1
- package/dist/browser/protocol.d.ts +6 -1
- package/dist/browser/protocol.d.ts.map +1 -1
- package/dist/browser/protocol.js.map +1 -1
- package/dist/commands/browser/actions.d.ts.map +1 -1
- package/dist/commands/browser/actions.js +487 -8
- package/dist/commands/browser/actions.js.map +1 -1
- package/dist/commands/compute.js +12 -1
- package/dist/commands/compute.js.map +1 -1
- package/dist/commands/schema.d.ts.map +1 -1
- package/dist/commands/schema.js +22 -0
- package/dist/commands/schema.js.map +1 -1
- package/dist/commands/scholar.d.ts +77 -5
- package/dist/commands/scholar.d.ts.map +1 -1
- package/dist/commands/scholar.js +2945 -83
- package/dist/commands/scholar.js.map +1 -1
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +14 -3
- package/dist/commands/search.js.map +1 -1
- package/dist/compute/contracts.d.ts +55 -0
- package/dist/compute/contracts.d.ts.map +1 -0
- package/dist/compute/contracts.js +487 -0
- package/dist/compute/contracts.js.map +1 -0
- package/dist/core/command-contract.d.ts.map +1 -1
- package/dist/core/command-contract.js +5 -0
- package/dist/core/command-contract.js.map +1 -1
- package/dist/core/schema-v2.d.ts +1 -0
- package/dist/core/schema-v2.d.ts.map +1 -1
- package/dist/core/schema-v2.js +1 -0
- package/dist/core/schema-v2.js.map +1 -1
- package/dist/discovery/aliases.d.ts +8 -1
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +333 -20
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts +2 -0
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +525 -66
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts +1 -0
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +299 -2
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +3 -0
- package/dist/discovery/loader.js.map +1 -1
- package/dist/discovery/macos-dynamic.d.ts +1 -0
- package/dist/discovery/macos-dynamic.d.ts.map +1 -1
- package/dist/discovery/macos-dynamic.js +20 -1
- package/dist/discovery/macos-dynamic.js.map +1 -1
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +12 -5
- package/dist/discovery/search.js.map +1 -1
- package/dist/engine/browser/evidence.d.ts +34 -1
- package/dist/engine/browser/evidence.d.ts.map +1 -1
- package/dist/engine/browser/evidence.js +141 -6
- package/dist/engine/browser/evidence.js.map +1 -1
- package/dist/engine/capability-policy.d.ts.map +1 -1
- package/dist/engine/capability-policy.js +30 -4
- package/dist/engine/capability-policy.js.map +1 -1
- package/dist/engine/kernel/stages.d.ts.map +1 -1
- package/dist/engine/kernel/stages.js +3 -0
- package/dist/engine/kernel/stages.js.map +1 -1
- package/dist/engine/operation-policy.d.ts +4 -1
- package/dist/engine/operation-policy.d.ts.map +1 -1
- package/dist/engine/operation-policy.js +23 -0
- package/dist/engine/operation-policy.js.map +1 -1
- package/dist/engine/steps/fetch-text.d.ts.map +1 -1
- package/dist/engine/steps/fetch-text.js +2 -2
- package/dist/engine/steps/fetch-text.js.map +1 -1
- package/dist/engine/steps/fetch.d.ts +1 -0
- package/dist/engine/steps/fetch.d.ts.map +1 -1
- package/dist/engine/steps/fetch.js +24 -4
- package/dist/engine/steps/fetch.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts +5 -5
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +61 -8
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/fast-path/manifest.d.ts +3 -0
- package/dist/fast-path/manifest.d.ts.map +1 -1
- package/dist/fast-path/manifest.js.map +1 -1
- package/dist/fast-path/policy.d.ts.map +1 -1
- package/dist/fast-path/policy.js +3 -0
- package/dist/fast-path/policy.js.map +1 -1
- package/dist/fast-path/render.d.ts +2 -0
- package/dist/fast-path/render.d.ts.map +1 -1
- package/dist/fast-path/render.js +9 -0
- package/dist/fast-path/render.js.map +1 -1
- package/dist/manifest-compact.txt +2 -2
- package/dist/manifest.json +6977 -1002
- package/dist/mcp/handler.d.ts +2 -16
- package/dist/mcp/handler.d.ts.map +1 -1
- package/dist/mcp/handler.js.map +1 -1
- package/dist/mcp/http-transport.d.ts +7 -1
- package/dist/mcp/http-transport.d.ts.map +1 -1
- package/dist/mcp/http-transport.js +20 -1
- package/dist/mcp/http-transport.js.map +1 -1
- package/dist/mcp/jsonrpc.d.ts +27 -0
- package/dist/mcp/jsonrpc.d.ts.map +1 -0
- package/dist/mcp/jsonrpc.js +12 -0
- package/dist/mcp/jsonrpc.js.map +1 -0
- package/dist/mcp/origin-guard.d.ts +26 -0
- package/dist/mcp/origin-guard.d.ts.map +1 -0
- package/dist/mcp/origin-guard.js +42 -0
- package/dist/mcp/origin-guard.js.map +1 -0
- package/dist/mcp/profiles/computer-use.d.ts.map +1 -1
- package/dist/mcp/profiles/computer-use.js +30 -270
- package/dist/mcp/profiles/computer-use.js.map +1 -1
- package/dist/mcp/streamable-http/session.d.ts +4 -22
- package/dist/mcp/streamable-http/session.d.ts.map +1 -1
- package/dist/mcp/streamable-http/session.js +4 -24
- package/dist/mcp/streamable-http/session.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +74 -54
- package/dist/mcp/tools.js.map +1 -1
- package/dist/output/envelope.d.ts +2 -0
- package/dist/output/envelope.d.ts.map +1 -1
- package/dist/output/envelope.js.map +1 -1
- package/dist/output/error-map.d.ts +14 -0
- package/dist/output/error-map.d.ts.map +1 -1
- package/dist/output/error-map.js +20 -0
- package/dist/output/error-map.js.map +1 -1
- package/dist/registry.d.ts +2 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +1 -0
- package/dist/registry.js.map +1 -1
- package/dist/transport/cascade.d.ts.map +1 -1
- package/dist/transport/cascade.js +77 -5
- package/dist/transport/cascade.js.map +1 -1
- package/dist/transport/refs.d.ts +33 -1
- package/dist/transport/refs.d.ts.map +1 -1
- package/dist/transport/refs.js +40 -1
- package/dist/transport/refs.js.map +1 -1
- package/dist/types/scholarly.d.ts +19 -4
- package/dist/types/scholarly.d.ts.map +1 -1
- package/dist/types/scholarly.js +4 -4
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.test.ts +111 -0
- package/src/adapters/acl-anthology/papers.ts +379 -71
- package/src/adapters/arxiv/papers.test.ts +46 -0
- package/src/adapters/arxiv/papers.ts +251 -4
- package/src/adapters/baidu-scholar/search.ts +74 -11
- package/src/adapters/biorxiv/preprints.ts +112 -0
- package/src/adapters/cnki/search.ts +357 -0
- package/src/adapters/cvf/papers.ts +260 -27
- package/src/adapters/dblp/publications.test.ts +9 -0
- package/src/adapters/dblp/publications.ts +31 -8
- package/src/adapters/defuddle/read.yaml +30 -0
- package/src/adapters/google-scholar/search.ts +165 -17
- package/src/adapters/hf/paper.test.ts +23 -0
- package/src/adapters/hf/paper.ts +89 -5
- package/src/adapters/hf/top.yaml +34 -2
- package/src/adapters/huggingface-papers/daily.yaml +37 -3
- package/src/adapters/huggingface-papers/search.yaml +43 -9
- package/src/adapters/jina/read.yaml +30 -0
- package/src/adapters/markdown-new/read.yaml +50 -0
- package/src/adapters/medrxiv/preprints.ts +112 -0
- package/src/adapters/neurips/proceedings.ts +266 -22
- package/src/adapters/ollama-cloud/fetch.yaml +39 -0
- package/src/adapters/ollama-cloud/search.yaml +43 -0
- package/src/adapters/openalex/works.test.ts +15 -4
- package/src/adapters/openalex/works.ts +136 -8
- package/src/adapters/openreview/papers.test.ts +31 -0
- package/src/adapters/openreview/papers.ts +407 -29
- package/src/adapters/pmlr/proceedings.ts +102 -12
- package/src/adapters/pubmed/articles.test.ts +88 -1
- package/src/adapters/pubmed/articles.ts +343 -44
- package/src/adapters/rxiv/preprints.test.ts +233 -0
- package/src/adapters/rxiv/preprints.ts +849 -0
- package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
- package/src/adapters/scholar-artifacts/pdf.ts +133 -0
- package/src/adapters/semantic-scholar/papers.ts +98 -6
- package/src/adapters/unpaywall/works.ts +141 -12
- package/src/adapters/wanfang/search.ts +57 -7
- package/src/adapters/cnki/search.yaml +0 -49
|
@@ -53,14 +53,18 @@ describe("dblp agent-facing publication commands", () => {
|
|
|
53
53
|
1,
|
|
54
54
|
),
|
|
55
55
|
).toEqual({
|
|
56
|
+
id: "conf/nips/Paper24",
|
|
56
57
|
rank: 1,
|
|
57
58
|
key: "conf/nips/Paper24",
|
|
59
|
+
dblp_key: "conf/nips/Paper24",
|
|
58
60
|
title: "A Paper",
|
|
59
61
|
authors: "Jane Doe",
|
|
60
62
|
venue: "NeurIPS",
|
|
61
63
|
year: "2024",
|
|
62
64
|
type: "conf",
|
|
63
65
|
doi: "10.0000/test",
|
|
66
|
+
source_url: "https://doi.org/10.0000/test",
|
|
67
|
+
landing_url: "https://dblp.org/rec/conf/nips/Paper24.html",
|
|
64
68
|
url: "https://doi.org/10.0000/test",
|
|
65
69
|
});
|
|
66
70
|
});
|
|
@@ -81,7 +85,9 @@ describe("dblp agent-facing publication commands", () => {
|
|
|
81
85
|
</dblp>
|
|
82
86
|
`;
|
|
83
87
|
expect(mapRecordXml(xml)).toEqual({
|
|
88
|
+
id: "conf/nips/Paper24",
|
|
84
89
|
key: "conf/nips/Paper24",
|
|
90
|
+
dblp_key: "conf/nips/Paper24",
|
|
85
91
|
type: "conf",
|
|
86
92
|
title: "A Paper",
|
|
87
93
|
authors: "Jane Doe, Max & Co",
|
|
@@ -90,7 +96,10 @@ describe("dblp agent-facing publication commands", () => {
|
|
|
90
96
|
pages: "1-10",
|
|
91
97
|
doi: "10.0000/test",
|
|
92
98
|
open_access_url: "https://openaccess.example/paper",
|
|
99
|
+
source_url: "https://openaccess.example/paper",
|
|
100
|
+
landing_url: "https://dblp.org/rec/conf/nips/Paper24.html",
|
|
93
101
|
dblp_url: "https://dblp.org/rec/conf/nips/Paper24.html",
|
|
102
|
+
url: "https://openaccess.example/paper",
|
|
94
103
|
});
|
|
95
104
|
expect(
|
|
96
105
|
splitAuthorRecords(
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @owner
|
|
3
|
-
* @does
|
|
4
|
-
* @needs
|
|
5
|
-
* @feeds
|
|
6
|
-
* @breaks
|
|
2
|
+
* @owner src::adapters::dblp::publications
|
|
3
|
+
* @does Registers DBLP publication, paper, author, and venue commands for computer-science bibliography discovery.
|
|
4
|
+
* @needs dblp public JSON/XML APIs, conservative XML field extraction, bounded result limits, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, scholar.author, and scholar.venue
|
|
6
|
+
* @breaks DBLP API envelope drift, record-key parsing drift, or missing normalized ids makes CS bibliography rows invisible to the scholar meta-command.
|
|
7
|
+
* @invariants DBLP `key` is the source-local scholarly id and is mirrored as `dblp_key`; DOI remains a separate cross-source dedupe key.
|
|
8
|
+
* @side-effects HTTPS egress to dblp.org only
|
|
9
|
+
* @perf O(limit) JSON/XML mapping after one DBLP API request per command
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test src/adapters/dblp/publications.test.ts; live smoke via `unicli dblp search <query>` and `unicli scholar doctor --sources dblp --live`
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
7
14
|
*/
|
|
8
15
|
|
|
9
16
|
import { cli, Strategy } from "../../registry.js";
|
|
@@ -149,16 +156,21 @@ export function mapPublicationHit(
|
|
|
149
156
|
): Record<string, unknown> {
|
|
150
157
|
const info = hit.info ?? {};
|
|
151
158
|
const key = stringField(info.key);
|
|
159
|
+
const url = stringField(info.ee) || stringField(info.url);
|
|
152
160
|
return {
|
|
161
|
+
id: key,
|
|
153
162
|
rank,
|
|
154
163
|
key,
|
|
164
|
+
dblp_key: key,
|
|
155
165
|
title: stripTrailingDot(decodeXmlEntities(info.title)).trim(),
|
|
156
166
|
authors: normalizeDblpAuthors(info.authors).join(", "),
|
|
157
167
|
venue: decodeXmlEntities(info.venue),
|
|
158
168
|
year: stringField(info.year),
|
|
159
169
|
type: compactType(info.type),
|
|
160
170
|
doi: stringField(info.doi),
|
|
161
|
-
|
|
171
|
+
source_url: url || (key ? `${DBLP_ORIGIN}/rec/${key}.html` : ""),
|
|
172
|
+
landing_url: key ? `${DBLP_ORIGIN}/rec/${key}.html` : "",
|
|
173
|
+
url,
|
|
162
174
|
};
|
|
163
175
|
}
|
|
164
176
|
|
|
@@ -240,8 +252,12 @@ export function mapRecordXml(xml: string): Record<string, unknown> {
|
|
|
240
252
|
.map(decodeXmlEntities)
|
|
241
253
|
.find((value) => /(?:doi\.org\/|^10\.)/i.test(value));
|
|
242
254
|
const doi = ee ? ee.replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "") : "";
|
|
255
|
+
const openAccessUrl = extractOpenAccessUrl(xml);
|
|
256
|
+
const dblpUrl = key ? `${DBLP_ORIGIN}/rec/${key}.html` : "";
|
|
243
257
|
return {
|
|
258
|
+
id: key,
|
|
244
259
|
key,
|
|
260
|
+
dblp_key: key,
|
|
245
261
|
type,
|
|
246
262
|
title: stripTrailingDot(decodeXmlEntities(extractFirst(xml, "title"))),
|
|
247
263
|
authors: extractAll(xml, "author")
|
|
@@ -252,8 +268,11 @@ export function mapRecordXml(xml: string): Record<string, unknown> {
|
|
|
252
268
|
year: decodeXmlEntities(extractFirst(xml, "year")),
|
|
253
269
|
pages: decodeXmlEntities(extractFirst(xml, "pages")),
|
|
254
270
|
doi: doi.startsWith("10.") ? doi : "",
|
|
255
|
-
open_access_url:
|
|
256
|
-
|
|
271
|
+
open_access_url: openAccessUrl,
|
|
272
|
+
source_url: openAccessUrl || dblpUrl,
|
|
273
|
+
landing_url: dblpUrl,
|
|
274
|
+
dblp_url: dblpUrl,
|
|
275
|
+
url: openAccessUrl || dblpUrl,
|
|
257
276
|
};
|
|
258
277
|
}
|
|
259
278
|
|
|
@@ -482,8 +501,10 @@ cli({
|
|
|
482
501
|
return records.slice(0, limit).map((record, index) => {
|
|
483
502
|
const row = mapRecordXml(`<root>${record}</root>`);
|
|
484
503
|
return {
|
|
504
|
+
id: row.id || row.key || extractRecordKey(record),
|
|
485
505
|
rank: index + 1,
|
|
486
506
|
key: row.key || extractRecordKey(record),
|
|
507
|
+
dblp_key: row.dblp_key || row.key || extractRecordKey(record),
|
|
487
508
|
title: row.title,
|
|
488
509
|
authors: row.authors,
|
|
489
510
|
venue: row.venue,
|
|
@@ -491,6 +512,8 @@ cli({
|
|
|
491
512
|
type: row.type,
|
|
492
513
|
doi: row.doi,
|
|
493
514
|
pid,
|
|
515
|
+
source_url: row.source_url,
|
|
516
|
+
landing_url: row.landing_url,
|
|
494
517
|
url: row.open_access_url || row.dblp_url,
|
|
495
518
|
};
|
|
496
519
|
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
site: defuddle
|
|
2
|
+
name: read
|
|
3
|
+
description: Read a public URL through defuddle.md and return Markdown with frontmatter
|
|
4
|
+
type: web-api
|
|
5
|
+
domain: defuddle.md
|
|
6
|
+
strategy: public
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
url:
|
|
10
|
+
type: str
|
|
11
|
+
required: true
|
|
12
|
+
positional: true
|
|
13
|
+
description: Public URL to convert to Markdown
|
|
14
|
+
format: uri
|
|
15
|
+
|
|
16
|
+
pipeline:
|
|
17
|
+
- fetch_text:
|
|
18
|
+
url: "https://defuddle.md/${{ args.url }}"
|
|
19
|
+
headers:
|
|
20
|
+
Accept: "text/markdown"
|
|
21
|
+
|
|
22
|
+
columns: []
|
|
23
|
+
|
|
24
|
+
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
25
|
+
capabilities: ["http.fetch"]
|
|
26
|
+
minimum_capability: http.fetch
|
|
27
|
+
trust: public
|
|
28
|
+
confidentiality: public
|
|
29
|
+
quarantine: false
|
|
30
|
+
schema_version: v2
|
|
@@ -1,7 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::google-scholar::search
|
|
3
|
+
* @does Registers Google Scholar public browser search as a broad discovery source for scholarly metadata.
|
|
4
|
+
* @needs scholar.google.com result DOM, src/registry.ts, src/types.ts, browser tools
|
|
5
|
+
* @feeds src/commands/scholar.ts capability discovery, `unicli google-scholar search`, `unicli scholar search/doctor`
|
|
6
|
+
* @breaks Google Scholar CAPTCHA/traffic blocks surface as upstream_blocked; result DOM drift can return empty rows instead of normalized scholarly records.
|
|
7
|
+
* @invariants Search is metadata discovery only; result links are source hints, not full-text proof; publication years must be standalone years, never substrings of arXiv IDs.
|
|
8
|
+
* @side-effects Navigates a Uni-CLI managed browser page to Google Scholar public search.
|
|
9
|
+
* @perf O(limit) DOM extraction after one page navigation.
|
|
10
|
+
* @concurrency safe — command state is page-local
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts; live smoke via `unicli google-scholar search <query>` and `unicli scholar doctor --sources google-scholar --live`
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-06-27
|
|
14
|
+
*/
|
|
15
|
+
|
|
1
16
|
import { cli, Strategy } from "../../registry.js";
|
|
2
17
|
import type { IPage } from "../../types.js";
|
|
3
18
|
import { intArg, js, str } from "../_shared/browser-tools.js";
|
|
4
19
|
|
|
20
|
+
interface GoogleScholarRawRow {
|
|
21
|
+
rank?: unknown;
|
|
22
|
+
title?: unknown;
|
|
23
|
+
infoLine?: unknown;
|
|
24
|
+
citedText?: unknown;
|
|
25
|
+
url?: unknown;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface GoogleScholarBrowserPayload {
|
|
29
|
+
blocked?: boolean;
|
|
30
|
+
block_reason?: unknown;
|
|
31
|
+
rows?: unknown;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
type GoogleScholarActionableError = Error & {
|
|
35
|
+
code?: string;
|
|
36
|
+
suggestion?: string;
|
|
37
|
+
retryable?: boolean;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export function buildGoogleScholarSearchUrl(query: string): string {
|
|
41
|
+
return `https://scholar.google.com/scholar?q=${encodeURIComponent(query)}&hl=en`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function parseGoogleScholarYear(infoLine: string): number | undefined {
|
|
45
|
+
for (const match of infoLine.matchAll(/(^|[^\d])((?:19|20)\d{2})(?!\d)/g)) {
|
|
46
|
+
const year = Number(match[2]);
|
|
47
|
+
if (Number.isInteger(year)) return year;
|
|
48
|
+
}
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function parseGoogleScholarInfoLine(infoLine: string): {
|
|
53
|
+
authors?: string;
|
|
54
|
+
venue?: string;
|
|
55
|
+
year?: number;
|
|
56
|
+
} {
|
|
57
|
+
const parts = infoLine.split(" - ");
|
|
58
|
+
const authors = parts[0]?.trim() || undefined;
|
|
59
|
+
const detail = parts[1]?.trim() || "";
|
|
60
|
+
const detailParts = detail.split(",");
|
|
61
|
+
const venue =
|
|
62
|
+
detailParts.slice(0, -1).join(",").trim() ||
|
|
63
|
+
detailParts[0]?.trim() ||
|
|
64
|
+
undefined;
|
|
65
|
+
return {
|
|
66
|
+
authors,
|
|
67
|
+
venue,
|
|
68
|
+
year: parseGoogleScholarYear(infoLine),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function googleScholarRecordId(
|
|
73
|
+
title: string,
|
|
74
|
+
sourceUrl: string,
|
|
75
|
+
): string {
|
|
76
|
+
if (sourceUrl) {
|
|
77
|
+
try {
|
|
78
|
+
const parsed = new URL(sourceUrl);
|
|
79
|
+
if (parsed.hostname === "arxiv.org") {
|
|
80
|
+
const arxivId = parsed.pathname
|
|
81
|
+
.replace(/^\/(?:abs|pdf)\//, "")
|
|
82
|
+
.replace(/\.pdf$/i, "")
|
|
83
|
+
.replace(/v\d+$/i, "");
|
|
84
|
+
if (/^\d{4}\.\d{4,5}$/.test(arxivId)) return arxivId;
|
|
85
|
+
}
|
|
86
|
+
if (parsed.hostname === "doi.org") {
|
|
87
|
+
const doi = decodeURIComponent(parsed.pathname.replace(/^\//, ""));
|
|
88
|
+
if (doi) return doi;
|
|
89
|
+
}
|
|
90
|
+
} catch {
|
|
91
|
+
return sourceUrl;
|
|
92
|
+
}
|
|
93
|
+
return sourceUrl;
|
|
94
|
+
}
|
|
95
|
+
return title;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function normalize(value: unknown): string {
|
|
99
|
+
return String(value ?? "")
|
|
100
|
+
.replace(/\s+/g, " ")
|
|
101
|
+
.trim();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function googleScholarBlockedError(
|
|
105
|
+
reason: string,
|
|
106
|
+
): GoogleScholarActionableError {
|
|
107
|
+
const error = new Error(
|
|
108
|
+
`Google Scholar blocked the public browser search: ${reason}.`,
|
|
109
|
+
) as GoogleScholarActionableError;
|
|
110
|
+
error.code = "upstream_blocked";
|
|
111
|
+
error.suggestion =
|
|
112
|
+
"Retry later, import a usable browser session, or use first-source alternatives such as semantic-scholar/openalex/crossref/arxiv for non-hallucinated scholarly lookup.";
|
|
113
|
+
error.retryable = true;
|
|
114
|
+
return error;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function mapGoogleScholarRow(
|
|
118
|
+
raw: GoogleScholarRawRow,
|
|
119
|
+
): Record<string, unknown> | undefined {
|
|
120
|
+
const title = normalize(raw.title);
|
|
121
|
+
if (!title) return undefined;
|
|
122
|
+
const sourceUrl = normalize(raw.url);
|
|
123
|
+
const info = parseGoogleScholarInfoLine(normalize(raw.infoLine));
|
|
124
|
+
const citedText = normalize(raw.citedText);
|
|
125
|
+
const citedByCount = Number(citedText.match(/\d+/)?.[0] ?? 0);
|
|
126
|
+
return {
|
|
127
|
+
id: googleScholarRecordId(title, sourceUrl),
|
|
128
|
+
rank: Number(raw.rank) || undefined,
|
|
129
|
+
title,
|
|
130
|
+
authors: info.authors,
|
|
131
|
+
source: info.venue,
|
|
132
|
+
venue: info.venue,
|
|
133
|
+
year: info.year,
|
|
134
|
+
cited: String(citedByCount),
|
|
135
|
+
cited_by_count: citedByCount,
|
|
136
|
+
source_url: sourceUrl || undefined,
|
|
137
|
+
url: sourceUrl || undefined,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
5
141
|
cli({
|
|
6
142
|
site: "google-scholar",
|
|
7
143
|
name: "search",
|
|
@@ -22,14 +158,25 @@ cli({
|
|
|
22
158
|
func: async (page, kwargs) => {
|
|
23
159
|
const p = page as IPage;
|
|
24
160
|
const limit = intArg(kwargs.limit, 10, 20);
|
|
25
|
-
await p.goto(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
)
|
|
29
|
-
const rows = await p.evaluate(`(() => {
|
|
161
|
+
await p.goto(buildGoogleScholarSearchUrl(str(kwargs.query)), {
|
|
162
|
+
settleMs: 2500,
|
|
163
|
+
});
|
|
164
|
+
const payload = (await p.evaluate(`(() => {
|
|
30
165
|
const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
|
|
31
|
-
const
|
|
166
|
+
const bodyText = normalize(document.body?.innerText || document.body?.textContent || '');
|
|
167
|
+
const blocked =
|
|
168
|
+
location.pathname.includes('/sorry/') ||
|
|
169
|
+
document.querySelector('form[action*="/sorry/"], input[name="captcha"], #gs_captcha_ccl, iframe[src*="recaptcha"]') ||
|
|
170
|
+
/unusual traffic|not a robot|captcha/i.test(bodyText);
|
|
171
|
+
if (blocked) {
|
|
172
|
+
return {
|
|
173
|
+
blocked: true,
|
|
174
|
+
block_reason: document.title || bodyText.slice(0, 160) || location.href,
|
|
175
|
+
rows: []
|
|
176
|
+
};
|
|
177
|
+
}
|
|
32
178
|
const seen = new Set();
|
|
179
|
+
const rows = [];
|
|
33
180
|
const cards = [...document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_r.gs_or')];
|
|
34
181
|
for (const card of cards) {
|
|
35
182
|
const body = card.querySelector('.gs_ri') || card;
|
|
@@ -41,23 +188,24 @@ cli({
|
|
|
41
188
|
if (seen.has(dedupeKey)) continue;
|
|
42
189
|
seen.add(dedupeKey);
|
|
43
190
|
|
|
44
|
-
const infoLine = normalize(body.querySelector('.gs_a')?.textContent);
|
|
45
|
-
const parts = infoLine.split(' - ');
|
|
46
|
-
const sourceParts = (parts[1] || '').split(',');
|
|
47
191
|
const citedText = normalize(body.querySelector('.gs_fl a[href*="cites"]')?.textContent);
|
|
48
192
|
rows.push({
|
|
49
193
|
rank: rows.length + 1,
|
|
50
194
|
title,
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
cited: citedText.match(/(\\d+)/)?.[1] || '0',
|
|
55
|
-
url,
|
|
195
|
+
infoLine: normalize(body.querySelector('.gs_a')?.textContent),
|
|
196
|
+
citedText,
|
|
197
|
+
url
|
|
56
198
|
});
|
|
57
199
|
if (rows.length >= ${js(limit)}) break;
|
|
58
200
|
}
|
|
59
|
-
return rows;
|
|
60
|
-
})()`);
|
|
61
|
-
|
|
201
|
+
return { blocked: false, rows };
|
|
202
|
+
})()`)) as GoogleScholarBrowserPayload;
|
|
203
|
+
if (payload.blocked) {
|
|
204
|
+
throw googleScholarBlockedError(normalize(payload.block_reason));
|
|
205
|
+
}
|
|
206
|
+
if (!Array.isArray(payload.rows)) return [];
|
|
207
|
+
return payload.rows
|
|
208
|
+
.map((row) => mapGoogleScholarRow(row as GoogleScholarRawRow))
|
|
209
|
+
.filter((row): row is Record<string, unknown> => row !== undefined);
|
|
62
210
|
},
|
|
63
211
|
});
|
|
@@ -27,6 +27,15 @@ describe("hf agent-facing paper command", () => {
|
|
|
27
27
|
ai_keywords: ["transformer", "attention"],
|
|
28
28
|
summary: "Paper summary",
|
|
29
29
|
ai_summary: "AI summary",
|
|
30
|
+
githubRepo: "https://github.com/example/transformer",
|
|
31
|
+
githubStars: 42,
|
|
32
|
+
projectPage: "https://example.org/transformer",
|
|
33
|
+
linkedModels: [{ id: "org/model-a" }],
|
|
34
|
+
linkedDatasets: [{ id: "org/dataset-a" }],
|
|
35
|
+
linkedSpaces: [{ id: "org/space-a" }],
|
|
36
|
+
numTotalModels: 1,
|
|
37
|
+
numTotalDatasets: 1,
|
|
38
|
+
numTotalSpaces: 1,
|
|
30
39
|
},
|
|
31
40
|
"https://hf.example/",
|
|
32
41
|
),
|
|
@@ -39,6 +48,19 @@ describe("hf agent-facing paper command", () => {
|
|
|
39
48
|
aiKeywords: "transformer, attention",
|
|
40
49
|
summary: "Paper summary",
|
|
41
50
|
aiSummary: "AI summary",
|
|
51
|
+
arxiv_id: "1706.03762",
|
|
52
|
+
pdf_url: "https://arxiv.org/pdf/1706.03762",
|
|
53
|
+
source_url: "https://hf.example/papers/1706.03762",
|
|
54
|
+
code_url: "https://github.com/example/transformer",
|
|
55
|
+
github_stars: 42,
|
|
56
|
+
project_url: "https://example.org/transformer",
|
|
57
|
+
dataset_url: "https://hf.example/datasets/org/dataset-a",
|
|
58
|
+
model_urls: "https://hf.example/org/model-a",
|
|
59
|
+
dataset_urls: "https://hf.example/datasets/org/dataset-a",
|
|
60
|
+
space_urls: "https://hf.example/spaces/org/space-a",
|
|
61
|
+
num_models: 1,
|
|
62
|
+
num_datasets: 1,
|
|
63
|
+
num_spaces: 1,
|
|
42
64
|
url: "https://hf.example/papers/1706.03762",
|
|
43
65
|
});
|
|
44
66
|
});
|
|
@@ -53,6 +75,7 @@ describe("hf agent-facing paper command", () => {
|
|
|
53
75
|
"scholar.get",
|
|
54
76
|
"scholar.pdf",
|
|
55
77
|
"scholar.code",
|
|
78
|
+
"scholar.datasets",
|
|
56
79
|
]);
|
|
57
80
|
});
|
|
58
81
|
});
|
package/src/adapters/hf/paper.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @owner src/adapters/hf/paper.ts
|
|
3
|
-
* @does Register agent-facing Hugging Face paper detail command.
|
|
3
|
+
* @does Register agent-facing Hugging Face paper detail and resource-link command.
|
|
4
4
|
* @needs Hugging Face public papers API, modern arXiv ids, paper metadata normalization.
|
|
5
|
-
* @feeds surface coverage ledger, HF daily paper detail workflows, scholarly metadata readers.
|
|
6
|
-
* @breaks HF papers API shape drift or invalid arXiv id handling can hide paper details.
|
|
5
|
+
* @feeds surface coverage ledger, HF daily paper detail workflows, scholarly metadata and resource readers.
|
|
6
|
+
* @breaks HF papers API shape drift, resource-link schema drift, or invalid arXiv id handling can hide paper details.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { cli, Strategy } from "../../registry.js";
|
|
@@ -25,6 +25,19 @@ interface HfPaper {
|
|
|
25
25
|
ai_keywords?: unknown;
|
|
26
26
|
summary?: unknown;
|
|
27
27
|
ai_summary?: unknown;
|
|
28
|
+
githubRepo?: unknown;
|
|
29
|
+
githubStars?: unknown;
|
|
30
|
+
projectPage?: unknown;
|
|
31
|
+
linkedDatasets?: unknown;
|
|
32
|
+
linkedModels?: unknown;
|
|
33
|
+
linkedSpaces?: unknown;
|
|
34
|
+
numTotalDatasets?: unknown;
|
|
35
|
+
numTotalModels?: unknown;
|
|
36
|
+
numTotalSpaces?: unknown;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface HfLinkedResource {
|
|
40
|
+
id?: unknown;
|
|
28
41
|
}
|
|
29
42
|
|
|
30
43
|
function stringField(value: unknown): string {
|
|
@@ -61,12 +74,51 @@ function hfAuthorNames(value: unknown): string {
|
|
|
61
74
|
.join(", ");
|
|
62
75
|
}
|
|
63
76
|
|
|
77
|
+
function bareArxivId(value: string): string {
|
|
78
|
+
return value.replace(/v\d+$/i, "");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function linkedResourceIds(value: unknown): string[] {
|
|
82
|
+
if (!Array.isArray(value)) return [];
|
|
83
|
+
return value
|
|
84
|
+
.map((item: HfLinkedResource | string) =>
|
|
85
|
+
typeof item === "string" ? item : stringField(item.id),
|
|
86
|
+
)
|
|
87
|
+
.map((id) => id.trim())
|
|
88
|
+
.filter(Boolean);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function linkedResourceUrls(
|
|
92
|
+
value: unknown,
|
|
93
|
+
kind: "dataset" | "model" | "space",
|
|
94
|
+
endpoint: string,
|
|
95
|
+
): string[] {
|
|
96
|
+
const base = hfEndpoint(endpoint);
|
|
97
|
+
return linkedResourceIds(value).map((id) => {
|
|
98
|
+
if (kind === "dataset") return `${base}/datasets/${id}`;
|
|
99
|
+
if (kind === "space") return `${base}/spaces/${id}`;
|
|
100
|
+
return `${base}/${id}`;
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function csv(values: string[]): string {
|
|
105
|
+
return values.join(", ");
|
|
106
|
+
}
|
|
107
|
+
|
|
64
108
|
export function mapHfPaperRow(
|
|
65
109
|
paper: HfPaper,
|
|
66
110
|
endpoint = HF_DEFAULT_ENDPOINT,
|
|
67
111
|
): Record<string, unknown> {
|
|
68
112
|
const id = stringField(paper.id);
|
|
69
113
|
if (!id) throw new Error("Hugging Face returned no paper data.");
|
|
114
|
+
const sourceUrl = `${hfEndpoint(endpoint)}/papers/${id}`;
|
|
115
|
+
const datasetUrls = linkedResourceUrls(
|
|
116
|
+
paper.linkedDatasets,
|
|
117
|
+
"dataset",
|
|
118
|
+
endpoint,
|
|
119
|
+
);
|
|
120
|
+
const modelUrls = linkedResourceUrls(paper.linkedModels, "model", endpoint);
|
|
121
|
+
const spaceUrls = linkedResourceUrls(paper.linkedSpaces, "space", endpoint);
|
|
70
122
|
return {
|
|
71
123
|
id,
|
|
72
124
|
title: stringField(paper.title),
|
|
@@ -78,7 +130,20 @@ export function mapHfPaperRow(
|
|
|
78
130
|
: "",
|
|
79
131
|
summary: stringField(paper.summary),
|
|
80
132
|
aiSummary: stringField(paper.ai_summary),
|
|
81
|
-
|
|
133
|
+
arxiv_id: bareArxivId(id),
|
|
134
|
+
pdf_url: `https://arxiv.org/pdf/${id}`,
|
|
135
|
+
source_url: sourceUrl,
|
|
136
|
+
code_url: stringField(paper.githubRepo),
|
|
137
|
+
github_stars: numberOrNull(paper.githubStars),
|
|
138
|
+
project_url: stringField(paper.projectPage),
|
|
139
|
+
dataset_url: datasetUrls[0] ?? "",
|
|
140
|
+
model_urls: csv(modelUrls),
|
|
141
|
+
dataset_urls: csv(datasetUrls),
|
|
142
|
+
space_urls: csv(spaceUrls),
|
|
143
|
+
num_models: numberOrNull(paper.numTotalModels),
|
|
144
|
+
num_datasets: numberOrNull(paper.numTotalDatasets),
|
|
145
|
+
num_spaces: numberOrNull(paper.numTotalSpaces),
|
|
146
|
+
url: sourceUrl,
|
|
82
147
|
};
|
|
83
148
|
}
|
|
84
149
|
|
|
@@ -129,9 +194,28 @@ cli({
|
|
|
129
194
|
"aiKeywords",
|
|
130
195
|
"summary",
|
|
131
196
|
"aiSummary",
|
|
197
|
+
"arxiv_id",
|
|
198
|
+
"pdf_url",
|
|
199
|
+
"source_url",
|
|
200
|
+
"code_url",
|
|
201
|
+
"github_stars",
|
|
202
|
+
"project_url",
|
|
203
|
+
"dataset_url",
|
|
204
|
+
"model_urls",
|
|
205
|
+
"dataset_urls",
|
|
206
|
+
"space_urls",
|
|
207
|
+
"num_models",
|
|
208
|
+
"num_datasets",
|
|
209
|
+
"num_spaces",
|
|
132
210
|
"url",
|
|
133
211
|
],
|
|
134
|
-
capabilities: [
|
|
212
|
+
capabilities: [
|
|
213
|
+
"http.fetch",
|
|
214
|
+
"scholar.get",
|
|
215
|
+
"scholar.pdf",
|
|
216
|
+
"scholar.code",
|
|
217
|
+
"scholar.datasets",
|
|
218
|
+
],
|
|
135
219
|
func: async (_page, kwargs) => {
|
|
136
220
|
const id = requireHfPaperId(kwargs.id);
|
|
137
221
|
return [mapHfPaperRow(await fetchHfPaper(id), hfEndpoint())];
|
package/src/adapters/hf/top.yaml
CHANGED
|
@@ -26,13 +26,45 @@ pipeline:
|
|
|
26
26
|
title: "${{ (item.title || '').length > 60 ? (item.title || '').slice(0, 57) + '...' : (item.title || '') }}"
|
|
27
27
|
upvotes: "${{ item.paper ? item.paper.upvotes : 0 }}"
|
|
28
28
|
authors: "${{ (() => { const a = item.paper && item.paper.authors ? item.paper.authors : []; const n = a.map(x => x.name); return n.length <= 3 ? n.join(', ') : n.slice(0, 3).join(', ') + ' et al.'; })() }}"
|
|
29
|
+
source_url: "${{ item.paper && item.paper.id ? 'https://huggingface.co/papers/' + item.paper.id : '' }}"
|
|
30
|
+
pdf_url: "${{ item.paper && item.paper.id ? 'https://arxiv.org/pdf/' + item.paper.id : '' }}"
|
|
31
|
+
code_url: "${{ item.paper && item.paper.githubRepo ? item.paper.githubRepo : '' }}"
|
|
32
|
+
github_stars: "${{ item.paper && item.paper.githubStars ? item.paper.githubStars : '' }}"
|
|
33
|
+
project_url: "${{ item.paper && item.paper.projectPage ? item.paper.projectPage : '' }}"
|
|
34
|
+
dataset_url: "${{ item.paper && item.paper.linkedDatasets && item.paper.linkedDatasets[0] ? 'https://huggingface.co/datasets/' + item.paper.linkedDatasets[0].id : '' }}"
|
|
35
|
+
model_urls: "${{ item.paper && item.paper.linkedModels ? item.paper.linkedModels.map(x => 'https://huggingface.co/' + x.id).join(', ') : '' }}"
|
|
36
|
+
dataset_urls: "${{ item.paper && item.paper.linkedDatasets ? item.paper.linkedDatasets.map(x => 'https://huggingface.co/datasets/' + x.id).join(', ') : '' }}"
|
|
37
|
+
space_urls: "${{ item.paper && item.paper.linkedSpaces ? item.paper.linkedSpaces.map(x => 'https://huggingface.co/spaces/' + x.id).join(', ') : '' }}"
|
|
38
|
+
num_models: "${{ item.paper && item.paper.numTotalModels !== null && item.paper.numTotalModels !== undefined ? item.paper.numTotalModels : '' }}"
|
|
39
|
+
num_datasets: "${{ item.paper && item.paper.numTotalDatasets !== null && item.paper.numTotalDatasets !== undefined ? item.paper.numTotalDatasets : '' }}"
|
|
40
|
+
num_spaces: "${{ item.paper && item.paper.numTotalSpaces !== null && item.paper.numTotalSpaces !== undefined ? item.paper.numTotalSpaces : '' }}"
|
|
29
41
|
|
|
30
42
|
- limit: ${{ args.limit }}
|
|
31
43
|
|
|
32
|
-
columns:
|
|
44
|
+
columns:
|
|
45
|
+
[
|
|
46
|
+
rank,
|
|
47
|
+
id,
|
|
48
|
+
title,
|
|
49
|
+
upvotes,
|
|
50
|
+
authors,
|
|
51
|
+
source_url,
|
|
52
|
+
pdf_url,
|
|
53
|
+
code_url,
|
|
54
|
+
github_stars,
|
|
55
|
+
project_url,
|
|
56
|
+
dataset_url,
|
|
57
|
+
model_urls,
|
|
58
|
+
dataset_urls,
|
|
59
|
+
space_urls,
|
|
60
|
+
num_models,
|
|
61
|
+
num_datasets,
|
|
62
|
+
num_spaces,
|
|
63
|
+
]
|
|
33
64
|
|
|
34
65
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
35
|
-
capabilities:
|
|
66
|
+
capabilities:
|
|
67
|
+
["http.fetch", "scholar.search", "scholar.code", "scholar.datasets"]
|
|
36
68
|
minimum_capability: http.fetch
|
|
37
69
|
trust: public
|
|
38
70
|
confidentiality: public
|
|
@@ -6,22 +6,56 @@ domain: huggingface.co
|
|
|
6
6
|
strategy: public
|
|
7
7
|
args:
|
|
8
8
|
limit:
|
|
9
|
+
type: int
|
|
9
10
|
required: false
|
|
10
11
|
default: 20
|
|
11
12
|
pipeline:
|
|
12
13
|
- fetch:
|
|
13
14
|
url: "https://huggingface.co/api/daily_papers"
|
|
14
15
|
- map:
|
|
16
|
+
id: "${{ item.paper && item.paper.id ? item.paper.id : '' }}"
|
|
15
17
|
title: "${{ item.paper.title }}"
|
|
16
|
-
authors: "${{ item.paper.authors
|
|
18
|
+
authors: "${{ item.paper && item.paper.authors ? item.paper.authors.map(x => x.name).filter(Boolean).join(', ') : '' }}"
|
|
17
19
|
upvotes: "${{ item.paper.upvotes }}"
|
|
18
20
|
url: "${{ 'https://huggingface.co/papers/' + item.paper.id }}"
|
|
21
|
+
source_url: "${{ item.paper && item.paper.id ? 'https://huggingface.co/papers/' + item.paper.id : '' }}"
|
|
22
|
+
pdf_url: "${{ item.paper && item.paper.id ? 'https://arxiv.org/pdf/' + item.paper.id : '' }}"
|
|
23
|
+
code_url: "${{ item.paper && item.paper.githubRepo ? item.paper.githubRepo : '' }}"
|
|
24
|
+
github_stars: "${{ item.paper && item.paper.githubStars ? item.paper.githubStars : '' }}"
|
|
25
|
+
project_url: "${{ item.paper && item.paper.projectPage ? item.paper.projectPage : '' }}"
|
|
26
|
+
dataset_url: "${{ item.paper && item.paper.linkedDatasets && item.paper.linkedDatasets[0] ? 'https://huggingface.co/datasets/' + item.paper.linkedDatasets[0].id : '' }}"
|
|
27
|
+
model_urls: "${{ item.paper && item.paper.linkedModels ? item.paper.linkedModels.map(x => 'https://huggingface.co/' + x.id).join(', ') : '' }}"
|
|
28
|
+
dataset_urls: "${{ item.paper && item.paper.linkedDatasets ? item.paper.linkedDatasets.map(x => 'https://huggingface.co/datasets/' + x.id).join(', ') : '' }}"
|
|
29
|
+
space_urls: "${{ item.paper && item.paper.linkedSpaces ? item.paper.linkedSpaces.map(x => 'https://huggingface.co/spaces/' + x.id).join(', ') : '' }}"
|
|
30
|
+
num_models: "${{ item.paper && item.paper.numTotalModels !== null && item.paper.numTotalModels !== undefined ? item.paper.numTotalModels : '' }}"
|
|
31
|
+
num_datasets: "${{ item.paper && item.paper.numTotalDatasets !== null && item.paper.numTotalDatasets !== undefined ? item.paper.numTotalDatasets : '' }}"
|
|
32
|
+
num_spaces: "${{ item.paper && item.paper.numTotalSpaces !== null && item.paper.numTotalSpaces !== undefined ? item.paper.numTotalSpaces : '' }}"
|
|
19
33
|
date: "${{ item.publishedAt }}"
|
|
20
34
|
- limit: "${{ args.limit | default(20) }}"
|
|
21
|
-
columns:
|
|
35
|
+
columns:
|
|
36
|
+
[
|
|
37
|
+
id,
|
|
38
|
+
title,
|
|
39
|
+
authors,
|
|
40
|
+
upvotes,
|
|
41
|
+
url,
|
|
42
|
+
source_url,
|
|
43
|
+
pdf_url,
|
|
44
|
+
code_url,
|
|
45
|
+
github_stars,
|
|
46
|
+
project_url,
|
|
47
|
+
dataset_url,
|
|
48
|
+
model_urls,
|
|
49
|
+
dataset_urls,
|
|
50
|
+
space_urls,
|
|
51
|
+
num_models,
|
|
52
|
+
num_datasets,
|
|
53
|
+
num_spaces,
|
|
54
|
+
]
|
|
22
55
|
|
|
23
56
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
24
|
-
capabilities:
|
|
57
|
+
capabilities:
|
|
58
|
+
["http.fetch", "scholar.search", "scholar.code", "scholar.datasets"]
|
|
25
59
|
minimum_capability: http.fetch
|
|
26
60
|
trust: public
|
|
27
61
|
confidentiality: public
|