smart-web-mcp 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/browser-session.d.ts +5 -0
- package/dist/browser-session.js +19 -10
- package/dist/browser-session.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/shared.d.ts +3 -1
- package/dist/shared.js +38 -2
- package/dist/shared.js.map +1 -1
- package/dist/smartfetch/provider-policy.js +22 -4
- package/dist/smartfetch/provider-policy.js.map +1 -1
- package/dist/smartfetch/provider-types.d.ts +1 -0
- package/dist/smartfetch/providers/hackernews.d.ts +2 -0
- package/dist/smartfetch/providers/hackernews.js +154 -0
- package/dist/smartfetch/providers/hackernews.js.map +1 -0
- package/dist/smartfetch/providers/index.js +3 -1
- package/dist/smartfetch/providers/index.js.map +1 -1
- package/dist/smartfetch/providers/naver-cafe.d.ts +2 -0
- package/dist/smartfetch/providers/naver-cafe.js +163 -0
- package/dist/smartfetch/providers/naver-cafe.js.map +1 -0
- package/dist/smartfetch/providers/youtube.d.ts +14 -0
- package/dist/smartfetch/providers/youtube.js +17 -13
- package/dist/smartfetch/providers/youtube.js.map +1 -1
- package/dist/smartfetch-internals.d.ts +13 -0
- package/dist/smartfetch-internals.js +19 -0
- package/dist/smartfetch-internals.js.map +1 -0
- package/dist/smartfetch.js +29 -10
- package/dist/smartfetch.js.map +1 -1
- package/dist/smartsearch.js +54 -0
- package/dist/smartsearch.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { asNumber, asString, dedupeUrls, extractAnchorHrefs, extractUrls, stripTags } from "../../shared.js";
|
|
2
|
+
import { fetchProviderJson } from "../provider-policy.js";
|
|
3
|
+
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
|
|
4
|
+
function extractItemId(url) {
|
|
5
|
+
try {
|
|
6
|
+
const parsed = new URL(url);
|
|
7
|
+
const id = parsed.searchParams.get("id");
|
|
8
|
+
return id ? String(id).trim() : "";
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return "";
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
function mapComment(node, outbound) {
|
|
15
|
+
if (!node || typeof node !== "object")
|
|
16
|
+
return null;
|
|
17
|
+
if (String(node.type || "") !== "comment")
|
|
18
|
+
return null;
|
|
19
|
+
const bodyHtml = asString(node.text);
|
|
20
|
+
for (const item of extractUrls(bodyHtml))
|
|
21
|
+
outbound.add(item);
|
|
22
|
+
for (const item of extractAnchorHrefs(bodyHtml))
|
|
23
|
+
outbound.add(item);
|
|
24
|
+
const children = Array.isArray(node.children)
|
|
25
|
+
? node.children.map((item) => mapComment(item, outbound)).filter(Boolean)
|
|
26
|
+
: [];
|
|
27
|
+
return {
|
|
28
|
+
id: asString(node.id),
|
|
29
|
+
author: asString(node.author),
|
|
30
|
+
body: stripTags(bodyHtml),
|
|
31
|
+
created_at: asString(node.created_at),
|
|
32
|
+
children,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
function countComments(node) {
|
|
36
|
+
if (!node || typeof node !== "object")
|
|
37
|
+
return 0;
|
|
38
|
+
const children = Array.isArray(node.children) ? node.children : [];
|
|
39
|
+
return (String(node.type || "") === "comment" ? 1 : 0)
|
|
40
|
+
+ children.reduce((sum, item) => sum + countComments(item), 0);
|
|
41
|
+
}
|
|
42
|
+
export const hackernewsProvider = {
|
|
43
|
+
id: "hackernews",
|
|
44
|
+
matches: (_url, target) => target === "hackernews_post",
|
|
45
|
+
async normalize(context) {
|
|
46
|
+
const itemId = extractItemId(context.url);
|
|
47
|
+
if (!itemId) {
|
|
48
|
+
return {
|
|
49
|
+
post: null,
|
|
50
|
+
thread: [],
|
|
51
|
+
comments: [],
|
|
52
|
+
outbound_links: [],
|
|
53
|
+
partial: true,
|
|
54
|
+
errors: [
|
|
55
|
+
...context.errors,
|
|
56
|
+
{ category: "parse_error", code: "invalid_hackernews_url", message: "URL is not a Hacker News item URL" },
|
|
57
|
+
],
|
|
58
|
+
method: context.active.method,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const result = await fetchProviderJson(`https://hn.algolia.com/api/v1/items/${encodeURIComponent(itemId)}`, context.timeoutMs, {
|
|
62
|
+
headers: {
|
|
63
|
+
accept: "application/json,text/plain,*/*",
|
|
64
|
+
"user-agent": USER_AGENT,
|
|
65
|
+
},
|
|
66
|
+
}, {
|
|
67
|
+
mode: "relay",
|
|
68
|
+
sourceUrl: context.url,
|
|
69
|
+
});
|
|
70
|
+
if (!result.ok || !result.data || typeof result.data !== "object") {
|
|
71
|
+
const providerError = !result.ok ? result.error : null;
|
|
72
|
+
return {
|
|
73
|
+
post: {
|
|
74
|
+
url: context.url,
|
|
75
|
+
text: stripTags(context.active.content),
|
|
76
|
+
status: "partial_text_only",
|
|
77
|
+
},
|
|
78
|
+
thread: [],
|
|
79
|
+
comments: [],
|
|
80
|
+
outbound_links: dedupeUrls([...context.active.links, ...extractUrls(context.active.content)]),
|
|
81
|
+
partial: true,
|
|
82
|
+
errors: [
|
|
83
|
+
...context.errors,
|
|
84
|
+
{
|
|
85
|
+
category: providerError?.category || "unavailable",
|
|
86
|
+
code: providerError?.code || "hackernews_item_failed",
|
|
87
|
+
message: providerError?.message || `status=${result.status}`,
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
method: context.active.method,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
const item = result.data;
|
|
94
|
+
const itemType = asString(item.type);
|
|
95
|
+
const selftext = asString(item.text);
|
|
96
|
+
const outbound = new Set();
|
|
97
|
+
for (const link of extractUrls(selftext))
|
|
98
|
+
outbound.add(link);
|
|
99
|
+
for (const link of extractAnchorHrefs(selftext))
|
|
100
|
+
outbound.add(link);
|
|
101
|
+
if (item.url)
|
|
102
|
+
outbound.add(String(item.url));
|
|
103
|
+
if (item.story_url)
|
|
104
|
+
outbound.add(String(item.story_url));
|
|
105
|
+
const comments = Array.isArray(item.children)
|
|
106
|
+
? item.children.map((node) => mapComment(node, outbound)).filter(Boolean)
|
|
107
|
+
: [];
|
|
108
|
+
if (itemType === "comment") {
|
|
109
|
+
return {
|
|
110
|
+
post: {
|
|
111
|
+
id: asString(item.id),
|
|
112
|
+
title: item.story_title ? `Comment on: ${asString(item.story_title)}` : "Hacker News comment",
|
|
113
|
+
author: asString(item.author),
|
|
114
|
+
url: "",
|
|
115
|
+
selftext: stripTags(selftext),
|
|
116
|
+
score: asNumber(item.points),
|
|
117
|
+
num_comments: Array.isArray(item.children)
|
|
118
|
+
? item.children.reduce((sum, node) => sum + countComments(node), 0)
|
|
119
|
+
: asNumber(item.children_count),
|
|
120
|
+
created_at: asString(item.created_at),
|
|
121
|
+
discussion_url: context.url,
|
|
122
|
+
story_title: asString(item.story_title),
|
|
123
|
+
story_url: asString(item.story_url),
|
|
124
|
+
},
|
|
125
|
+
thread: [],
|
|
126
|
+
comments,
|
|
127
|
+
outbound_links: dedupeUrls([...outbound]),
|
|
128
|
+
partial: false,
|
|
129
|
+
errors: context.errors,
|
|
130
|
+
method: "hackernews_algolia_public",
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
post: {
|
|
135
|
+
id: asString(item.id),
|
|
136
|
+
title: asString(item.title),
|
|
137
|
+
author: asString(item.author),
|
|
138
|
+
url: item.url ? asString(item.url) : "",
|
|
139
|
+
selftext: stripTags(selftext),
|
|
140
|
+
score: asNumber(item.points),
|
|
141
|
+
num_comments: Array.isArray(item.children) ? item.children.reduce((sum, node) => sum + countComments(node), 0) : asNumber(item.children_count),
|
|
142
|
+
created_at: asString(item.created_at),
|
|
143
|
+
discussion_url: context.url,
|
|
144
|
+
},
|
|
145
|
+
thread: [],
|
|
146
|
+
comments,
|
|
147
|
+
outbound_links: dedupeUrls([...outbound]),
|
|
148
|
+
partial: false,
|
|
149
|
+
errors: context.errors,
|
|
150
|
+
method: "hackernews_algolia_public",
|
|
151
|
+
};
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
//# sourceMappingURL=hackernews.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hackernews.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/hackernews.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAC5G,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAGzD,MAAM,UAAU,GAAG,iHAAiH,CAAA;AAEpI,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,EAAE,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACxC,OAAO,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,IAAS,EAAE,QAAqB;IAClD,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAClD,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,SAAS;QAAE,OAAO,IAAI,CAAA;IACtD,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACpC,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC;QAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAC5D,KAAK,MAAM,IAAI,IAAI,kBAAkB,CAAC,QAAQ,CAAC;QAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IACnE,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;QAC3C,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC;QAC9E,CAAC,CAAC,EAAE,CAAA;IACN,OAAO;QACL,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;QAC7B,IAAI,EAAE,SAAS,CAAC,QAAQ,CAAC;QACzB,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;QACrC,QAAQ;KACT,CAAA;AACH,CAAC;AAED,SAAS,aAAa,CAAC,IAAS;IAC9B,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAA;IAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAA;IAClE,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;UAClD,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;AAC/E,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAuB;IACpD,EAAE,EAAE,YAAY;IAChB,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,iBAAiB;IACvD,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,EAAE;gBAClB,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,wBAAwB,EAAE,OAAO,EAAE,mCAAmC,EAAE;iBAC1G;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,uCAAuC,kBAAkB,CAAC,MAAM,CAAC,EAAE,EACnE,OAAO,CAAC,SAAS,EACjB;YACE,OAAO,EAAE;gBACP,MAAM,EAAE,iCAAiC;gBACzC,YAAY,EAAE,UAAU;aACzB;SACF,EACD;YACE,IAAI,EAAE,OAAO;YACb,SAAS,EAAE,OAAO,CAAC,GAAG;SACvB,CACF,CAAA;QAED,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAClE,MAAM,aAAa,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAA;YACtD,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,IAAI,EAAE,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC;oBACvC,MAAM,EAAE,mBAAmB;iBAC5B;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB;wBACE,QAAQ,EAAE,aAAa,EAAE,QAAQ,IAAI,aAAa;wBAClD,IAAI,EAAE,aAAa,EAAE,IAAI,IAAI,wBAAwB;wBACrD,OAAO,EAAE,aAAa,EAAE,OAAO,IAAI,UAAU,MAAM,CAAC,MAAM,EAAE;qBAC7D;iBACF;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,CAAC,IAAW,CAAA;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAA;QAClC,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC5D,KAAK,MAAM,IAAI,IAAI,kBAAkB,CAAC,QAAQ,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACnE,IAAI,IAAI,CAAC,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;QAC5C,IAAI,IAAI,CAAC,SAAS;YAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;QAExD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;YAC3C,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAA8B;YAC3G,CAAC,CAAC,EAAE,CAAA;QAEN,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE;oBACJ,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBACrB,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC,qBAAqB;oBAC7F,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;oBAC7B,GAAG,EAAE,EAAE;oBACP,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC;oBAC7B,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;oBAC5B,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;wBACxC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBAChF,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC;oBACjC,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;oBACrC,cAAc,EAAE,OAAO,CAAC,GAAG;oBAC3B,WAAW,EAAE,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC;oBACvC,SAAS,EAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC;iBACpC;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ;gBACR,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;gBACzC,OAAO,EAAE,KAAK;gBACd,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,2BAA2B;aACpC,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE;gBACJ,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrB,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;gBAC3B,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;gBAC7B,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE;gBACvC,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC;gBAC7B,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;gBAC5B,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC;gBAC3J,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;gBACrC,cAAc,EAAE,OAAO,CAAC,GAAG;aAC5B;YACD,MAAM,EAAE,EAAE;YACV,QAAQ;YACR,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;YACzC,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,MAAM,EAAE,2BAA2B;SACpC,CAAA;IACH,CAAC;CACF,CAAA"}
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import { dcinsideProvider } from "./dcinside.js";
|
|
2
2
|
import { genericProvider } from "./generic.js";
|
|
3
|
+
import { hackernewsProvider } from "./hackernews.js";
|
|
3
4
|
import { naverBlogProvider } from "./naver-blog.js";
|
|
5
|
+
import { naverCafeProvider } from "./naver-cafe.js";
|
|
4
6
|
import { redditProvider } from "./reddit.js";
|
|
5
7
|
import { tistoryProvider } from "./tistory.js";
|
|
6
8
|
import { velogProvider } from "./velog.js";
|
|
7
9
|
import { xProvider } from "./x.js";
|
|
8
10
|
import { youtubeProvider } from "./youtube.js";
|
|
9
|
-
const providers = [redditProvider, xProvider, dcinsideProvider, youtubeProvider, naverBlogProvider, tistoryProvider, velogProvider, genericProvider];
|
|
11
|
+
const providers = [hackernewsProvider, redditProvider, xProvider, dcinsideProvider, youtubeProvider, naverBlogProvider, naverCafeProvider, tistoryProvider, velogProvider, genericProvider];
|
|
10
12
|
export function resolveSmartfetchProvider(url, target) {
|
|
11
13
|
return providers.find((provider) => provider.matches(url, target)) || genericProvider;
|
|
12
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAClC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAE9C,MAAM,SAAS,GAAyB,CAAC,cAAc,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB,EAAE,eAAe,EAAE,aAAa,EAAE,eAAe,CAAC,CAAA;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAClC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAE9C,MAAM,SAAS,GAAyB,CAAC,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,aAAa,EAAE,eAAe,CAAC,CAAA;AAEjN,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,MAAsB;IAC3E,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,eAAe,CAAA;AACvF,CAAC"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { asNumber, asString, dedupeUrls, extractMetaDescription, extractMetaProperty, extractTitleFromHtml, extractUrls, fetchJson, isNaverCafeUrl, stripTags } from "../../shared.js";
|
|
2
|
+
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
|
|
3
|
+
function decodeJwtPayload(value) {
|
|
4
|
+
const parts = String(value || "").split(".");
|
|
5
|
+
if (parts.length < 2)
|
|
6
|
+
return null;
|
|
7
|
+
try {
|
|
8
|
+
const normalized = (parts[1] || "").replace(/-/g, "+").replace(/_/g, "/");
|
|
9
|
+
const padded = normalized + "=".repeat((4 - normalized.length % 4) % 4);
|
|
10
|
+
return JSON.parse(Buffer.from(padded, "base64").toString("utf-8"));
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function parseNaverCafeIdentifiers(url, html) {
|
|
17
|
+
let art = "";
|
|
18
|
+
let cafeId = 0;
|
|
19
|
+
let articleId = 0;
|
|
20
|
+
try {
|
|
21
|
+
const parsed = new URL(url);
|
|
22
|
+
art = parsed.searchParams.get("art") || "";
|
|
23
|
+
cafeId = asNumber(parsed.searchParams.get("clubid"));
|
|
24
|
+
articleId = asNumber(parsed.searchParams.get("articleid"));
|
|
25
|
+
if (!articleId) {
|
|
26
|
+
const mobilePath = parsed.pathname.match(/\/cafes\/(\d+)\/articles\/(\d+)/i);
|
|
27
|
+
if (mobilePath) {
|
|
28
|
+
cafeId ||= asNumber(mobilePath[1]);
|
|
29
|
+
articleId ||= asNumber(mobilePath[2]);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (!articleId) {
|
|
33
|
+
const parts = parsed.pathname.split("/").filter(Boolean);
|
|
34
|
+
const last = parts.at(-1) || "";
|
|
35
|
+
if (/^\d+$/.test(last))
|
|
36
|
+
articleId = asNumber(last);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
// Keep the regex fallbacks below.
|
|
41
|
+
}
|
|
42
|
+
if (!art)
|
|
43
|
+
art = html.match(/[?&]art=([^"'&\s]+)/i)?.[1] || "";
|
|
44
|
+
if (!cafeId)
|
|
45
|
+
cafeId = asNumber(html.match(/g_sClubId\s*=\s*"(\d+)"/i)?.[1] || html.match(/[?&]clubid=(\d+)/i)?.[1]);
|
|
46
|
+
if (!articleId)
|
|
47
|
+
articleId = asNumber(html.match(/[?&]articleid=(\d+)/i)?.[1]);
|
|
48
|
+
const tokenPayload = art ? decodeJwtPayload(art) : null;
|
|
49
|
+
cafeId ||= asNumber(tokenPayload?.cafeId);
|
|
50
|
+
articleId ||= asNumber(tokenPayload?.articleId);
|
|
51
|
+
return {
|
|
52
|
+
art,
|
|
53
|
+
cafeId,
|
|
54
|
+
articleId,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
function buildArticleApiUrl(cafeId, articleId, art) {
|
|
58
|
+
const apiUrl = new URL(`https://article.cafe.naver.com/gw/v4/cafes/${cafeId}/articles/${articleId}`);
|
|
59
|
+
if (art)
|
|
60
|
+
apiUrl.searchParams.set("art", art);
|
|
61
|
+
return apiUrl.toString();
|
|
62
|
+
}
|
|
63
|
+
function looksLikeNaverCafeArticle(url) {
|
|
64
|
+
try {
|
|
65
|
+
const parsed = new URL(url);
|
|
66
|
+
const path = parsed.pathname;
|
|
67
|
+
return /\/ArticleRead\.nhn$/i.test(path) || /\/ca-fe\/web\/cafes\/\d+\/articles\/\d+/i.test(path) || /\/[^/]+\/\d+$/i.test(path);
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
export const naverCafeProvider = {
|
|
74
|
+
id: "naver-cafe",
|
|
75
|
+
matches: (url, target) => target === "naver_cafe_post" || (target === "generic" && isNaverCafeUrl(url) && looksLikeNaverCafeArticle(url)),
|
|
76
|
+
async normalize(context) {
|
|
77
|
+
const fallbackTitle = extractMetaProperty(context.active.content, "og:title") || extractTitleFromHtml(context.active.content);
|
|
78
|
+
const fallbackDescription = extractMetaProperty(context.active.content, "og:description") || extractMetaDescription(context.active.content);
|
|
79
|
+
const fallbackText = stripTags(context.active.content);
|
|
80
|
+
const identifiers = parseNaverCafeIdentifiers(context.resolvedUrl || context.url, context.active.content);
|
|
81
|
+
if (!identifiers.cafeId || !identifiers.articleId || !identifiers.art) {
|
|
82
|
+
return {
|
|
83
|
+
post: {
|
|
84
|
+
url: context.url,
|
|
85
|
+
title: fallbackTitle,
|
|
86
|
+
description: fallbackDescription,
|
|
87
|
+
text: fallbackText.slice(0, 30000),
|
|
88
|
+
extractor: "fallback",
|
|
89
|
+
status: fallbackText ? "partial_text_only" : "blocked_or_unavailable",
|
|
90
|
+
},
|
|
91
|
+
thread: [],
|
|
92
|
+
comments: [],
|
|
93
|
+
outbound_links: dedupeUrls([...context.active.links, ...extractUrls(context.active.content)]),
|
|
94
|
+
partial: true,
|
|
95
|
+
errors: [
|
|
96
|
+
...context.errors,
|
|
97
|
+
{ category: "parse_error", code: "naver_cafe_public_token_missing", message: "Could not resolve the public Naver Cafe article token and identifiers" },
|
|
98
|
+
],
|
|
99
|
+
method: context.active.method,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
const apiUrl = buildArticleApiUrl(identifiers.cafeId, identifiers.articleId, identifiers.art);
|
|
103
|
+
const result = await fetchJson(apiUrl, context.timeoutMs, {
|
|
104
|
+
headers: {
|
|
105
|
+
accept: "application/json,text/plain,*/*",
|
|
106
|
+
"user-agent": USER_AGENT,
|
|
107
|
+
referer: context.resolvedUrl || context.url,
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
const article = result.ok ? result.data?.result?.article || {} : {};
|
|
111
|
+
const writer = article?.writer || {};
|
|
112
|
+
const menu = article?.menu || {};
|
|
113
|
+
const cafe = result.ok ? result.data?.result?.cafe || {} : {};
|
|
114
|
+
const contentHtml = asString(article?.contentHtml);
|
|
115
|
+
const body = stripTags(contentHtml);
|
|
116
|
+
if (!result.ok || !article?.subject || !body) {
|
|
117
|
+
return {
|
|
118
|
+
post: {
|
|
119
|
+
url: context.url,
|
|
120
|
+
canonical_url: apiUrl,
|
|
121
|
+
title: asString(article?.subject) || fallbackTitle,
|
|
122
|
+
description: fallbackDescription,
|
|
123
|
+
text: (body || fallbackText).slice(0, 30000),
|
|
124
|
+
author: asString(writer?.nick),
|
|
125
|
+
extractor: body ? "naver_cafe_article_json" : "fallback",
|
|
126
|
+
status: body || fallbackText ? "partial_text_only" : "blocked_or_unavailable",
|
|
127
|
+
},
|
|
128
|
+
thread: [],
|
|
129
|
+
comments: [],
|
|
130
|
+
outbound_links: dedupeUrls([...context.active.links, ...extractUrls(contentHtml || context.active.content)]),
|
|
131
|
+
partial: true,
|
|
132
|
+
errors: result.ok
|
|
133
|
+
? [...context.errors, { category: "parse_error", code: "naver_cafe_article_missing_body", message: "Naver Cafe article JSON did not include readable body content" }]
|
|
134
|
+
: [...context.errors, { category: "unavailable", code: "naver_cafe_article_fetch_failed", message: result.error || `status=${result.status}` }],
|
|
135
|
+
method: result.ok ? "naver_cafe_article_json" : context.active.method,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
post: {
|
|
140
|
+
url: context.url,
|
|
141
|
+
canonical_url: apiUrl,
|
|
142
|
+
title: asString(article?.subject),
|
|
143
|
+
description: fallbackDescription,
|
|
144
|
+
text: body.slice(0, 50000),
|
|
145
|
+
author: asString(writer?.nick),
|
|
146
|
+
menu_name: asString(menu?.name),
|
|
147
|
+
cafe_name: asString(cafe?.name || cafe?.cafeName),
|
|
148
|
+
write_date: asString(article?.writeDate),
|
|
149
|
+
read_count: asNumber(article?.readCount),
|
|
150
|
+
comment_count: asNumber(article?.commentCount),
|
|
151
|
+
extractor: "naver_cafe_article_json",
|
|
152
|
+
status: "ok",
|
|
153
|
+
},
|
|
154
|
+
thread: [],
|
|
155
|
+
comments: [],
|
|
156
|
+
outbound_links: dedupeUrls([...context.active.links, ...extractUrls(contentHtml)]),
|
|
157
|
+
partial: false,
|
|
158
|
+
errors: context.errors,
|
|
159
|
+
method: "naver_cafe_article_json",
|
|
160
|
+
};
|
|
161
|
+
},
|
|
162
|
+
};
|
|
163
|
+
//# sourceMappingURL=naver-cafe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"naver-cafe.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/naver-cafe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAGtL,MAAM,UAAU,GAAG,iHAAiH,CAAA;AAEpI,SAAS,gBAAgB,CAAC,KAAa;IACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC5C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACjC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;QACzE,MAAM,MAAM,GAAG,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACvE,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAA4B,CAAA;IAC/F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAS,yBAAyB,CAAC,GAAW,EAAE,IAAY;IAC1D,IAAI,GAAG,GAAG,EAAE,CAAA;IACZ,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAA;QAC1C,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAA;QACpD,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAA;QAE1D,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAA;YAC5E,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;gBAClC,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;YACvC,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACxD,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAC/B,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;QACpD,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,kCAAkC;IACpC,CAAC;IAED,IAAI,CAAC,GAAG;QAAE,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC7D,IAAI,CAAC,MAAM;QAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,0BAA0B,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IACnH,IAAI,CAAC,SAAS;QAAE,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IAE7E,MAAM,YAAY,GAAG,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACvD,MAAM,KAAK,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACzC,SAAS,KAAK,QAAQ,CAAC,YAAY,EAAE,SAAS,CAAC,CAAA;IAE/C,OAAO;QACL,GAAG;QACH,MAAM;QACN,SAAS;KACV,CAAA;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,MAAc,EAAE,SAAiB,EAAE,GAAW;IACxE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,8CAA8C,MAAM,aAAa,SAAS,EAAE,CAAC,CAAA;IACpG,IAAI,GAAG;QAAE,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IAC5C,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;AAC1B,CAAC;AAED,SAAS,yBAAyB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAA;QAC5B,OAAO,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,0CAA0C,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAClI,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAuB;IACnD,EAAE,EAAE,YAAY;IAChB,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,iBAAiB,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,IAAI,yBAAyB,CAAC,GAAG,CAAC,CAAC;IACzI,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,oBAAoB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAC7H,MAAM,mBAAmB,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,CAAC,IAAI,sBAAsB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAC3I,MAAM,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,WAAW,GAAG,yBAAyB,CAAC,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAEzG,IAAI,CAAC,WAAW,CAAC,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC;YACtE,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK,EAAE,aAAa;oBACpB,WAAW,EAAE,mBAAmB;oBAChC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,SAAS,EAAE,UAAU;oBACrB,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,uEAAuE,EAAE;iBACvJ;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,SAAS,EAAE,WAAW,CAAC,GAAG,CAAC,CAAA;QAC7F,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,SAAS,EAAE;YACxD,OAAO,EAAE;gBACP,MAAM,EAAE,iCAAiC;gBACzC,YAAY,EAAE,UAAU;gBACxB,OAAO,EAAE,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG;aAC5C;SACF,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,CAAE,MAAM,CAAC,IAAY,EAAE,MAAM,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QAC5E,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,EAAE,CAAA;QACpC,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,EAAE,CAAA;QAChC,MAAM,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,CAAE,MAAM,CAAC,IAAY,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;QAEnC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7C,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,aAAa,EAAE,MAAM;oBACrB,KAAK,EAAE,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,aAAa;oBAClD,WAAW,EAAE,mBAAmB;oBAChC,IAAI,EAAE,CAAC,IAAI,IAAI,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAC5C,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC;oBAC9B,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,UAAU;oBACxD,MAAM,EAAE,IAAI,IAAI,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBAC9E;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC5G,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE,MAAM,CAAC,EAAE;oBACf,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,+DAA+D,EAAE,CAAC;oBACrK,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,MAAM,CAAC,KAAK,IAAI,UAAU,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;gBACjJ,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM;aACtE,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE;gBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,aAAa,EAAE,MAAM;gBACrB,KAAK,EAAE,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;gBACjC,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;gBAC1B,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC;gBAC9B,SAAS,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;gBAC/B,SAAS,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,IAAI,IAAI,EAAE,QAAQ,CAAC;gBACjD,UAAU,EAAE,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC;gBACxC,UAAU,EAAE,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC;gBACxC,aAAa,EAAE,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;gBAC9C,SAAS,EAAE,yBAAyB;gBACpC,MAAM,EAAE,IAAI;aACb;YACD,MAAM,EAAE,EAAE;YACV,QAAQ,EAAE,EAAE;YACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;YAClF,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,MAAM,EAAE,yBAAyB;SAClC,CAAA;IACH,CAAC;CACF,CAAA"}
|
|
@@ -1,2 +1,16 @@
|
|
|
1
1
|
import type { SmartfetchProvider } from "../provider-types.js";
|
|
2
|
+
type TranscriptChunk = {
|
|
3
|
+
text: string;
|
|
4
|
+
duration: number;
|
|
5
|
+
offset: number;
|
|
6
|
+
lang?: string;
|
|
7
|
+
};
|
|
8
|
+
type YoutubeTranscriptModule = {
|
|
9
|
+
fetchTranscript: (url: string, options?: {
|
|
10
|
+
lang?: string;
|
|
11
|
+
fetch?: typeof fetch;
|
|
12
|
+
}) => Promise<TranscriptChunk[]>;
|
|
13
|
+
};
|
|
14
|
+
export declare function TEST_setYoutubeTranscriptLoader(loader?: () => Promise<YoutubeTranscriptModule>): void;
|
|
2
15
|
export declare const youtubeProvider: SmartfetchProvider;
|
|
16
|
+
export {};
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import { execFileSync } from "node:child_process";
|
|
2
1
|
import { dedupeUrls, envEnabled, extractMetaDescription, extractMetaProperty, extractTitleFromHtml, extractUrls, stripTags } from "../../shared.js";
|
|
3
2
|
import { fetchProviderJson } from "../provider-policy.js";
|
|
3
|
+
const defaultYoutubeTranscriptLoader = () => import("youtube-transcript/dist/youtube-transcript.esm.js");
|
|
4
|
+
let youtubeTranscriptLoader = defaultYoutubeTranscriptLoader;
|
|
5
|
+
export function TEST_setYoutubeTranscriptLoader(loader) {
|
|
6
|
+
youtubeTranscriptLoader = loader || defaultYoutubeTranscriptLoader;
|
|
7
|
+
}
|
|
4
8
|
function allowYoutubeTranscript() {
|
|
5
9
|
return envEnabled("SMARTFETCH_ENABLE_YOUTUBE_TRANSCRIPT", true);
|
|
6
10
|
}
|
|
@@ -18,22 +22,22 @@ async function fetchYoutubeMetadata(url, timeoutMs) {
|
|
|
18
22
|
return result.data;
|
|
19
23
|
}
|
|
20
24
|
async function fetchYoutubeTranscript(url, timeoutMs) {
|
|
25
|
+
const waitMs = Math.max(1000, timeoutMs);
|
|
26
|
+
const controller = new AbortController();
|
|
27
|
+
const signal = controller.signal;
|
|
28
|
+
const timer = setTimeout(() => controller.abort(new Error(`Transcript fetch timed out after ${waitMs}ms`)), waitMs);
|
|
21
29
|
try {
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
`;
|
|
27
|
-
const output = execFileSync(process.execPath, ["--input-type=module", "-e", script], {
|
|
28
|
-
timeout: Math.max(1000, timeoutMs),
|
|
29
|
-
encoding: "utf-8",
|
|
30
|
-
maxBuffer: 5 * 1024 * 1024,
|
|
31
|
-
}).trim();
|
|
32
|
-
return (output ? JSON.parse(output) : []);
|
|
30
|
+
const mod = await youtubeTranscriptLoader();
|
|
31
|
+
return await mod.fetchTranscript(url, {
|
|
32
|
+
fetch: (input, init) => fetch(input, { ...init, signal }),
|
|
33
|
+
});
|
|
33
34
|
}
|
|
34
35
|
catch (error) {
|
|
35
36
|
const message = error instanceof Error ? error.message : String(error);
|
|
36
|
-
throw new Error(message.toLowerCase().includes("timed out") ? `Transcript fetch timed out after ${
|
|
37
|
+
throw new Error(signal.aborted || message.toLowerCase().includes("abort") || message.toLowerCase().includes("timed out") ? `Transcript fetch timed out after ${waitMs}ms` : message);
|
|
38
|
+
}
|
|
39
|
+
finally {
|
|
40
|
+
clearTimeout(timer);
|
|
37
41
|
}
|
|
38
42
|
}
|
|
39
43
|
export const youtubeProvider = {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AACnJ,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAczD,MAAM,8BAA8B,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,mDAAmD,CAAqC,CAAA;AAE5I,IAAI,uBAAuB,GAAG,8BAA8B,CAAA;AAE5D,MAAM,UAAU,+BAA+B,CAAC,MAA+C;IAC7F,uBAAuB,GAAG,MAAM,IAAI,8BAA8B,CAAA;AACpE,CAAC;AAED,SAAS,sBAAsB;IAC7B,OAAO,UAAU,CAAC,sCAAsC,EAAE,IAAI,CAAC,CAAA;AACjE,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW,EAAE,SAAiB;IAChE,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,sCAAsC,kBAAkB,CAAC,GAAG,CAAC,cAAc,EAC3E,SAAS,EACT;QACE,OAAO,EAAE;YACP,MAAM,EAAE,iCAAiC;SAC1C;KACF,EACD;QACE,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,GAAG;KACf,CACF,CAAA;IACD,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC9E,OAAO,MAAM,CAAC,IAA+B,CAAA;AAC/C,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,GAAW,EAAE,SAAiB;IAClE,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;IACxC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;IAChC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,oCAAoC,MAAM,IAAI,CAAC,CAAC,EAAE,MAAM,CAAC,CAAA;IACnH,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,uBAAuB,EAAE,CAAA;QAC3C,OAAO,MAAM,GAAG,CAAC,eAAe,CAAC,GAAG,EAAE;YACpC,KAAK,EAAE,CAAC,KAA6B,EAAE,IAAkB,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,CAAC;SACzF,CAAsB,CAAA;IAChC,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACtE,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,OAAO,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,oCAAoC,MAAM,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAA;IACtL,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAuB;IACjD,EAAE,EAAE,SAAS;IACb,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,eAAe;IACrD,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC,IAAI,oBAAoB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACpJ,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,CAAC,IAAI,sBAAsB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACnI,MAAM,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC,CAAA;QAChD,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,aAAa,IAAI,EAAE,CAAC,CAAA;QAErD,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;YAC9B,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,6BAA6B,EAAE,OAAO,EAAE,yDAAyD,EAAE;iBACrI;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;YAC/E,MAAM,cAAc,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,IAAqB,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAC/I,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBACpC,oBAAoB,EAAE,OAAO,CAAC,cAAc,CAAC;oBAC7C,wBAAwB,EAAE,UAAU,CAAC,MAAM;oBAC3C,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,wBAAwB;iBACzD;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,CAAC,cAAc;gBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,oBAAoB;aAC7B,CAAA;QACH,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE;iBAC7E;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;IACH,CAAC;CACF,CAAA"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export type TimedCache = {
|
|
2
|
+
value: unknown;
|
|
3
|
+
expiresAt: number;
|
|
4
|
+
};
|
|
5
|
+
export declare function getImpitCache(): TimedCache | undefined;
|
|
6
|
+
export declare function setImpitCache(value: TimedCache | undefined): void;
|
|
7
|
+
export declare function getImpitLoader(): () => Promise<{
|
|
8
|
+
Impit: unknown;
|
|
9
|
+
}>;
|
|
10
|
+
export declare function TEST_setImpitLoader(loader: () => Promise<{
|
|
11
|
+
Impit: unknown;
|
|
12
|
+
}>): void;
|
|
13
|
+
export declare function TEST_resetImpitInternals(): void;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
let impitCache = undefined;
|
|
2
|
+
let impitLoader = () => import("impit");
|
|
3
|
+
export function getImpitCache() {
|
|
4
|
+
return impitCache;
|
|
5
|
+
}
|
|
6
|
+
export function setImpitCache(value) {
|
|
7
|
+
impitCache = value;
|
|
8
|
+
}
|
|
9
|
+
export function getImpitLoader() {
|
|
10
|
+
return impitLoader;
|
|
11
|
+
}
|
|
12
|
+
export function TEST_setImpitLoader(loader) {
|
|
13
|
+
impitLoader = loader;
|
|
14
|
+
}
|
|
15
|
+
export function TEST_resetImpitInternals() {
|
|
16
|
+
impitCache = undefined;
|
|
17
|
+
impitLoader = () => import("impit");
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=smartfetch-internals.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"smartfetch-internals.js","sourceRoot":"","sources":["../src/smartfetch-internals.ts"],"names":[],"mappings":"AAKA,IAAI,UAAU,GAA2B,SAAS,CAAA;AAClD,IAAI,WAAW,GAAsC,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;AAE1E,MAAM,UAAU,aAAa;IAC3B,OAAO,UAAU,CAAA;AACnB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAA6B;IACzD,UAAU,GAAG,KAAK,CAAA;AACpB,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,WAAW,CAAA;AACpB,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,MAAyC;IAC3E,WAAW,GAAG,MAAM,CAAA;AACtB,CAAC;AAED,MAAM,UAAU,wBAAwB;IACtC,UAAU,GAAG,SAAS,CAAA;IACtB,WAAW,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;AACrC,CAAC"}
|
package/dist/smartfetch.js
CHANGED
|
@@ -1,23 +1,25 @@
|
|
|
1
1
|
import { asString, cleanLinks, dedupeUrls, envFlag, extractAnchorHrefs, extractUrls, inferTarget, isDcinsideUrl, needsDynamicCrawl, resolveValidatedUrl, validateOutboundUrl, } from "./shared.js";
|
|
2
2
|
import { runBrowserSession } from "./browser-session.js";
|
|
3
|
+
import { getImpitCache, getImpitLoader, setImpitCache } from "./smartfetch-internals.js";
|
|
3
4
|
import { discoverAssets } from "./smartfetch/assets.js";
|
|
4
5
|
import { maybeUseArchiveFallback } from "./smartfetch/archive-fallback.js";
|
|
5
6
|
import { resolveSmartfetchProvider } from "./smartfetch/providers/index.js";
|
|
6
7
|
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
|
|
7
|
-
|
|
8
|
+
const FAILURE_CACHE_TTL_MS = 30_000;
|
|
8
9
|
function allowPrivateHosts() {
|
|
9
10
|
return envFlag("SMART_WEB_ALLOW_PRIVATE_HOSTS");
|
|
10
11
|
}
|
|
11
12
|
async function getImpit() {
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
const impitCache = getImpitCache();
|
|
14
|
+
if (impitCache !== undefined && Date.now() < impitCache.expiresAt)
|
|
15
|
+
return impitCache.value;
|
|
14
16
|
try {
|
|
15
|
-
|
|
17
|
+
setImpitCache({ value: (await getImpitLoader()()).Impit, expiresAt: Number.POSITIVE_INFINITY });
|
|
16
18
|
}
|
|
17
19
|
catch {
|
|
18
|
-
|
|
20
|
+
setImpitCache({ value: false, expiresAt: Date.now() + FAILURE_CACHE_TTL_MS });
|
|
19
21
|
}
|
|
20
|
-
return
|
|
22
|
+
return getImpitCache()?.value;
|
|
21
23
|
}
|
|
22
24
|
async function runNativeFetch(url, timeoutMs) {
|
|
23
25
|
const headers = {
|
|
@@ -65,7 +67,7 @@ async function runNativeFetch(url, timeoutMs) {
|
|
|
65
67
|
};
|
|
66
68
|
}
|
|
67
69
|
const links = [...extractAnchorHrefs(body), ...extractUrls(body)];
|
|
68
|
-
return { ok: true, method: "plain_fetch", content: body, links: dedupeUrls(links) };
|
|
70
|
+
return { ok: true, method: "plain_fetch", content: body, links: dedupeUrls(links), final_url: resolved.url };
|
|
69
71
|
}
|
|
70
72
|
catch (error) {
|
|
71
73
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -130,7 +132,7 @@ async function runImpitFetch(url, timeoutMs) {
|
|
|
130
132
|
};
|
|
131
133
|
}
|
|
132
134
|
const links = [...extractAnchorHrefs(body), ...extractUrls(body)];
|
|
133
|
-
return { ok: true, method: "impit_fetch", content: body, links: dedupeUrls(links) };
|
|
135
|
+
return { ok: true, method: "impit_fetch", content: body, links: dedupeUrls(links), final_url: resolved.url };
|
|
134
136
|
}
|
|
135
137
|
catch (error) {
|
|
136
138
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -184,8 +186,12 @@ async function runPlaywrightFetch(url, timeoutMs, target) {
|
|
|
184
186
|
method: result.method,
|
|
185
187
|
content: choosePlaywrightContent(result),
|
|
186
188
|
links: dedupeUrls(result.links.map((item) => asString(item))),
|
|
189
|
+
final_url: result.final_url,
|
|
187
190
|
};
|
|
188
191
|
}
|
|
192
|
+
function riskLevelForTarget(target) {
|
|
193
|
+
return target === "x_post" ? "high" : target === "generic" ? "low" : "medium";
|
|
194
|
+
}
|
|
189
195
|
function baseOutput(url, target) {
|
|
190
196
|
return {
|
|
191
197
|
source: "smartfetch",
|
|
@@ -264,6 +270,7 @@ export async function runSmartfetch(options, runtime = {}) {
|
|
|
264
270
|
method: target === "youtube_video" ? "youtube_metadata_seed" : "x_public_seed",
|
|
265
271
|
content: "",
|
|
266
272
|
links: [],
|
|
273
|
+
final_url: options.url,
|
|
267
274
|
};
|
|
268
275
|
output.retrieval_method.push(active.method);
|
|
269
276
|
}
|
|
@@ -294,10 +301,21 @@ export async function runSmartfetch(options, runtime = {}) {
|
|
|
294
301
|
output.retrieval_method.push(active.method);
|
|
295
302
|
}
|
|
296
303
|
}
|
|
297
|
-
|
|
304
|
+
let effectiveTarget = target;
|
|
305
|
+
if ((options.target || "auto") === "auto") {
|
|
306
|
+
const inferred = inferTarget(active.final_url || options.url, "auto");
|
|
307
|
+
if (inferred !== effectiveTarget) {
|
|
308
|
+
effectiveTarget = inferred;
|
|
309
|
+
output.target = inferred;
|
|
310
|
+
output.risk_level = riskLevelForTarget(inferred);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
const providerUrl = active.final_url || options.url;
|
|
314
|
+
const provider = resolveSmartfetchProvider(providerUrl, effectiveTarget);
|
|
298
315
|
const normalized = await provider.normalize({
|
|
299
316
|
url: options.url,
|
|
300
|
-
|
|
317
|
+
resolvedUrl: providerUrl,
|
|
318
|
+
target: effectiveTarget,
|
|
301
319
|
timeoutMs,
|
|
302
320
|
active,
|
|
303
321
|
errors: output.errors,
|
|
@@ -428,6 +446,7 @@ async function runPlaywrightFetchWithRuntime(url, timeoutMs, target, runtime) {
|
|
|
428
446
|
method: result.method,
|
|
429
447
|
content: choosePlaywrightContent(result),
|
|
430
448
|
links: dedupeUrls(result.links.map((item) => asString(item))),
|
|
449
|
+
final_url: result.final_url,
|
|
431
450
|
};
|
|
432
451
|
}
|
|
433
452
|
//# sourceMappingURL=smartfetch.js.map
|