smart-web-mcp 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,154 @@
1
+ import { asNumber, asString, dedupeUrls, extractAnchorHrefs, extractUrls, stripTags } from "../../shared.js";
2
+ import { fetchProviderJson } from "../provider-policy.js";
3
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
4
+ function extractItemId(url) {
5
+ try {
6
+ const parsed = new URL(url);
7
+ const id = parsed.searchParams.get("id");
8
+ return id ? String(id).trim() : "";
9
+ }
10
+ catch {
11
+ return "";
12
+ }
13
+ }
14
+ function mapComment(node, outbound) {
15
+ if (!node || typeof node !== "object")
16
+ return null;
17
+ if (String(node.type || "") !== "comment")
18
+ return null;
19
+ const bodyHtml = asString(node.text);
20
+ for (const item of extractUrls(bodyHtml))
21
+ outbound.add(item);
22
+ for (const item of extractAnchorHrefs(bodyHtml))
23
+ outbound.add(item);
24
+ const children = Array.isArray(node.children)
25
+ ? node.children.map((item) => mapComment(item, outbound)).filter(Boolean)
26
+ : [];
27
+ return {
28
+ id: asString(node.id),
29
+ author: asString(node.author),
30
+ body: stripTags(bodyHtml),
31
+ created_at: asString(node.created_at),
32
+ children,
33
+ };
34
+ }
35
+ function countComments(node) {
36
+ if (!node || typeof node !== "object")
37
+ return 0;
38
+ const children = Array.isArray(node.children) ? node.children : [];
39
+ return (String(node.type || "") === "comment" ? 1 : 0)
40
+ + children.reduce((sum, item) => sum + countComments(item), 0);
41
+ }
42
+ export const hackernewsProvider = {
43
+ id: "hackernews",
44
+ matches: (_url, target) => target === "hackernews_post",
45
+ async normalize(context) {
46
+ const itemId = extractItemId(context.url);
47
+ if (!itemId) {
48
+ return {
49
+ post: null,
50
+ thread: [],
51
+ comments: [],
52
+ outbound_links: [],
53
+ partial: true,
54
+ errors: [
55
+ ...context.errors,
56
+ { category: "parse_error", code: "invalid_hackernews_url", message: "URL is not a Hacker News item URL" },
57
+ ],
58
+ method: context.active.method,
59
+ };
60
+ }
61
+ const result = await fetchProviderJson(`https://hn.algolia.com/api/v1/items/${encodeURIComponent(itemId)}`, context.timeoutMs, {
62
+ headers: {
63
+ accept: "application/json,text/plain,*/*",
64
+ "user-agent": USER_AGENT,
65
+ },
66
+ }, {
67
+ mode: "relay",
68
+ sourceUrl: context.url,
69
+ });
70
+ if (!result.ok || !result.data || typeof result.data !== "object") {
71
+ const providerError = !result.ok ? result.error : null;
72
+ return {
73
+ post: {
74
+ url: context.url,
75
+ text: stripTags(context.active.content),
76
+ status: "partial_text_only",
77
+ },
78
+ thread: [],
79
+ comments: [],
80
+ outbound_links: dedupeUrls([...context.active.links, ...extractUrls(context.active.content)]),
81
+ partial: true,
82
+ errors: [
83
+ ...context.errors,
84
+ {
85
+ category: providerError?.category || "unavailable",
86
+ code: providerError?.code || "hackernews_item_failed",
87
+ message: providerError?.message || `status=${result.status}`,
88
+ },
89
+ ],
90
+ method: context.active.method,
91
+ };
92
+ }
93
+ const item = result.data;
94
+ const itemType = asString(item.type);
95
+ const selftext = asString(item.text);
96
+ const outbound = new Set();
97
+ for (const link of extractUrls(selftext))
98
+ outbound.add(link);
99
+ for (const link of extractAnchorHrefs(selftext))
100
+ outbound.add(link);
101
+ if (item.url)
102
+ outbound.add(String(item.url));
103
+ if (item.story_url)
104
+ outbound.add(String(item.story_url));
105
+ const comments = Array.isArray(item.children)
106
+ ? item.children.map((node) => mapComment(node, outbound)).filter(Boolean)
107
+ : [];
108
+ if (itemType === "comment") {
109
+ return {
110
+ post: {
111
+ id: asString(item.id),
112
+ title: item.story_title ? `Comment on: ${asString(item.story_title)}` : "Hacker News comment",
113
+ author: asString(item.author),
114
+ url: "",
115
+ selftext: stripTags(selftext),
116
+ score: asNumber(item.points),
117
+ num_comments: Array.isArray(item.children)
118
+ ? item.children.reduce((sum, node) => sum + countComments(node), 0)
119
+ : asNumber(item.children_count),
120
+ created_at: asString(item.created_at),
121
+ discussion_url: context.url,
122
+ story_title: asString(item.story_title),
123
+ story_url: asString(item.story_url),
124
+ },
125
+ thread: [],
126
+ comments,
127
+ outbound_links: dedupeUrls([...outbound]),
128
+ partial: false,
129
+ errors: context.errors,
130
+ method: "hackernews_algolia_public",
131
+ };
132
+ }
133
+ return {
134
+ post: {
135
+ id: asString(item.id),
136
+ title: asString(item.title),
137
+ author: asString(item.author),
138
+ url: item.url ? asString(item.url) : "",
139
+ selftext: stripTags(selftext),
140
+ score: asNumber(item.points),
141
+ num_comments: Array.isArray(item.children) ? item.children.reduce((sum, node) => sum + countComments(node), 0) : asNumber(item.children_count),
142
+ created_at: asString(item.created_at),
143
+ discussion_url: context.url,
144
+ },
145
+ thread: [],
146
+ comments,
147
+ outbound_links: dedupeUrls([...outbound]),
148
+ partial: false,
149
+ errors: context.errors,
150
+ method: "hackernews_algolia_public",
151
+ };
152
+ },
153
+ };
154
+ //# sourceMappingURL=hackernews.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hackernews.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/hackernews.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAC5G,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAGzD,MAAM,UAAU,GAAG,iHAAiH,CAAA;AAEpI,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,EAAE,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACxC,OAAO,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,IAAS,EAAE,QAAqB;IAClD,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAClD,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,SAAS;QAAE,OAAO,IAAI,CAAA;IACtD,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACpC,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC;QAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAC5D,KAAK,MAAM,IAAI,IAAI,kBAAkB,CAAC,QAAQ,CAAC;QAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IACnE,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;QAC3C,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC;QAC9E,CAAC,CAAC,EAAE,CAAA;IACN,OAAO;QACL,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;QAC7B,IAAI,EAAE,SAAS,CAAC,QAAQ,CAAC;QACzB,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;QACrC,QAAQ;KACT,CAAA;AACH,CAAC;AAED,SAAS,aAAa,CAAC,IAAS;IAC9B,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAA;IAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAA;IAClE,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;UAClD,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;AAC/E,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAuB;IACpD,EAAE,EAAE,YAAY;IAChB,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,iBAAiB;IACvD,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,EAAE;gBAClB,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,wBAAwB,EAAE,OAAO,EAAE,mCAAmC,EAAE;iBAC1G;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,uCAAuC,kBAAkB,CAAC,MAAM,CAAC,EAAE,EACnE,OAAO,CAAC,SAAS,EACjB;YACE,OAAO,EAAE;gBACP,MAAM,EAAE,iCAAiC;gBACzC,YAAY,EAAE,UAAU;aACzB;SACF,EACD;YACE,IAAI,EAAE,OAAO;YACb,SAAS,EAAE,OAAO,CAAC,GAAG;SACvB,CACF,CAAA;QAED,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAClE,MAAM,aAAa,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAA;YACtD,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,IAAI,EAAE,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC;oBACvC,MAAM,EAAE,mBAAmB;iBAC5B;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB;wBACE,QAAQ,EAAE,aAAa,EAAE,QAAQ,IAAI,aAAa;wBAClD,IAAI,EAAE,aAAa,EAAE,IAAI,IAAI,wBAAwB;wBACrD,OAAO,EAAE,aAAa,EAAE,OAAO,IAAI,UAAU,MAAM,CAAC,MAAM,EAAE;qBAC7D;iBACF;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,CAAC,IAAW,CAAA;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAA;QAClC,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC5D,KAAK,MAAM,IAAI,IAAI,kBAAkB,CAAC,QAAQ,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACnE,IAAI,IAAI,CAAC,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;QAC5C,IAAI,IAAI,CAAC,SAAS;YAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;QAExD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;YAC3C,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAA8B;YAC3G,CAAC,CAAC,EAAE,CAAA;QAEN,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE;oBACJ,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBACrB,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC,qBAAqB;oBAC7F,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;oBAC7B,GAAG,EAAE,EAAE;oBACP,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC;oBAC7B,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;oBAC5B,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;wBACxC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBAChF,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC;oBACjC,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;oBACrC,cAAc,EAAE,OAAO,CAAC,GAAG;oBAC3B,WAAW,EAAE,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC;oBACvC,SAAS,EAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC;iBACpC;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ;gBACR,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;gBACzC,OAAO,EAAE,KAAK;gBACd,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,2BAA2B;aACpC,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE;gBACJ,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrB,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;gBAC3B,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;gBAC7B,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE;gBACvC,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC;gBAC7B,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;gBAC5B,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,IAAS,EAAE,EAAE,CAAC,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC;gBAC3J,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;gBACrC,cAAc,EAAE,OAAO,CAAC,GAAG;aAC5B;YACD,MAAM,EAAE,EAAE;YACV,QAAQ;YACR,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;YACzC,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,MAAM,EAAE,2BAA2B;SACpC,CAAA;IACH,CAAC;CACF,CAAA"}
@@ -1,12 +1,14 @@
1
1
  import { dcinsideProvider } from "./dcinside.js";
2
2
  import { genericProvider } from "./generic.js";
3
+ import { hackernewsProvider } from "./hackernews.js";
3
4
  import { naverBlogProvider } from "./naver-blog.js";
5
+ import { naverCafeProvider } from "./naver-cafe.js";
4
6
  import { redditProvider } from "./reddit.js";
5
7
  import { tistoryProvider } from "./tistory.js";
6
8
  import { velogProvider } from "./velog.js";
7
9
  import { xProvider } from "./x.js";
8
10
  import { youtubeProvider } from "./youtube.js";
9
- const providers = [redditProvider, xProvider, dcinsideProvider, youtubeProvider, naverBlogProvider, tistoryProvider, velogProvider, genericProvider];
11
+ const providers = [hackernewsProvider, redditProvider, xProvider, dcinsideProvider, youtubeProvider, naverBlogProvider, naverCafeProvider, tistoryProvider, velogProvider, genericProvider];
10
12
  export function resolveSmartfetchProvider(url, target) {
11
13
  return providers.find((provider) => provider.matches(url, target)) || genericProvider;
12
14
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAClC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAE9C,MAAM,SAAS,GAAyB,CAAC,cAAc,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB,EAAE,eAAe,EAAE,aAAa,EAAE,eAAe,CAAC,CAAA;AAE1K,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,MAAsB;IAC3E,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,eAAe,CAAA;AACvF,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAClC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAE9C,MAAM,SAAS,GAAyB,CAAC,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,aAAa,EAAE,eAAe,CAAC,CAAA;AAEjN,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,MAAsB;IAC3E,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,eAAe,CAAA;AACvF,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { SmartfetchProvider } from "../provider-types.js";
2
+ export declare const naverCafeProvider: SmartfetchProvider;
@@ -0,0 +1,163 @@
1
+ import { asNumber, asString, dedupeUrls, extractMetaDescription, extractMetaProperty, extractTitleFromHtml, extractUrls, fetchJson, isNaverCafeUrl, stripTags } from "../../shared.js";
2
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
3
+ function decodeJwtPayload(value) {
4
+ const parts = String(value || "").split(".");
5
+ if (parts.length < 2)
6
+ return null;
7
+ try {
8
+ const normalized = (parts[1] || "").replace(/-/g, "+").replace(/_/g, "/");
9
+ const padded = normalized + "=".repeat((4 - normalized.length % 4) % 4);
10
+ return JSON.parse(Buffer.from(padded, "base64").toString("utf-8"));
11
+ }
12
+ catch {
13
+ return null;
14
+ }
15
+ }
16
+ function parseNaverCafeIdentifiers(url, html) {
17
+ let art = "";
18
+ let cafeId = 0;
19
+ let articleId = 0;
20
+ try {
21
+ const parsed = new URL(url);
22
+ art = parsed.searchParams.get("art") || "";
23
+ cafeId = asNumber(parsed.searchParams.get("clubid"));
24
+ articleId = asNumber(parsed.searchParams.get("articleid"));
25
+ if (!articleId) {
26
+ const mobilePath = parsed.pathname.match(/\/cafes\/(\d+)\/articles\/(\d+)/i);
27
+ if (mobilePath) {
28
+ cafeId ||= asNumber(mobilePath[1]);
29
+ articleId ||= asNumber(mobilePath[2]);
30
+ }
31
+ }
32
+ if (!articleId) {
33
+ const parts = parsed.pathname.split("/").filter(Boolean);
34
+ const last = parts.at(-1) || "";
35
+ if (/^\d+$/.test(last))
36
+ articleId = asNumber(last);
37
+ }
38
+ }
39
+ catch {
40
+ // Keep the regex fallbacks below.
41
+ }
42
+ if (!art)
43
+ art = html.match(/[?&]art=([^"'&\s]+)/i)?.[1] || "";
44
+ if (!cafeId)
45
+ cafeId = asNumber(html.match(/g_sClubId\s*=\s*"(\d+)"/i)?.[1] || html.match(/[?&]clubid=(\d+)/i)?.[1]);
46
+ if (!articleId)
47
+ articleId = asNumber(html.match(/[?&]articleid=(\d+)/i)?.[1]);
48
+ const tokenPayload = art ? decodeJwtPayload(art) : null;
49
+ cafeId ||= asNumber(tokenPayload?.cafeId);
50
+ articleId ||= asNumber(tokenPayload?.articleId);
51
+ return {
52
+ art,
53
+ cafeId,
54
+ articleId,
55
+ };
56
+ }
57
+ function buildArticleApiUrl(cafeId, articleId, art) {
58
+ const apiUrl = new URL(`https://article.cafe.naver.com/gw/v4/cafes/${cafeId}/articles/${articleId}`);
59
+ if (art)
60
+ apiUrl.searchParams.set("art", art);
61
+ return apiUrl.toString();
62
+ }
63
+ function looksLikeNaverCafeArticle(url) {
64
+ try {
65
+ const parsed = new URL(url);
66
+ const path = parsed.pathname;
67
+ return /\/ArticleRead\.nhn$/i.test(path) || /\/ca-fe\/web\/cafes\/\d+\/articles\/\d+/i.test(path) || /\/[^/]+\/\d+$/i.test(path);
68
+ }
69
+ catch {
70
+ return false;
71
+ }
72
+ }
73
+ export const naverCafeProvider = {
74
+ id: "naver-cafe",
75
+ matches: (url, target) => target === "naver_cafe_post" || (target === "generic" && isNaverCafeUrl(url) && looksLikeNaverCafeArticle(url)),
76
+ async normalize(context) {
77
+ const fallbackTitle = extractMetaProperty(context.active.content, "og:title") || extractTitleFromHtml(context.active.content);
78
+ const fallbackDescription = extractMetaProperty(context.active.content, "og:description") || extractMetaDescription(context.active.content);
79
+ const fallbackText = stripTags(context.active.content);
80
+ const identifiers = parseNaverCafeIdentifiers(context.resolvedUrl || context.url, context.active.content);
81
+ if (!identifiers.cafeId || !identifiers.articleId || !identifiers.art) {
82
+ return {
83
+ post: {
84
+ url: context.url,
85
+ title: fallbackTitle,
86
+ description: fallbackDescription,
87
+ text: fallbackText.slice(0, 30000),
88
+ extractor: "fallback",
89
+ status: fallbackText ? "partial_text_only" : "blocked_or_unavailable",
90
+ },
91
+ thread: [],
92
+ comments: [],
93
+ outbound_links: dedupeUrls([...context.active.links, ...extractUrls(context.active.content)]),
94
+ partial: true,
95
+ errors: [
96
+ ...context.errors,
97
+ { category: "parse_error", code: "naver_cafe_public_token_missing", message: "Could not resolve the public Naver Cafe article token and identifiers" },
98
+ ],
99
+ method: context.active.method,
100
+ };
101
+ }
102
+ const apiUrl = buildArticleApiUrl(identifiers.cafeId, identifiers.articleId, identifiers.art);
103
+ const result = await fetchJson(apiUrl, context.timeoutMs, {
104
+ headers: {
105
+ accept: "application/json,text/plain,*/*",
106
+ "user-agent": USER_AGENT,
107
+ referer: context.resolvedUrl || context.url,
108
+ },
109
+ });
110
+ const article = result.ok ? result.data?.result?.article || {} : {};
111
+ const writer = article?.writer || {};
112
+ const menu = article?.menu || {};
113
+ const cafe = result.ok ? result.data?.result?.cafe || {} : {};
114
+ const contentHtml = asString(article?.contentHtml);
115
+ const body = stripTags(contentHtml);
116
+ if (!result.ok || !article?.subject || !body) {
117
+ return {
118
+ post: {
119
+ url: context.url,
120
+ canonical_url: apiUrl,
121
+ title: asString(article?.subject) || fallbackTitle,
122
+ description: fallbackDescription,
123
+ text: (body || fallbackText).slice(0, 30000),
124
+ author: asString(writer?.nick),
125
+ extractor: body ? "naver_cafe_article_json" : "fallback",
126
+ status: body || fallbackText ? "partial_text_only" : "blocked_or_unavailable",
127
+ },
128
+ thread: [],
129
+ comments: [],
130
+ outbound_links: dedupeUrls([...context.active.links, ...extractUrls(contentHtml || context.active.content)]),
131
+ partial: true,
132
+ errors: result.ok
133
+ ? [...context.errors, { category: "parse_error", code: "naver_cafe_article_missing_body", message: "Naver Cafe article JSON did not include readable body content" }]
134
+ : [...context.errors, { category: "unavailable", code: "naver_cafe_article_fetch_failed", message: result.error || `status=${result.status}` }],
135
+ method: result.ok ? "naver_cafe_article_json" : context.active.method,
136
+ };
137
+ }
138
+ return {
139
+ post: {
140
+ url: context.url,
141
+ canonical_url: apiUrl,
142
+ title: asString(article?.subject),
143
+ description: fallbackDescription,
144
+ text: body.slice(0, 50000),
145
+ author: asString(writer?.nick),
146
+ menu_name: asString(menu?.name),
147
+ cafe_name: asString(cafe?.name || cafe?.cafeName),
148
+ write_date: asString(article?.writeDate),
149
+ read_count: asNumber(article?.readCount),
150
+ comment_count: asNumber(article?.commentCount),
151
+ extractor: "naver_cafe_article_json",
152
+ status: "ok",
153
+ },
154
+ thread: [],
155
+ comments: [],
156
+ outbound_links: dedupeUrls([...context.active.links, ...extractUrls(contentHtml)]),
157
+ partial: false,
158
+ errors: context.errors,
159
+ method: "naver_cafe_article_json",
160
+ };
161
+ },
162
+ };
163
+ //# sourceMappingURL=naver-cafe.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"naver-cafe.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/naver-cafe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAGtL,MAAM,UAAU,GAAG,iHAAiH,CAAA;AAEpI,SAAS,gBAAgB,CAAC,KAAa;IACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC5C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACjC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;QACzE,MAAM,MAAM,GAAG,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACvE,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAA4B,CAAA;IAC/F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAS,yBAAyB,CAAC,GAAW,EAAE,IAAY;IAC1D,IAAI,GAAG,GAAG,EAAE,CAAA;IACZ,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAA;QAC1C,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAA;QACpD,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAA;QAE1D,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAA;YAC5E,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;gBAClC,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;YACvC,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACxD,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAC/B,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;QACpD,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,kCAAkC;IACpC,CAAC;IAED,IAAI,CAAC,GAAG;QAAE,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC7D,IAAI,CAAC,MAAM;QAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,0BAA0B,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IACnH,IAAI,CAAC,SAAS;QAAE,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IAE7E,MAAM,YAAY,GAAG,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACvD,MAAM,KAAK,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACzC,SAAS,KAAK,QAAQ,CAAC,YAAY,EAAE,SAAS,CAAC,CAAA;IAE/C,OAAO;QACL,GAAG;QACH,MAAM;QACN,SAAS;KACV,CAAA;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,MAAc,EAAE,SAAiB,EAAE,GAAW;IACxE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,8CAA8C,MAAM,aAAa,SAAS,EAAE,CAAC,CAAA;IACpG,IAAI,GAAG;QAAE,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IAC5C,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;AAC1B,CAAC;AAED,SAAS,yBAAyB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAA;QAC5B,OAAO,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,0CAA0C,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAClI,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAuB;IACnD,EAAE,EAAE,YAAY;IAChB,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,iBAAiB,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,IAAI,yBAAyB,CAAC,GAAG,CAAC,CAAC;IACzI,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,oBAAoB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAC7H,MAAM,mBAAmB,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,CAAC,IAAI,sBAAsB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAC3I,MAAM,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,WAAW,GAAG,yBAAyB,CAAC,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAEzG,IAAI,CAAC,WAAW,CAAC,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC;YACtE,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK,EAAE,aAAa;oBACpB,WAAW,EAAE,mBAAmB;oBAChC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,SAAS,EAAE,UAAU;oBACrB,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,uEAAuE,EAAE;iBACvJ;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,SAAS,EAAE,WAAW,CAAC,GAAG,CAAC,CAAA;QAC7F,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,SAAS,EAAE;YACxD,OAAO,EAAE;gBACP,MAAM,EAAE,iCAAiC;gBACzC,YAAY,EAAE,UAAU;gBACxB,OAAO,EAAE,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG;aAC5C;SACF,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,CAAE,MAAM,CAAC,IAAY,EAAE,MAAM,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QAC5E,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,EAAE,CAAA;QACpC,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,EAAE,CAAA;QAChC,MAAM,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,CAAE,MAAM,CAAC,IAAY,EAAE,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;QAEnC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7C,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,aAAa,EAAE,MAAM;oBACrB,KAAK,EAAE,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,aAAa;oBAClD,WAAW,EAAE,mBAAmB;oBAChC,IAAI,EAAE,CAAC,IAAI,IAAI,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAC5C,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC;oBAC9B,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,UAAU;oBACxD,MAAM,EAAE,IAAI,IAAI,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBAC9E;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC5G,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE,MAAM,CAAC,EAAE;oBACf,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,+DAA+D,EAAE,CAAC;oBACrK,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,MAAM,CAAC,KAAK,IAAI,UAAU,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;gBACjJ,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM;aACtE,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE;gBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,aAAa,EAAE,MAAM;gBACrB,KAAK,EAAE,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;gBACjC,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;gBAC1B,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC;gBAC9B,SAAS,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;gBAC/B,SAAS,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,IAAI,IAAI,EAAE,QAAQ,CAAC;gBACjD,UAAU,EAAE,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC;gBACxC,UAAU,EAAE,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC;gBACxC,aAAa,EAAE,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;gBAC9C,SAAS,EAAE,yBAAyB;gBACpC,MAAM,EAAE,IAAI;aACb;YACD,MAAM,EAAE,EAAE;YACV,QAAQ,EAAE,EAAE;YACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;YAClF,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,MAAM,EAAE,yBAAyB;SAClC,CAAA;IACH,CAAC;CACF,CAAA"}
@@ -1,2 +1,16 @@
1
1
  import type { SmartfetchProvider } from "../provider-types.js";
2
+ type TranscriptChunk = {
3
+ text: string;
4
+ duration: number;
5
+ offset: number;
6
+ lang?: string;
7
+ };
8
+ type YoutubeTranscriptModule = {
9
+ fetchTranscript: (url: string, options?: {
10
+ lang?: string;
11
+ fetch?: typeof fetch;
12
+ }) => Promise<TranscriptChunk[]>;
13
+ };
14
+ export declare function TEST_setYoutubeTranscriptLoader(loader?: () => Promise<YoutubeTranscriptModule>): void;
2
15
  export declare const youtubeProvider: SmartfetchProvider;
16
+ export {};
@@ -1,6 +1,10 @@
1
- import { execFileSync } from "node:child_process";
2
1
  import { dedupeUrls, envEnabled, extractMetaDescription, extractMetaProperty, extractTitleFromHtml, extractUrls, stripTags } from "../../shared.js";
3
2
  import { fetchProviderJson } from "../provider-policy.js";
3
+ const defaultYoutubeTranscriptLoader = () => import("youtube-transcript/dist/youtube-transcript.esm.js");
4
+ let youtubeTranscriptLoader = defaultYoutubeTranscriptLoader;
5
+ export function TEST_setYoutubeTranscriptLoader(loader) {
6
+ youtubeTranscriptLoader = loader || defaultYoutubeTranscriptLoader;
7
+ }
4
8
  function allowYoutubeTranscript() {
5
9
  return envEnabled("SMARTFETCH_ENABLE_YOUTUBE_TRANSCRIPT", true);
6
10
  }
@@ -18,22 +22,22 @@ async function fetchYoutubeMetadata(url, timeoutMs) {
18
22
  return result.data;
19
23
  }
20
24
  async function fetchYoutubeTranscript(url, timeoutMs) {
25
+ const waitMs = Math.max(1000, timeoutMs);
26
+ const controller = new AbortController();
27
+ const signal = controller.signal;
28
+ const timer = setTimeout(() => controller.abort(new Error(`Transcript fetch timed out after ${waitMs}ms`)), waitMs);
21
29
  try {
22
- const script = `
23
- const mod = await import("youtube-transcript/dist/youtube-transcript.esm.js")
24
- const transcript = await mod.fetchTranscript(${JSON.stringify(url)})
25
- console.log(JSON.stringify(transcript))
26
- `;
27
- const output = execFileSync(process.execPath, ["--input-type=module", "-e", script], {
28
- timeout: Math.max(1000, timeoutMs),
29
- encoding: "utf-8",
30
- maxBuffer: 5 * 1024 * 1024,
31
- }).trim();
32
- return (output ? JSON.parse(output) : []);
30
+ const mod = await youtubeTranscriptLoader();
31
+ return await mod.fetchTranscript(url, {
32
+ fetch: (input, init) => fetch(input, { ...init, signal }),
33
+ });
33
34
  }
34
35
  catch (error) {
35
36
  const message = error instanceof Error ? error.message : String(error);
36
- throw new Error(message.toLowerCase().includes("timed out") ? `Transcript fetch timed out after ${timeoutMs}ms` : message);
37
+ throw new Error(signal.aborted || message.toLowerCase().includes("abort") || message.toLowerCase().includes("timed out") ? `Transcript fetch timed out after ${waitMs}ms` : message);
38
+ }
39
+ finally {
40
+ clearTimeout(timer);
37
41
  }
38
42
  }
39
43
  export const youtubeProvider = {
@@ -1 +1 @@
1
- {"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAEjD,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AACnJ,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAGzD,SAAS,sBAAsB;IAC7B,OAAO,UAAU,CAAC,sCAAsC,EAAE,IAAI,CAAC,CAAA;AACjE,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW,EAAE,SAAiB;IAChE,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,sCAAsC,kBAAkB,CAAC,GAAG,CAAC,cAAc,EAC3E,SAAS,EACT;QACE,OAAO,EAAE;YACP,MAAM,EAAE,iCAAiC;SAC1C;KACF,EACD;QACE,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,GAAG;KACf,CACF,CAAA;IACD,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC9E,OAAO,MAAM,CAAC,IAA+B,CAAA;AAC/C,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,GAAW,EAAE,SAAiB;IAClE,IAAI,CAAC;QACH,MAAM,MAAM,GAAG;;qDAEkC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC;;KAEnE,CAAA;QACD,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,qBAAqB,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE;YACnF,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC;YAClC,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,CAAC,GAAG,IAAI,GAAG,IAAI;SAC3B,CAAC,CAAC,IAAI,EAAE,CAAA;QACT,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAsB,CAAA;IAChE,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACtE,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,oCAAoC,SAAS,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAA;IAC5H,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAuB;IACjD,EAAE,EAAE,SAAS;IACb,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,eAAe;IACrD,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC,IAAI,oBAAoB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACpJ,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,CAAC,IAAI,sBAAsB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACnI,MAAM,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC,CAAA;QAChD,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,aAAa,IAAI,EAAE,CAAC,CAAA;QAErD,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;YAC9B,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,6BAA6B,EAAE,OAAO,EAAE,yDAAyD,EAAE;iBACrI;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;YAC/E,MAAM,cAAc,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,IAAqB,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAC/I,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBACpC,oBAAoB,EAAE,OAAO,CAAC,cAAc,CAAC;oBAC7C,wBAAwB,EAAE,UAAU,CAAC,MAAM;oBAC3C,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,wBAAwB;iBACzD;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,CAAC,cAAc;gBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,oBAAoB;aAC7B,CAAA;QACH,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE;iBAC7E;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;IACH,CAAC;CACF,CAAA"}
1
+ {"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../src/smartfetch/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AACnJ,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAczD,MAAM,8BAA8B,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,mDAAmD,CAAqC,CAAA;AAE5I,IAAI,uBAAuB,GAAG,8BAA8B,CAAA;AAE5D,MAAM,UAAU,+BAA+B,CAAC,MAA+C;IAC7F,uBAAuB,GAAG,MAAM,IAAI,8BAA8B,CAAA;AACpE,CAAC;AAED,SAAS,sBAAsB;IAC7B,OAAO,UAAU,CAAC,sCAAsC,EAAE,IAAI,CAAC,CAAA;AACjE,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW,EAAE,SAAiB;IAChE,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,sCAAsC,kBAAkB,CAAC,GAAG,CAAC,cAAc,EAC3E,SAAS,EACT;QACE,OAAO,EAAE;YACP,MAAM,EAAE,iCAAiC;SAC1C;KACF,EACD;QACE,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,GAAG;KACf,CACF,CAAA;IACD,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC9E,OAAO,MAAM,CAAC,IAA+B,CAAA;AAC/C,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,GAAW,EAAE,SAAiB;IAClE,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;IACxC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;IAChC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,oCAAoC,MAAM,IAAI,CAAC,CAAC,EAAE,MAAM,CAAC,CAAA;IACnH,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,uBAAuB,EAAE,CAAA;QAC3C,OAAO,MAAM,GAAG,CAAC,eAAe,CAAC,GAAG,EAAE;YACpC,KAAK,EAAE,CAAC,KAA6B,EAAE,IAAkB,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,CAAC;SACzF,CAAsB,CAAA;IAChC,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACtE,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,OAAO,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,oCAAoC,MAAM,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAA;IACtL,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAA;IACrB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAuB;IACjD,EAAE,EAAE,SAAS;IACb,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,KAAK,eAAe;IACrD,KAAK,CAAC,SAAS,CAAC,OAA0B;QACxC,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC,IAAI,oBAAoB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACpJ,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,gBAAgB,CAAC,IAAI,sBAAsB,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACnI,MAAM,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC,CAAA;QAChD,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,aAAa,IAAI,EAAE,CAAC,CAAA;QAErD,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;YAC9B,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,6BAA6B,EAAE,OAAO,EAAE,yDAAyD,EAAE;iBACrI;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAA;YAC/E,MAAM,cAAc,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,IAAqB,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAC/I,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBACpC,oBAAoB,EAAE,OAAO,CAAC,cAAc,CAAC;oBAC7C,wBAAwB,EAAE,UAAU,CAAC,MAAM;oBAC3C,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,wBAAwB;iBACzD;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,CAAC,cAAc;gBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,oBAAoB;aAC7B,CAAA;QACH,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,OAAO;gBACL,IAAI,EAAE;oBACJ,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,KAAK;oBACL,WAAW;oBACX,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7B,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;oBAClC,oBAAoB,EAAE,KAAK;oBAC3B,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,wBAAwB;iBACtE;gBACD,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;gBACZ,cAAc,EAAE,UAAU,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7F,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE;oBACN,GAAG,OAAO,CAAC,MAAM;oBACjB,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE;iBAC7E;gBACD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM;aAC9B,CAAA;QACH,CAAC;IACH,CAAC;CACF,CAAA"}
@@ -0,0 +1,13 @@
1
+ export type TimedCache = {
2
+ value: unknown;
3
+ expiresAt: number;
4
+ };
5
+ export declare function getImpitCache(): TimedCache | undefined;
6
+ export declare function setImpitCache(value: TimedCache | undefined): void;
7
+ export declare function getImpitLoader(): () => Promise<{
8
+ Impit: unknown;
9
+ }>;
10
+ export declare function TEST_setImpitLoader(loader: () => Promise<{
11
+ Impit: unknown;
12
+ }>): void;
13
+ export declare function TEST_resetImpitInternals(): void;
@@ -0,0 +1,19 @@
1
+ let impitCache = undefined;
2
+ let impitLoader = () => import("impit");
3
+ export function getImpitCache() {
4
+ return impitCache;
5
+ }
6
+ export function setImpitCache(value) {
7
+ impitCache = value;
8
+ }
9
+ export function getImpitLoader() {
10
+ return impitLoader;
11
+ }
12
+ export function TEST_setImpitLoader(loader) {
13
+ impitLoader = loader;
14
+ }
15
+ export function TEST_resetImpitInternals() {
16
+ impitCache = undefined;
17
+ impitLoader = () => import("impit");
18
+ }
19
+ //# sourceMappingURL=smartfetch-internals.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"smartfetch-internals.js","sourceRoot":"","sources":["../src/smartfetch-internals.ts"],"names":[],"mappings":"AAKA,IAAI,UAAU,GAA2B,SAAS,CAAA;AAClD,IAAI,WAAW,GAAsC,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;AAE1E,MAAM,UAAU,aAAa;IAC3B,OAAO,UAAU,CAAA;AACnB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAA6B;IACzD,UAAU,GAAG,KAAK,CAAA;AACpB,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,WAAW,CAAA;AACpB,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,MAAyC;IAC3E,WAAW,GAAG,MAAM,CAAA;AACtB,CAAC;AAED,MAAM,UAAU,wBAAwB;IACtC,UAAU,GAAG,SAAS,CAAA;IACtB,WAAW,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;AACrC,CAAC"}
@@ -1,23 +1,25 @@
1
1
  import { asString, cleanLinks, dedupeUrls, envFlag, extractAnchorHrefs, extractUrls, inferTarget, isDcinsideUrl, needsDynamicCrawl, resolveValidatedUrl, validateOutboundUrl, } from "./shared.js";
2
2
  import { runBrowserSession } from "./browser-session.js";
3
+ import { getImpitCache, getImpitLoader, setImpitCache } from "./smartfetch-internals.js";
3
4
  import { discoverAssets } from "./smartfetch/assets.js";
4
5
  import { maybeUseArchiveFallback } from "./smartfetch/archive-fallback.js";
5
6
  import { resolveSmartfetchProvider } from "./smartfetch/providers/index.js";
6
7
  const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
7
- let impitCache = undefined;
8
+ const FAILURE_CACHE_TTL_MS = 30_000;
8
9
  function allowPrivateHosts() {
9
10
  return envFlag("SMART_WEB_ALLOW_PRIVATE_HOSTS");
10
11
  }
11
12
  async function getImpit() {
12
- if (impitCache !== undefined)
13
- return impitCache;
13
+ const impitCache = getImpitCache();
14
+ if (impitCache !== undefined && Date.now() < impitCache.expiresAt)
15
+ return impitCache.value;
14
16
  try {
15
- impitCache = (await import("impit")).Impit;
17
+ setImpitCache({ value: (await getImpitLoader()()).Impit, expiresAt: Number.POSITIVE_INFINITY });
16
18
  }
17
19
  catch {
18
- impitCache = false;
20
+ setImpitCache({ value: false, expiresAt: Date.now() + FAILURE_CACHE_TTL_MS });
19
21
  }
20
- return impitCache;
22
+ return getImpitCache()?.value;
21
23
  }
22
24
  async function runNativeFetch(url, timeoutMs) {
23
25
  const headers = {
@@ -65,7 +67,7 @@ async function runNativeFetch(url, timeoutMs) {
65
67
  };
66
68
  }
67
69
  const links = [...extractAnchorHrefs(body), ...extractUrls(body)];
68
- return { ok: true, method: "plain_fetch", content: body, links: dedupeUrls(links) };
70
+ return { ok: true, method: "plain_fetch", content: body, links: dedupeUrls(links), final_url: resolved.url };
69
71
  }
70
72
  catch (error) {
71
73
  const message = error instanceof Error ? error.message : String(error);
@@ -130,7 +132,7 @@ async function runImpitFetch(url, timeoutMs) {
130
132
  };
131
133
  }
132
134
  const links = [...extractAnchorHrefs(body), ...extractUrls(body)];
133
- return { ok: true, method: "impit_fetch", content: body, links: dedupeUrls(links) };
135
+ return { ok: true, method: "impit_fetch", content: body, links: dedupeUrls(links), final_url: resolved.url };
134
136
  }
135
137
  catch (error) {
136
138
  const message = error instanceof Error ? error.message : String(error);
@@ -184,8 +186,12 @@ async function runPlaywrightFetch(url, timeoutMs, target) {
184
186
  method: result.method,
185
187
  content: choosePlaywrightContent(result),
186
188
  links: dedupeUrls(result.links.map((item) => asString(item))),
189
+ final_url: result.final_url,
187
190
  };
188
191
  }
192
+ function riskLevelForTarget(target) {
193
+ return target === "x_post" ? "high" : target === "generic" ? "low" : "medium";
194
+ }
189
195
  function baseOutput(url, target) {
190
196
  return {
191
197
  source: "smartfetch",
@@ -264,6 +270,7 @@ export async function runSmartfetch(options, runtime = {}) {
264
270
  method: target === "youtube_video" ? "youtube_metadata_seed" : "x_public_seed",
265
271
  content: "",
266
272
  links: [],
273
+ final_url: options.url,
267
274
  };
268
275
  output.retrieval_method.push(active.method);
269
276
  }
@@ -294,10 +301,21 @@ export async function runSmartfetch(options, runtime = {}) {
294
301
  output.retrieval_method.push(active.method);
295
302
  }
296
303
  }
297
- const provider = resolveSmartfetchProvider(options.url, target);
304
+ let effectiveTarget = target;
305
+ if ((options.target || "auto") === "auto") {
306
+ const inferred = inferTarget(active.final_url || options.url, "auto");
307
+ if (inferred !== effectiveTarget) {
308
+ effectiveTarget = inferred;
309
+ output.target = inferred;
310
+ output.risk_level = riskLevelForTarget(inferred);
311
+ }
312
+ }
313
+ const providerUrl = active.final_url || options.url;
314
+ const provider = resolveSmartfetchProvider(providerUrl, effectiveTarget);
298
315
  const normalized = await provider.normalize({
299
316
  url: options.url,
300
- target,
317
+ resolvedUrl: providerUrl,
318
+ target: effectiveTarget,
301
319
  timeoutMs,
302
320
  active,
303
321
  errors: output.errors,
@@ -428,6 +446,7 @@ async function runPlaywrightFetchWithRuntime(url, timeoutMs, target, runtime) {
428
446
  method: result.method,
429
447
  content: choosePlaywrightContent(result),
430
448
  links: dedupeUrls(result.links.map((item) => asString(item))),
449
+ final_url: result.final_url,
431
450
  };
432
451
  }
433
452
  //# sourceMappingURL=smartfetch.js.map