@j0hanz/fetch-url-mcp 1.12.7 → 1.12.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/http/auth.d.ts +2 -2
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +4 -5
- package/dist/http/index.d.ts +6 -0
- package/dist/http/index.d.ts.map +1 -0
- package/dist/http/index.js +5 -0
- package/dist/http/native.d.ts +73 -0
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +554 -10
- package/dist/http/rate-limit.d.ts +1 -1
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +3 -4
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +67 -6
- package/dist/lib/config.js +2 -2
- package/dist/lib/core.d.ts +56 -4
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +155 -4
- package/dist/lib/error/classes.d.ts +19 -0
- package/dist/lib/error/classes.d.ts.map +1 -0
- package/dist/lib/error/classes.js +107 -0
- package/dist/lib/error/classify.d.ts +4 -0
- package/dist/lib/error/classify.d.ts.map +1 -0
- package/dist/lib/error/classify.js +154 -0
- package/dist/lib/error/codes.d.ts +23 -0
- package/dist/lib/error/codes.d.ts.map +1 -0
- package/dist/lib/error/codes.js +22 -0
- package/dist/lib/error/index.d.ts +6 -0
- package/dist/lib/error/index.d.ts.map +1 -0
- package/dist/lib/error/index.js +5 -0
- package/dist/lib/{error-messages.d.ts → error/messages.d.ts} +2 -2
- package/dist/lib/error/messages.d.ts.map +1 -0
- package/dist/lib/{error-messages.js → error/messages.js} +2 -2
- package/dist/lib/{tool-errors.d.ts → error/payload.d.ts} +7 -13
- package/dist/lib/error/payload.d.ts.map +1 -0
- package/dist/lib/error/payload.js +108 -0
- package/dist/lib/mcp-interop.d.ts.map +1 -1
- package/dist/lib/mcp-interop.js +4 -6
- package/dist/lib/net/http.d.ts.map +1 -0
- package/dist/lib/{http.js → net/http.js} +4 -7
- package/dist/lib/net/index.d.ts +4 -0
- package/dist/lib/net/index.d.ts.map +1 -0
- package/dist/lib/net/index.js +3 -0
- package/dist/lib/{fetch-pipeline.d.ts → net/pipeline.d.ts} +3 -3
- package/dist/lib/net/pipeline.d.ts.map +1 -0
- package/dist/lib/{fetch-pipeline.js → net/pipeline.js} +3 -5
- package/dist/lib/{url.d.ts → net/url.d.ts} +1 -1
- package/dist/lib/net/url.d.ts.map +1 -0
- package/dist/lib/{url.js → net/url.js} +3 -5
- package/dist/lib/utils.d.ts +2 -18
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +29 -104
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +8 -5
- package/dist/schemas.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +7 -9
- package/dist/tasks/index.d.ts +2 -0
- package/dist/tasks/index.d.ts.map +1 -0
- package/dist/tasks/index.js +1 -0
- package/dist/tasks/manager.d.ts +123 -1
- package/dist/tasks/manager.d.ts.map +1 -1
- package/dist/tasks/manager.js +745 -10
- package/dist/tools/{fetch-url.d.ts → index.d.ts} +4 -5
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/{fetch-url.js → index.js} +6 -8
- package/dist/transform/index.d.ts +279 -0
- package/dist/transform/index.d.ts.map +1 -0
- package/dist/transform/index.js +5234 -0
- package/package.json +2 -2
- package/dist/cli.d.ts +0 -19
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -65
- package/dist/http/health.d.ts +0 -8
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js +0 -152
- package/dist/http/helpers.d.ts +0 -68
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js +0 -402
- package/dist/lib/error-codes.d.ts +0 -13
- package/dist/lib/error-codes.d.ts.map +0 -1
- package/dist/lib/error-codes.js +0 -12
- package/dist/lib/error-messages.d.ts.map +0 -1
- package/dist/lib/fetch-pipeline.d.ts.map +0 -1
- package/dist/lib/http.d.ts.map +0 -1
- package/dist/lib/logger-names.d.ts +0 -16
- package/dist/lib/logger-names.d.ts.map +0 -1
- package/dist/lib/logger-names.js +0 -15
- package/dist/lib/session.d.ts +0 -44
- package/dist/lib/session.d.ts.map +0 -1
- package/dist/lib/session.js +0 -137
- package/dist/lib/tool-errors.d.ts.map +0 -1
- package/dist/lib/tool-errors.js +0 -253
- package/dist/lib/url.d.ts.map +0 -1
- package/dist/lib/zod.d.ts +0 -3
- package/dist/lib/zod.d.ts.map +0 -1
- package/dist/lib/zod.js +0 -27
- package/dist/tasks/call-contract.d.ts +0 -25
- package/dist/tasks/call-contract.d.ts.map +0 -1
- package/dist/tasks/call-contract.js +0 -59
- package/dist/tasks/execution.d.ts +0 -16
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js +0 -241
- package/dist/tasks/handlers.d.ts +0 -11
- package/dist/tasks/handlers.d.ts.map +0 -1
- package/dist/tasks/handlers.js +0 -157
- package/dist/tasks/owner.d.ts +0 -43
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js +0 -144
- package/dist/tasks/registry.d.ts +0 -20
- package/dist/tasks/registry.d.ts.map +0 -1
- package/dist/tasks/registry.js +0 -40
- package/dist/tasks/waiters.d.ts +0 -27
- package/dist/tasks/waiters.d.ts.map +0 -1
- package/dist/tasks/waiters.js +0 -114
- package/dist/tools/fetch-url.d.ts.map +0 -1
- package/dist/transform/dom-prep.d.ts +0 -16
- package/dist/transform/dom-prep.d.ts.map +0 -1
- package/dist/transform/dom-prep.js +0 -1287
- package/dist/transform/html-translators.d.ts +0 -5
- package/dist/transform/html-translators.d.ts.map +0 -1
- package/dist/transform/html-translators.js +0 -697
- package/dist/transform/markdown-cleanup.d.ts +0 -10
- package/dist/transform/markdown-cleanup.d.ts.map +0 -1
- package/dist/transform/markdown-cleanup.js +0 -542
- package/dist/transform/metadata.d.ts +0 -18
- package/dist/transform/metadata.d.ts.map +0 -1
- package/dist/transform/metadata.js +0 -462
- package/dist/transform/next-flight.d.ts +0 -2
- package/dist/transform/next-flight.d.ts.map +0 -1
- package/dist/transform/next-flight.js +0 -374
- package/dist/transform/shared.d.ts +0 -8
- package/dist/transform/shared.d.ts.map +0 -1
- package/dist/transform/shared.js +0 -137
- package/dist/transform/transform.d.ts +0 -38
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js +0 -1042
- package/dist/transform/types.d.ts +0 -124
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js +0 -5
- package/dist/transform/worker-pool.d.ts +0 -76
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js +0 -725
- /package/dist/lib/{http.d.ts → net/http.d.ts} +0 -0
|
@@ -1,462 +0,0 @@
|
|
|
1
|
-
import { parseHTML } from 'linkedom';
|
|
2
|
-
import { config } from '../lib/core.js';
|
|
3
|
-
import { parseUrlOrNull } from '../lib/utils.js';
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Head-section parsing
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
const HEAD_END_PATTERN = /<\/head\s*>|<body\b/i;
|
|
8
|
-
const MAX_HEAD_SCAN_LENGTH = 50_000;
|
|
9
|
-
function extractHeadSection(html) {
|
|
10
|
-
const searchText = html.length <= MAX_HEAD_SCAN_LENGTH
|
|
11
|
-
? html
|
|
12
|
-
: html.slice(0, MAX_HEAD_SCAN_LENGTH);
|
|
13
|
-
const match = HEAD_END_PATTERN.exec(searchText);
|
|
14
|
-
return match ? html.slice(0, match.index) : null;
|
|
15
|
-
}
|
|
16
|
-
export function normalizeDocumentTitle(title, baseUrl) {
|
|
17
|
-
if (!baseUrl || !title.startsWith('GitHub - '))
|
|
18
|
-
return title;
|
|
19
|
-
const parsed = parseUrlOrNull(baseUrl);
|
|
20
|
-
if (!parsed)
|
|
21
|
-
return title;
|
|
22
|
-
const hostname = parsed.hostname.toLowerCase();
|
|
23
|
-
if (hostname !== 'github.com' && hostname !== 'www.github.com') {
|
|
24
|
-
return title;
|
|
25
|
-
}
|
|
26
|
-
const segments = parsed.pathname.split('/').filter(Boolean);
|
|
27
|
-
if (segments.length !== 2)
|
|
28
|
-
return title;
|
|
29
|
-
const [owner, repo] = segments;
|
|
30
|
-
if (!owner || !repo)
|
|
31
|
-
return title;
|
|
32
|
-
return `${owner}/${repo}`;
|
|
33
|
-
}
|
|
34
|
-
const META_PROPERTY_HANDLERS = new Map([
|
|
35
|
-
[
|
|
36
|
-
'og:title',
|
|
37
|
-
(ctx, c) => {
|
|
38
|
-
ctx.title.og = c;
|
|
39
|
-
},
|
|
40
|
-
],
|
|
41
|
-
[
|
|
42
|
-
'og:description',
|
|
43
|
-
(ctx, c) => {
|
|
44
|
-
ctx.description.og = c;
|
|
45
|
-
},
|
|
46
|
-
],
|
|
47
|
-
[
|
|
48
|
-
'og:image',
|
|
49
|
-
(ctx, c) => {
|
|
50
|
-
ctx.image = c;
|
|
51
|
-
},
|
|
52
|
-
],
|
|
53
|
-
[
|
|
54
|
-
'article:published_time',
|
|
55
|
-
(ctx, c) => {
|
|
56
|
-
ctx.publishedAt = c;
|
|
57
|
-
},
|
|
58
|
-
],
|
|
59
|
-
[
|
|
60
|
-
'article:modified_time',
|
|
61
|
-
(ctx, c) => {
|
|
62
|
-
ctx.modifiedAt = c;
|
|
63
|
-
},
|
|
64
|
-
],
|
|
65
|
-
]);
|
|
66
|
-
const META_NAME_HANDLERS = new Map([
|
|
67
|
-
[
|
|
68
|
-
'twitter:title',
|
|
69
|
-
(ctx, c) => {
|
|
70
|
-
ctx.title.twitter = c;
|
|
71
|
-
},
|
|
72
|
-
],
|
|
73
|
-
[
|
|
74
|
-
'twitter:description',
|
|
75
|
-
(ctx, c) => {
|
|
76
|
-
ctx.description.twitter = c;
|
|
77
|
-
},
|
|
78
|
-
],
|
|
79
|
-
[
|
|
80
|
-
'description',
|
|
81
|
-
(ctx, c) => {
|
|
82
|
-
ctx.description.standard = c;
|
|
83
|
-
},
|
|
84
|
-
],
|
|
85
|
-
[
|
|
86
|
-
'author',
|
|
87
|
-
(ctx, c) => {
|
|
88
|
-
ctx.author = c;
|
|
89
|
-
},
|
|
90
|
-
],
|
|
91
|
-
]);
|
|
92
|
-
function processMetaTag(ctx, tag) {
|
|
93
|
-
const content = tag.getAttribute('content')?.trim();
|
|
94
|
-
if (!content)
|
|
95
|
-
return;
|
|
96
|
-
const property = tag.getAttribute('property');
|
|
97
|
-
if (property)
|
|
98
|
-
META_PROPERTY_HANDLERS.get(property)?.(ctx, content);
|
|
99
|
-
const name = tag.getAttribute('name');
|
|
100
|
-
if (name)
|
|
101
|
-
META_NAME_HANDLERS.get(name)?.(ctx, content);
|
|
102
|
-
}
|
|
103
|
-
function buildMetaContext(document) {
|
|
104
|
-
const ctx = { title: {}, description: {} };
|
|
105
|
-
for (const tag of document.querySelectorAll('meta')) {
|
|
106
|
-
processMetaTag(ctx, tag);
|
|
107
|
-
}
|
|
108
|
-
const titleEl = document.querySelector('title');
|
|
109
|
-
if (!ctx.title.standard && titleEl?.textContent) {
|
|
110
|
-
ctx.title.standard = titleEl.textContent.trim();
|
|
111
|
-
}
|
|
112
|
-
return ctx;
|
|
113
|
-
}
|
|
114
|
-
function resolveMetadataFromContext(ctx) {
|
|
115
|
-
const metadata = {};
|
|
116
|
-
const resolvedTitle = ctx.title.og ?? ctx.title.twitter ?? ctx.title.standard;
|
|
117
|
-
const resolvedDesc = ctx.description.og ?? ctx.description.twitter ?? ctx.description.standard;
|
|
118
|
-
if (resolvedTitle)
|
|
119
|
-
metadata.title = resolvedTitle;
|
|
120
|
-
if (resolvedDesc)
|
|
121
|
-
metadata.description = resolvedDesc;
|
|
122
|
-
if (ctx.author)
|
|
123
|
-
metadata.author = ctx.author;
|
|
124
|
-
if (ctx.image)
|
|
125
|
-
metadata.image = ctx.image;
|
|
126
|
-
if (ctx.publishedAt)
|
|
127
|
-
metadata.publishedAt = ctx.publishedAt;
|
|
128
|
-
if (ctx.modifiedAt)
|
|
129
|
-
metadata.modifiedAt = ctx.modifiedAt;
|
|
130
|
-
return metadata;
|
|
131
|
-
}
|
|
132
|
-
// ---------------------------------------------------------------------------
|
|
133
|
-
// Favicon resolution
|
|
134
|
-
// ---------------------------------------------------------------------------
|
|
135
|
-
/** Ordered by preference: exact 32×32, SVG, any generic icon, legacy shortcut. */
|
|
136
|
-
const FAVICON_SELECTORS = [
|
|
137
|
-
'link[rel="icon"][sizes="32x32"]',
|
|
138
|
-
'link[rel="icon"][type="image/svg+xml"]',
|
|
139
|
-
'link[rel="icon"]',
|
|
140
|
-
'link[rel="shortcut icon"]',
|
|
141
|
-
];
|
|
142
|
-
function resolveFaviconUrl(href, baseUrl) {
|
|
143
|
-
const trimmed = href.trim();
|
|
144
|
-
if (!trimmed || trimmed.toLowerCase().startsWith('data:'))
|
|
145
|
-
return undefined;
|
|
146
|
-
const resolved = parseUrlOrNull(trimmed, baseUrl);
|
|
147
|
-
if (resolved?.protocol !== 'http:' && resolved?.protocol !== 'https:') {
|
|
148
|
-
return undefined;
|
|
149
|
-
}
|
|
150
|
-
return resolved.toString();
|
|
151
|
-
}
|
|
152
|
-
function extractFavicon(document, baseUrl) {
|
|
153
|
-
for (const selector of FAVICON_SELECTORS) {
|
|
154
|
-
for (const el of document.querySelectorAll(selector)) {
|
|
155
|
-
const href = el.getAttribute('href');
|
|
156
|
-
if (href) {
|
|
157
|
-
const resolved = resolveFaviconUrl(href, baseUrl);
|
|
158
|
-
if (resolved)
|
|
159
|
-
return resolved;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
return undefined;
|
|
164
|
-
}
|
|
165
|
-
// ---------------------------------------------------------------------------
|
|
166
|
-
// Public interface
|
|
167
|
-
// ---------------------------------------------------------------------------
|
|
168
|
-
export function extractMetadata(document, baseUrl) {
|
|
169
|
-
const ctx = buildMetaContext(document);
|
|
170
|
-
const metadata = resolveMetadataFromContext(ctx);
|
|
171
|
-
if (metadata.title) {
|
|
172
|
-
metadata.title = normalizeDocumentTitle(metadata.title, baseUrl);
|
|
173
|
-
}
|
|
174
|
-
if (baseUrl) {
|
|
175
|
-
const favicon = extractFavicon(document, baseUrl);
|
|
176
|
-
if (favicon)
|
|
177
|
-
metadata.favicon = favicon;
|
|
178
|
-
}
|
|
179
|
-
return metadata;
|
|
180
|
-
}
|
|
181
|
-
export function extractMetadataFromHead(html, baseUrl) {
|
|
182
|
-
const headSection = extractHeadSection(html);
|
|
183
|
-
if (!headSection)
|
|
184
|
-
return null;
|
|
185
|
-
try {
|
|
186
|
-
const { document } = parseHTML(`<!DOCTYPE html><html>${headSection}</head><body></body></html>`);
|
|
187
|
-
return extractMetadata(document, baseUrl);
|
|
188
|
-
}
|
|
189
|
-
catch {
|
|
190
|
-
return null;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
export function mergeMetadata(early, late) {
|
|
194
|
-
if (!early)
|
|
195
|
-
return late;
|
|
196
|
-
const merged = {};
|
|
197
|
-
const keys = [
|
|
198
|
-
'title',
|
|
199
|
-
'description',
|
|
200
|
-
'author',
|
|
201
|
-
'image',
|
|
202
|
-
'favicon',
|
|
203
|
-
'publishedAt',
|
|
204
|
-
'modifiedAt',
|
|
205
|
-
];
|
|
206
|
-
for (const key of keys) {
|
|
207
|
-
const value = late[key] ?? early[key];
|
|
208
|
-
if (value !== undefined)
|
|
209
|
-
merged[key] = value;
|
|
210
|
-
}
|
|
211
|
-
return merged;
|
|
212
|
-
}
|
|
213
|
-
const BODY_SCAN_LIMIT = 500;
|
|
214
|
-
const HTML_TAG_DENSITY_LIMIT = 5;
|
|
215
|
-
const HEADING_MARKER = /^#{1,6}\s/m;
|
|
216
|
-
const HEADING_STRICT = /^#{1,6}\s+/m;
|
|
217
|
-
const SOURCE_KEY = /^source:\s/im;
|
|
218
|
-
const HTML_DOC_START = /^(<!doctype|<html)/i;
|
|
219
|
-
const LIST_MARKER = /^(?:[-*+])\s/m;
|
|
220
|
-
function getLineEnding(content) {
|
|
221
|
-
return content.includes('\r\n') ? '\r\n' : '\n';
|
|
222
|
-
}
|
|
223
|
-
function parseFrontmatter(content) {
|
|
224
|
-
const len = content.length;
|
|
225
|
-
if (len < 4)
|
|
226
|
-
return null;
|
|
227
|
-
let lineEnding = null;
|
|
228
|
-
let fenceLen = 0;
|
|
229
|
-
if (content.startsWith('---\n')) {
|
|
230
|
-
lineEnding = '\n';
|
|
231
|
-
fenceLen = 4;
|
|
232
|
-
}
|
|
233
|
-
else if (content.startsWith('---\r\n')) {
|
|
234
|
-
lineEnding = '\r\n';
|
|
235
|
-
fenceLen = 5;
|
|
236
|
-
}
|
|
237
|
-
if (!lineEnding)
|
|
238
|
-
return null;
|
|
239
|
-
const fence = `---${lineEnding}`;
|
|
240
|
-
const closeIndex = content.indexOf(fence, fenceLen);
|
|
241
|
-
if (closeIndex === -1)
|
|
242
|
-
return null;
|
|
243
|
-
const range = {
|
|
244
|
-
start: 0,
|
|
245
|
-
end: closeIndex + fenceLen,
|
|
246
|
-
linesStart: fenceLen,
|
|
247
|
-
linesEnd: closeIndex,
|
|
248
|
-
lineEnding,
|
|
249
|
-
};
|
|
250
|
-
// Parse key-value entries in one pass
|
|
251
|
-
const entries = new Map();
|
|
252
|
-
const fmBody = content.slice(range.linesStart, range.linesEnd);
|
|
253
|
-
let lastIdx = 0;
|
|
254
|
-
while (lastIdx < fmBody.length) {
|
|
255
|
-
let nextIdx = fmBody.indexOf(lineEnding, lastIdx);
|
|
256
|
-
if (nextIdx === -1)
|
|
257
|
-
nextIdx = fmBody.length;
|
|
258
|
-
const line = fmBody.slice(lastIdx, nextIdx).trim();
|
|
259
|
-
const colonIdx = line.indexOf(':');
|
|
260
|
-
if (line && colonIdx > 0) {
|
|
261
|
-
const key = line.slice(0, colonIdx).trim().toLowerCase();
|
|
262
|
-
let value = line.slice(colonIdx + 1).trim();
|
|
263
|
-
// Strip surrounding quotes
|
|
264
|
-
const first = value.charAt(0);
|
|
265
|
-
const last = value.charAt(value.length - 1);
|
|
266
|
-
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
267
|
-
value = value.slice(1, -1).trim();
|
|
268
|
-
}
|
|
269
|
-
if (value)
|
|
270
|
-
entries.set(key, value);
|
|
271
|
-
}
|
|
272
|
-
lastIdx = nextIdx + lineEnding.length;
|
|
273
|
-
}
|
|
274
|
-
return { range, entries };
|
|
275
|
-
}
|
|
276
|
-
function scanBodyForTitle(content) {
|
|
277
|
-
const len = content.length;
|
|
278
|
-
let scanIndex = 0;
|
|
279
|
-
const maxScan = Math.min(len, BODY_SCAN_LIMIT);
|
|
280
|
-
while (scanIndex < maxScan) {
|
|
281
|
-
let nextIndex = content.indexOf('\n', scanIndex);
|
|
282
|
-
if (nextIndex === -1)
|
|
283
|
-
nextIndex = len;
|
|
284
|
-
let line = content.slice(scanIndex, nextIndex);
|
|
285
|
-
if (line.endsWith('\r'))
|
|
286
|
-
line = line.slice(0, -1);
|
|
287
|
-
const trimmed = line.trim();
|
|
288
|
-
if (trimmed) {
|
|
289
|
-
if (HEADING_STRICT.test(trimmed)) {
|
|
290
|
-
return trimmed.replace(HEADING_MARKER, '').trim() || undefined;
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
scanIndex = nextIndex + 1;
|
|
294
|
-
}
|
|
295
|
-
return undefined;
|
|
296
|
-
}
|
|
297
|
-
export function extractTitleFromRawMarkdown(content) {
|
|
298
|
-
const fm = parseFrontmatter(content);
|
|
299
|
-
if (fm) {
|
|
300
|
-
const title = fm.entries.get('title') ?? fm.entries.get('name');
|
|
301
|
-
if (title)
|
|
302
|
-
return title;
|
|
303
|
-
}
|
|
304
|
-
return scanBodyForTitle(fm ? content.slice(fm.range.end) : content);
|
|
305
|
-
}
|
|
306
|
-
export function addSourceToMarkdown(content, url) {
|
|
307
|
-
const fm = parseFrontmatter(content);
|
|
308
|
-
const useMarkdownFormat = config.transform.metadataFormat === 'markdown';
|
|
309
|
-
if (useMarkdownFormat && !fm) {
|
|
310
|
-
if (SOURCE_KEY.test(content))
|
|
311
|
-
return content;
|
|
312
|
-
const lineEnding = getLineEnding(content);
|
|
313
|
-
const firstH1Match = HEADING_MARKER.exec(content);
|
|
314
|
-
if (firstH1Match) {
|
|
315
|
-
const h1Index = firstH1Match.index;
|
|
316
|
-
const lineEndIndex = content.indexOf(lineEnding, h1Index);
|
|
317
|
-
const insertPos = lineEndIndex === -1 ? content.length : lineEndIndex + lineEnding.length;
|
|
318
|
-
const injection = `${lineEnding}Source: ${url}${lineEnding}`;
|
|
319
|
-
return content.slice(0, insertPos) + injection + content.slice(insertPos);
|
|
320
|
-
}
|
|
321
|
-
return `Source: ${url}${lineEnding}${lineEnding}${content}`;
|
|
322
|
-
}
|
|
323
|
-
if (!fm) {
|
|
324
|
-
const lineEnding = getLineEnding(content);
|
|
325
|
-
const escapedUrl = url.replace(/"/g, '\\"');
|
|
326
|
-
return `---${lineEnding}source: "${escapedUrl}"${lineEnding}---${lineEnding}${lineEnding}${content}`;
|
|
327
|
-
}
|
|
328
|
-
const fmBody = content.slice(fm.range.linesStart, fm.range.linesEnd);
|
|
329
|
-
if (SOURCE_KEY.test(fmBody))
|
|
330
|
-
return content;
|
|
331
|
-
const escapedUrl = url.replace(/"/g, '\\"');
|
|
332
|
-
const injection = `source: "${escapedUrl}"${fm.range.lineEnding}`;
|
|
333
|
-
return (content.slice(0, fm.range.linesEnd) +
|
|
334
|
-
injection +
|
|
335
|
-
content.slice(fm.range.linesEnd));
|
|
336
|
-
}
|
|
337
|
-
// endregion
|
|
338
|
-
// region Content Detection & Metadata Footer
|
|
339
|
-
function countCommonTags(content, limit) {
|
|
340
|
-
if (limit <= 0)
|
|
341
|
-
return 0;
|
|
342
|
-
const regex = /<(html|head|body|div|span|script|style|meta|link)\b/gi;
|
|
343
|
-
let count = 0;
|
|
344
|
-
while (regex.exec(content)) {
|
|
345
|
-
count += 1;
|
|
346
|
-
if (count > limit)
|
|
347
|
-
break;
|
|
348
|
-
}
|
|
349
|
-
return count;
|
|
350
|
-
}
|
|
351
|
-
export function isRawTextContent(content) {
|
|
352
|
-
const trimmed = content.trim();
|
|
353
|
-
if (HTML_DOC_START.test(trimmed))
|
|
354
|
-
return false;
|
|
355
|
-
if (parseFrontmatter(trimmed) !== null)
|
|
356
|
-
return true;
|
|
357
|
-
const tagCount = countCommonTags(content, HTML_TAG_DENSITY_LIMIT);
|
|
358
|
-
if (tagCount > HTML_TAG_DENSITY_LIMIT)
|
|
359
|
-
return false;
|
|
360
|
-
return (HEADING_MARKER.test(content) ||
|
|
361
|
-
LIST_MARKER.test(content) ||
|
|
362
|
-
content.includes('```'));
|
|
363
|
-
}
|
|
364
|
-
function formatFetchedAt(value) {
|
|
365
|
-
const date = new Date(value);
|
|
366
|
-
if (Number.isNaN(date.getTime()))
|
|
367
|
-
return value;
|
|
368
|
-
const formatter = new Intl.DateTimeFormat(config.i18n.locale, {
|
|
369
|
-
day: '2-digit',
|
|
370
|
-
month: '2-digit',
|
|
371
|
-
year: 'numeric',
|
|
372
|
-
});
|
|
373
|
-
return formatter.format(date);
|
|
374
|
-
}
|
|
375
|
-
export function buildMetadataFooter(metadata, fallbackUrl) {
|
|
376
|
-
if (!metadata)
|
|
377
|
-
return '';
|
|
378
|
-
const lines = ['---', ''];
|
|
379
|
-
const url = metadata.url || fallbackUrl;
|
|
380
|
-
const parts = [];
|
|
381
|
-
if (metadata.title)
|
|
382
|
-
parts.push(`_${metadata.title}_`);
|
|
383
|
-
if (metadata.author)
|
|
384
|
-
parts.push(`_${metadata.author}_`);
|
|
385
|
-
if (url)
|
|
386
|
-
parts.push(`[_Original Source_](${url})`);
|
|
387
|
-
if (metadata.fetchedAt) {
|
|
388
|
-
parts.push(`_${formatFetchedAt(metadata.fetchedAt)}_`);
|
|
389
|
-
}
|
|
390
|
-
if (parts.length > 0)
|
|
391
|
-
lines.push(` ${parts.join(' | ')}`);
|
|
392
|
-
if (metadata.description)
|
|
393
|
-
lines.push(` <sub>${metadata.description}</sub>`);
|
|
394
|
-
return lines.join('\n');
|
|
395
|
-
}
|
|
396
|
-
const TITLE_PART_SEPARATOR = /\s*(?:[-|:•·]|–|—)\s*/u;
|
|
397
|
-
const LEADING_HEADING_PATTERN = /^(#{1,6})\s+(.+?)\s*$/;
|
|
398
|
-
const HEADING_SCAN_LIMIT = 12;
|
|
399
|
-
export function normalizeSyntheticTitleToken(value) {
|
|
400
|
-
return (value ?? '').replace(/\s+/g, ' ').trim().toLowerCase();
|
|
401
|
-
}
|
|
402
|
-
export function shouldPreferPrimaryHeadingTitle(primaryHeading, title) {
|
|
403
|
-
const primary = normalizeSyntheticTitleToken(primaryHeading);
|
|
404
|
-
if (!primary)
|
|
405
|
-
return false;
|
|
406
|
-
const normalizedTitle = normalizeSyntheticTitleToken(title);
|
|
407
|
-
if (!normalizedTitle)
|
|
408
|
-
return true;
|
|
409
|
-
if (normalizedTitle === primary)
|
|
410
|
-
return true;
|
|
411
|
-
return normalizedTitle
|
|
412
|
-
.split(TITLE_PART_SEPARATOR)
|
|
413
|
-
.some((part) => part === primary);
|
|
414
|
-
}
|
|
415
|
-
export function isGithubRepositoryRootUrl(url) {
|
|
416
|
-
const parsed = parseUrlOrNull(url);
|
|
417
|
-
if (!parsed)
|
|
418
|
-
return false;
|
|
419
|
-
const hostname = parsed.hostname.toLowerCase();
|
|
420
|
-
if (hostname !== 'github.com' && hostname !== 'www.github.com') {
|
|
421
|
-
return false;
|
|
422
|
-
}
|
|
423
|
-
return parsed.pathname.split('/').filter(Boolean).length === 2;
|
|
424
|
-
}
|
|
425
|
-
function stripLeadingHeading(markdown, headingText) {
|
|
426
|
-
if (!markdown)
|
|
427
|
-
return markdown;
|
|
428
|
-
const lines = markdown.split('\n');
|
|
429
|
-
const target = normalizeSyntheticTitleToken(headingText);
|
|
430
|
-
let nonEmptySeen = 0;
|
|
431
|
-
for (let index = 0; index < lines.length && nonEmptySeen < HEADING_SCAN_LIMIT; index += 1) {
|
|
432
|
-
const trimmed = lines[index]?.trim() ?? '';
|
|
433
|
-
if (!trimmed)
|
|
434
|
-
continue;
|
|
435
|
-
nonEmptySeen += 1;
|
|
436
|
-
const match = LEADING_HEADING_PATTERN.exec(trimmed);
|
|
437
|
-
if (!match)
|
|
438
|
-
continue;
|
|
439
|
-
const current = normalizeSyntheticTitleToken(match[2] ?? '');
|
|
440
|
-
if (current !== target)
|
|
441
|
-
return markdown;
|
|
442
|
-
lines.splice(index, 1);
|
|
443
|
-
if ((lines[index] ?? '').trim() === '') {
|
|
444
|
-
lines.splice(index, 1);
|
|
445
|
-
}
|
|
446
|
-
return lines.join('\n');
|
|
447
|
-
}
|
|
448
|
-
return markdown;
|
|
449
|
-
}
|
|
450
|
-
export function maybeStripGithubPrimaryHeading(markdown, primaryHeading, url) {
|
|
451
|
-
if (primaryHeading === undefined || !isGithubRepositoryRootUrl(url)) {
|
|
452
|
-
return markdown;
|
|
453
|
-
}
|
|
454
|
-
return stripLeadingHeading(markdown, primaryHeading);
|
|
455
|
-
}
|
|
456
|
-
export function maybePrependSyntheticTitle(markdown, context) {
|
|
457
|
-
if (!context.title || /^#\s/.test(markdown.trimStart())) {
|
|
458
|
-
return markdown;
|
|
459
|
-
}
|
|
460
|
-
return `# ${context.title}\n\n${markdown}`;
|
|
461
|
-
}
|
|
462
|
-
// endregion
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"next-flight.d.ts","sourceRoot":"","sources":["../../src/transform/next-flight.ts"],"names":[],"mappings":"AA2bA,wBAAgB,gCAAgC,CAC9C,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,GACnB,MAAM,CAuCR"}
|