webpeel 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +11 -4
- package/dist/cache.js.map +1 -1
- package/dist/cli.bundle.cjs +159248 -0
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/core/agent.js +12 -8
- package/dist/core/agent.js.map +1 -1
- package/dist/core/application-tracker.js +3 -2
- package/dist/core/application-tracker.js.map +1 -1
- package/dist/core/auto-extract.js +6 -4
- package/dist/core/auto-extract.js.map +1 -1
- package/dist/core/browser-fetch.d.ts +90 -0
- package/dist/core/browser-fetch.d.ts.map +1 -0
- package/dist/core/browser-fetch.js +599 -0
- package/dist/core/browser-fetch.js.map +1 -0
- package/dist/core/browser-pool.d.ts +70 -0
- package/dist/core/browser-pool.d.ts.map +1 -0
- package/dist/core/browser-pool.js +378 -0
- package/dist/core/browser-pool.js.map +1 -0
- package/dist/core/change-tracking.js +3 -2
- package/dist/core/change-tracking.js.map +1 -1
- package/dist/core/diff.js +3 -2
- package/dist/core/diff.js.map +1 -1
- package/dist/core/domain-extractors.js +3 -2
- package/dist/core/domain-extractors.js.map +1 -1
- package/dist/core/extract-inline.js +6 -4
- package/dist/core/extract-inline.js.map +1 -1
- package/dist/core/fetcher.d.ts +9 -116
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +10 -1484
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/http-fetch.d.ts +37 -0
- package/dist/core/http-fetch.d.ts.map +1 -0
- package/dist/core/http-fetch.js +618 -0
- package/dist/core/http-fetch.js.map +1 -0
- package/dist/core/metadata.js +18 -12
- package/dist/core/metadata.js.map +1 -1
- package/dist/core/pipeline.d.ts +104 -0
- package/dist/core/pipeline.d.ts.map +1 -0
- package/dist/core/pipeline.js +623 -0
- package/dist/core/pipeline.js.map +1 -0
- package/dist/core/profiles.js +15 -10
- package/dist/core/profiles.js.map +1 -1
- package/dist/core/quick-answer.d.ts.map +1 -1
- package/dist/core/quick-answer.js +120 -9
- package/dist/core/quick-answer.js.map +1 -1
- package/dist/core/rate-governor.js +3 -2
- package/dist/core/rate-governor.js.map +1 -1
- package/dist/core/readability.d.ts.map +1 -1
- package/dist/core/readability.js +19 -6
- package/dist/core/readability.js.map +1 -1
- package/dist/core/research.js +9 -6
- package/dist/core/research.js.map +1 -1
- package/dist/core/search-provider.js +12 -8
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +14 -5
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/timing.d.ts +22 -0
- package/dist/core/timing.d.ts.map +1 -0
- package/dist/core/timing.js +34 -0
- package/dist/core/timing.js.map +1 -0
- package/dist/core/youtube.d.ts.map +1 -1
- package/dist/core/youtube.js +19 -6
- package/dist/core/youtube.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -444
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +1 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/server/middleware/auth.js +3 -2
- package/dist/server/middleware/auth.js.map +1 -1
- package/dist/server/routes/answer.d.ts.map +1 -1
- package/dist/server/routes/answer.js +5 -0
- package/dist/server/routes/answer.js.map +1 -1
- package/dist/server/routes/compat.js +3 -2
- package/dist/server/routes/compat.js.map +1 -1
- package/dist/server/routes/deep-fetch.d.ts.map +1 -1
- package/dist/server/routes/deep-fetch.js +5 -0
- package/dist/server/routes/deep-fetch.js.map +1 -1
- package/dist/server/routes/fetch.d.ts.map +1 -1
- package/dist/server/routes/fetch.js +44 -4
- package/dist/server/routes/fetch.js.map +1 -1
- package/dist/server/routes/health.js +3 -2
- package/dist/server/routes/health.js.map +1 -1
- package/dist/server/routes/mcp.js +1 -1
- package/dist/server/routes/mcp.js.map +1 -1
- package/dist/server/routes/quick-answer.d.ts.map +1 -1
- package/dist/server/routes/quick-answer.js +5 -0
- package/dist/server/routes/quick-answer.js.map +1 -1
- package/dist/server/routes/search.js +6 -4
- package/dist/server/routes/search.js.map +1 -1
- package/dist/server/routes/users.js +3 -2
- package/dist/server/routes/users.js.map +1 -1
- package/dist/server/routes/webhooks.d.ts +1 -0
- package/dist/server/routes/webhooks.d.ts.map +1 -1
- package/dist/server/routes/webhooks.js +1 -0
- package/dist/server/routes/webhooks.js.map +1 -1
- package/dist/server/routes/youtube.d.ts.map +1 -1
- package/dist/server/routes/youtube.js +5 -0
- package/dist/server/routes/youtube.js.map +1 -1
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +5 -2
package/dist/core/metadata.js
CHANGED
|
@@ -69,8 +69,9 @@ function extractPublished($) {
|
|
|
69
69
|
try {
|
|
70
70
|
return new Date(published).toISOString();
|
|
71
71
|
}
|
|
72
|
-
catch {
|
|
73
|
-
|
|
72
|
+
catch (e) {
|
|
73
|
+
if (process.env.DEBUG)
|
|
74
|
+
console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
|
|
74
75
|
}
|
|
75
76
|
}
|
|
76
77
|
// Try datePublished schema.org
|
|
@@ -79,8 +80,9 @@ function extractPublished($) {
|
|
|
79
80
|
try {
|
|
80
81
|
return new Date(published).toISOString();
|
|
81
82
|
}
|
|
82
|
-
catch {
|
|
83
|
-
|
|
83
|
+
catch (e) {
|
|
84
|
+
if (process.env.DEBUG)
|
|
85
|
+
console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
|
|
84
86
|
}
|
|
85
87
|
}
|
|
86
88
|
return undefined;
|
|
@@ -139,8 +141,9 @@ export function extractLinks(html, baseUrl) {
|
|
|
139
141
|
}
|
|
140
142
|
links.add(absoluteUrl.href);
|
|
141
143
|
}
|
|
142
|
-
catch {
|
|
143
|
-
|
|
144
|
+
catch (e) {
|
|
145
|
+
if (process.env.DEBUG)
|
|
146
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
144
147
|
}
|
|
145
148
|
});
|
|
146
149
|
return Array.from(links).sort();
|
|
@@ -184,8 +187,9 @@ export function extractImages(html, baseUrl) {
|
|
|
184
187
|
// Deduplicate by src
|
|
185
188
|
images.set(absoluteUrl.href, imageInfo);
|
|
186
189
|
}
|
|
187
|
-
catch {
|
|
188
|
-
|
|
190
|
+
catch (e) {
|
|
191
|
+
if (process.env.DEBUG)
|
|
192
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
189
193
|
}
|
|
190
194
|
});
|
|
191
195
|
// Extract <picture><source> tags
|
|
@@ -214,8 +218,9 @@ export function extractImages(html, baseUrl) {
|
|
|
214
218
|
};
|
|
215
219
|
images.set(absoluteUrl.href, imageInfo);
|
|
216
220
|
}
|
|
217
|
-
catch {
|
|
218
|
-
|
|
221
|
+
catch (e) {
|
|
222
|
+
if (process.env.DEBUG)
|
|
223
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
219
224
|
}
|
|
220
225
|
});
|
|
221
226
|
});
|
|
@@ -244,8 +249,9 @@ export function extractImages(html, baseUrl) {
|
|
|
244
249
|
};
|
|
245
250
|
images.set(absoluteUrl.href, imageInfo);
|
|
246
251
|
}
|
|
247
|
-
catch {
|
|
248
|
-
|
|
252
|
+
catch (e) {
|
|
253
|
+
if (process.env.DEBUG)
|
|
254
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
249
255
|
}
|
|
250
256
|
});
|
|
251
257
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,
|
|
1
|
+
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,oBAAoB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,SAAS,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,oBAAoB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,eAAe;IACf,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,MAAM,SAAS,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAEvC,qBAAqB;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE3C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,iDAAiD;YACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YACxC,IAAI,WAAW,CAAC,IAAI;gBAChB,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM;gBAC5C,WAAW,CAAC,QAAQ,KAAK,cAAc,CAAC,QAAQ;gBAChD,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE,CAAC;gBACjD,OAAO;YACT,CAAC;YAED,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,OAAe;IACzD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2C,CAAC;IAElE,qBAAqB;IACrB,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAE1C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAE/D,MAAM,SAAS,GAAoC;gBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;gBACrB,GAAG;gBACH,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACjD,MAAM,EAAE,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;aACtD,CAAC;YAEF,qBAAqB;YACrB,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,iCAAiC;IACjC,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC3C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM;YAAE,OAAO;QAEpB,kEAAkE;QAClE,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACzD,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAErE,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG;iBACJ,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7G,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,gCAAgC;IAChC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC1C,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,qBAAqB;QACrB,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAChE,IAAI,CAAC,UAAU;YAAE,OAAO;QAExB,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,8BAA8B,EAAE,IAAI,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG,EAAE,EAAE,EAAE,wCAAwC;iBAClD,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7G,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,IAAY;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAiB;QAC7B,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;QACxB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9B,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC;QACtB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;KAC/B,CAAC;IAEF,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel pipeline stages
|
|
3
|
+
*
|
|
4
|
+
* Each stage is an exported async function that reads from / writes to the
|
|
5
|
+
* mutable PipelineContext. The stages are called in order by peel().
|
|
6
|
+
*/
|
|
7
|
+
import { type AutoScrollOptions } from './actions.js';
|
|
8
|
+
import { type DomainExtractResult } from './domain-extractors.js';
|
|
9
|
+
import { type ReadabilityResult } from './readability.js';
|
|
10
|
+
import { type QuickAnswerResult } from './quick-answer.js';
|
|
11
|
+
import { Timer } from './timing.js';
|
|
12
|
+
import type { PeelOptions, PeelResult, ImageInfo } from '../types.js';
|
|
13
|
+
import type { BrandingProfile } from './branding.js';
|
|
14
|
+
import type { ChangeResult } from './change-tracking.js';
|
|
15
|
+
/** Mutable context threaded through pipeline stages */
|
|
16
|
+
export interface PipelineContext {
|
|
17
|
+
url: string;
|
|
18
|
+
options: PeelOptions;
|
|
19
|
+
timer: Timer;
|
|
20
|
+
startTime: number;
|
|
21
|
+
render: boolean;
|
|
22
|
+
stealth: boolean;
|
|
23
|
+
wait: number;
|
|
24
|
+
format: 'markdown' | 'text' | 'html';
|
|
25
|
+
timeout: number;
|
|
26
|
+
userAgent?: string;
|
|
27
|
+
screenshot: boolean;
|
|
28
|
+
screenshotFullPage: boolean;
|
|
29
|
+
selector?: string;
|
|
30
|
+
exclude?: string[];
|
|
31
|
+
includeTags?: string[];
|
|
32
|
+
excludeTags?: string[];
|
|
33
|
+
headers?: Record<string, string>;
|
|
34
|
+
cookies?: string[];
|
|
35
|
+
raw: boolean;
|
|
36
|
+
actions?: any[];
|
|
37
|
+
extract?: any;
|
|
38
|
+
maxTokens?: number;
|
|
39
|
+
extractImagesFlag: boolean;
|
|
40
|
+
profileDir?: string;
|
|
41
|
+
headed: boolean;
|
|
42
|
+
storageState?: any;
|
|
43
|
+
proxy?: string;
|
|
44
|
+
fullPage: boolean;
|
|
45
|
+
autoScrollOpts?: AutoScrollOptions;
|
|
46
|
+
fetchResult?: any;
|
|
47
|
+
contentType: 'document' | 'html' | 'json' | 'xml' | 'text';
|
|
48
|
+
content: string;
|
|
49
|
+
title: string;
|
|
50
|
+
metadata: any;
|
|
51
|
+
links: string[];
|
|
52
|
+
quality: number;
|
|
53
|
+
prunedPercent?: number;
|
|
54
|
+
readabilityResult?: ReadabilityResult;
|
|
55
|
+
imagesList?: ImageInfo[];
|
|
56
|
+
extracted?: Record<string, any>;
|
|
57
|
+
domainData?: DomainExtractResult;
|
|
58
|
+
quickAnswerResult?: QuickAnswerResult;
|
|
59
|
+
brandingProfile?: BrandingProfile;
|
|
60
|
+
changeResult?: ChangeResult;
|
|
61
|
+
summaryText?: string;
|
|
62
|
+
screenshotBase64?: string;
|
|
63
|
+
}
|
|
64
|
+
/** Create the initial PipelineContext with defaults */
|
|
65
|
+
export declare function createContext(url: string, options: PeelOptions): PipelineContext;
|
|
66
|
+
/**
|
|
67
|
+
* Resolve all PeelOptions values into flat context fields with defaults applied.
|
|
68
|
+
* Force render=true when screenshot/stealth/actions/branding/autoScroll requested.
|
|
69
|
+
* Parse the autoScroll option.
|
|
70
|
+
*/
|
|
71
|
+
export declare function normalizeOptions(ctx: PipelineContext): void;
|
|
72
|
+
/**
|
|
73
|
+
* If the URL is a YouTube URL, attempt transcript extraction.
|
|
74
|
+
* Returns a PeelResult on success, or null to fall through to normal pipeline.
|
|
75
|
+
*/
|
|
76
|
+
export declare function handleYouTube(ctx: PipelineContext): Promise<PeelResult | null>;
|
|
77
|
+
/**
|
|
78
|
+
* Fetch the URL via smartFetch, handle autoScroll, and store result in ctx.fetchResult.
|
|
79
|
+
*/
|
|
80
|
+
export declare function fetchContent(ctx: PipelineContext): Promise<void>;
|
|
81
|
+
/**
|
|
82
|
+
* Detect and set ctx.contentType based on response headers and content.
|
|
83
|
+
*/
|
|
84
|
+
export declare function detectContentType(ctx: PipelineContext): void;
|
|
85
|
+
/**
|
|
86
|
+
* Parse content from fetchResult based on the detected contentType.
|
|
87
|
+
* Sets ctx.content, ctx.title, ctx.metadata, ctx.links, ctx.quality, ctx.prunedPercent.
|
|
88
|
+
*/
|
|
89
|
+
export declare function parseContent(ctx: PipelineContext): Promise<void>;
|
|
90
|
+
/**
|
|
91
|
+
* Run all post-processing in sequence:
|
|
92
|
+
* readability, image extraction, structured extraction,
|
|
93
|
+
* maxTokens truncation, budget distillation, domain extractors, quick answer.
|
|
94
|
+
*/
|
|
95
|
+
export declare function postProcess(ctx: PipelineContext): Promise<void>;
|
|
96
|
+
/**
|
|
97
|
+
* Screenshot base64 conversion, branding extraction (needs page), change tracking, AI summary.
|
|
98
|
+
*/
|
|
99
|
+
export declare function finalize(ctx: PipelineContext): Promise<void>;
|
|
100
|
+
/**
|
|
101
|
+
* Assemble the final PeelResult from the pipeline context.
|
|
102
|
+
*/
|
|
103
|
+
export declare function buildResult(ctx: PipelineContext): PeelResult;
|
|
104
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH,OAAO,EAA+B,KAAK,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAInF,OAAO,EAAyC,KAAK,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AACzG,OAAO,EAA0B,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAClF,OAAO,EAAiC,KAAK,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC1F,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACpC,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,uDAAuD;AACvD,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE,KAAK,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAGlB,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,OAAO,CAAC;IACpB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,OAAO,CAAC;IACb,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,GAAG,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,OAAO,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,CAAC,EAAE,GAAG,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IAGnC,WAAW,CAAC,EAAE,GAAG,CAAC;IAGlB,WAAW,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;IAG3D,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,GAAG,CAAC;IACd,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IAGvB,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,UAAU,CAAC,EAAE,mBAAmB,CAAC;IACjC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,uDAAuD;AACvD,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,GAAG,eAAe,CA4ChF;AAMD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,CA+F3D;AAMD;;;GAGG;AACH,wBAAsB,aAAa,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAsDpF;AAMD;;GAEG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAmDtE;AAMD;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,CAe5D;AAMD;;;GAGG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CA8JtE;AAMD;;;;GAIG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAoFrE;AAMD;;GAEG;AACH,wBAAsB,QAAQ,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAyDlE;AAMD;;GAEG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,eAAe,GAAG,UAAU,CA8B5D"}
|