webpeel 0.14.3 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +11 -4
- package/dist/cache.js.map +1 -1
- package/dist/cli.bundle.cjs +159248 -0
- package/dist/core/agent.js +12 -8
- package/dist/core/agent.js.map +1 -1
- package/dist/core/application-tracker.js +3 -2
- package/dist/core/application-tracker.js.map +1 -1
- package/dist/core/auto-extract.js +6 -4
- package/dist/core/auto-extract.js.map +1 -1
- package/dist/core/browser-fetch.d.ts +90 -0
- package/dist/core/browser-fetch.d.ts.map +1 -0
- package/dist/core/browser-fetch.js +599 -0
- package/dist/core/browser-fetch.js.map +1 -0
- package/dist/core/browser-pool.d.ts +70 -0
- package/dist/core/browser-pool.d.ts.map +1 -0
- package/dist/core/browser-pool.js +378 -0
- package/dist/core/browser-pool.js.map +1 -0
- package/dist/core/change-tracking.js +3 -2
- package/dist/core/change-tracking.js.map +1 -1
- package/dist/core/diff.js +3 -2
- package/dist/core/diff.js.map +1 -1
- package/dist/core/domain-extractors.js +3 -2
- package/dist/core/domain-extractors.js.map +1 -1
- package/dist/core/extract-inline.js +6 -4
- package/dist/core/extract-inline.js.map +1 -1
- package/dist/core/fetcher.d.ts +9 -118
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +10 -1525
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/http-fetch.d.ts +37 -0
- package/dist/core/http-fetch.d.ts.map +1 -0
- package/dist/core/http-fetch.js +618 -0
- package/dist/core/http-fetch.js.map +1 -0
- package/dist/core/metadata.js +18 -12
- package/dist/core/metadata.js.map +1 -1
- package/dist/core/pipeline.d.ts +104 -0
- package/dist/core/pipeline.d.ts.map +1 -0
- package/dist/core/pipeline.js +623 -0
- package/dist/core/pipeline.js.map +1 -0
- package/dist/core/profiles.js +15 -10
- package/dist/core/profiles.js.map +1 -1
- package/dist/core/quick-answer.d.ts.map +1 -1
- package/dist/core/quick-answer.js +120 -9
- package/dist/core/quick-answer.js.map +1 -1
- package/dist/core/rate-governor.js +3 -2
- package/dist/core/rate-governor.js.map +1 -1
- package/dist/core/research.js +9 -6
- package/dist/core/research.js.map +1 -1
- package/dist/core/search-provider.js +12 -8
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/youtube.js +3 -2
- package/dist/core/youtube.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -487
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +1 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/server/middleware/auth.js +3 -2
- package/dist/server/middleware/auth.js.map +1 -1
- package/dist/server/routes/compat.js +3 -2
- package/dist/server/routes/compat.js.map +1 -1
- package/dist/server/routes/fetch.d.ts.map +1 -1
- package/dist/server/routes/fetch.js +44 -4
- package/dist/server/routes/fetch.js.map +1 -1
- package/dist/server/routes/health.js +3 -2
- package/dist/server/routes/health.js.map +1 -1
- package/dist/server/routes/mcp.js +1 -1
- package/dist/server/routes/mcp.js.map +1 -1
- package/dist/server/routes/search.js +6 -4
- package/dist/server/routes/search.js.map +1 -1
- package/dist/server/routes/users.js +3 -2
- package/dist/server/routes/users.js.map +1 -1
- package/package.json +1 -1
package/dist/core/metadata.js
CHANGED
|
@@ -69,8 +69,9 @@ function extractPublished($) {
|
|
|
69
69
|
try {
|
|
70
70
|
return new Date(published).toISOString();
|
|
71
71
|
}
|
|
72
|
-
catch {
|
|
73
|
-
|
|
72
|
+
catch (e) {
|
|
73
|
+
if (process.env.DEBUG)
|
|
74
|
+
console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
|
|
74
75
|
}
|
|
75
76
|
}
|
|
76
77
|
// Try datePublished schema.org
|
|
@@ -79,8 +80,9 @@ function extractPublished($) {
|
|
|
79
80
|
try {
|
|
80
81
|
return new Date(published).toISOString();
|
|
81
82
|
}
|
|
82
|
-
catch {
|
|
83
|
-
|
|
83
|
+
catch (e) {
|
|
84
|
+
if (process.env.DEBUG)
|
|
85
|
+
console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
|
|
84
86
|
}
|
|
85
87
|
}
|
|
86
88
|
return undefined;
|
|
@@ -139,8 +141,9 @@ export function extractLinks(html, baseUrl) {
|
|
|
139
141
|
}
|
|
140
142
|
links.add(absoluteUrl.href);
|
|
141
143
|
}
|
|
142
|
-
catch {
|
|
143
|
-
|
|
144
|
+
catch (e) {
|
|
145
|
+
if (process.env.DEBUG)
|
|
146
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
144
147
|
}
|
|
145
148
|
});
|
|
146
149
|
return Array.from(links).sort();
|
|
@@ -184,8 +187,9 @@ export function extractImages(html, baseUrl) {
|
|
|
184
187
|
// Deduplicate by src
|
|
185
188
|
images.set(absoluteUrl.href, imageInfo);
|
|
186
189
|
}
|
|
187
|
-
catch {
|
|
188
|
-
|
|
190
|
+
catch (e) {
|
|
191
|
+
if (process.env.DEBUG)
|
|
192
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
189
193
|
}
|
|
190
194
|
});
|
|
191
195
|
// Extract <picture><source> tags
|
|
@@ -214,8 +218,9 @@ export function extractImages(html, baseUrl) {
|
|
|
214
218
|
};
|
|
215
219
|
images.set(absoluteUrl.href, imageInfo);
|
|
216
220
|
}
|
|
217
|
-
catch {
|
|
218
|
-
|
|
221
|
+
catch (e) {
|
|
222
|
+
if (process.env.DEBUG)
|
|
223
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
219
224
|
}
|
|
220
225
|
});
|
|
221
226
|
});
|
|
@@ -244,8 +249,9 @@ export function extractImages(html, baseUrl) {
|
|
|
244
249
|
};
|
|
245
250
|
images.set(absoluteUrl.href, imageInfo);
|
|
246
251
|
}
|
|
247
|
-
catch {
|
|
248
|
-
|
|
252
|
+
catch (e) {
|
|
253
|
+
if (process.env.DEBUG)
|
|
254
|
+
console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
|
|
249
255
|
}
|
|
250
256
|
});
|
|
251
257
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,
|
|
1
|
+
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,oBAAoB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,SAAS,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,oBAAoB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,eAAe;IACf,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,MAAM,SAAS,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAEvC,qBAAqB;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE3C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,iDAAiD;YACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YACxC,IAAI,WAAW,CAAC,IAAI;gBAChB,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM;gBAC5C,WAAW,CAAC,QAAQ,KAAK,cAAc,CAAC,QAAQ;gBAChD,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE,CAAC;gBACjD,OAAO;YACT,CAAC;YAED,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,OAAe;IACzD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2C,CAAC;IAElE,qBAAqB;IACrB,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAE1C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAE/D,MAAM,SAAS,GAAoC;gBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;gBACrB,GAAG;gBACH,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACjD,MAAM,EAAE,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;aACtD,CAAC;YAEF,qBAAqB;YACrB,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,iCAAiC;IACjC,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC3C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM;YAAE,OAAO;QAEpB,kEAAkE;QAClE,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACzD,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAErE,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG;iBACJ,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7G,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,gCAAgC;IAChC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC1C,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,qBAAqB;QACrB,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAChE,IAAI,CAAC,UAAU;YAAE,OAAO;QAExB,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,8BAA8B,EAAE,IAAI,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG,EAAE,EAAE,EAAE,wCAAwC;iBAClD,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7G,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,IAAY;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAiB;QAC7B,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;QACxB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9B,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC;QACtB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;KAC/B,CAAC;IAEF,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel pipeline stages
|
|
3
|
+
*
|
|
4
|
+
* Each stage is an exported async function that reads from / writes to the
|
|
5
|
+
* mutable PipelineContext. The stages are called in order by peel().
|
|
6
|
+
*/
|
|
7
|
+
import { type AutoScrollOptions } from './actions.js';
|
|
8
|
+
import { type DomainExtractResult } from './domain-extractors.js';
|
|
9
|
+
import { type ReadabilityResult } from './readability.js';
|
|
10
|
+
import { type QuickAnswerResult } from './quick-answer.js';
|
|
11
|
+
import { Timer } from './timing.js';
|
|
12
|
+
import type { PeelOptions, PeelResult, ImageInfo } from '../types.js';
|
|
13
|
+
import type { BrandingProfile } from './branding.js';
|
|
14
|
+
import type { ChangeResult } from './change-tracking.js';
|
|
15
|
+
/** Mutable context threaded through pipeline stages */
|
|
16
|
+
export interface PipelineContext {
|
|
17
|
+
url: string;
|
|
18
|
+
options: PeelOptions;
|
|
19
|
+
timer: Timer;
|
|
20
|
+
startTime: number;
|
|
21
|
+
render: boolean;
|
|
22
|
+
stealth: boolean;
|
|
23
|
+
wait: number;
|
|
24
|
+
format: 'markdown' | 'text' | 'html';
|
|
25
|
+
timeout: number;
|
|
26
|
+
userAgent?: string;
|
|
27
|
+
screenshot: boolean;
|
|
28
|
+
screenshotFullPage: boolean;
|
|
29
|
+
selector?: string;
|
|
30
|
+
exclude?: string[];
|
|
31
|
+
includeTags?: string[];
|
|
32
|
+
excludeTags?: string[];
|
|
33
|
+
headers?: Record<string, string>;
|
|
34
|
+
cookies?: string[];
|
|
35
|
+
raw: boolean;
|
|
36
|
+
actions?: any[];
|
|
37
|
+
extract?: any;
|
|
38
|
+
maxTokens?: number;
|
|
39
|
+
extractImagesFlag: boolean;
|
|
40
|
+
profileDir?: string;
|
|
41
|
+
headed: boolean;
|
|
42
|
+
storageState?: any;
|
|
43
|
+
proxy?: string;
|
|
44
|
+
fullPage: boolean;
|
|
45
|
+
autoScrollOpts?: AutoScrollOptions;
|
|
46
|
+
fetchResult?: any;
|
|
47
|
+
contentType: 'document' | 'html' | 'json' | 'xml' | 'text';
|
|
48
|
+
content: string;
|
|
49
|
+
title: string;
|
|
50
|
+
metadata: any;
|
|
51
|
+
links: string[];
|
|
52
|
+
quality: number;
|
|
53
|
+
prunedPercent?: number;
|
|
54
|
+
readabilityResult?: ReadabilityResult;
|
|
55
|
+
imagesList?: ImageInfo[];
|
|
56
|
+
extracted?: Record<string, any>;
|
|
57
|
+
domainData?: DomainExtractResult;
|
|
58
|
+
quickAnswerResult?: QuickAnswerResult;
|
|
59
|
+
brandingProfile?: BrandingProfile;
|
|
60
|
+
changeResult?: ChangeResult;
|
|
61
|
+
summaryText?: string;
|
|
62
|
+
screenshotBase64?: string;
|
|
63
|
+
}
|
|
64
|
+
/** Create the initial PipelineContext with defaults */
|
|
65
|
+
export declare function createContext(url: string, options: PeelOptions): PipelineContext;
|
|
66
|
+
/**
|
|
67
|
+
* Resolve all PeelOptions values into flat context fields with defaults applied.
|
|
68
|
+
* Force render=true when screenshot/stealth/actions/branding/autoScroll requested.
|
|
69
|
+
* Parse the autoScroll option.
|
|
70
|
+
*/
|
|
71
|
+
export declare function normalizeOptions(ctx: PipelineContext): void;
|
|
72
|
+
/**
|
|
73
|
+
* If the URL is a YouTube URL, attempt transcript extraction.
|
|
74
|
+
* Returns a PeelResult on success, or null to fall through to normal pipeline.
|
|
75
|
+
*/
|
|
76
|
+
export declare function handleYouTube(ctx: PipelineContext): Promise<PeelResult | null>;
|
|
77
|
+
/**
|
|
78
|
+
* Fetch the URL via smartFetch, handle autoScroll, and store result in ctx.fetchResult.
|
|
79
|
+
*/
|
|
80
|
+
export declare function fetchContent(ctx: PipelineContext): Promise<void>;
|
|
81
|
+
/**
|
|
82
|
+
* Detect and set ctx.contentType based on response headers and content.
|
|
83
|
+
*/
|
|
84
|
+
export declare function detectContentType(ctx: PipelineContext): void;
|
|
85
|
+
/**
|
|
86
|
+
* Parse content from fetchResult based on the detected contentType.
|
|
87
|
+
* Sets ctx.content, ctx.title, ctx.metadata, ctx.links, ctx.quality, ctx.prunedPercent.
|
|
88
|
+
*/
|
|
89
|
+
export declare function parseContent(ctx: PipelineContext): Promise<void>;
|
|
90
|
+
/**
|
|
91
|
+
* Run all post-processing in sequence:
|
|
92
|
+
* readability, image extraction, structured extraction,
|
|
93
|
+
* maxTokens truncation, budget distillation, domain extractors, quick answer.
|
|
94
|
+
*/
|
|
95
|
+
export declare function postProcess(ctx: PipelineContext): Promise<void>;
|
|
96
|
+
/**
|
|
97
|
+
* Screenshot base64 conversion, branding extraction (needs page), change tracking, AI summary.
|
|
98
|
+
*/
|
|
99
|
+
export declare function finalize(ctx: PipelineContext): Promise<void>;
|
|
100
|
+
/**
|
|
101
|
+
* Assemble the final PeelResult from the pipeline context.
|
|
102
|
+
*/
|
|
103
|
+
export declare function buildResult(ctx: PipelineContext): PeelResult;
|
|
104
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH,OAAO,EAA+B,KAAK,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAInF,OAAO,EAAyC,KAAK,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AACzG,OAAO,EAA0B,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAClF,OAAO,EAAiC,KAAK,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC1F,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACpC,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,uDAAuD;AACvD,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE,KAAK,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAGlB,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,OAAO,CAAC;IACpB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,OAAO,CAAC;IACb,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,GAAG,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,OAAO,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,CAAC,EAAE,GAAG,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IAGnC,WAAW,CAAC,EAAE,GAAG,CAAC;IAGlB,WAAW,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;IAG3D,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,GAAG,CAAC;IACd,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IAGvB,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,UAAU,CAAC,EAAE,mBAAmB,CAAC;IACjC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,uDAAuD;AACvD,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,GAAG,eAAe,CA4ChF;AAMD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,CA+F3D;AAMD;;;GAGG;AACH,wBAAsB,aAAa,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAsDpF;AAMD;;GAEG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAmDtE;AAMD;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,CAe5D;AAMD;;;GAGG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CA8JtE;AAMD;;;;GAIG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAoFrE;AAMD;;GAEG;AACH,wBAAsB,QAAQ,CAAC,GAAG,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAyDlE;AAMD;;GAEG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,eAAe,GAAG,UAAU,CA8B5D"}
|