website-scrap-engine 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/life-cycle/download-resource.d.ts.map +1 -1
- package/lib/life-cycle/download-resource.js +26 -0
- package/lib/life-cycle/download-resource.js.map +1 -1
- package/lib/life-cycle/process-html.d.ts.map +1 -1
- package/lib/life-cycle/process-html.js +37 -15
- package/lib/life-cycle/process-html.js.map +1 -1
- package/lib/options.d.ts +1 -0
- package/lib/options.d.ts.map +1 -1
- package/lib/options.js.map +1 -1
- package/package.json +5 -5
- package/src/life-cycle/download-resource.ts +27 -0
- package/src/life-cycle/process-html.ts +53 -19
- package/src/options.ts +1 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"download-resource.d.ts","sourceRoot":"","sources":["../../src/life-cycle/download-resource.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,eAAe,EAAE,WAAW,EAAgB,QAAQ,EAAC,MAAM,KAAK,CAAC;AAE9E,OAAO,KAAK,EAAC,gBAAgB,EAAE,cAAc,EAAC,MAAM,YAAY,CAAC;AACjE,OAAO,KAAK,EAAC,QAAQ,EAAC,MAAM,gBAAgB,CAAC;AAE7C,OAAO,KAAK,EAAC,qBAAqB,EAAC,MAAM,eAAe,CAAC;AAKzD,6BAA6B;AAC7B,eAAO,MAAM,eAAe,EAAE,eAqB7B,CAAC;AAEF,MAAM,WAAW,aAAc,SAAQ,OAAO,CAAC,KAAK,CAAC;IACnD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;GAMG;AACH,wBAAsB,QAAQ,CAC5B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,WAAW,GACnB,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,IAAI,CAAC,CA4C3C;AAED,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,QAAQ,GAAG;IAAE,sBAAsB,EAAE,MAAM,CAAA;CAAE,EAClD,cAAc,EAAE,cAAc,EAC9B,OAAO,CAAC,EAAE,qBAAqB,GAC9B,OAAO,CAAC,gBAAgB,GAAG,QAAQ,GAAG,IAAI,CAAC,CAyC7C;AAED,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,QAAQ,EACb,cAAc,EAAE,cAAc,EAC9B,OAAO,EAAE,qBAAqB,GAC7B,OAAO,CAAC,gBAAgB,GAAG,QAAQ,GAAG,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"download-resource.d.ts","sourceRoot":"","sources":["../../src/life-cycle/download-resource.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,eAAe,EAAE,WAAW,EAAgB,QAAQ,EAAC,MAAM,KAAK,CAAC;AAE9E,OAAO,KAAK,EAAC,gBAAgB,EAAE,cAAc,EAAC,MAAM,YAAY,CAAC;AACjE,OAAO,KAAK,EAAC,QAAQ,EAAC,MAAM,gBAAgB,CAAC;AAE7C,OAAO,KAAK,EAAC,qBAAqB,EAAC,MAAM,eAAe,CAAC;AAKzD,6BAA6B;AAC7B,eAAO,MAAM,eAAe,EAAE,eAqB7B,CAAC;AAEF,MAAM,WAAW,aAAc,SAAQ,OAAO,CAAC,KAAK,CAAC;IACnD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;GAMG;AACH,wBAAsB,QAAQ,CAC5B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,WAAW,GACnB,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,IAAI,CAAC,CA4C3C;AAED,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,QAAQ,GAAG;IAAE,sBAAsB,EAAE,MAAM,CAAA;CAAE,EAClD,cAAc,EAAE,cAAc,EAC9B,OAAO,CAAC,EAAE,qBAAqB,GAC9B,OAAO,CAAC,gBAAgB,GAAG,QAAQ,GAAG,IAAI,CAAC,CAyC7C;AAED,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,QAAQ,EACb,cAAc,EAAE,cAAc,EAC9B,OAAO,EAAE,qBAAqB,GAC7B,OAAO,CAAC,gBAAgB,GAAG,QAAQ,GAAG,IAAI,CAAC,CAoE7C"}
|
|
@@ -125,6 +125,32 @@ export async function downloadResource(res, requestOptions, options) {
|
|
|
125
125
|
return downloadedResource;
|
|
126
126
|
}
|
|
127
127
|
if (downloadedResource.type === ResourceType.Html) {
|
|
128
|
+
if (options.meta.warnForNonHtml) {
|
|
129
|
+
const headers = downloadedResource.meta.headers;
|
|
130
|
+
if (headers) {
|
|
131
|
+
const contentType = headers['content-type'] || headers['Content-Type'];
|
|
132
|
+
let nonHtml = false;
|
|
133
|
+
if (typeof contentType === 'string') {
|
|
134
|
+
nonHtml = !contentType.includes('/html') &&
|
|
135
|
+
!contentType.includes('/xml') &&
|
|
136
|
+
!contentType.includes('application/xhtml+xml');
|
|
137
|
+
}
|
|
138
|
+
else if (Array.isArray(contentType)) {
|
|
139
|
+
nonHtml = true;
|
|
140
|
+
for (const header of contentType) {
|
|
141
|
+
if (!header.includes('/html') &&
|
|
142
|
+
!header.includes('/xml') &&
|
|
143
|
+
!header.includes('application/xhtml+xml')) {
|
|
144
|
+
nonHtml = false;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (nonHtml) {
|
|
150
|
+
logger.error.warn('Detected non-html content type', downloadedResource.downloadLink, downloadedResource.rawUrl, contentType);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
128
154
|
if (options.meta.detectIncompleteHtml &&
|
|
129
155
|
(typeof downloadedResource.body === 'string' ||
|
|
130
156
|
Buffer.isBuffer(downloadedResource.body))) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"download-resource.js","sourceRoot":"","sources":["../../src/life-cycle/download-resource.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAC,YAAY,EAAC,MAAM,KAAK,CAAC;AAGtC,OAAO,EAAC,gBAAgB,EAAE,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAE9D,OAAO,KAAK,MAAM,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAC,SAAS,EAAE,KAAK,EAAC,MAAM,YAAY,CAAC;AAC5C,OAAO,GAAG,MAAM,OAAO,CAAC;AAExB,6BAA6B;AAC7B,MAAM,CAAC,MAAM,eAAe,GAAoB,CAC9C,KAAmB,EACnB,UAA8B,EAC9B,EAAE;IACF,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;IAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO;IACT,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;QACnD,OAAO;IACT,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QACnE,CAAC,UAAU,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;aACnE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EACzD,KAAK,CAAC,OAAO,EAAG,KAAsB,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,CAAC,UAAU,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;aACnE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAChF,CAAC;AACH,CAAC,CAAC;AAQF;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,GAAW,EACX,OAAoB;IAEpB,IAAI,GAAG,GAAqC,KAAK,CAAC,CAAC;IACnD,IAAI,GAAG,GAAyB,KAAK,CAAC,EAAE,YAAyB,CAAC;IAClE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,GAAG,GAAG,KAAK,CAAC,CAAC;QACb,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;YAC1C,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,GAAG,EAAE,YAAY,CAAC,CAA8B,CAAC;YAClE,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC1C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,0CAA0C,EAClE,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBACnB,SAAS;YACX,CAAC;YACD,MAAM;QACR,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,gCAAgC;YAChC,GAAG,GAAG,CAAyB,CAAC;YAChC,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,KAAK,iBAAiB,EAAE,CAAC;gBAC7C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,mCAAmC,EAC3D,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9C,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBACrB,SAAS;YACX,CAAC;YACD,+CAA+C;YAC/C,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,kBAAkB;gBAChC,CAAC,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,CAAC;gBAC5D,4DAA4D;gBAC5D,4EAA4E;gBAC5E,2BAA2B;gBAC3B,CAAC,GAAG,CAAC,IAAI,KAAK,WAAW,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC,EAAE,CAAC;gBACvD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,qBAAqB,GAAG,CAAC,KAAK,UAAU,EAChE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBACnC,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBACrB,SAAS;YACX,CAAC;YACD,MAAM,CAAC,CAAC;QACV,CAAC;IACH,CAAC;IACD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,+CAA+C,EACrE,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,GAAG,CAAC;IACZ,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAkD,EAClD,cAA8B,EAC9B,OAA+B;IAE/B,MAAM,YAAY,GAAW,SAAS,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC;IACpE,MAAM,UAAU,GAAgB,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;IAClE,UAAU,CAAC,YAAY,GAAG,QAAQ,CAAC;IACnC,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;QACtD,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC;QAC7B,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC;IAC/B,CAAC;IACD,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,EAAE,GAAG,CAAC,MAAM,EACnD,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,QAAQ,GACZ,MAAM,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,GAAe,CAAC;QACjC,OAAO,QAAQ,CAAC,sBAAsB,CAAC;QACvC,OAAO,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnB,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,sBAAsB,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC;QAClE,OAAO,GAAe,CAAC;IACzB,CAAC;IACD,GAAG,CAAC,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;IAEpC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,GAAG,EACpE,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IACpD,GAAG,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACjC,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,eAAe,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACpE,GAAG,CAAC,aAAa,GAAG,QAAQ,CAAC,GAAG,CAAC;IACjC,mEAAmE;IACnE,aAAa;IACb,IAAI,GAAG,CAAC,aAAa,KAAK,GAAG,CAAC,GAAG,EAAE,CAAC;QAClC,GAAG,CAAC,kBAAkB,GAAG,gBAAgB,CACvC,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,EACtB,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,EAC9B,CAAC,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,sBAAsB,CAAA,EAChC,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,YAAY,CAAC,CAAC;IAC3B,CAAC;IACD,GAAG,CAAC,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IACzB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAa,EACb,cAA8B,EAC9B,OAA8B;IAE9B,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;QACb,OAAO,GAAuB,CAAC;IACjC,CAAC;IACD,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC,eAAe,EAAE,CAAC;QAC9C,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC;QAChC,GAAG,CAAC,sBAAsB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACxC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,sBAAsB,GAAG,GAAG,CAAC,eAAe,CAAC;IAClE,CAAC;IACD,IAAI,kBAAkB,GAAuC,MAAM,kBAAkB,CACnF,GAAsD,EAAE,cAAc,EAAE,OAAO,CAAC,CAAC;IACnF,IAAI,CAAC,kBAAkB,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC;QACpD,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IACD,IAAI,kBAAkB,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,EAAE,CAAC;QAClD,IAAI,OAAO,CAAC,IAAI,CAAC,oBAAoB;YACnC,CAAC,OAAO,kBAAkB,CAAC,IAAI,KAAK,QAAQ;gBAC1C,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACzE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,qCAAqC,EACrD,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBACnC,kBAAkB,GAAG,MAAM,kBAAkB,CAC3C,GAAsD,EAAE,cAAc,CAAC,CAAC;YAC5E,CAAC;YACD,8BAA8B;YAC9B,IAAI,CAAC,kBAAkB,IAAI,OAAO,kBAAkB,CAAC,IAAI,KAAK,QAAQ;gBACpE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACvE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,gCAAgC,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;gBACtE,OAAO,kBAAkB,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,kBAAkB,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAChD,kBAAkB,CAAC,YAAY;YAC7B,kBAAkB,CAAC,eAAe,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACpE,CAAC;IACD,OAAO,kBAAkB,CAAC;AAC5B,CAAC"}
|
|
1
|
+
{"version":3,"file":"download-resource.js","sourceRoot":"","sources":["../../src/life-cycle/download-resource.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAC,YAAY,EAAC,MAAM,KAAK,CAAC;AAGtC,OAAO,EAAC,gBAAgB,EAAE,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAE9D,OAAO,KAAK,MAAM,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAC,SAAS,EAAE,KAAK,EAAC,MAAM,YAAY,CAAC;AAC5C,OAAO,GAAG,MAAM,OAAO,CAAC;AAExB,6BAA6B;AAC7B,MAAM,CAAC,MAAM,eAAe,GAAoB,CAC9C,KAAmB,EACnB,UAA8B,EAC9B,EAAE;IACF,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;IAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO;IACT,CAAC;IACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;QACnD,OAAO;IACT,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QACnE,CAAC,UAAU,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;aACnE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EACzD,KAAK,CAAC,OAAO,EAAG,KAAsB,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,CAAC,UAAU,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;aACnE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAChF,CAAC;AACH,CAAC,CAAC;AAQF;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,GAAW,EACX,OAAoB;IAEpB,IAAI,GAAG,GAAqC,KAAK,CAAC,CAAC;IACnD,IAAI,GAAG,GAAyB,KAAK,CAAC,EAAE,YAAyB,CAAC;IAClE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,GAAG,GAAG,KAAK,CAAC,CAAC;QACb,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;YAC1C,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,GAAG,EAAE,YAAY,CAAC,CAA8B,CAAC;YAClE,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC1C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,0CAA0C,EAClE,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBACnB,SAAS;YACX,CAAC;YACD,MAAM;QACR,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,gCAAgC;YAChC,GAAG,GAAG,CAAyB,CAAC;YAChC,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,KAAK,iBAAiB,EAAE,CAAC;gBAC7C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,mCAAmC,EAC3D,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9C,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBACrB,SAAS;YACX,CAAC;YACD,+CAA+C;YAC/C,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,kBAAkB;gBAChC,CAAC,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,CAAC;gBAC5D,4DAA4D;gBAC5D,4EAA4E;gBAC5E,2BAA2B;gBAC3B,CAAC,GAAG,CAAC,IAAI,KAAK,WAAW,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC,EAAE,CAAC;gBACvD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,qBAAqB,GAAG,CAAC,KAAK,UAAU,EAChE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBACnC,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBACrB,SAAS;YACX,CAAC;YACD,MAAM,CAAC,CAAC;QACV,CAAC;IACH,CAAC;IACD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,+CAA+C,EACrE,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,GAAG,CAAC;IACZ,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAkD,EAClD,cAA8B,EAC9B,OAA+B;IAE/B,MAAM,YAAY,GAAW,SAAS,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC;IACpE,MAAM,UAAU,GAAgB,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;IAClE,UAAU,CAAC,YAAY,GAAG,QAAQ,CAAC;IACnC,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;QACtD,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC;QAC7B,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC;IAC/B,CAAC;IACD,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,EAAE,GAAG,CAAC,MAAM,EACnD,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,QAAQ,GACZ,MAAM,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,GAAe,CAAC;QACjC,OAAO,QAAQ,CAAC,sBAAsB,CAAC;QACvC,OAAO,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnB,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,sBAAsB,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC;QAClE,OAAO,GAAe,CAAC;IACzB,CAAC;IACD,GAAG,CAAC,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;IAEpC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,GAAG,EACpE,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IACpD,GAAG,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACjC,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,eAAe,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACpE,GAAG,CAAC,aAAa,GAAG,QAAQ,CAAC,GAAG,CAAC;IACjC,mEAAmE;IACnE,aAAa;IACb,IAAI,GAAG,CAAC,aAAa,KAAK,GAAG,CAAC,GAAG,EAAE,CAAC;QAClC,GAAG,CAAC,kBAAkB,GAAG,gBAAgB,CACvC,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,EACtB,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,EAC9B,CAAC,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,sBAAsB,CAAA,EAChC,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,YAAY,CAAC,CAAC;IAC3B,CAAC;IACD,GAAG,CAAC,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IACzB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAa,EACb,cAA8B,EAC9B,OAA8B;IAE9B,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;QACb,OAAO,GAAuB,CAAC;IACjC,CAAC;IACD,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC,eAAe,EAAE,CAAC;QAC9C,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC;QAChC,GAAG,CAAC,sBAAsB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACxC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,sBAAsB,GAAG,GAAG,CAAC,eAAe,CAAC;IAClE,CAAC;IACD,IAAI,kBAAkB,GAAuC,MAAM,kBAAkB,CACnF,GAAsD,EAAE,cAAc,EAAE,OAAO,CAAC,CAAC;IACnF,IAAI,CAAC,kBAAkB,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC;QACpD,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IACD,IAAI,kBAAkB,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,EAAE,CAAC;QAClD,IAAI,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC;YAChD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,WAAW,GACf,OAAO,CAAC,cAAc,CAAC,IAAI,OAAO,CAAC,cAAc,CAAC,CAAC;gBACrD,IAAI,OAAO,GAAG,KAAK,CAAC;gBACpB,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;oBACpC,OAAO,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC;wBACtC,CAAC,WAAW,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC7B,CAAC,WAAW,CAAC,QAAQ,CAAC,uBAAuB,CAAC,CAAC;gBACnD,CAAC;qBAAM,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;oBACtC,OAAO,GAAG,IAAI,CAAC;oBACf,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;wBACjC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;4BAC3B,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;4BACxB,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;4BAC5C,OAAO,GAAG,KAAK,CAAC;4BAChB,MAAM;wBACR,CAAC;oBACH,CAAC;gBACH,CAAC;gBACD,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,gCAAgC,EAChD,kBAAkB,CAAC,YAAY,EAAE,kBAAkB,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;gBAC7E,CAAC;YACH,CAAC;QACH,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,CAAC,oBAAoB;YACnC,CAAC,OAAO,kBAAkB,CAAC,IAAI,KAAK,QAAQ;gBAC1C,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACzE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,qCAAqC,EACrD,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBACnC,kBAAkB,GAAG,MAAM,kBAAkB,CAC3C,GAAsD,EAAE,cAAc,CAAC,CAAC;YAC5E,CAAC;YACD,8BAA8B;YAC9B,IAAI,CAAC,kBAAkB,IAAI,OAAO,kBAAkB,CAAC,IAAI,KAAK,QAAQ;gBACpE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACvE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,gCAAgC,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;gBACtE,OAAO,kBAAkB,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,kBAAkB,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAChD,kBAAkB,CAAC,YAAY;YAC7B,kBAAkB,CAAC,eAAe,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACpE,CAAC;IACD,OAAO,kBAAkB,CAAC;AAC5B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"process-html.d.ts","sourceRoot":"","sources":["../../src/life-cycle/process-html.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"process-html.d.ts","sourceRoot":"","sources":["../../src/life-cycle/process-html.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,gBAAgB,EAAE,kBAAkB,EAAC,MAAM,YAAY,CAAC;AACrE,OAAO,KAAK,EAAC,qBAAqB,EAAC,MAAM,eAAe,CAAC;AAKzD,OAAO,KAAK,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AAsI7D,wBAAsB,WAAW,CAC/B,GAAG,EAAE,gBAAgB,EACrB,MAAM,EAAE,kBAAkB,EAC1B,OAAO,EAAE,qBAAqB,EAC9B,QAAQ,EAAE,gBAAgB,GACzB,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAqBlC"}
|
|
@@ -1,24 +1,11 @@
|
|
|
1
1
|
import { parseSrcset, stringifySrcset } from 'srcset';
|
|
2
|
+
import { load } from 'cheerio';
|
|
2
3
|
import { sources as defaultSources } from '../sources.js';
|
|
3
4
|
import { ResourceType } from '../resource.js';
|
|
4
5
|
import { processCssText } from './process-css.js';
|
|
5
6
|
import { error, skip } from '../logger/logger.js';
|
|
6
7
|
import { parseHtml } from './adapters.js';
|
|
7
|
-
|
|
8
|
-
if (res.type !== ResourceType.Html) {
|
|
9
|
-
return res;
|
|
10
|
-
}
|
|
11
|
-
const refUrl = res.redirectedUrl || res.url;
|
|
12
|
-
const savePath = refUrl === res.url ? res.savePath : undefined;
|
|
13
|
-
// useless since processRedirectedUrl enabled by default
|
|
14
|
-
// refUrl = await pipeline.linkRedirect(refUrl, null, res) || refUrl;
|
|
15
|
-
const depth = res.depth + 1;
|
|
16
|
-
// resources from inline css
|
|
17
|
-
const resources = [];
|
|
18
|
-
let doc = res.meta.doc;
|
|
19
|
-
if (!doc) {
|
|
20
|
-
res.meta.doc = doc = parseHtml(res, options);
|
|
21
|
-
}
|
|
8
|
+
async function processHtmlDoc(options, doc, res, pipeline, depth, resources, refUrl, savePath, submit) {
|
|
22
9
|
const sources = options.sources || defaultSources;
|
|
23
10
|
for (const { selector, attr, type } of sources) {
|
|
24
11
|
const elements = doc(selector);
|
|
@@ -113,6 +100,41 @@ export async function processHtml(res, submit, options, pipeline) {
|
|
|
113
100
|
}
|
|
114
101
|
}
|
|
115
102
|
}
|
|
103
|
+
const iframeSrcDocs = doc('iframe[srcdoc]');
|
|
104
|
+
for (let index = 0; index < iframeSrcDocs.length; index++) {
|
|
105
|
+
const elem = iframeSrcDocs.eq(index);
|
|
106
|
+
const attrValue = elem.attr('srcdoc');
|
|
107
|
+
if (!attrValue) {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
const iframeDoc = load(attrValue);
|
|
112
|
+
await processHtmlDoc(options, iframeDoc, res, pipeline, depth, resources, refUrl, savePath, submit);
|
|
113
|
+
const html = options.cheerioSerialize ?
|
|
114
|
+
iframeDoc.html(options.cheerioSerialize) : iframeDoc.html();
|
|
115
|
+
elem.attr('srcdoc', html);
|
|
116
|
+
}
|
|
117
|
+
catch (e) {
|
|
118
|
+
error.info('can not parse iframe srcdoc', res.url, res.rawUrl, e);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
export async function processHtml(res, submit, options, pipeline) {
|
|
123
|
+
if (res.type !== ResourceType.Html) {
|
|
124
|
+
return res;
|
|
125
|
+
}
|
|
126
|
+
const refUrl = res.redirectedUrl || res.url;
|
|
127
|
+
const savePath = refUrl === res.url ? res.savePath : undefined;
|
|
128
|
+
// useless since processRedirectedUrl enabled by default
|
|
129
|
+
// refUrl = await pipeline.linkRedirect(refUrl, null, res) || refUrl;
|
|
130
|
+
const depth = res.depth + 1;
|
|
131
|
+
let doc = res.meta.doc;
|
|
132
|
+
if (!doc) {
|
|
133
|
+
res.meta.doc = doc = parseHtml(res, options);
|
|
134
|
+
}
|
|
135
|
+
// resources from inline css
|
|
136
|
+
const resources = [];
|
|
137
|
+
await processHtmlDoc(options, doc, res, pipeline, depth, resources, refUrl, savePath, submit);
|
|
116
138
|
if (resources.length) {
|
|
117
139
|
submit(resources);
|
|
118
140
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"process-html.js","sourceRoot":"","sources":["../../src/life-cycle/process-html.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,WAAW,EAAE,eAAe,EAAC,MAAM,QAAQ,CAAC;AACpD,OAAO,EAAC,OAAO,IAAI,cAAc,EAAC,MAAM,eAAe,CAAC;AAIxD,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAC5C,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,KAAK,EAAE,IAAI,EAAC,MAAM,qBAAqB,CAAC;AAEhD,OAAO,EAAC,SAAS,EAAC,MAAM,eAAe,CAAC;AAMxC,
|
|
1
|
+
{"version":3,"file":"process-html.js","sourceRoot":"","sources":["../../src/life-cycle/process-html.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,WAAW,EAAE,eAAe,EAAC,MAAM,QAAQ,CAAC;AACpD,OAAO,EAAC,IAAI,EAAC,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAC,OAAO,IAAI,cAAc,EAAC,MAAM,eAAe,CAAC;AAIxD,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAC5C,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,KAAK,EAAE,IAAI,EAAC,MAAM,qBAAqB,CAAC;AAEhD,OAAO,EAAC,SAAS,EAAC,MAAM,eAAe,CAAC;AAMxC,KAAK,UAAU,cAAc,CAC3B,OAA8B,EAC9B,GAAkB,EAClB,GAAqB,EACrB,QAA0B,EAC1B,KAAa,EACb,SAAqB,EACrB,MAAc,EACd,QAA4B,EAC5B,MAA0B;IAE1B,MAAM,OAAO,GAA0B,OAAO,CAAC,OAAO,IAAI,cAAc,CAAC;IACzE,KAAK,MAAM,EAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAC,IAAI,OAAO,EAAE,CAAC;QAC7C,MAAM,QAAQ,GAAY,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;YACrD,MAAM,IAAI,GAAG,QAAQ,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YAChC,MAAM,SAAS,GAAkB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACxB,cAAc;gBACd,IAAI,IAAI,KAAK,YAAY,CAAC,SAAS,EAAE,CAAC;oBACpC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;oBAC1B,IAAI,CAAC,OAAO;wBAAE,SAAS;oBACvB,OAAO,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,EAClD,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;oBAC9B,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,CAAC;gBACD,SAAS;YACX,CAAC;iBAAM,IAAI,IAAI,KAAK,YAAY,CAAC,SAAS,EAAE,CAAC;gBAC3C,MAAM,OAAO,GAAW,MAAM,cAAc,CAAC,SAAS,EAAE,GAAG,EAAE,OAAO,EAClE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;gBAC9B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,KAAe,EAAE,YAAyC,CAAC;YAC/D,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,IAAI,CAAC;oBACH,YAAY,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;gBACxC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,KAAK,CAAC,IAAI,CAAC,yBAAyB,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;oBACpD,uCAAuC;oBACvC,SAAS;gBACX,CAAC;gBACD,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,CAAC,SAAS,CAAC,CAAC;gBACpB,YAAY,GAAG,SAAS,CAAC;YAC3B,CAAC;YACD,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,SAAS,GAAG,CAAC,EAAE,SAAS,EAAE,EAAE,CAAC;gBACrE,MAAM,YAAY,GAAW,KAAK,CAAC,SAAS,CAAC,CAAC;gBAC9C,mBAAmB;gBACnB,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,SAAS;gBACX,CAAC;gBACD,MAAM,IAAI,GACR,MAAM,QAAQ,CAAC,YAAY,CAAC,YAAY,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;gBACvD,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;wBAC1B,IAAI,CAAC,KAAK,CAAC,mBAAmB,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;oBACxD,CAAC;oBACD,SAAS;gBACX,CAAC;gBACD,MAAM,QAAQ,GACZ,MAAM,QAAQ,CAAC,kBAAkB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC3D,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;wBAC1B,IAAI,CAAC,KAAK,CAAC,yBAAyB,EAClC,YAAY,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;oBAChC,CAAC;oBACD,SAAS;gBACX,CAAC;gBACD,IAAI,QAAQ,GAAoB,MAAM,QAAQ,CAAC,cAAc,CAC3D,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAC7B,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACzC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;gBACtB,QAAQ,GAAG,MAAM,QAAQ,CAAC,qBAAqB,CAAC,QAAQ,EAAE,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC9E,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;wBAC1B,IAAI,CAAC,KAAK,CAAC,4BAA4B,EACrC,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;oBAC1C,CAAC;oBACD,SAAS;gBACX,CAAC;gBACD,IAAI,CAAC,QAAQ,CAAC,6BAA6B,EAAE,CAAC;oBAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC;gBACnB,CAAC;gBACD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;oBACtB,+BAA+B;oBAC/B,mDAAmD;oBACnD,6DAA6D;oBAC5D,YAAkC,CAAC,SAAS,CAAC,CAAC,GAAG,GAAG,QAAQ,CAAC,WAAW,CAAC;gBAC5E,CAAC;qBAAM,CAAC;oBACN,YAAY,GAAG,QAAQ,CAAC,WAAW,CAAC;oBACpC,6BAA6B;oBAC7B,IAAI,YAAY,KAAK,OAAO,IAAI,YAAY,KAAK,QAAQ,EAAE,CAAC;wBAC1D,YAAY,GAAG,EAAE,CAAC;oBACpB,CAAC;gBACH,CAAC;YACH,CAAC;YACD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,eAAe,CAAC,YAAkC,CAAC,CAAC,CAAC;YACvE,CAAC;iBAAM,IAAI,IAAI,EAAE,CAAC;gBAChB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,YAAsB,CAAC,CAAC;YAC1C,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;IACD,MAAM,aAAa,GAAG,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAE5C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,aAAa,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAC1D,MAAM,IAAI,GAAG,aAAa,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,SAAS,GAAkB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,SAAS;QACX,CAAC;QACD,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;YAClC,MAAM,cAAc,CAAC,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;YACpG,MAAM,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBACrC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;YAC9D,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,KAAK,CAAC,IAAI,CAAC,6BAA6B,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,GAAqB,EACrB,MAA0B,EAC1B,OAA8B,EAC9B,QAA0B;IAE1B,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO,GAAG,CAAC;IACb,CAAC;IACD,MAAM,MAAM,GAAW,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,GAAG,CAAC;IACpD,MAAM,QAAQ,GAAG,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;IAC/D,wDAAwD;IACxD,qEAAqE;IAErE,MAAM,KAAK,GAAW,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC;IACpC,IAAI,GAAG,GAAyB,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;IAC7C,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,GAAG,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IACD,4BAA4B;IAC5B,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,cAAc,CAAC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC9F,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;QACrB,MAAM,CAAC,SAAS,CAAC,CAAC;IACpB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/lib/options.d.ts
CHANGED
|
@@ -10,6 +10,7 @@ import type { CheerioOptionsInterface } from './types.js';
|
|
|
10
10
|
*/
|
|
11
11
|
export interface StaticDownloadMeta extends Record<string, string | number | boolean | void> {
|
|
12
12
|
detectIncompleteHtml?: '</html>' | '</body>' | string;
|
|
13
|
+
warnForNonHtml?: boolean;
|
|
13
14
|
}
|
|
14
15
|
/**
|
|
15
16
|
* Options which should not be changed at runtime, and safe for cloning
|
package/lib/options.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"options.d.ts","sourceRoot":"","sources":["../src/options.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,YAAY,EAAE,aAAa,EAAe,YAAY,EAAC,MAAM,KAAK,CAAC;AAEhF,OAAO,KAAK,EAAC,gBAAgB,EAAE,YAAY,EAAC,MAAM,eAAe,CAAC;AAElE,OAAO,KAAK,EAAC,mBAAmB,EAAE,cAAc,EAAC,MAAM,uBAAuB,CAAC;AAM/E,OAAO,EAAC,eAAe,EAAC,MAAM,2BAA2B,CAAC;AAC1D,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,uBAAuB,CAAC;AAE9D,OAAO,KAAK,EAAC,gBAAgB,EAAC,MAAM,cAAc,CAAC;AACnD,OAAO,KAAK,EAAC,uBAAuB,EAAC,MAAM,YAAY,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,kBACf,SAAQ,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAE,IAAI,CAAC;IACvD,oBAAoB,CAAC,EAAE,SAAS,GAAG,SAAS,GAAG,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"options.d.ts","sourceRoot":"","sources":["../src/options.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,YAAY,EAAE,aAAa,EAAe,YAAY,EAAC,MAAM,KAAK,CAAC;AAEhF,OAAO,KAAK,EAAC,gBAAgB,EAAE,YAAY,EAAC,MAAM,eAAe,CAAC;AAElE,OAAO,KAAK,EAAC,mBAAmB,EAAE,cAAc,EAAC,MAAM,uBAAuB,CAAC;AAM/E,OAAO,EAAC,eAAe,EAAC,MAAM,2BAA2B,CAAC;AAC1D,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,uBAAuB,CAAC;AAE9D,OAAO,KAAK,EAAC,gBAAgB,EAAC,MAAM,cAAc,CAAC;AACnD,OAAO,KAAK,EAAC,uBAAuB,EAAC,MAAM,YAAY,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,kBACf,SAAQ,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAE,IAAI,CAAC;IACvD,oBAAoB,CAAC,EAAE,SAAS,GAAG,SAAS,GAAG,MAAM,CAAC;IACtD,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB;;;;;;OAMG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;OAKG;IACH,QAAQ,EAAE,MAAM,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;IAEjD;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,sBAAsB,CAAC,EAAE,OAAO,CAAC;IAEjC;;OAEG;IACH,YAAY,CAAC,EAAE,uBAAuB,CAAC;IAEvC;;OAEG;IACH,gBAAgB,CAAC,EAAE,uBAAuB,CAAC;IAE3C;;OAEG;IACH,OAAO,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAE7B;;;;;;;;;OASG;IACH,GAAG,CAAC,EAAE,cAAc,CAAC;IAErB;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,IAAI,EAAE,kBAAkB,CAAC;IAEzB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAEhC;;;;;;OAMG;IACH,4BAA4B,CAAC,EAAE,OAAO,CAAC;CACxC;AAED,MAAM,WAAW,eAAgB,SAAQ,qBAAqB,EAAE,mBAAmB;IACjF;;;;OAIG;IACH,GAAG,EAAE,cAAc,CAAC;IAEpB;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,qBAAqB,CAAC,EAAE,CAAC,UAAU,EAAE,kBAAkB,KAAK,IAAI,CAAC;IAEjE;;OAEG;IACH,eAAe,EAAE,OAAO,eAAe,CAAC;CACzC;AAED,MAAM,MAAM,aAAa,GAAG,CAAC,YAAY,GAAG,YAAY,CAAC,GAAG;IAC1D,kBAAkB,EAAE,OAAO,CAAC;CAC7B,CAAC;AAyBF;;;;;GAKG;AACH,eAAO,MAAM,kBAAkB,EAAE,aAqEhC,CAAC;AAyBF,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,mBAAmB,GAAG,OAAO,CAAC,eAAe,CAAC,GAAG,eAAe,CAkD1E;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,eAAe,GAAG,eAAe,CAiB9E;AAED,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,eAAe,GAAG,CAAC,MAAM,eAAe,CAAC,EAClD,eAAe,CAAC,EAAE,OAAO,CAAC,qBAAqB,CAAC,GAAG,eAAe,CAanE"}
|
package/lib/options.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"options.js","sourceRoot":"","sources":["../src/options.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAC,OAAO,EAAC,MAAM,KAAK,CAAC;AAEjC,OAAO,EAAC,cAAc,EAAC,MAAM,eAAe,CAAC;AAE7C,oCAAoC;AACpC,OAAO,EAAC,eAAe,EAAC,MAAM,mCAAmC,CAAC;AAClE,OAAO,EAAC,KAAK,EAAC,MAAM,oBAAoB,CAAC;AACzC,oCAAoC;AACpC,OAAO,EAAC,MAAM,EAAC,MAAM,oCAAoC,CAAC;AAC1D,OAAO,EAAC,eAAe,EAAC,MAAM,2BAA2B,CAAC;AAE1D,OAAO,EAAC,UAAU,EAAC,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"options.js","sourceRoot":"","sources":["../src/options.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAC,OAAO,EAAC,MAAM,KAAK,CAAC;AAEjC,OAAO,EAAC,cAAc,EAAC,MAAM,eAAe,CAAC;AAE7C,oCAAoC;AACpC,OAAO,EAAC,eAAe,EAAC,MAAM,mCAAmC,CAAC;AAClE,OAAO,EAAC,KAAK,EAAC,MAAM,oBAAoB,CAAC;AACzC,oCAAoC;AACpC,OAAO,EAAC,MAAM,EAAC,MAAM,oCAAoC,CAAC;AAC1D,OAAO,EAAC,eAAe,EAAC,MAAM,2BAA2B,CAAC;AAE1D,OAAO,EAAC,UAAU,EAAC,MAAM,WAAW,CAAC;AAgKrC,MAAM,eAAe,GAAG,IAAI,CAAC;AAE7B,MAAM,eAAe,GAAgB,IAAI,GAAG,CAAC;IAC3C,0CAA0C;IAC1C,WAAW;IACX,6CAA6C;IAC7C,YAAY;IACZ,mCAAmC;IACnC,YAAY;IACZ,wCAAwC;IACxC,cAAc;IACd,+DAA+D;IAC/D,OAAO;IACP,kDAAkD;IAClD,WAAW;IACX,0BAA0B;IAC1B,aAAa;IACb,wBAAwB;IACxB,WAAW;IACX,4BAA4B;IAC5B,WAAW;CACZ,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAkB,CAAC,WAAwB,EAAU,EAAE;IACpF,MAAM,EAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAC,GAAG,WAAW,CAAC;IAE7D,IAAI,YAAY,GAAG,YAAY,CAAC,KAAK,EAAE,CAAC;QACrC,GAAqB,CAAC,kBAAkB,GAAG,IAAI,CAAC;QACjD,OAAO,CAAC,CAAC;IACX,CAAC;SAAM,CAAC;QACL,GAAqB,CAAC,kBAAkB,GAAG,KAAK,CAAC;IACpD,CAAC;IAED,MAAM,SAAS,GAAY,GAAG,CAAC,OAAO;QACpC,CAAC,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAC5B,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;YACnD,GAAG,CAAC,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,CAAC;IAClC,MAAM,YAAY,GAAG,GAAG,CAAC,IAAI;QAC3B,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YAC/B,YAAY,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;YAC5C,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAc,CAAC,CAAC,CAAC;IAC7C,MAAM,aAAa,GAAwB,YAAY,CAAC,WAAW;QACjE,GAAG,CAAC,QAAQ;QACZ,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAC7D,IAAI,CAAC,SAAS,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,aAAa;QAChD,GAAG,CAAC,IAAI,KAAK,WAAW,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,CAAC,EAAE,CAAC;QAE3D,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,KAAK,WAAW;YACpC,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,KAAK,GAAG,CAAC,CAAC,EAAE,CAAC;YACpD,KAAK,CAAC,KAAK,CAAC,wBAAwB,EAClC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAG,GAAoB,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,EAC5D,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC7C,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,KAAK,GAAW,CAAC,CAAC,CAAC,GAAG,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC;IAC5E,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QACrB,KAAK,IAAI,IAAI,CAAC;IAChB,CAAC;IACD,IAAI,KAAK,GAAG,eAAe,EAAE,CAAC;QAC5B,KAAK,GAAG,eAAe,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC;IACzD,CAAC;IACD,wBAAwB;IACxB,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW;QAC1B,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;QAClD,mBAAmB;QACnB,KAAK,IAAI,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC;QACrC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO;YACtB,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YACtC,IAAI,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC;YAC/D,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC7B,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,UAAU,IAAI,IAAI,CAAC;YACrB,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;gBACvB,UAAU,IAAI,CAAC,CAAC;gBAChB,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;oBACnB,UAAU,GAAG,CAAC,CAAC;gBACjB,CAAC;gBACD,IAAI,YAAY,CAAC,aAAa,EAAE,CAAC;oBAC/B,IAAI,UAAU,IAAI,YAAY,CAAC,aAAa,EAAE,CAAC;wBAC7C,KAAK,GAAG,UAAU,CAAC;oBACrB,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,KAAK,GAAG,UAAU,CAAC;gBACrB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,KAAK,IAAI,CAAC,CAAC;IACX,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAEF,MAAM,cAAc,GAAoB;IACtC,IAAI,EAAE,EAAE;IACR,OAAO,EAAE,EAAE;IACX,WAAW,EAAE,EAAE;IACf,eAAe;IACf,cAAc;IACd,kBAAkB,EAAE,EAAE;IACtB,QAAQ,EAAE,EAAE;IACZ,mBAAmB;IACnB,QAAQ,EAAE,EAAiC;IAC3C,YAAY,EAAE,EAAE;IAChB,SAAS,EAAE,EAAE;IACb,QAAQ,EAAE,CAAC;IACX,IAAI,EAAE;QACJ,oBAAoB,EAAE,SAAS;KAChC;IACD,oBAAoB,EAAE,EAAE;IACxB,qBAAqB,EAAE,EAAE;IACzB,GAAG,EAAE,EAAE;IACP,UAAU,EAAE,EAAE;IACd,sBAAsB,EAAE,IAAI;CAC7B,CAAC;AAEF,MAAM,UAAU,sBAAsB,CACpC,OAAuD;IACvD,MAAM,MAAM,GAAoB,UAAU,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC;IACpE,+CAA+C;IAC/C,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC;IAC1B,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG,EAAE,CAAC;IACxB,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,eAAe,CAAC,CAAC;IACnD,CAAC;IACD,IAAI,CAAC,CAAC,cAAc,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;QACpC,MAAM,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC;IAC/B,CAAC;IACD,IAAI,CAAC,CAAC,sBAAsB,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,CAAC,oBAAoB,GAAG,IAAI,CAAC;IACzC,CAAC;IACD,IAAI,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACnE,MAAM,CAAC,GAAG,CAAC,OAAO,GAAG;YACnB,MAAM,EAAE,IAAI;YACZ,OAAO,EAAE,IAAI;YACb,aAAa,EAAE,IAAI;YACnB,MAAM,EAAE,IAAI;YACZ,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,MAAM;SAChB,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAC/D,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG;YACjB,KAAK,EAAE,EAAE;YACT,aAAa,EAAE,KAAK;YACpB,cAAc,EAAE,kBAAkB;SACnC,CAAC;IACJ,CAAC;SAAM,IAAI,OAAO,MAAM,CAAC,GAAG,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG;YACjB,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK;YACvB,aAAa,EAAE,KAAK;YACpB,cAAc,EAAE,kBAAkB;SACnC,CAAC;IACJ,CAAC;SAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,GAAG,kBAAkB,CAAC;IACvD,CAAC;IACD,IAAI,OAAO,CAAC,uBAAuB;QACjC,OAAO,CAAC,uBAAuB,GAAG,CAAC;QACnC,CAAC,OAAO,CAAC,qBAAqB,EAAE,CAAC;QACjC,OAAO,CAAC,qBAAqB,GAAG,MAAM,CAAC;IACzC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,OAAwB;;IAC3D,IAAI,CAAC,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,IAAI,SAAS,CAAC,mBAAmB,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,IAAI,SAAS,CAAC,uBAAuB,CAAC,CAAC;IAC/C,CAAC;IACD,IAAI,MAAA,OAAO,CAAC,YAAY,0CAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,OAAO,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAClE,CAAC;IACD,IAAI,CAAC,OAAO,CAAC,QAAQ,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;QAClD,MAAM,IAAI,SAAS,CAAC,iCAAiC,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACtD,MAAM,IAAI,SAAS,CAAC,mCAAmC,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,sBAAsB,CAAC,OAAO,CAAC,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,OAAkD,EAClD,eAAgD;IAChD,MAAM,GAAG,GAAoB,OAAO,OAAO,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IACjF,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,GAAG,CAAC,IAAI,IAAI,eAAe,CAAC,IAAI,EAAE,CAAC;QACrC,eAAe,CAAC,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,eAAe,CAAC,IAAI,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,GAAG,CAAC,GAAG,IAAI,eAAe,CAAC,GAAG,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC;QACrC,eAAe,CAAC,GAAG,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,eAAe,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,oBAAoB,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,eAAe,CAAC,CAAC,CAAC;AACnE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "website-scrap-engine",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Configurable website scraper in typescript",
|
|
5
5
|
"main": "lib",
|
|
6
6
|
"types": "lib",
|
|
@@ -30,11 +30,11 @@
|
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"@jest/globals": "^29.7.0",
|
|
33
|
-
"@types/node": "^22.
|
|
33
|
+
"@types/node": "^22.13.4",
|
|
34
34
|
"@types/urijs": "^1.19.25",
|
|
35
|
-
"@typescript-eslint/eslint-plugin": "^8.
|
|
36
|
-
"@typescript-eslint/parser": "^8.
|
|
37
|
-
"eslint": "^9.
|
|
35
|
+
"@typescript-eslint/eslint-plugin": "^8.24.1",
|
|
36
|
+
"@typescript-eslint/parser": "^8.24.1",
|
|
37
|
+
"eslint": "^9.20.1",
|
|
38
38
|
"jest": "^29.7.0",
|
|
39
39
|
"ts-jest": "^29.2.5",
|
|
40
40
|
"typescript": "^5.7.3"
|
|
@@ -165,6 +165,33 @@ export async function downloadResource(
|
|
|
165
165
|
return downloadedResource;
|
|
166
166
|
}
|
|
167
167
|
if (downloadedResource.type === ResourceType.Html) {
|
|
168
|
+
if (options.meta.warnForNonHtml) {
|
|
169
|
+
const headers = downloadedResource.meta.headers;
|
|
170
|
+
if (headers) {
|
|
171
|
+
const contentType =
|
|
172
|
+
headers['content-type'] || headers['Content-Type'];
|
|
173
|
+
let nonHtml = false;
|
|
174
|
+
if (typeof contentType === 'string') {
|
|
175
|
+
nonHtml = !contentType.includes('/html') &&
|
|
176
|
+
!contentType.includes('/xml') &&
|
|
177
|
+
!contentType.includes('application/xhtml+xml');
|
|
178
|
+
} else if (Array.isArray(contentType)) {
|
|
179
|
+
nonHtml = true;
|
|
180
|
+
for (const header of contentType) {
|
|
181
|
+
if (!header.includes('/html') &&
|
|
182
|
+
!header.includes('/xml') &&
|
|
183
|
+
!header.includes('application/xhtml+xml')) {
|
|
184
|
+
nonHtml = false;
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (nonHtml) {
|
|
190
|
+
logger.error.warn('Detected non-html content type',
|
|
191
|
+
downloadedResource.downloadLink, downloadedResource.rawUrl, contentType);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
168
195
|
if (options.meta.detectIncompleteHtml &&
|
|
169
196
|
(typeof downloadedResource.body === 'string' ||
|
|
170
197
|
Buffer.isBuffer(downloadedResource.body))) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type {SrcSetDefinition} from 'srcset';
|
|
2
2
|
import {parseSrcset, stringifySrcset} from 'srcset';
|
|
3
|
+
import {load} from 'cheerio';
|
|
3
4
|
import {sources as defaultSources} from '../sources.js';
|
|
4
5
|
import type {DownloadResource, SubmitResourceFunc} from './types.js';
|
|
5
6
|
import type {StaticDownloadOptions} from '../options.js';
|
|
@@ -14,26 +15,17 @@ import type {Cheerio, CheerioStatic} from '../types.js';
|
|
|
14
15
|
type Writeable<T> = { -readonly [P in keyof T]: T[P] };
|
|
15
16
|
type WriteableSrcSet = Writeable<SrcSetDefinition>;
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
res: DownloadResource,
|
|
19
|
-
submit: SubmitResourceFunc,
|
|
18
|
+
async function processHtmlDoc(
|
|
20
19
|
options: StaticDownloadOptions,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const depth: number = res.depth + 1;
|
|
31
|
-
// resources from inline css
|
|
32
|
-
const resources: Resource[] = [];
|
|
33
|
-
let doc: CheerioStatic | void = res.meta.doc;
|
|
34
|
-
if (!doc) {
|
|
35
|
-
res.meta.doc = doc = parseHtml(res, options);
|
|
36
|
-
}
|
|
20
|
+
doc: CheerioStatic,
|
|
21
|
+
res: DownloadResource,
|
|
22
|
+
pipeline: PipelineExecutor,
|
|
23
|
+
depth: number,
|
|
24
|
+
resources: Resource[],
|
|
25
|
+
refUrl: string,
|
|
26
|
+
savePath: string | undefined,
|
|
27
|
+
submit: SubmitResourceFunc
|
|
28
|
+
) {
|
|
37
29
|
const sources: typeof defaultSources = options.sources || defaultSources;
|
|
38
30
|
for (const {selector, attr, type} of sources) {
|
|
39
31
|
const elements: Cheerio = doc(selector);
|
|
@@ -130,6 +122,48 @@ export async function processHtml(
|
|
|
130
122
|
}
|
|
131
123
|
}
|
|
132
124
|
}
|
|
125
|
+
const iframeSrcDocs = doc('iframe[srcdoc]');
|
|
126
|
+
|
|
127
|
+
for (let index = 0; index < iframeSrcDocs.length; index++) {
|
|
128
|
+
const elem = iframeSrcDocs.eq(index);
|
|
129
|
+
const attrValue: string | void = elem.attr('srcdoc');
|
|
130
|
+
if (!attrValue) {
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
try {
|
|
134
|
+
const iframeDoc = load(attrValue);
|
|
135
|
+
await processHtmlDoc(options, iframeDoc, res, pipeline, depth, resources, refUrl, savePath, submit);
|
|
136
|
+
const html = options.cheerioSerialize ?
|
|
137
|
+
iframeDoc.html(options.cheerioSerialize) : iframeDoc.html();
|
|
138
|
+
elem.attr('srcdoc', html);
|
|
139
|
+
} catch (e) {
|
|
140
|
+
error.info('can not parse iframe srcdoc', res.url, res.rawUrl, e);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export async function processHtml(
|
|
146
|
+
res: DownloadResource,
|
|
147
|
+
submit: SubmitResourceFunc,
|
|
148
|
+
options: StaticDownloadOptions,
|
|
149
|
+
pipeline: PipelineExecutor
|
|
150
|
+
): Promise<DownloadResource | void> {
|
|
151
|
+
if (res.type !== ResourceType.Html) {
|
|
152
|
+
return res;
|
|
153
|
+
}
|
|
154
|
+
const refUrl: string = res.redirectedUrl || res.url;
|
|
155
|
+
const savePath = refUrl === res.url ? res.savePath : undefined;
|
|
156
|
+
// useless since processRedirectedUrl enabled by default
|
|
157
|
+
// refUrl = await pipeline.linkRedirect(refUrl, null, res) || refUrl;
|
|
158
|
+
|
|
159
|
+
const depth: number = res.depth + 1;
|
|
160
|
+
let doc: CheerioStatic | void = res.meta.doc;
|
|
161
|
+
if (!doc) {
|
|
162
|
+
res.meta.doc = doc = parseHtml(res, options);
|
|
163
|
+
}
|
|
164
|
+
// resources from inline css
|
|
165
|
+
const resources: Resource[] = [];
|
|
166
|
+
await processHtmlDoc(options, doc, res, pipeline, depth, resources, refUrl, savePath, submit);
|
|
133
167
|
if (resources.length) {
|
|
134
168
|
submit(resources);
|
|
135
169
|
}
|
package/src/options.ts
CHANGED