webpeel 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,45 @@ const JUNK_SELECTORS = [
36
36
  // SVG decorations (icons, decorative elements)
37
37
  'svg:not(img svg)',
38
38
  ];
39
+ /**
40
+ * Filter HTML by including or excluding specific tags/selectors
41
+ * Applied BEFORE markdown conversion for precise content control
42
+ *
43
+ * @param html - HTML to filter
44
+ * @param includeTags - Only keep content from these elements (e.g., ['article', 'main', '.content'])
45
+ * @param excludeTags - Remove these elements (e.g., ['nav', 'footer', 'header', '.sidebar'])
46
+ * @returns Filtered HTML
47
+ */
48
+ export function filterByTags(html, includeTags, excludeTags) {
49
+ const $ = cheerio.load(html);
50
+ // Apply exclude tags first (remove unwanted elements)
51
+ if (excludeTags?.length) {
52
+ excludeTags.forEach(selector => {
53
+ $(selector).remove();
54
+ });
55
+ }
56
+ // Apply include tags (only keep specified elements)
57
+ if (includeTags?.length) {
58
+ // Collect all matching elements
59
+ const included = [];
60
+ includeTags.forEach(selector => {
61
+ const matches = $(selector);
62
+ if (matches.length > 0) {
63
+ matches.each((_, el) => {
64
+ included.push($(el));
65
+ });
66
+ }
67
+ });
68
+ // If we found matching elements, return only those
69
+ if (included.length > 0) {
70
+ return included.map(el => $.html(el)).join('\n');
71
+ }
72
+ // If includeTags specified but nothing matched, return empty
73
+ return '';
74
+ }
75
+ // Return filtered HTML
76
+ return $.html();
77
+ }
39
78
  /**
40
79
  * Extract content matching a CSS selector
41
80
  * Returns filtered HTML or full HTML if selector matches nothing
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,cAAc,GAAG;IACrB,4BAA4B;IAC5B,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,wBAAwB;IACjE,yBAAyB;IACzB,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO;IAClC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB;IAChE,wBAAwB,EAAE,iBAAiB;IAC3C,UAAU,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,WAAW;IAC3D,aAAa,EAAE,cAAc,EAAE,uBAAuB;IACtD,aAAa,EAAE,uBAAuB;IACtC,iBAAiB;IACjB,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,EAAE,aAAa;IACxD,mBAAmB,EAAE,oBAAoB,EAAE,kBAAkB;IAC7D,mBAAmB;IACnB,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB;IACrD,mBAAmB,EAAE,gBAAgB;IACrC,oBAAoB,EAAE,iBAAiB;IACvC,0BAA0B;IAC1B,mBAAmB,EAAE,kBAAkB,EAAE,kBAAkB;IAC3D,oBAAoB,EAAE,6BAA6B;IACnD,mBAAmB;IACnB,eAAe,EAAE,kBAAkB,EAAE,mBAAmB;IACxD,mBAAmB;IACnB,oBAAoB,EAAE,uBAAuB,EAAE,sBAAsB;IACrE,gBAAgB,EAAE,2BAA2B,EAAE,mBAAmB;IAClE,kBAAkB;IAClB,gBAAgB,EAAE,oBAAoB,EAAE,wBAAwB;IAChE,yBAAyB,EAAE,yBAAyB;IACpD,WAAW;IACX,WAAW,EAAE,WAAW,EAAE,oBAAoB;IAC9C,+CAA+C;IAC/C,kBAAkB;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,QAAgB,EAAE,OAAkB;IAC9E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,oDAAoD;QACpD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0CAA0C;IAC1C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9D,CAAC;AAED;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO;QAC3C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,cAAc,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAClC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,mCAAmC;IACnC,CAAC,CAAC,oBAAoB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjC,uCAAuC;IACvC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,sBAAsB,GAAG;IAC7B,sBAAsB;IACtB,cAAc;IACd,uBAAuB;IACvB,SAAS;IACT,eAAe;IACf,MAAM;IACN,eAAe,EAAE,kBAAkB,EAAE,eAAe,EAAE,gBAAgB;IACtE,YAAY,EAAE,aAAa,EAAE,eAAe;IAC5C,UAAU,EAAE,eAAe,EAAE,UAAU,EAAE,OAAO;IAChD,UAAU,EAAE,eAAe;CAC5B,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,KAAK,MAAM,QAAQ,IAAI,sBAAsB,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClB,kEAAkE;YAClE,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;gBACvB,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM,GAAgC,IAAI,CAAC;IAC/C,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,sEAAsE;QACtE,IAAI,IAAI,CAAC,MAAM,GAAG,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAChD,6CAA6C;YAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9E,MAAM,GAAG,KAAK,CAAC;gBACf,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,MAAM,IAAI,OAAO,GAAG,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAClD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,YAAoB;IACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,CAAC,CAAC;IAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC;IAEpC,uFAAuF;IACvF,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACxE,iEAAiE;IACjE,MAAM,gBAAgB,GAAG,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEtC,wEAAwE;IACxE,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;IAEpD,4DAA4D;IAC5D,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IACzD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjE,wCAAwC;IACxC,MAAM,WAAW,GAAG,UAAU,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACzC,UAAU,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACxB,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjC,mBAAmB;IACnB,MAAM,OAAO,GAAG,CACd,gBAAgB,GAAG,GAAG;QACtB,YAAY,GAAG,GAAG;QAClB,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,GAAG;QACnC,WAAW,GAAG,GAAG,CAClB,CAAC;IAEF,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACzC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAA4B;IACvE,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;QACrB,WAAW,EAAE,GAAG;QAChB,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,kBAAkB;IAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAE7D,kDAAkD;IAClD,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;YAC7B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC,CAAC;IAEH,oCAAoC;IACpC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE;QAC7B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACf,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,MAAM,CAAC;QACzE,CAAC;QACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAiB,CAAC;YACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,OAAO,SAAS,GAAG,QAAQ,GAAG,IAAI,GAAG,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC;QAC1E,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9C,kEAAkE;IAClE,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,yBAAyB;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,8DAA8D;IAC9D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QAC3D,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,SAAS,IAAI,SAAS;YAAE,OAAO,GAAG,CAAC;QACvC,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3B,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEpC,8CAA8C;IAC9C,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,IAAI,OAAO,GAAG,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAED,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAAe,EAAE,SAAiB;IACtE,MAAM,aAAa,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAE9C,gCAAgC;IAChC,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;QAC/B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,mBAAmB;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEzC,mCAAmC;QACnC,IAAI,CAAC,iBAAiB,IAAI,SAAS,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,iBAAiB,IAAI,UAAU,CAAC;YAChC,iBAAiB,GAAG,IAAI,CAAC;YACzB,SAAS;QACX,CAAC;QAED,gDAAgD;QAChD,IAAI,iBAAiB,GAAG,UAAU,GAAG,SAAS,EAAE,CAAC;YAC/C,YAAY;YACZ,MAAM;QACR,CAAC;QAED,eAAe;QACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,iBAAiB,IAAI,UAAU,CAAC;IAClC,CAAC;IAED,wBAAwB;IACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,MAAM,CAAC,IAAI,CAAC,0BAA0B,SAAS,UAAU,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC"}
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,cAAc,GAAG;IACrB,4BAA4B;IAC5B,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,wBAAwB;IACjE,yBAAyB;IACzB,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO;IAClC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB;IAChE,wBAAwB,EAAE,iBAAiB;IAC3C,UAAU,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,WAAW;IAC3D,aAAa,EAAE,cAAc,EAAE,uBAAuB;IACtD,aAAa,EAAE,uBAAuB;IACtC,iBAAiB;IACjB,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,EAAE,aAAa;IACxD,mBAAmB,EAAE,oBAAoB,EAAE,kBAAkB;IAC7D,mBAAmB;IACnB,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB;IACrD,mBAAmB,EAAE,gBAAgB;IACrC,oBAAoB,EAAE,iBAAiB;IACvC,0BAA0B;IAC1B,mBAAmB,EAAE,kBAAkB,EAAE,kBAAkB;IAC3D,oBAAoB,EAAE,6BAA6B;IACnD,mBAAmB;IACnB,eAAe,EAAE,kBAAkB,EAAE,mBAAmB;IACxD,mBAAmB;IACnB,oBAAoB,EAAE,uBAAuB,EAAE,sBAAsB;IACrE,gBAAgB,EAAE,2BAA2B,EAAE,mBAAmB;IAClE,kBAAkB;IAClB,gBAAgB,EAAE,oBAAoB,EAAE,wBAAwB;IAChE,yBAAyB,EAAE,yBAAyB;IACpD,WAAW;IACX,WAAW,EAAE,WAAW,EAAE,oBAAoB;IAC9C,+CAA+C;IAC/C,kBAAkB;CACnB,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,WAAsB,EAAE,WAAsB;IACvF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,sDAAsD;IACtD,IAAI,WAAW,EAAE,MAAM,EAAE,CAAC;QACxB,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;YAC7B,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;QACvB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,WAAW,EAAE,MAAM,EAAE,CAAC;QACxB,gCAAgC;QAChC,MAAM,QAAQ,GAA2B,EAAE,CAAC;QAC5C,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;YAC7B,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;YAC5B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;oBACrB,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvB,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,mDAAmD;QACnD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;QAED,6DAA6D;QAC7D,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,uBAAuB;IACvB,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,QAAgB,EAAE,OAAkB;IAC9E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,oDAAoD;QACpD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0CAA0C;IAC1C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9D,CAAC;AAED;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO;QAC3C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,cAAc,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAClC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,mCAAmC;IACnC,CAAC,CAAC,oBAAoB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjC,uCAAuC;IACvC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,sBAAsB,GAAG;IAC7B,sBAAsB;IACtB,cAAc;IACd,uBAAuB;IACvB,SAAS;IACT,eAAe;IACf,MAAM;IACN,eAAe,EAAE,kBAAkB,EAAE,eAAe,EAAE,gBAAgB;IACtE,YAAY,EAAE,aAAa,EAAE,eAAe;IAC5C,UAAU,EAAE,eAAe,EAAE,UAAU,EAAE,OAAO;IAChD,UAAU,EAAE,eAAe;CAC5B,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,KAAK,MAAM,QAAQ,IAAI,sBAAsB,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClB,kEAAkE;YAClE,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;gBACvB,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM,GAAgC,IAAI,CAAC;IAC/C,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,sEAAsE;QACtE,IAAI,IAAI,CAAC,MAAM,GAAG,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAChD,6CAA6C;YAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9E,MAAM,GAAG,KAAK,CAAC;gBACf,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,MAAM,IAAI,OAAO,GAAG,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAClD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,YAAoB;IACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,CAAC,CAAC;IAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC;IAEpC,uFAAuF;IACvF,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACxE,iEAAiE;IACjE,MAAM,gBAAgB,GAAG,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEtC,wEAAwE;IACxE,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;IAEpD,4DAA4D;IAC5D,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IACzD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjE,wCAAwC;IACxC,MAAM,WAAW,GAAG,UAAU,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACzC,UAAU,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACxB,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjC,mBAAmB;IACnB,MAAM,OAAO,GAAG,CACd,gBAAgB,GAAG,GAAG;QACtB,YAAY,GAAG,GAAG;QAClB,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,GAAG;QACnC,WAAW,GAAG,GAAG,CAClB,CAAC;IAEF,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACzC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAA4B;IACvE,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;QACrB,WAAW,EAAE,GAAG;QAChB,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,kBAAkB;IAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAE7D,kDAAkD;IAClD,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;YAC7B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC,CAAC;IAEH,oCAAoC;IACpC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE;QAC7B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACf,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,MAAM,CAAC;QACzE,CAAC;QACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAiB,CAAC;YACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,OAAO,SAAS,GAAG,QAAQ,GAAG,IAAI,GAAG,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC;QAC1E,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9C,kEAAkE;IAClE,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,yBAAyB;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,8DAA8D;IAC9D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QAC3D,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,SAAS,IAAI,SAAS;YAAE,OAAO,GAAG,CAAC;QACvC,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3B,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEpC,8CAA8C;IAC9C,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,IAAI,OAAO,GAAG,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAED,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAAe,EAAE,SAAiB;IACtE,MAAM,aAAa,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAE9C,gCAAgC;IAChC,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;QAC/B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,mBAAmB;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEzC,mCAAmC;QACnC,IAAI,CAAC,iBAAiB,IAAI,SAAS,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,iBAAiB,IAAI,UAAU,CAAC;YAChC,iBAAiB,GAAG,IAAI,CAAC;YACzB,SAAS;QACX,CAAC;QAED,gDAAgD;QAChD,IAAI,iBAAiB,GAAG,UAAU,GAAG,SAAS,EAAE,CAAC;YAC/C,YAAY;YACZ,MAAM;QACR,CAAC;QAED,eAAe;QACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,iBAAiB,IAAI,UAAU,CAAC;IAClC,CAAC;IAED,wBAAwB;IACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,MAAM,CAAC,IAAI,CAAC,0BAA0B,SAAS,UAAU,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC"}
@@ -7,6 +7,15 @@ import type { PageMetadata } from '../types.js';
7
7
  * Returns absolute URLs, deduplicated
8
8
  */
9
9
  export declare function extractLinks(html: string, baseUrl: string): string[];
10
+ /**
11
+ * Extract all images from HTML
12
+ * Resolves relative URLs to absolute and extracts metadata
13
+ *
14
+ * @param html - HTML to extract images from
15
+ * @param baseUrl - Base URL for resolving relative paths
16
+ * @returns Array of image information, deduplicated by src
17
+ */
18
+ export declare function extractImages(html: string, baseUrl: string): import('../types.js').ImageInfo[];
10
19
  /**
11
20
  * Extract all metadata from HTML
12
21
  */
@@ -1 +1 @@
1
- {"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsHhD;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAgCpE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,YAAY,CAAA;CAAE,CAarG"}
1
+ {"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsHhD;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAgCpE;AAED;;;;;;;GAOG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,aAAa,EAAE,SAAS,EAAE,CA8G9F;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,YAAY,CAAA;CAAE,CAarG"}
@@ -145,6 +145,112 @@ export function extractLinks(html, baseUrl) {
145
145
  });
146
146
  return Array.from(links).sort();
147
147
  }
148
+ /**
149
+ * Extract all images from HTML
150
+ * Resolves relative URLs to absolute and extracts metadata
151
+ *
152
+ * @param html - HTML to extract images from
153
+ * @param baseUrl - Base URL for resolving relative paths
154
+ * @returns Array of image information, deduplicated by src
155
+ */
156
+ export function extractImages(html, baseUrl) {
157
+ const $ = cheerio.load(html);
158
+ const images = new Map();
159
+ // Extract <img> tags
160
+ $('img[src]').each((_, elem) => {
161
+ const $img = $(elem);
162
+ const src = $img.attr('src');
163
+ if (!src)
164
+ return;
165
+ try {
166
+ const absoluteUrl = new URL(src, baseUrl);
167
+ // SECURITY: Only allow HTTP and HTTPS protocols
168
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
169
+ return;
170
+ }
171
+ const alt = $img.attr('alt') || '';
172
+ const title = $img.attr('title');
173
+ const widthStr = $img.attr('width');
174
+ const heightStr = $img.attr('height');
175
+ const width = widthStr ? parseInt(widthStr, 10) : undefined;
176
+ const height = heightStr ? parseInt(heightStr, 10) : undefined;
177
+ const imageInfo = {
178
+ src: absoluteUrl.href,
179
+ alt,
180
+ title,
181
+ width: width && !isNaN(width) ? width : undefined,
182
+ height: height && !isNaN(height) ? height : undefined,
183
+ };
184
+ // Deduplicate by src
185
+ images.set(absoluteUrl.href, imageInfo);
186
+ }
187
+ catch {
188
+ // Invalid URL, skip
189
+ }
190
+ });
191
+ // Extract <picture><source> tags
192
+ $('picture source[srcset]').each((_, elem) => {
193
+ const $source = $(elem);
194
+ const srcset = $source.attr('srcset');
195
+ if (!srcset)
196
+ return;
197
+ // Parse srcset (format: "url 1x, url 2x" or "url 100w, url 200w")
198
+ const srcsetParts = srcset.split(',').map(s => s.trim());
199
+ srcsetParts.forEach(part => {
200
+ const url = part.split(/\s+/)[0];
201
+ if (!url)
202
+ return;
203
+ try {
204
+ const absoluteUrl = new URL(url, baseUrl);
205
+ // SECURITY: Only allow HTTP and HTTPS protocols
206
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
207
+ return;
208
+ }
209
+ // Try to get alt from parent picture's img
210
+ const alt = $source.closest('picture').find('img').attr('alt') || '';
211
+ const imageInfo = {
212
+ src: absoluteUrl.href,
213
+ alt,
214
+ };
215
+ images.set(absoluteUrl.href, imageInfo);
216
+ }
217
+ catch {
218
+ // Invalid URL, skip
219
+ }
220
+ });
221
+ });
222
+ // Extract CSS background images
223
+ $('[style*="background"]').each((_, elem) => {
224
+ const style = $(elem).attr('style');
225
+ if (!style)
226
+ return;
227
+ // Match url() in CSS
228
+ const urlMatches = style.match(/url\(['"]?([^'")\s]+)['"]?\)/g);
229
+ if (!urlMatches)
230
+ return;
231
+ urlMatches.forEach(match => {
232
+ const url = match.replace(/url\(['"]?([^'")\s]+)['"]?\)/, '$1');
233
+ if (!url)
234
+ return;
235
+ try {
236
+ const absoluteUrl = new URL(url, baseUrl);
237
+ // SECURITY: Only allow HTTP and HTTPS protocols
238
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
239
+ return;
240
+ }
241
+ const imageInfo = {
242
+ src: absoluteUrl.href,
243
+ alt: '', // Background images don't have alt text
244
+ };
245
+ images.set(absoluteUrl.href, imageInfo);
246
+ }
247
+ catch {
248
+ // Invalid URL, skip
249
+ }
250
+ });
251
+ });
252
+ return Array.from(images.values());
253
+ }
148
254
  /**
149
255
  * Extract all metadata from HTML
150
256
  */
@@ -1 +1 @@
1
- {"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,SAAS,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,eAAe;IACf,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,MAAM,SAAS,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAEvC,qBAAqB;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE3C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,iDAAiD;YACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YACxC,IAAI,WAAW,CAAC,IAAI;gBAChB,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM;gBAC5C,WAAW,CAAC,QAAQ,KAAK,cAAc,CAAC,QAAQ;gBAChD,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE,CAAC;gBACjD,OAAO;YACT,CAAC;YAED,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,IAAY;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAiB;QAC7B,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;QACxB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9B,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC;QACtB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;KAC/B,CAAC;IAEF,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC7B,CAAC"}
1
+ {"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,SAAS,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,eAAe;IACf,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,MAAM,SAAS,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAEvC,qBAAqB;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE3C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,iDAAiD;YACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YACxC,IAAI,WAAW,CAAC,IAAI;gBAChB,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM;gBAC5C,WAAW,CAAC,QAAQ,KAAK,cAAc,CAAC,QAAQ;gBAChD,WAAW,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE,CAAC;gBACjD,OAAO;YACT,CAAC;YAED,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,OAAe;IACzD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2C,CAAC;IAElE,qBAAqB;IACrB,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAE1C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAE/D,MAAM,SAAS,GAAoC;gBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;gBACrB,GAAG;gBACH,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACjD,MAAM,EAAE,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;aACtD,CAAC;YAEF,qBAAqB;YACrB,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,iCAAiC;IACjC,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC3C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM;YAAE,OAAO;QAEpB,kEAAkE;QAClE,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACzD,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAErE,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG;iBACJ,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,MAAM,CAAC;gBACP,oBAAoB;YACtB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,gCAAgC;IAChC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC1C,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,qBAAqB;QACrB,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAChE,IAAI,CAAC,UAAU;YAAE,OAAO;QAExB,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACzB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,8BAA8B,EAAE,IAAI,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAE1C,gDAAgD;gBAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACxD,OAAO;gBACT,CAAC;gBAED,MAAM,SAAS,GAAoC;oBACjD,GAAG,EAAE,WAAW,CAAC,IAAI;oBACrB,GAAG,EAAE,EAAE,EAAE,wCAAwC;iBAClD,CAAC;gBAEF,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,MAAM,CAAC;gBACP,oBAAoB;YACtB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,IAAY;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAiB;QAC7B,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;QACxB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9B,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC;QACtB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;KAC/B,CAAC;IAEF,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC7B,CAAC"}
@@ -31,6 +31,13 @@ export interface StrategyOptions {
31
31
  to?: 'top' | 'bottom' | number;
32
32
  timeout?: number;
33
33
  }>;
34
+ /** Keep browser page open for reuse (caller must close) */
35
+ keepPageOpen?: boolean;
36
+ /** Location/language for geo-targeted scraping */
37
+ location?: {
38
+ country?: string;
39
+ languages?: string[];
40
+ };
34
41
  }
35
42
  export interface StrategyResult extends FetchResult {
36
43
  /** Which strategy succeeded: 'simple' | 'browser' | 'stealth' */
@@ -1 +1 @@
1
- {"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,iEAAiE;IACjE,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;CAC1C;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CAyGpG"}
1
+ {"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,2DAA2D;IAC3D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,kDAAkD;IAClD,QAAQ,CAAC,EAAE;QACT,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,iEAAiE;IACjE,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;CAC1C;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CA6GpG"}
@@ -15,7 +15,7 @@ import { BlockedError, NetworkError } from '../types.js';
15
15
  * Returns the result along with which method worked
16
16
  */
17
17
  export async function smartFetch(url, options = {}) {
18
- const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions } = options;
18
+ const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions, keepPageOpen = false, } = options;
19
19
  // If stealth is requested, force browser mode (stealth requires browser)
20
20
  const shouldUseBrowser = forceBrowser || screenshot || stealth;
21
21
  // Strategy 1: Simple fetch (unless browser is forced or screenshot is requested)
@@ -50,6 +50,7 @@ export async function smartFetch(url, options = {}) {
50
50
  cookies,
51
51
  stealth,
52
52
  actions,
53
+ keepPageOpen,
53
54
  });
54
55
  return {
55
56
  ...result,
@@ -70,6 +71,7 @@ export async function smartFetch(url, options = {}) {
70
71
  cookies,
71
72
  stealth: true, // Escalate to stealth mode
72
73
  actions,
74
+ keepPageOpen,
73
75
  });
74
76
  return {
75
77
  ...result,
@@ -94,6 +96,7 @@ export async function smartFetch(url, options = {}) {
94
96
  cookies,
95
97
  stealth, // Keep stealth setting
96
98
  actions,
99
+ keepPageOpen,
97
100
  });
98
101
  return {
99
102
  ...result,
@@ -1 +1 @@
1
- {"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsCzD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,UAA2B,EAAE;IACzE,MAAM,EACJ,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,MAAM,GAAG,CAAC,EACV,SAAS,EACT,SAAS,GAAG,KAAK,EACjB,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,OAAO,EACP,OAAO,EACP,OAAO,EACR,GAAG,OAAO,CAAC;IAEZ,yEAAyE;IACzE,MAAM,gBAAgB,GAAG,YAAY,IAAI,UAAU,IAAI,OAAO,CAAC;IAE/D,iFAAiF;IACjF,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,EACrD,CAAC,CACF,CAAC;YACF,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,QAAQ;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;gBAClC,mCAAmC;YACrC,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;YACT,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;SACR,CAAC,CAAC;QACH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,mGAAmG;QACnG,IAAI,CAAC,OAAO,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YAC9C,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;oBACrC,SAAS;oBACT,MAAM;oBACN,SAAS;oBACT,UAAU;oBACV,kBAAkB;oBAClB,OAAO;oBACP,OAAO;oBACP,OAAO,EAAE,IAAI,EAAE,2BAA2B;oBAC1C,OAAO;iBACR,CAAC,CAAC;gBACH,OAAO;oBACL,GAAG,MAAM;oBACT,MAAM,EAAE,SAAS;iBAClB,CAAC;YACJ,CAAC;YAAC,OAAO,YAAY,EAAE,CAAC;gBACtB,kDAAkD;gBAClD,MAAM,YAAY,CAAC;YACrB,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI,EAAE,mCAAmC;gBACjD,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,uBAAuB;gBAChC,OAAO;aACR,CAAC,CAAC;YACH,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;aACxC,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AA6CzD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,UAA2B,EAAE;IACzE,MAAM,EACJ,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,MAAM,GAAG,CAAC,EACV,SAAS,EACT,SAAS,GAAG,KAAK,EACjB,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,OAAO,EACP,OAAO,EACP,OAAO,EACP,YAAY,GAAG,KAAK,GACrB,GAAG,OAAO,CAAC;IAEZ,yEAAyE;IACzE,MAAM,gBAAgB,GAAG,YAAY,IAAI,UAAU,IAAI,OAAO,CAAC;IAE/D,iFAAiF;IACjF,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,EACrD,CAAC,CACF,CAAC;YACF,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,QAAQ;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;gBAClC,mCAAmC;YACrC,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;YACT,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,YAAY;SACb,CAAC,CAAC;QACH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,mGAAmG;QACnG,IAAI,CAAC,OAAO,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YAC9C,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;oBACrC,SAAS;oBACT,MAAM;oBACN,SAAS;oBACT,UAAU;oBACV,kBAAkB;oBAClB,OAAO;oBACP,OAAO;oBACP,OAAO,EAAE,IAAI,EAAE,2BAA2B;oBAC1C,OAAO;oBACP,YAAY;iBACb,CAAC,CAAC;gBACH,OAAO;oBACL,GAAG,MAAM;oBACT,MAAM,EAAE,SAAS;iBAClB,CAAC;YACJ,CAAC;YAAC,OAAO,YAAY,EAAE,CAAC;gBACtB,kDAAkD;gBAClD,MAAM,YAAY,CAAC;YACrB,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI,EAAE,mCAAmC;gBACjD,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,uBAAuB;gBAChC,OAAO;gBACP,YAAY;aACb,CAAC,CAAC;YACH,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;aACxC,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * AI-powered content summarization using OpenAI-compatible APIs
3
+ */
4
+ export interface SummarizeOptions {
5
+ /** OpenAI-compatible API base URL (default: https://api.openai.com/v1) */
6
+ apiBase?: string;
7
+ /** API key for the LLM */
8
+ apiKey: string;
9
+ /** Model to use (default: gpt-4o-mini) */
10
+ model?: string;
11
+ /** Max length of summary in words */
12
+ maxWords?: number;
13
+ }
14
+ /**
15
+ * Summarize content using an OpenAI-compatible LLM API
16
+ */
17
+ export declare function summarizeContent(content: string, options: SummarizeOptions): Promise<string>;
18
+ //# sourceMappingURL=summarize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summarize.d.ts","sourceRoot":"","sources":["../../src/core/summarize.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,0EAA0E;IAC1E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,0CAA0C;IAC1C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAiBD;;GAEG;AACH,wBAAsB,gBAAgB,CACpC,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,MAAM,CAAC,CAoFjB"}
@@ -0,0 +1,79 @@
1
+ /**
2
+ * AI-powered content summarization using OpenAI-compatible APIs
3
+ */
4
+ /**
5
+ * Truncate content to roughly 4000 tokens (~16000 characters)
6
+ * This leaves room for system prompt and response
7
+ */
8
+ function truncateContent(content) {
9
+ const MAX_CHARS = 16000; // ~4000 tokens
10
+ if (content.length <= MAX_CHARS) {
11
+ return content;
12
+ }
13
+ // Truncate and add ellipsis
14
+ return content.slice(0, MAX_CHARS) + '\n\n[Content truncated for summarization...]';
15
+ }
16
+ /**
17
+ * Summarize content using an OpenAI-compatible LLM API
18
+ */
19
+ export async function summarizeContent(content, options) {
20
+ const { apiBase = 'https://api.openai.com/v1', apiKey, model = 'gpt-4o-mini', maxWords = 150, } = options;
21
+ // Validate inputs
22
+ if (!apiKey || apiKey.trim().length === 0) {
23
+ throw new Error('API key is required for summarization');
24
+ }
25
+ if (!content || content.trim().length === 0) {
26
+ throw new Error('Content is required for summarization');
27
+ }
28
+ // Truncate content to fit within token limits
29
+ const truncatedContent = truncateContent(content);
30
+ // Build the prompt
31
+ const prompt = `Summarize the following web page content concisely in ${maxWords} words or fewer. Focus on the key information.
32
+
33
+ Content:
34
+ ${truncatedContent}`;
35
+ // Call the OpenAI-compatible API
36
+ const apiUrl = `${apiBase.replace(/\/$/, '')}/chat/completions`;
37
+ try {
38
+ const response = await fetch(apiUrl, {
39
+ method: 'POST',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ 'Authorization': `Bearer ${apiKey}`,
43
+ },
44
+ body: JSON.stringify({
45
+ model,
46
+ messages: [
47
+ {
48
+ role: 'user',
49
+ content: prompt,
50
+ },
51
+ ],
52
+ temperature: 0.3, // Lower temperature for more focused summaries
53
+ max_tokens: maxWords * 2, // Rough estimate: 1 word ≈ 1.5-2 tokens
54
+ }),
55
+ });
56
+ if (!response.ok) {
57
+ const errorText = await response.text();
58
+ throw new Error(`LLM API error: HTTP ${response.status} - ${errorText}`);
59
+ }
60
+ const result = await response.json();
61
+ // Check for API error
62
+ if (result.error) {
63
+ throw new Error(`LLM API error: ${result.error.message}`);
64
+ }
65
+ // Extract summary from response
66
+ const summary = result.choices?.[0]?.message?.content?.trim();
67
+ if (!summary) {
68
+ throw new Error('LLM API returned empty response');
69
+ }
70
+ return summary;
71
+ }
72
+ catch (error) {
73
+ if (error instanceof Error) {
74
+ throw new Error(`Summarization failed: ${error.message}`);
75
+ }
76
+ throw new Error('Summarization failed: Unknown error');
77
+ }
78
+ }
79
+ //# sourceMappingURL=summarize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summarize.js","sourceRoot":"","sources":["../../src/core/summarize.ts"],"names":[],"mappings":"AAAA;;GAEG;AAaH;;;GAGG;AACH,SAAS,eAAe,CAAC,OAAe;IACtC,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,eAAe;IAExC,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,4BAA4B;IAC5B,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,8CAA8C,CAAC;AACtF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,OAAe,EACf,OAAyB;IAEzB,MAAM,EACJ,OAAO,GAAG,2BAA2B,EACrC,MAAM,EACN,KAAK,GAAG,aAAa,EACrB,QAAQ,GAAG,GAAG,GACf,GAAG,OAAO,CAAC;IAEZ,kBAAkB;IAClB,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,8CAA8C;IAC9C,MAAM,gBAAgB,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAElD,mBAAmB;IACnB,MAAM,MAAM,GAAG,yDAAyD,QAAQ;;;EAGhF,gBAAgB,EAAE,CAAC;IAEnB,iCAAiC;IACjC,MAAM,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,mBAAmB,CAAC;IAEhE,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,eAAe,EAAE,UAAU,MAAM,EAAE;aACpC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE,MAAM;qBAChB;iBACF;gBACD,WAAW,EAAE,GAAG,EAAE,+CAA+C;gBACjE,UAAU,EAAE,QAAQ,GAAG,CAAC,EAAE,wCAAwC;aACnE,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EASjC,CAAC;QAEF,sBAAsB;QACtB,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,kBAAkB,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,gCAAgC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;QAE9D,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrD,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,yBAAyB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5D,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;IACzD,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAE5C,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE1D,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,KAAK,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,cAAc,EAAE,KAAK,YAAY,EAAE,KAAK,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACvH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CA8RtF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,WAAW,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACnD,OAAO,CAAC,CAAC,UAAU,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,CAAC,CAwB1D;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAE5C,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAa,MAAM,YAAY,CAAC;AAErE,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,KAAK,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,cAAc,EAAE,KAAK,YAAY,EAAE,KAAK,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACvH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CA+RtF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,WAAW,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACnD,OAAO,CAAC,CAAC,UAAU,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,CAAC,CAwB1D;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
package/dist/index.js CHANGED
@@ -5,8 +5,8 @@
5
5
  */
6
6
  import { createHash } from 'crypto';
7
7
  import { smartFetch } from './core/strategies.js';
8
- import { htmlToMarkdown, htmlToText, estimateTokens, selectContent, detectMainContent, calculateQuality, truncateToTokenBudget } from './core/markdown.js';
9
- import { extractMetadata, extractLinks } from './core/metadata.js';
8
+ import { htmlToMarkdown, htmlToText, estimateTokens, selectContent, detectMainContent, calculateQuality, truncateToTokenBudget, filterByTags } from './core/markdown.js';
9
+ import { extractMetadata, extractLinks, extractImages } from './core/metadata.js';
10
10
  import { cleanup } from './core/fetcher.js';
11
11
  import { extractStructured } from './core/extract.js';
12
12
  export * from './types.js';
@@ -34,7 +34,7 @@ export { extractWithLLM } from './core/extract.js';
34
34
  */
35
35
  export async function peel(url, options = {}) {
36
36
  const startTime = Date.now();
37
- let { render = false, stealth = false, wait = 0, format = 'markdown', timeout = 30000, userAgent, screenshot = false, screenshotFullPage = false, selector, exclude, headers, cookies, raw = false, actions, extract, maxTokens, } = options;
37
+ let { render = false, stealth = false, wait = 0, format = 'markdown', timeout = 30000, userAgent, screenshot = false, screenshotFullPage = false, selector, exclude, includeTags, excludeTags, headers, cookies, raw = false, actions, extract, maxTokens, images: extractImagesFlag = false, location: _location, } = options;
38
38
  // Detect PDF URLs and force browser rendering
39
39
  const isPdf = url.toLowerCase().endsWith('.pdf');
40
40
  if (isPdf) {
@@ -57,7 +57,8 @@ export async function peel(url, options = {}) {
57
57
  render = true;
58
58
  }
59
59
  try {
60
- // Fetch the page
60
+ // Fetch the page (keep browser open if branding extraction is needed)
61
+ const needsBranding = options.branding && render;
61
62
  const fetchResult = await smartFetch(url, {
62
63
  forceBrowser: render,
63
64
  stealth,
@@ -69,6 +70,7 @@ export async function peel(url, options = {}) {
69
70
  headers,
70
71
  cookies,
71
72
  actions,
73
+ keepPageOpen: needsBranding,
72
74
  });
73
75
  // Detect content type from the response
74
76
  const ct = (fetchResult.contentType || '').toLowerCase();
@@ -85,6 +87,10 @@ export async function peel(url, options = {}) {
85
87
  if (isHTML) {
86
88
  // Standard HTML pipeline
87
89
  let html = fetchResult.html;
90
+ // Apply include/exclude tags filtering first (before selector)
91
+ if (includeTags || excludeTags) {
92
+ html = filterByTags(html, includeTags, excludeTags);
93
+ }
88
94
  if (selector) {
89
95
  html = selectContent(html, selector, exclude);
90
96
  }
@@ -172,6 +178,11 @@ export async function peel(url, options = {}) {
172
178
  links = [...new Set(found)];
173
179
  quality = 1.0;
174
180
  }
181
+ // Extract images if requested
182
+ let imagesList;
183
+ if (extractImagesFlag && isHTML) {
184
+ imagesList = extractImages(fetchResult.html, fetchResult.url);
185
+ }
175
186
  // Extract structured data if requested
176
187
  let extracted;
177
188
  if (extract && isHTML) {
@@ -195,34 +206,26 @@ export async function peel(url, options = {}) {
195
206
  const fingerprint = createHash('sha256').update(content).digest('hex').slice(0, 16);
196
207
  // Convert screenshot buffer to base64 if present
197
208
  const screenshotBase64 = fetchResult.screenshot?.toString('base64');
198
- // Extract branding if requested (requires browser)
209
+ // Extract branding if requested (reuses existing browser page when available)
199
210
  let brandingProfile;
200
- if (options.branding && render) {
211
+ if (options.branding && render && fetchResult.page) {
201
212
  try {
202
- // Import playwright and create a page for branding extraction
203
- const { chromium } = await import('playwright-core');
204
- const browser = await chromium.launch({ headless: true });
205
- const page = await browser.newPage({
206
- userAgent: userAgent || undefined,
207
- });
208
- // Navigate to the URL
209
- await page.goto(fetchResult.url, {
210
- waitUntil: 'domcontentloaded',
211
- timeout: timeout || 30000,
212
- });
213
- // Wait if specified
214
- if (wait > 0) {
215
- await page.waitForTimeout(wait);
216
- }
217
- // Extract branding
218
213
  const { extractBranding } = await import('./core/branding.js');
219
- brandingProfile = await extractBranding(page);
220
- // Clean up
221
- await browser.close();
214
+ brandingProfile = await extractBranding(fetchResult.page);
222
215
  }
223
216
  catch (error) {
224
217
  console.error('Branding extraction failed:', error);
225
218
  }
219
+ finally {
220
+ // Clean up the kept-open page and browser
221
+ try {
222
+ await fetchResult.page.close().catch(() => { });
223
+ if (fetchResult.browser) {
224
+ await fetchResult.browser.close().catch(() => { });
225
+ }
226
+ }
227
+ catch { /* ignore cleanup errors */ }
228
+ }
226
229
  }
227
230
  // Track content changes if requested
228
231
  let changeResult;
@@ -239,20 +242,16 @@ export async function peel(url, options = {}) {
239
242
  let summaryText;
240
243
  if (options.summary && options.llm) {
241
244
  try {
242
- const { extractWithLLM } = await import('./core/extract.js');
243
- const summaryPrompt = typeof options.summary === 'object' && options.summary.prompt
244
- ? options.summary.prompt
245
- : 'Summarize the main points of this content in a concise paragraph.';
245
+ const { summarizeContent } = await import('./core/summarize.js');
246
246
  const maxLength = typeof options.summary === 'object' && options.summary.maxLength
247
247
  ? options.summary.maxLength
248
- : 200;
249
- const result = await extractWithLLM(content, {
250
- prompt: `${summaryPrompt} Keep it under ${maxLength} words.`,
251
- llmApiKey: options.llm.apiKey,
252
- llmModel: options.llm.model,
253
- llmBaseUrl: options.llm.baseUrl,
248
+ : 150;
249
+ summaryText = await summarizeContent(content, {
250
+ apiKey: options.llm.apiKey,
251
+ model: options.llm.model,
252
+ apiBase: options.llm.baseUrl,
253
+ maxWords: maxLength,
254
254
  });
255
- summaryText = result.summary || Object.values(result)[0];
256
255
  }
257
256
  catch (error) {
258
257
  console.error('Summary generation failed:', error);
@@ -275,6 +274,7 @@ export async function peel(url, options = {}) {
275
274
  branding: brandingProfile,
276
275
  changeTracking: changeResult,
277
276
  summary: summaryText,
277
+ images: imagesList,
278
278
  };
279
279
  }
280
280
  catch (error) {