webpeel 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAG3C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,SAAS,CAAC;KACf,WAAW,CAAC,gCAAgC,CAAC;KAC7C,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,QAAQ,CAAC,OAAO,EAAE,cAAc,CAAC;KACjC,MAAM,CAAC,cAAc,EAAE,2CAA2C,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,EAAE,QAAQ,CAAC;KACrE,MAAM,CAAC,QAAQ,EAAE,qCAAqC,CAAC;KACvD,MAAM,CAAC,QAAQ,EAAE,uCAAuC,CAAC;KACzD,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,oBAAoB,EAAE,sBAAsB,EAAE,QAAQ,EAAE,KAAK,CAAC;KACrE,MAAM,CAAC,cAAc,EAAE,mBAAmB,CAAC;KAC3C,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,KAAK,EAAE,GAAuB,EAAE,OAAO,EAAE,EAAE;IACjD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,oCAAoC;IACpC,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACtB,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,+BAA+B;IAC/B,IAAI,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,sBAAsB;IACtB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC;IAEnE,IAAI,CAAC;QACH,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;YAC/D,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,GAAgB;YAC/B,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;YAC/B,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC;YACvB,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,SAAS,EAAE,OAAO,CAAC,EAAE;SACtB,CAAC;QAEF,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACxB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,MAAM,GAAG,UAAU,CAAC;QAClC,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;QAE5C,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,cAAc,MAAM,CAAC,OAAO,YAAY,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;QAClF,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;QAED,oBAAoB;QACpB,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,kBAAkB;AAClB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,QAAQ,CAAC,SAAS,EAAE,cAAc,CAAC;KACnC,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,GAAG,EAAE;IACX,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,kBAAkB,CAAC;KAC/B,MAAM,CAAC,mBAAmB,EAAE,aAAa,EAAE,MAAM,CAAC;KAClD,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACxD,WAAW,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;AACpD,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,8CAA8C,CAAC;KAC3D,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAClC,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACnC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAGtD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,SAAS,CAAC;KACf,WAAW,CAAC,gCAAgC,CAAC;KAC7C,OAAO,CAAC,OAAO,CAAC;KAChB,uBAAuB,EAAE,CAAC;AAE7B,OAAO;KACJ,QAAQ,CAAC,OAAO,EAAE,cAAc,CAAC;KACjC,MAAM,CAAC,cAAc,EAAE,2CAA2C,CAAC;KACnE,MAAM,CAAC,WAAW,EAAE,kEAAkE,CAAC;KACvF,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,EAAE,QAAQ,CAAC;KACrE,MAAM,CAAC,QAAQ,EAAE,qCAAqC,CAAC;KACvD,MAAM,CAAC,QAAQ,EAAE,uCAAuC,CAAC;KACzD,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,oBAAoB,EAAE,sBAAsB,EAAE,QAAQ,EAAE,KAAK,CAAC;KACrE,MAAM,CAAC,cAAc,EAAE,mBAAmB,CAAC;KAC3C,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,qBAAqB,EAAE,kDAAkD,CAAC;KACjF,MAAM,CAAC,aAAa,EAAE,8CAA8C,CAAC;KACrE,MAAM,CAAC,kBAAkB,EAAE,uDAAuD,CAAC;KACnF,MAAM,CAAC,0BAA0B,EAAE,oDAAoD,CAAC;KACxF,MAAM,CAAC,0BAA0B,EAAE,sDAAsD,CAAC;KAC1F,MAAM,CAAC,sBAAsB,EAAE,yCAAyC,CAAC;KACzE,MAAM,CAAC,KAAK,EAAE,GAAuB,EAAE,OAAO,EAAE,EAAE;IACjD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,oCAAoC;IACpC,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACtB,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,+BAA+B;IAC/B,IAAI,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,sBAAsB;IACtB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC;IAEnE,IAAI,CAAC;QACH,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;YAC/D,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,uBAAuB;QACvB,IAAI,OAA2C,CAAC;QAChD,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChD,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpC,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBACvC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;oBACtB,OAAO,CAAC,KAAK,CAAC,iCAAiC,MAAM,EAAE,CAAC,CAAC;oBACzD,OAAO,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;oBAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAClB,CAAC;gBACD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC/C,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAClD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;YACvB,CAAC;QACH,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,GAAgB;YAC/B,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;YAC/B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;YACjC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC;YACvB,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,UAAU,EAAE,OAAO,CAAC,UAAU,KAAK,SAAS;YAC5C,kBAAkB,EAAE,OAAO,CAAC,QAAQ,IAAI,KAAK;YAC7C,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,OAAO;YACP,OAAO,EAAE,OAAO,CAAC,MAAM;SACxB,CAAC;QAEF,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACxB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,MAAM,GAAG,UAAU,CAAC;QAClC,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;QAE5C,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,cAAc,MAAM,CAAC,OAAO,YAAY,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;QAClF,CAAC;QAED,2BAA2B;QAC3B,IAAI,OAAO,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAC5C,MAAM,cAAc,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,QAAQ;gBAC3D,CAAC,CAAC,OAAO,CAAC,UAAU;gBACpB,CAAC,CAAC,gBAAgB,CAAC;YAErB,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;YAClE,aAAa,CAAC,cAAc,EAAE,gBAAgB,CAAC,CAAC;YAEhD,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpB,OAAO,CAAC,KAAK,CAAC,wBAAwB,cAAc,EAAE,CAAC,CAAC;YAC1D,CAAC;YAED,uDAAuD;YACvD,IAAI,OAAO,OAAO,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;gBAC3C,OAAO,MAAM,CAAC,UAAU,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAChD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;oBAC3C,IAAI,GAAG;wBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;wBAChB,OAAO,EAAE,CAAC;gBACjB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;oBAClD,IAAI,GAAG;wBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;wBAChB,OAAO,EAAE,CAAC;gBACjB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,gBAAgB,CAAC;KACzB,WAAW,CAAC,yBAAyB,CAAC;KACtC,MAAM,CAAC,iBAAiB,EAAE,0BAA0B,EAAE,GAAG,CAAC;KAC1D,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC;KACrC,MAAM,CAAC,KAAK,EAAE,KAAa,EAAE,OAAO,EAAE,EAAE;IACvC,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAE3C,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC;IAE9D,IAAI,CAAC;QACH,yCAAyC;QACzC,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;QACtD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAEzC,MAAM,SAAS,GAAG,uCAAuC,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC;QAErF,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,oEAAoE;aACnF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QAErB,MAAM,OAAO,GAA2D,EAAE,CAAC;QAE3E,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;YAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;gBAAE,OAAO;YAEpC,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC3D,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC7D,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAE/D,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;gBAAE,OAAO;YAE9B,8CAA8C;YAC9C,IAAI,GAAG,GAAG,MAAM,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;gBACzD,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;gBAC7C,IAAI,IAAI,EAAE,CAAC;oBACT,GAAG,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,+BAA+B;YACjC,CAAC;YAED,qBAAqB;YACrB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACnD,OAAO;gBACT,CAAC;gBACD,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YAED,OAAO,CAAC,IAAI,CAAC;gBACX,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBAC1B,GAAG;gBACH,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aAC/B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,SAAS,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;QACrD,CAAC;QAED,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACjD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;oBAC3C,IAAI,GAAG;wBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;wBAChB,OAAO,EAAE,CAAC;gBACjB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBACjC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACxB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAChC,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,gBAAgB;AAChB,OAAO;KACJ,OAAO,CAAC,cAAc,CAAC;KACvB,WAAW,CAAC,qBAAqB,CAAC;KAClC,MAAM,CAAC,uBAAuB,EAAE,qCAAqC,EAAE,GAAG,CAAC;KAC3E,MAAM,CAAC,oBAAoB,EAAE,qCAAqC,CAAC;KACnE,MAAM,CAAC,QAAQ,EAAE,sBAAsB,CAAC;KACxC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC;KACrC,MAAM,CAAC,cAAc,EAAE,sBAAsB,CAAC;KAC9C,MAAM,CAAC,kBAAkB,EAAE,yBAAyB,CAAC;KACrD,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,OAAO,EAAE,EAAE;IACtC,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAElC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC;IAEjE,IAAI,CAAC;QACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;QAE5C,sBAAsB;QACtB,IAAI,IAAc,CAAC;QACnB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC5C,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;iBACvB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;iBACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,GAAG,YAAY,IAAI,CAAC,MAAM,uBAAuB,OAAO,CAAC,WAAW,MAAM,CAAC;QACzF,CAAC;QAED,cAAc;QACd,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE;YACpC,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC;YAC/C,MAAM,EAAE,YAAY;YACpB,QAAQ,EAAE,QAAQ;SACnB,CAAC,CAAC;QAEH,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YAChE,OAAO,CAAC,OAAO,CAAC,cAAc,YAAY,IAAI,IAAI,CAAC,MAAM,aAAa,CAAC,CAAC;QAC1E,CAAC;QAED,iBAAiB;QACjB,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACjD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;oBAC3C,IAAI,GAAG;wBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;wBAChB,OAAO,EAAE,CAAC;gBACjB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1B,MAAM,EAAE,aAAa,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;YACxD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;YAEtC,0BAA0B;YAC1B,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAE/C,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;gBAChC,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,KAAK,CAAC;gBAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;gBAEhD,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;oBACxB,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;gBAC1C,CAAC;qBAAM,CAAC;oBACN,aAAa,CAAC,QAAQ,EAAE,UAAU,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,uBAAuB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,0BAA0B;YAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;gBACtC,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;oBACxB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;gBACpD,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBACxC,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,aAAa,CAAC;KACtB,WAAW,CAAC,qCAAqC,CAAC;KAClD,MAAM,CAAC,sBAAsB,EAAE,0DAA0D,EAAE,QAAQ,EAAE,EAAE,CAAC;KACxG,MAAM,CAAC,sBAAsB,EAAE,6CAA6C,EAAE,QAAQ,EAAE,CAAC,CAAC;KAC1F,MAAM,CAAC,gCAAgC,EAAE,0DAA0D,CAAC;KACpG,MAAM,CAAC,yBAAyB,EAAE,4CAA4C,CAAC;KAC/E,MAAM,CAAC,iBAAiB,EAAE,iDAAiD,CAAC;KAC5E,MAAM,CAAC,mBAAmB,EAAE,mDAAmD,EAAE,QAAQ,EAAE,IAAI,CAAC;KAChG,MAAM,CAAC,cAAc,EAAE,oCAAoC,CAAC;KAC5D,MAAM,CAAC,WAAW,EAAE,gCAAgC,CAAC;KACrD,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,GAAW,EAAE,OAAO,EAAE,EAAE;IACrC,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC;IAEnE,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,cAAc,EAAE,OAAO,CAAC,cAAc;YACtC,eAAe,EAAE,OAAO,CAAC,OAAO;YAChC,gBAAgB,EAAE,CAAC,OAAO,CAAC,YAAY;YACvC,WAAW,EAAE,OAAO,CAAC,SAAS;YAC9B,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;YAC/B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;SAClC,CAAC,CAAC;QAEH,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,WAAW,OAAO,CAAC,MAAM,QAAQ,CAAC,CAAC;QACrD,CAAC;QAED,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;gBACnC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5D,OAAO,CAAC,GAAG,CAAC,QAAQ,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;gBAClC,OAAO,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzF,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,OAAO,IAAI,CAAC,CAAC;gBAE5C,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBACjB,OAAO,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBACxC,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAChG,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC/B,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,kBAAkB,CAAC;KAC/B,MAAM,CAAC,mBAAmB,EAAE,aAAa,EAAE,MAAM,CAAC;KAClD,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACxD,WAAW,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;AACpD,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,8CAA8C,CAAC;KAC3D,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAClC,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Web crawler functionality
3
+ * Crawls a starting URL and follows links matching specified patterns
4
+ */
5
+ import type { PeelOptions } from '../types.js';
6
+ export interface CrawlOptions extends Omit<PeelOptions, 'format'> {
7
+ /** Maximum number of pages to crawl (default: 10, max: 100) */
8
+ maxPages?: number;
9
+ /** Maximum depth to crawl (default: 2, max: 5) */
10
+ maxDepth?: number;
11
+ /** Only crawl URLs from these domains (default: same domain as starting URL) */
12
+ allowedDomains?: string[];
13
+ /** Exclude URLs matching these patterns (regex strings) */
14
+ excludePatterns?: string[];
15
+ /** Respect robots.txt (default: true) */
16
+ respectRobotsTxt?: boolean;
17
+ /** Rate limit between requests in milliseconds (default: 1000ms = 1 req/sec) */
18
+ rateLimitMs?: number;
19
+ }
20
+ export interface CrawlResult {
21
+ /** URL of the crawled page */
22
+ url: string;
23
+ /** Page title */
24
+ title: string;
25
+ /** Markdown content */
26
+ markdown: string;
27
+ /** All links found on this page (absolute URLs) */
28
+ links: string[];
29
+ /** Depth level (0 = starting URL) */
30
+ depth: number;
31
+ /** Parent URL that linked to this page (null for starting URL) */
32
+ parent: string | null;
33
+ /** Time elapsed fetching this page (ms) */
34
+ elapsed: number;
35
+ /** Error message if page failed to fetch */
36
+ error?: string;
37
+ }
38
+ /**
39
+ * Crawl a website starting from a URL
40
+ *
41
+ * @param startUrl - Starting URL to crawl from
42
+ * @param options - Crawl options
43
+ * @returns Array of crawl results
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * import { crawl } from 'webpeel';
48
+ *
49
+ * const results = await crawl('https://example.com', {
50
+ * maxPages: 20,
51
+ * maxDepth: 2,
52
+ * });
53
+ *
54
+ * console.log(`Crawled ${results.length} pages`);
55
+ * ```
56
+ */
57
+ export declare function crawl(startUrl: string, options?: CrawlOptions): Promise<CrawlResult[]>;
58
+ //# sourceMappingURL=crawler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAG/C,MAAM,WAAW,YAAa,SAAQ,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC;IAC/D,+DAA+D;IAC/D,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kDAAkD;IAClD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gFAAgF;IAChF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,2DAA2D;IAC3D,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,yCAAyC;IACzC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gFAAgF;IAChF,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,qCAAqC;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,kEAAkE;IAClE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAsFD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,KAAK,CACzB,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CAwIxB"}
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Web crawler functionality
3
+ * Crawls a starting URL and follows links matching specified patterns
4
+ */
5
+ import { peel } from '../index.js';
6
+ import { fetch as undiciFetch } from 'undici';
7
+ /**
8
+ * Parse robots.txt and return disallowed paths for User-agent: *
9
+ */
10
+ async function fetchRobotsTxt(domain) {
11
+ const robotsUrl = `https://${domain}/robots.txt`;
12
+ try {
13
+ const response = await undiciFetch(robotsUrl, {
14
+ headers: {
15
+ 'User-Agent': 'WebPeel/0.3.0 (+https://webpeel.dev)',
16
+ },
17
+ signal: AbortSignal.timeout(5000), // 5 second timeout
18
+ });
19
+ if (!response.ok) {
20
+ // If robots.txt doesn't exist, allow everything
21
+ return { disallowedPaths: [] };
22
+ }
23
+ const text = await response.text();
24
+ const lines = text.split('\n');
25
+ const disallowedPaths = [];
26
+ let crawlDelay;
27
+ let relevantSection = false;
28
+ for (const line of lines) {
29
+ const trimmed = line.trim();
30
+ // Check for User-agent: *
31
+ if (trimmed.toLowerCase().startsWith('user-agent:')) {
32
+ const agent = trimmed.substring('user-agent:'.length).trim();
33
+ relevantSection = agent === '*';
34
+ continue;
35
+ }
36
+ if (!relevantSection)
37
+ continue;
38
+ // Parse Disallow directives
39
+ if (trimmed.toLowerCase().startsWith('disallow:')) {
40
+ const path = trimmed.substring('disallow:'.length).trim();
41
+ if (path) {
42
+ disallowedPaths.push(path);
43
+ }
44
+ }
45
+ // Parse Crawl-delay directive
46
+ if (trimmed.toLowerCase().startsWith('crawl-delay:')) {
47
+ const delay = parseInt(trimmed.substring('crawl-delay:'.length).trim());
48
+ if (!isNaN(delay)) {
49
+ crawlDelay = delay * 1000; // Convert to milliseconds
50
+ }
51
+ }
52
+ }
53
+ return { disallowedPaths, crawlDelay };
54
+ }
55
+ catch {
56
+ // If we can't fetch robots.txt, allow everything
57
+ return { disallowedPaths: [] };
58
+ }
59
+ }
60
+ /**
61
+ * Check if a URL is allowed by robots.txt rules
62
+ */
63
+ function isAllowedByRobots(url, rules) {
64
+ const urlObj = new URL(url);
65
+ const path = urlObj.pathname;
66
+ for (const disallowed of rules.disallowedPaths) {
67
+ // Simple prefix matching (proper robots.txt parsing would handle wildcards)
68
+ if (path.startsWith(disallowed)) {
69
+ return false;
70
+ }
71
+ }
72
+ return true;
73
+ }
74
+ /**
75
+ * Crawl a website starting from a URL
76
+ *
77
+ * @param startUrl - Starting URL to crawl from
78
+ * @param options - Crawl options
79
+ * @returns Array of crawl results
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * import { crawl } from 'webpeel';
84
+ *
85
+ * const results = await crawl('https://example.com', {
86
+ * maxPages: 20,
87
+ * maxDepth: 2,
88
+ * });
89
+ *
90
+ * console.log(`Crawled ${results.length} pages`);
91
+ * ```
92
+ */
93
+ export async function crawl(startUrl, options = {}) {
94
+ const { maxPages = 10, maxDepth = 2, allowedDomains, excludePatterns = [], respectRobotsTxt = true, rateLimitMs = 1000, ...peelOptions } = options;
95
+ // Validate limits
96
+ const validatedMaxPages = Math.min(Math.max(maxPages, 1), 100);
97
+ const validatedMaxDepth = Math.min(Math.max(maxDepth, 1), 5);
98
+ const validatedRateLimit = Math.max(rateLimitMs, 100); // Min 100ms between requests
99
+ // Parse starting URL
100
+ const startUrlObj = new URL(startUrl);
101
+ const startDomain = startUrlObj.hostname;
102
+ // Default: only crawl same domain as starting URL
103
+ const validatedAllowedDomains = allowedDomains && allowedDomains.length > 0
104
+ ? allowedDomains
105
+ : [startDomain];
106
+ // Compile exclude patterns
107
+ const excludeRegexes = excludePatterns.map(pattern => new RegExp(pattern));
108
+ // Fetch robots.txt if needed
109
+ let robotsRules = { disallowedPaths: [] };
110
+ if (respectRobotsTxt) {
111
+ robotsRules = await fetchRobotsTxt(startDomain);
112
+ // Use crawl-delay from robots.txt if it's larger than our rate limit
113
+ if (robotsRules.crawlDelay && robotsRules.crawlDelay > validatedRateLimit) {
114
+ console.error(`[Crawler] Using Crawl-delay from robots.txt: ${robotsRules.crawlDelay}ms`);
115
+ }
116
+ }
117
+ const effectiveRateLimit = robotsRules.crawlDelay || validatedRateLimit;
118
+ // State tracking
119
+ const results = [];
120
+ const visited = new Set();
121
+ const queue = [
122
+ { url: startUrl, depth: 0, parent: null },
123
+ ];
124
+ while (queue.length > 0 && results.length < validatedMaxPages) {
125
+ const item = queue.shift();
126
+ const { url, depth, parent } = item;
127
+ // Skip if already visited
128
+ if (visited.has(url))
129
+ continue;
130
+ visited.add(url);
131
+ // Skip if depth exceeded
132
+ if (depth > validatedMaxDepth)
133
+ continue;
134
+ // Validate URL
135
+ let urlObj;
136
+ try {
137
+ urlObj = new URL(url);
138
+ }
139
+ catch {
140
+ continue; // Skip invalid URLs
141
+ }
142
+ // Check if domain is allowed
143
+ if (!validatedAllowedDomains.includes(urlObj.hostname)) {
144
+ continue;
145
+ }
146
+ // Check exclude patterns
147
+ if (excludeRegexes.some(regex => regex.test(url))) {
148
+ continue;
149
+ }
150
+ // Check robots.txt
151
+ if (respectRobotsTxt && !isAllowedByRobots(url, robotsRules)) {
152
+ console.error(`[Crawler] Skipping ${url} (disallowed by robots.txt)`);
153
+ continue;
154
+ }
155
+ // Fetch the page
156
+ try {
157
+ const result = await peel(url, {
158
+ ...peelOptions,
159
+ format: 'markdown',
160
+ });
161
+ results.push({
162
+ url: result.url,
163
+ title: result.title,
164
+ markdown: result.content,
165
+ links: result.links,
166
+ depth,
167
+ parent,
168
+ elapsed: result.elapsed,
169
+ });
170
+ // Add discovered links to queue
171
+ if (depth < validatedMaxDepth) {
172
+ for (const link of result.links) {
173
+ if (!visited.has(link)) {
174
+ queue.push({
175
+ url: link,
176
+ depth: depth + 1,
177
+ parent: url,
178
+ });
179
+ }
180
+ }
181
+ }
182
+ // Rate limiting
183
+ if (results.length < validatedMaxPages) {
184
+ await new Promise(resolve => setTimeout(resolve, effectiveRateLimit));
185
+ }
186
+ }
187
+ catch (error) {
188
+ // Log error and continue
189
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
190
+ console.error(`[Crawler] Failed to fetch ${url}: ${errorMessage}`);
191
+ results.push({
192
+ url,
193
+ title: '',
194
+ markdown: '',
195
+ links: [],
196
+ depth,
197
+ parent,
198
+ elapsed: 0,
199
+ error: errorMessage,
200
+ });
201
+ }
202
+ }
203
+ return results;
204
+ }
205
+ //# sourceMappingURL=crawler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAyC9C;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,WAAW,MAAM,aAAa,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,sCAAsC;aACrD;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,mBAAmB;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,gDAAgD;YAChD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;QACjC,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,MAAM,eAAe,GAAa,EAAE,CAAC;QACrC,IAAI,UAA8B,CAAC;QACnC,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,0BAA0B;YAC1B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,4BAA4B;YAC5B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,IAAI,EAAE,CAAC;oBACT,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,0BAA0B;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAkB;IACxD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QAC/C,4EAA4E;QAC5E,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,QAAgB,EAChB,UAAwB,EAAE;IAE1B,MAAM,EACJ,QAAQ,GAAG,EAAE,EACb,QAAQ,GAAG,CAAC,EACZ,cAAc,EACd,eAAe,GAAG,EAAE,EACpB,gBAAgB,GAAG,IAAI,EACvB,WAAW,GAAG,IAAI,EAClB,GAAG,WAAW,EACf,GAAG,OAAO,CAAC;IAEZ,kBAAkB;IAClB,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;IAEpF,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC;IAEzC,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACzE,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAElB,2BAA2B;IAC3B,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAE3E,6BAA6B;IAC7B,IAAI,WAAW,GAAgB,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACvD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QAEhD,qEAAqE;QACrE,IAAI,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAC1E,OAAO,CAAC,KAAK,CAAC,gDAAgD,WAAW,CAAC,UAAU,IAAI,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,WAAW,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAExE,iBAAiB;IACjB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,KAAK,GAAiE;QAC1E,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE;KAC1C,CAAC;IAEF,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC9D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QAC5B,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAEpC,0BAA0B;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjB,yBAAyB;QACzB,IAAI,KAAK,GAAG,iBAAiB;YAAE,SAAS;QAExC,eAAe;QACf,IAAI,MAAW,CAAC;QAChB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,oBAAoB;QAChC,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAClD,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,gBAAgB,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,sBAAsB,GAAG,6BAA6B,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;gBAC7B,GAAG,WAAW;gBACd,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE,MAAM,CAAC,OAAO;gBACxB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC,CAAC;YAEH,gCAAgC;YAChC,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;gBAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,IAAI;4BACT,KAAK,EAAE,KAAK,GAAG,CAAC;4BAChB,MAAM,EAAE,GAAG;yBACZ,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACvC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,yBAAyB;YACzB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAC9E,OAAO,CAAC,KAAK,CAAC,6BAA6B,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YAEnE,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,YAAY;aACpB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -5,13 +5,15 @@ export interface FetchResult {
5
5
  html: string;
6
6
  url: string;
7
7
  statusCode?: number;
8
+ screenshot?: Buffer;
9
+ contentType?: string;
8
10
  }
9
11
  /**
10
12
  * Simple HTTP fetch using native fetch + Cheerio
11
13
  * Fast and lightweight, but can be blocked by Cloudflare/bot detection
12
14
  * SECURITY: Manual redirect handling with SSRF re-validation
13
15
  */
14
- export declare function simpleFetch(url: string, userAgent?: string, timeoutMs?: number): Promise<FetchResult>;
16
+ export declare function simpleFetch(url: string, userAgent?: string, timeoutMs?: number, customHeaders?: Record<string, string>): Promise<FetchResult>;
15
17
  /**
16
18
  * Fetch using headless Chromium via Playwright
17
19
  * Slower but can handle JavaScript-heavy sites and bypass some bot detection
@@ -20,6 +22,11 @@ export declare function browserFetch(url: string, options?: {
20
22
  userAgent?: string;
21
23
  waitMs?: number;
22
24
  timeoutMs?: number;
25
+ screenshot?: boolean;
26
+ screenshotFullPage?: boolean;
27
+ headers?: Record<string, string>;
28
+ cookies?: string[];
29
+ stealth?: boolean;
23
30
  }): Promise<FetchResult>;
24
31
  /**
25
32
  * Retry a fetch operation with exponential backoff
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/core/fetcher.ts"],"names":[],"mappings":"AAAA;;GAEG;AA2PH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,MAAM,EAClB,SAAS,GAAE,MAAc,GACxB,OAAO,CAAC,WAAW,CAAC,CAyItB;AAuBD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACf,GACL,OAAO,CAAC,WAAW,CAAC,CAoGtB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,CAAC,EAChC,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,WAAW,GAAE,MAAU,EACvB,WAAW,GAAE,MAAa,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ;AAED;;GAEG;AACH,wBAAsB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAK7C"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/core/fetcher.ts"],"names":[],"mappings":"AAAA;;GAEG;AAgQH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,MAAM,EAClB,SAAS,GAAE,MAAc,EACzB,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,OAAO,CAAC,WAAW,CAAC,CA2JtB;AAyCD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;CACd,GACL,OAAO,CAAC,WAAW,CAAC,CAkKtB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,CAAC,EAChC,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,WAAW,GAAE,MAAU,EACvB,WAAW,GAAE,MAAa,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ;AAED;;GAEG;AACH,wBAAsB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAS7C"}
@@ -2,7 +2,11 @@
2
2
  * Core fetching logic: simple HTTP and browser-based fetching
3
3
  */
4
4
  import { chromium } from 'playwright';
5
+ import { chromium as stealthChromium } from 'playwright-extra';
6
+ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
5
7
  import { TimeoutError, BlockedError, NetworkError, WebPeelError } from '../types.js';
8
+ // Add stealth plugin to playwright-extra
9
+ stealthChromium.use(StealthPlugin());
6
10
  const USER_AGENTS = [
7
11
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
8
12
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
@@ -224,11 +228,31 @@ function validateUserAgent(userAgent) {
224
228
  * Fast and lightweight, but can be blocked by Cloudflare/bot detection
225
229
  * SECURITY: Manual redirect handling with SSRF re-validation
226
230
  */
227
- export async function simpleFetch(url, userAgent, timeoutMs = 30000) {
231
+ export async function simpleFetch(url, userAgent, timeoutMs = 30000, customHeaders) {
228
232
  // SECURITY: Validate URL to prevent SSRF
229
233
  validateUrl(url);
230
234
  // Validate user agent if provided
231
235
  const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
236
+ // SECURITY: Merge custom headers with defaults, block Host header override
237
+ const defaultHeaders = {
238
+ 'User-Agent': validatedUserAgent,
239
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
240
+ 'Accept-Language': 'en-US,en;q=0.9',
241
+ 'Accept-Encoding': 'gzip, deflate, br',
242
+ 'DNT': '1',
243
+ 'Connection': 'keep-alive',
244
+ 'Upgrade-Insecure-Requests': '1',
245
+ };
246
+ const mergedHeaders = { ...defaultHeaders };
247
+ if (customHeaders) {
248
+ for (const [key, value] of Object.entries(customHeaders)) {
249
+ // SECURITY: Block Host header override
250
+ if (key.toLowerCase() === 'host') {
251
+ throw new WebPeelError('Custom Host header is not allowed');
252
+ }
253
+ mergedHeaders[key] = value;
254
+ }
255
+ }
232
256
  const MAX_REDIRECTS = 10;
233
257
  let redirectCount = 0;
234
258
  let currentUrl = url;
@@ -245,15 +269,7 @@ export async function simpleFetch(url, userAgent, timeoutMs = 30000) {
245
269
  const timer = setTimeout(() => controller.abort(), timeoutMs);
246
270
  try {
247
271
  const response = await fetch(currentUrl, {
248
- headers: {
249
- 'User-Agent': validatedUserAgent,
250
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
251
- 'Accept-Language': 'en-US,en;q=0.9',
252
- 'Accept-Encoding': 'gzip, deflate, br',
253
- 'DNT': '1',
254
- 'Connection': 'keep-alive',
255
- 'Upgrade-Insecure-Requests': '1',
256
- },
272
+ headers: mergedHeaders,
257
273
  signal: controller.signal,
258
274
  redirect: 'manual', // SECURITY: Manual redirect handling
259
275
  });
@@ -277,8 +293,10 @@ export async function simpleFetch(url, userAgent, timeoutMs = 30000) {
277
293
  }
278
294
  // SECURITY: Validate Content-Type
279
295
  const contentType = response.headers.get('content-type') || '';
280
- if (!contentType.includes('text/html') && !contentType.includes('application/xhtml+xml')) {
281
- throw new WebPeelError('Unsupported content type. Only HTML is supported.');
296
+ if (!contentType.includes('text/html') &&
297
+ !contentType.includes('application/xhtml+xml') &&
298
+ !contentType.includes('application/pdf')) {
299
+ throw new WebPeelError(`Unsupported content type: ${contentType}. Supported: HTML, PDF`);
282
300
  }
283
301
  // SECURITY: Stream response with size limit (prevent memory exhaustion)
284
302
  const chunks = [];
@@ -323,6 +341,7 @@ export async function simpleFetch(url, userAgent, timeoutMs = 30000) {
323
341
  html,
324
342
  url: currentUrl,
325
343
  statusCode: response.status,
344
+ contentType,
326
345
  };
327
346
  }
328
347
  catch (error) {
@@ -339,6 +358,7 @@ export async function simpleFetch(url, userAgent, timeoutMs = 30000) {
339
358
  throw new WebPeelError(`Too many redirects (max ${MAX_REDIRECTS})`);
340
359
  }
341
360
  let sharedBrowser = null;
361
+ let sharedStealthBrowser = null;
342
362
  let activePagesCount = 0;
343
363
  const MAX_CONCURRENT_PAGES = 5;
344
364
  async function getBrowser() {
@@ -357,6 +377,22 @@ async function getBrowser() {
357
377
  sharedBrowser = await chromium.launch({ headless: true });
358
378
  return sharedBrowser;
359
379
  }
380
+ async function getStealthBrowser() {
381
+ // SECURITY: Check if stealth browser is still connected and healthy
382
+ if (sharedStealthBrowser) {
383
+ try {
384
+ if (sharedStealthBrowser.isConnected()) {
385
+ return sharedStealthBrowser;
386
+ }
387
+ }
388
+ catch {
389
+ // Browser is dead, recreate
390
+ sharedStealthBrowser = null;
391
+ }
392
+ }
393
+ sharedStealthBrowser = await stealthChromium.launch({ headless: true });
394
+ return sharedStealthBrowser;
395
+ }
360
396
  /**
361
397
  * Fetch using headless Chromium via Playwright
362
398
  * Slower but can handle JavaScript-heavy sites and bypass some bot detection
@@ -364,13 +400,25 @@ async function getBrowser() {
364
400
  export async function browserFetch(url, options = {}) {
365
401
  // SECURITY: Validate URL to prevent SSRF
366
402
  validateUrl(url);
367
- const { userAgent, waitMs = 0, timeoutMs = 30000 } = options;
403
+ const { userAgent, waitMs = 0, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, stealth = false } = options;
368
404
  // Validate user agent if provided
369
405
  const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
370
406
  // Validate wait time
371
407
  if (waitMs < 0 || waitMs > 60000) {
372
408
  throw new WebPeelError('Wait time must be between 0 and 60000ms');
373
409
  }
410
+ // SECURITY: Validate custom headers if provided
411
+ if (headers) {
412
+ for (const [key, value] of Object.entries(headers)) {
413
+ // Block Host header override
414
+ if (key.toLowerCase() === 'host') {
415
+ throw new WebPeelError('Custom Host header is not allowed');
416
+ }
417
+ if (typeof value !== 'string' || value.length > 500) {
418
+ throw new WebPeelError('Invalid header value');
419
+ }
420
+ }
421
+ }
374
422
  // SECURITY: Limit concurrent browser pages with timeout
375
423
  const queueStartTime = Date.now();
376
424
  const QUEUE_TIMEOUT_MS = 30000; // 30 second max wait
@@ -383,20 +431,46 @@ export async function browserFetch(url, options = {}) {
383
431
  activePagesCount++;
384
432
  let page = null;
385
433
  try {
386
- const browser = await getBrowser();
434
+ const browser = stealth ? await getStealthBrowser() : await getBrowser();
387
435
  page = await browser.newPage({
388
436
  userAgent: validatedUserAgent,
389
437
  });
390
- // Block images, fonts, and other heavy resources for speed
391
- await page.route('**/*', (route) => {
392
- const resourceType = route.request().resourceType();
393
- if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
394
- route.abort();
395
- }
396
- else {
397
- route.continue();
398
- }
399
- });
438
+ // Set custom headers if provided
439
+ if (headers && Object.keys(headers).length > 0) {
440
+ await page.setExtraHTTPHeaders(headers);
441
+ }
442
+ // Set cookies if provided
443
+ if (cookies && cookies.length > 0) {
444
+ const parsedCookies = cookies.map(cookie => {
445
+ const [nameValue] = cookie.split(';').map(s => s.trim());
446
+ const [name, value] = nameValue.split('=');
447
+ if (!name || value === undefined) {
448
+ throw new WebPeelError(`Invalid cookie format: ${cookie}`);
449
+ }
450
+ return {
451
+ name: name.trim(),
452
+ value: value.trim(),
453
+ url,
454
+ };
455
+ });
456
+ await page.context().addCookies(parsedCookies);
457
+ }
458
+ // Block images, fonts, and other heavy resources for speed (unless screenshot is requested)
459
+ if (!screenshot) {
460
+ await page.route('**/*', (route) => {
461
+ const resourceType = route.request().resourceType();
462
+ if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
463
+ route.abort();
464
+ }
465
+ else {
466
+ route.continue();
467
+ }
468
+ });
469
+ }
470
+ else {
471
+ // For screenshots, allow all resources
472
+ await page.route('**/*', (route) => route.continue());
473
+ }
400
474
  // SECURITY: Wrap entire operation in timeout
401
475
  const fetchPromise = (async () => {
402
476
  await page.goto(url, {
@@ -422,9 +496,18 @@ export async function browserFetch(url, options = {}) {
422
496
  if (!html || html.length < 100) {
423
497
  throw new BlockedError('Empty or suspiciously small response from browser.');
424
498
  }
499
+ // Capture screenshot if requested
500
+ let screenshotBuffer;
501
+ if (screenshot) {
502
+ screenshotBuffer = await page.screenshot({
503
+ fullPage: screenshotFullPage,
504
+ type: 'png'
505
+ });
506
+ }
425
507
  return {
426
508
  html,
427
509
  url: finalUrl,
510
+ screenshot: screenshotBuffer,
428
511
  };
429
512
  }
430
513
  catch (error) {
@@ -475,5 +558,9 @@ export async function cleanup() {
475
558
  await sharedBrowser.close();
476
559
  sharedBrowser = null;
477
560
  }
561
+ if (sharedStealthBrowser) {
562
+ await sharedStealthBrowser.close();
563
+ sharedStealthBrowser = null;
564
+ }
478
565
  }
479
566
  //# sourceMappingURL=fetcher.js.map