webpeel 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/cache.d.ts.map +1 -1
  2. package/dist/cache.js +1 -0
  3. package/dist/cache.js.map +1 -1
  4. package/dist/cli.js +218 -13
  5. package/dist/cli.js.map +1 -1
  6. package/dist/core/content-pruner.d.ts.map +1 -1
  7. package/dist/core/content-pruner.js +27 -0
  8. package/dist/core/content-pruner.js.map +1 -1
  9. package/dist/core/domain-extractors.js +4 -4
  10. package/dist/core/http-fetch.js +1 -1
  11. package/dist/core/http-fetch.js.map +1 -1
  12. package/dist/core/markdown.d.ts.map +1 -1
  13. package/dist/core/markdown.js +30 -1
  14. package/dist/core/markdown.js.map +1 -1
  15. package/dist/core/metadata.d.ts +3 -1
  16. package/dist/core/metadata.d.ts.map +1 -1
  17. package/dist/core/metadata.js +14 -2
  18. package/dist/core/metadata.js.map +1 -1
  19. package/dist/core/pipeline.d.ts +2 -0
  20. package/dist/core/pipeline.d.ts.map +1 -1
  21. package/dist/core/pipeline.js +45 -2
  22. package/dist/core/pipeline.js.map +1 -1
  23. package/dist/core/readability.d.ts.map +1 -1
  24. package/dist/core/readability.js +24 -0
  25. package/dist/core/readability.js.map +1 -1
  26. package/dist/server/app.d.ts.map +1 -1
  27. package/dist/server/app.js +10 -10
  28. package/dist/server/app.js.map +1 -1
  29. package/dist/server/job-queue.d.ts.map +1 -1
  30. package/dist/server/job-queue.js +4 -2
  31. package/dist/server/job-queue.js.map +1 -1
  32. package/dist/server/logger.d.ts +11 -0
  33. package/dist/server/logger.d.ts.map +1 -0
  34. package/dist/server/logger.js +38 -0
  35. package/dist/server/logger.js.map +1 -0
  36. package/dist/server/middleware/auth.js +4 -4
  37. package/dist/server/middleware/auth.js.map +1 -1
  38. package/dist/server/middleware/rate-limit.d.ts.map +1 -1
  39. package/dist/server/middleware/rate-limit.js +24 -7
  40. package/dist/server/middleware/rate-limit.js.map +1 -1
  41. package/dist/server/routes/cli-usage.js +1 -1
  42. package/dist/server/routes/cli-usage.js.map +1 -1
  43. package/dist/server/routes/stripe.d.ts.map +1 -1
  44. package/dist/server/routes/stripe.js +15 -13
  45. package/dist/server/routes/stripe.js.map +1 -1
  46. package/dist/server/routes/users.d.ts.map +1 -1
  47. package/dist/server/routes/users.js +44 -0
  48. package/dist/server/routes/users.js.map +1 -1
  49. package/dist/types.d.ts +2 -0
  50. package/dist/types.d.ts.map +1 -1
  51. package/dist/types.js.map +1 -1
  52. package/package.json +1 -1
@@ -1 +1 @@
1
- {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAc5C;AAiBD;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,IAAI,CAsB/E;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI,CAerG;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,UAAQ,GAAG,MAAM,CAyB9C;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAgBhF"}
1
+ {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAc5C;AAkBD;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,IAAI,CAsB/E;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI,CAerG;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,UAAQ,GAAG,MAAM,CAyB9C;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAgBhF"}
package/dist/cache.js CHANGED
@@ -37,6 +37,7 @@ function cacheKey(url, options) {
37
37
  stealth: options?.stealth || false,
38
38
  selector: options?.selector || null,
39
39
  format: options?.format || 'markdown',
40
+ readable: options?.readable || false,
40
41
  };
41
42
  const hash = createHash('sha256').update(JSON.stringify(relevant)).digest('hex').slice(0, 16);
42
43
  return hash;
package/dist/cache.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC3G,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;AAUvD;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC5C,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,IAAI,CAAC;QAC9B,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,IAAI,CAAC;QACnC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACxC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC7C,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,GAAW,EAAE,OAA6B;IAC1D,MAAM,QAAQ,GAAG;QACf,GAAG;QACH,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,KAAK;QAChC,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI,KAAK;QAClC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI;QACnC,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,UAAU;KACtC,CAAC;IACF,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9F,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,OAA6B;IACjE,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,CAAC;IAEhD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QACtE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC;QAExC,IAAI,GAAG,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YACtB,mCAAmC;YACnC,IAAI,CAAC;gBAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAAC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,qBAAqB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/G,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,KAAK,CAAC,MAAM,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,MAAW,EAAE,KAAa,EAAE,OAA6B;IAC7F,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,KAAK,GAAe;QACxB,GAAG;QACH,MAAM;QACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;QACpB,KAAK;QACL,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;KACvD,CAAC;IAEF,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;AACvE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAG,GAAG,KAAK;IACpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,CAAC,CAAC;IAErC,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC;YACH,IAAI,GAAG,EAAE,CAAC;gBACR,UAAU,CAAC,QAAQ,CAAC,CAAC;gBACrB,OAAO,EAAE,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;gBACtE,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC9C,UAAU,CAAC,QAAQ,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,0BAA0B,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;IAEhF,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;YAC7C,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC;QACzB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;AAC9D,CAAC"}
1
+ {"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC3G,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;AAUvD;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC5C,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,IAAI,CAAC;QAC9B,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,IAAI,CAAC;QACnC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACxC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC7C,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,GAAW,EAAE,OAA6B;IAC1D,MAAM,QAAQ,GAAG;QACf,GAAG;QACH,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,KAAK;QAChC,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI,KAAK;QAClC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI;QACnC,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,UAAU;QACrC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,KAAK;KACrC,CAAC;IACF,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9F,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,OAA6B;IACjE,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,CAAC;IAEhD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QACtE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC;QAExC,IAAI,GAAG,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YACtB,mCAAmC;YACnC,IAAI,CAAC;gBAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAAC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,qBAAqB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/G,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,KAAK,CAAC,MAAM,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,MAAW,EAAE,KAAa,EAAE,OAA6B;IAC7F,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,KAAK,GAAe;QACxB,GAAG;QACH,MAAM;QACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;QACpB,KAAK;QACL,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;KACvD,CAAC;IAEF,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;AACvE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAG,GAAG,KAAK;IACpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,CAAC,CAAC;IAErC,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC;YACH,IAAI,GAAG,EAAE,CAAC;gBACR,UAAU,CAAC,QAAQ,CAAC,CAAC;gBACrB,OAAO,EAAE,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;gBACtE,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC9C,UAAU,CAAC,QAAQ,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,0BAA0B,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;IAEhF,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;YAC7C,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC;QACzB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;AAC9D,CAAC"}
package/dist/cli.js CHANGED
@@ -187,7 +187,7 @@ program
187
187
  .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
188
188
  .option('--html', 'Output raw HTML instead of markdown')
189
189
  .option('--text', 'Output plain text instead of markdown')
190
- .option('--clean', 'Output clean text optimized for AI (strips URLs, keeps structure)')
190
+ .option('--clean', 'Clean output article content only, no links or metadata (alias for --readable with URL-stripped markdown)')
191
191
  .option('--json', 'Output as JSON')
192
192
  .option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
193
193
  .option('--ua <agent>', 'Custom user agent')
@@ -201,6 +201,7 @@ program
201
201
  .option('--only-main-content', 'Shortcut for --include-tags main,article')
202
202
  .option('--full-content', 'Return full page content (disable automatic content density pruning)')
203
203
  .option('--readable', 'Reader mode — extract only the main article content, strip all noise (like browser Reader Mode)')
204
+ .option('--full-nav', 'Keep full navigation/content (disable auto-readability when piped or in agent mode)')
204
205
  .option('--focus <query>', 'Query-focused filtering — only return content relevant to this query (BM25 ranking)')
205
206
  .option('--chunk', 'Split content into RAG-ready chunks')
206
207
  .option('--chunk-size <tokens>', 'Max tokens per chunk (default: 512)', parseInt)
@@ -214,6 +215,7 @@ program
214
215
  .option('--images', 'Output image URLs from the page')
215
216
  .option('--meta', 'Output only the page metadata (title, description, author, etc.)')
216
217
  .option('--raw', 'Return full page without smart content extraction')
218
+ .option('--full', 'Alias for --raw — full page content, no budget')
217
219
  .option('--lite', 'Lite mode — minimal processing, maximum speed (skip pruning, budget, metadata)')
218
220
  .option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
219
221
  .option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
@@ -247,11 +249,60 @@ program
247
249
  .option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
248
250
  .option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
249
251
  .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)');
250
- program.configureHelp({
251
- sortSubcommands: true,
252
- showGlobalOptions: false,
253
- });
254
- program.addHelpText('afterAll', `
252
+ // ─── Help System ─────────────────────────────────────────────────────────────
253
+ // Detect --help-all early, before Commander parses argv.
254
+ const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
255
+ if (isHelpAll) {
256
+ // Translate --help-all → --help so Commander generates its standard output.
257
+ const idx = process.argv.indexOf('--help-all');
258
+ if (idx !== -1)
259
+ process.argv[idx] = '--help';
260
+ }
261
+ // ANSI helpers (fall back gracefully when colors are disabled).
262
+ const NO_COLOR = process.env.NO_COLOR !== undefined || !process.stdout.isTTY;
263
+ const bold = (s) => NO_COLOR ? s : `\x1b[1m${s}\x1b[0m`;
264
+ const dim = (s) => NO_COLOR ? s : `\x1b[2m${s}\x1b[0m`;
265
+ const cyan = (s) => NO_COLOR ? s : `\x1b[36m${s}\x1b[0m`;
266
+ /**
267
+ * Reconstruct the standard Commander help layout for --help-all and subcommands.
268
+ * This mirrors Commander's own default formatHelp() so subcommand help keeps working.
269
+ */
270
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
271
+ function buildCommanderHelp(cmd, helper) {
272
+ const termWidth = helper.padWidth(cmd, helper);
273
+ const helpWidth = helper.helpWidth ?? 80;
274
+ const pad = ' ';
275
+ const formatItem = (term, description) => {
276
+ if (description) {
277
+ const full = `${term.padEnd(termWidth + 2)}${description}`;
278
+ return helper.wrap(full, helpWidth - pad.length, termWidth + 2);
279
+ }
280
+ return term;
281
+ };
282
+ const formatList = (items) => items.join('\n').replace(/^/gm, pad);
283
+ let out = [`Usage: ${helper.commandUsage(cmd)}`, ''];
284
+ const desc = helper.commandDescription(cmd);
285
+ if (desc.length > 0) {
286
+ out = out.concat([helper.wrap(desc, helpWidth, 0), '']);
287
+ }
288
+ // Arguments
289
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
290
+ const args = helper.visibleArguments(cmd).map(a => formatItem(helper.argumentTerm(a), helper.argumentDescription(a)));
291
+ if (args.length > 0)
292
+ out = out.concat(['Arguments:', formatList(args), '']);
293
+ // Options
294
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
295
+ const opts = helper.visibleOptions(cmd).map(o => formatItem(helper.optionTerm(o), helper.optionDescription(o)));
296
+ if (opts.length > 0)
297
+ out = out.concat(['Options:', formatList(opts), '']);
298
+ // Subcommands
299
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
300
+ const cmds = helper.visibleCommands(cmd).map(c => formatItem(helper.subcommandTerm(c), helper.subcommandDescription(c)));
301
+ if (cmds.length > 0)
302
+ out = out.concat(['Commands:', formatList(cmds), '']);
303
+ // Append grouped option sections only on root command (--help-all)
304
+ if (cmd.parent === null) {
305
+ out = out.concat([`
255
306
  Output Formats:
256
307
  --json JSON output with full metadata
257
308
  --html Raw HTML output
@@ -295,16 +346,106 @@ Agent Integration:
295
346
  $ webpeel pipe "https://example.com" | jq .content Pipe-friendly JSON
296
347
  $ webpeel "https://site.com" --json --silent Same as pipe
297
348
  $ curl https://webpeel.dev/llms.txt AI-readable docs
298
- `);
349
+ `]);
350
+ }
351
+ return out.join('\n');
352
+ }
353
+ /**
354
+ * Condensed, Anthropic-style help for the root command (default --help).
355
+ */
356
+ function buildCondensedHelp() {
357
+ const v = cliVersion;
358
+ return [
359
+ '',
360
+ ` ${bold('◆ WebPeel')} ${dim(`v${v}`)}`,
361
+ ` ${dim('The web data platform for AI agents')}`,
362
+ '',
363
+ ` ${bold('Usage:')} webpeel [url] [options]`,
364
+ ` webpeel <command> [options]`,
365
+ '',
366
+ ` ${bold('Examples:')}`,
367
+ ` webpeel https://example.com ${dim('Clean content (reader mode)')}`,
368
+ ` webpeel read https://example.com ${dim('Explicit reader mode')}`,
369
+ ` webpeel screenshot https://example.com ${dim('Screenshot any page')}`,
370
+ ` webpeel ask https://news.com "summary" ${dim('Ask about any page')}`,
371
+ ` webpeel search "webpeel vs jina" ${dim('Web search')}`,
372
+ ` echo "url" | webpeel ${dim('Pipe mode (auto JSON)')}`,
373
+ '',
374
+ ` ${bold('Commands:')}`,
375
+ ` fetch (default) Fetch a URL as clean markdown`,
376
+ ` read <url> Reader mode (article content only)`,
377
+ ` screenshot <url> Take a screenshot`,
378
+ ` ask <url> <question> Ask about any page`,
379
+ ` search <query> Search the web (DuckDuckGo + sources)`,
380
+ ` crawl <url> Crawl a website`,
381
+ ` mcp Start MCP server for AI tools`,
382
+ ` ${dim('... (use --help-all for all 25+ commands)')}`,
383
+ '',
384
+ ` ${bold('Common Options:')}`,
385
+ ` -r, --render Browser rendering (JS-heavy sites)`,
386
+ ` --stealth Stealth mode (anti-bot bypass)`,
387
+ ` --raw Full page (disable auto reader mode)`,
388
+ ` --full Full page, no budget limit`,
389
+ ` --json JSON output with metadata`,
390
+ ` --budget: 4000)`,
391
+ ` -q, --question <q> Ask about the content`,
392
+ ` -s, --silent No spinner output`,
393
+ '',
394
+ ` Use ${cyan("'webpeel <command> --help'")} for command-specific options.`,
395
+ ` Use ${cyan("'webpeel --help-all'")} for the full option reference.`,
396
+ '',
397
+ ` Docs: ${cyan('https://webpeel.dev/docs')}`,
398
+ '',
399
+ ].join('\n');
400
+ }
401
+ program.configureHelp({
402
+ sortSubcommands: true,
403
+ showGlobalOptions: false,
404
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
405
+ formatHelp: (cmd, helper) => {
406
+ // Subcommands always get standard Commander help.
407
+ // Root command with --help-all also gets standard full help.
408
+ if (cmd.parent !== null || isHelpAll) {
409
+ return buildCommanderHelp(cmd, helper);
410
+ }
411
+ // Root command default: beautiful condensed help.
412
+ return buildCondensedHelp();
413
+ },
414
+ });
299
415
  // Main fetch handler — shared with the `pipe` subcommand
300
416
  async function runFetch(url, options) {
301
- // Smart defaults: when piped (not a TTY), default to silent JSON
417
+ // Smart defaults: when piped (not a TTY), default to silent JSON + budget
302
418
  const isPiped = !process.stdout.isTTY;
303
419
  if (isPiped && !options.html && !options.text) {
304
420
  if (!options.json)
305
421
  options.json = true;
306
422
  if (!options.silent)
307
423
  options.silent = true;
424
+ // Auto-enable readability for AI consumers — clean content by default
425
+ if (!options.readable && !options.fullNav) {
426
+ options.readable = true;
427
+ }
428
+ // Auto token budget for piped mode (AI consumers want concise content)
429
+ if (options.budget === undefined && !options.fullContent && !options.raw && !options.full) {
430
+ options.budget = 4000;
431
+ }
432
+ }
433
+ // --full alias: sets raw + fullContent
434
+ if (options.full) {
435
+ options.raw = true;
436
+ options.fullContent = true;
437
+ }
438
+ // Smart defaults for terminal (interactive) mode
439
+ const isTerminal = process.stdout.isTTY && !isPiped;
440
+ if (isTerminal && !options.raw && !options.html && !options.text) {
441
+ // Auto-readable: clean content by default (like browser Reader Mode)
442
+ if (!options.readable && !options.fullNav && !options.selector) {
443
+ options.readable = true;
444
+ }
445
+ // Default token budget: don't flood the terminal with 20K tokens
446
+ if (options.budget === undefined && !options.fullContent && !options.raw) {
447
+ options.budget = 4000;
448
+ }
308
449
  }
309
450
  // --agent sets sensible defaults for AI agents; explicit flags override
310
451
  if (options.agent) {
@@ -316,6 +457,10 @@ async function runFetch(url, options) {
316
457
  options.extractAll = true;
317
458
  if (options.budget === undefined)
318
459
  options.budget = 4000;
460
+ // Agent mode = clean content by default
461
+ if (!options.readable && !options.fullNav) {
462
+ options.readable = true;
463
+ }
319
464
  }
320
465
  const isJson = options.json;
321
466
  // --- --list-schemas: print all available schemas and exit ---
@@ -414,8 +559,9 @@ async function runFetch(url, options) {
414
559
  render: options.render,
415
560
  stealth: options.stealth,
416
561
  selector: options.selector,
417
- format: options.html ? 'html' : options.text ? 'text' : 'markdown',
562
+ format: options.html ? 'html' : options.text ? 'text' : options.clean ? 'clean' : 'markdown',
418
563
  budget: null, // Budget excluded from cache key — cache stores full content
564
+ readable: options.readable || false,
419
565
  };
420
566
  const cachedResult = getCache(url, cacheOptions);
421
567
  if (cachedResult) {
@@ -643,6 +789,7 @@ async function runFetch(url, options) {
643
789
  || !!options.waitUntil
644
790
  || !!options.waitSelector
645
791
  || !!options.blockResources
792
+ || !!options.screenshot // Auto-enable render for screenshot (needs browser)
646
793
  || false;
647
794
  // Inject scroll actions when --scroll-extract N (fixed count) is used
648
795
  if (scrollExtractCount > 0) {
@@ -731,6 +878,8 @@ async function runFetch(url, options) {
731
878
  }
732
879
  else if (options.clean) {
733
880
  peelOptions.format = 'clean';
881
+ // --clean implies readable mode (article content only, no navs/footers)
882
+ peelOptions.readable = true;
734
883
  }
735
884
  else {
736
885
  peelOptions.format = 'markdown';
@@ -787,6 +936,7 @@ async function runFetch(url, options) {
787
936
  selector: options.selector,
788
937
  format: peelOptions.format,
789
938
  budget: null, // Budget excluded — cache stores full content, budget applied post-cache
939
+ readable: options.readable || false,
790
940
  });
791
941
  }
792
942
  // Apply smart budget distillation AFTER caching (cache always stores full content)
@@ -1114,6 +1264,34 @@ program
1114
1264
  .action(async (url, options) => {
1115
1265
  await runFetch(url, options);
1116
1266
  });
1267
+ // Read subcommand (explicit readable mode)
1268
+ program
1269
+ .command('read <url>')
1270
+ .description('Read a page in clean reader mode (like browser Reader View)')
1271
+ .option('--json', 'Output as JSON')
1272
+ .option('-s, --silent', 'Silent mode')
1273
+ .option('--budget <n>', 'Token budget (default: 4000)', parseInt)
1274
+ .option('--focus <query>', 'Focus on content relevant to this query')
1275
+ .action(async (url, opts) => {
1276
+ await runFetch(url, {
1277
+ ...opts,
1278
+ readable: true,
1279
+ budget: 4000,
1280
+ });
1281
+ });
1282
+ // Ask subcommand (question mode)
1283
+ program
1284
+ .command('ask <url> <question>')
1285
+ .description('Ask a question about any page')
1286
+ .option('--json', 'Output as JSON')
1287
+ .option('-s, --silent', 'Silent mode')
1288
+ .action(async (url, question, opts) => {
1289
+ await runFetch(url, {
1290
+ ...opts,
1291
+ question,
1292
+ readable: true,
1293
+ });
1294
+ });
1117
1295
  // Search command
1118
1296
  program
1119
1297
  .command('search <query>')
@@ -1288,7 +1466,7 @@ program
1288
1466
  }
1289
1467
  }
1290
1468
  else if (isJson) {
1291
- const jsonStr = JSON.stringify(results, null, 2);
1469
+ const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
1292
1470
  await writeStdout(jsonStr + '\n');
1293
1471
  }
1294
1472
  else {
@@ -1534,7 +1712,7 @@ program
1534
1712
  showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
1535
1713
  }
1536
1714
  if (options.json) {
1537
- console.log(JSON.stringify(results, null, 2));
1715
+ console.log(JSON.stringify({ pages: results, count: results.length }, null, 2));
1538
1716
  }
1539
1717
  else {
1540
1718
  results.forEach((result, i) => {
@@ -1876,6 +2054,7 @@ program
1876
2054
  .option('-q, --question <q>', 'Quick answer')
1877
2055
  .option('--proxy <url>', 'Proxy URL')
1878
2056
  .option('--timeout <ms>', 'Timeout in ms', parseInt)
2057
+ .option('-s, --silent', 'Silent mode (always on for pipe, accepted for compatibility)')
1879
2058
  .action(async (url, opts) => {
1880
2059
  // Force JSON + silent — always, unconditionally
1881
2060
  opts.json = true;
@@ -2922,6 +3101,7 @@ program
2922
3101
  // Screenshot command
2923
3102
  program
2924
3103
  .command('screenshot <url>')
3104
+ .alias('snap')
2925
3105
  .description('Take a screenshot of a URL and save as PNG/JPEG')
2926
3106
  .option('--full-page', 'Capture full page (not just viewport)')
2927
3107
  .option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
@@ -3575,7 +3755,7 @@ program
3575
3755
  .option('--llm-key <key>', 'LLM API key for synthesis (or env OPENAI_API_KEY)')
3576
3756
  .option('--llm-model <model>', 'LLM model for synthesis (default: gpt-4o-mini)')
3577
3757
  .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
3578
- .option('--timeout <ms>', 'Max research time in ms (default: 60000)', '60000')
3758
+ .option('--timeout <ms>', 'Max research time in ms (default: 40000)', '60000')
3579
3759
  .option('--json', 'Output result as JSON')
3580
3760
  .option('-s, --silent', 'Suppress progress output')
3581
3761
  .action(async (query, options) => {
@@ -3861,11 +4041,36 @@ async function outputResult(result, options, extra = {}) {
3861
4041
  output.truncated = true;
3862
4042
  if (extra.totalAvailable !== undefined)
3863
4043
  output.totalAvailable = extra.totalAvailable;
3864
- output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing };
4044
+ output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
3865
4045
  await writeStdout(JSON.stringify(output, null, 2) + '\n');
3866
4046
  }
3867
4047
  else {
4048
+ // Smart terminal header (interactive mode only)
4049
+ const isTerminalOutput = process.stdout.isTTY && !options.silent;
4050
+ if (isTerminalOutput) {
4051
+ const meta = result.metadata || {};
4052
+ const parts = [];
4053
+ if (meta.title || result.title)
4054
+ parts.push(`\x1b[1m${meta.title || result.title}\x1b[0m`);
4055
+ if (meta.author)
4056
+ parts.push(`By ${meta.author}`);
4057
+ if (meta.wordCount)
4058
+ parts.push(`${meta.wordCount} words`);
4059
+ const totalMs = result.timing?.total ?? result.elapsed;
4060
+ if (totalMs)
4061
+ parts.push(`${totalMs}ms`);
4062
+ if (parts.length > 0) {
4063
+ await writeStdout(`\n ${parts.join(' · ')}\n`);
4064
+ await writeStdout(' ' + '─'.repeat(60) + '\n\n');
4065
+ }
4066
+ }
4067
+ // Stream content immediately to stdout — consumer gets it without waiting
3868
4068
  await writeStdout(result.content + '\n');
4069
+ // Append timing summary to stderr so it doesn't pollute piped content
4070
+ if (!options.silent) {
4071
+ const totalMs = result.timing?.total ?? result.elapsed;
4072
+ process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
4073
+ }
3869
4074
  }
3870
4075
  }
3871
4076
  function writeStdout(data) {