@j0hanz/superfetch 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +345 -57
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +256 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +6 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +75 -62
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +4 -12
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +22 -47
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -10
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +5 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +93 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +40 -5
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +20 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +128 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +194 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +12 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +60 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +13 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +143 -54
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +57 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +184 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +78 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +99 -5
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +6 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +38 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +238 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +4 -12
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +34 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +81 -79
package/dist/tools/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,EACL,4BAA4B,EAC5B,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EACL,+BAA+B,EAC/B,wBAAwB,EACxB,wBAAwB,GACzB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,0BAA0B,EAC1B,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EACL,2BAA2B,EAC3B,oBAAoB,EACpB,oBAAoB,GACrB,MAAM,+BAA+B,CAAC;AAEvC,8DAA8D;AAE9D,6CAA6C;AAC7C,MAAM,oBAAoB,GAAG;IAC3B,aAAa,EAAE,CAAC;SACb,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CAAC,qCAAqC,CAAC;IAClD,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,IAAI,CAAC;SACT,GAAG,CAAC,KAAK,CAAC;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,8CAA8C,CAAC;IAC3D,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,QAAQ,EAAE;SACV,QAAQ,CAAC,iCAAiC,CAAC;CAC/C,CAAC;AAEF,gBAAgB;AAChB,MAAM,mBAAmB,GAAG;IAC1B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;IACnD,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,QAAQ,EAAE;SACV,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,eAAe,CAAC;IAC5B,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,+BAA+B,CAAC;IAChE,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;IACrC,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;IACrC,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,GAAG,CAAC,IAAI,CAAC;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,aAAa,EAAE,CAAC;SACb,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,sDAAsD,CAAC;IACnE,aAAa,EAAE,CAAC;SACb,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,0CAA0C,CAAC;IACvD,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,wBAAwB,GAAG;IAC/B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,gDAAgD,CAAC;IAC7D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,mCAAmC,CAAC;IAChD,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;IACnD,WAAW,EAAE,CAAC;SACX,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,0CAA0C,CAAC;IACvD,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,oBAAoB,GAAG;IAC3B,IAAI,EAAE,CAAC;SACJ,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACxB,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,QAAQ,CAAC,oCAAoC,CAAC;IACjD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,8CAA8C,CAAC;IAC3D,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,QAAQ,EAAE;SACV,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,4BAA4B,CAAC;IACzC,WAAW,EAAE,CAAC;SACX,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,mCAAmC,CAAC;IAChD,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,uCAAuC,CAAC;IACpD,GAAG,oBAAoB;CACxB,CAAC;AAEF,mDAAmD;AACnD,MAAM,oBAAoB,GAAG;IAC3B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oCAAoC,CAAC;IACxE,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,CAAC;IACpE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uCAAuC,CAAC;IACrE,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACxE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,sBAAsB,GAAG;IAC7B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;IAC1C,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;IACjE,KAAK,EAAE,CAAC;SACL,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;QACzC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;QACjD,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;KACtE,CAAC,CACH;SACA,QAAQ,CAAC,0BAA0B,CAAC;IACvC,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,yCAAyC,CAAC;IACtD,SAAS,EAAE,CAAC;SACT,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,yBAAyB,GAAG;IAChC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACzE,GAAG,EAAE,CAAC;SACH,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qBAAqB,CAAC;QACjD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;QACzC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;KACtD,CAAC,CACH;SACA,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACxE,SAAS,EAAE,CAAC;SACT,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;IAChE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,OAAO,EAAE,CAAC;SACP,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;QAC3C,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;QACjE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;QACnD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;QAChE,aAAa,EAAE,CAAC;aACb,MAAM,EAAE;aACR,QAAQ,EAAE;aACV,QAAQ,CAAC,uCAAuC,CAAC;QACpD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,2BAA2B,CAAC;QACpE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;QAChE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;KAClE,CAAC,CACH;SACA,QAAQ,CAAC,+BAA+B,CAAC;IAC5C,OAAO,EAAE,CAAC;SACP,MAAM,CAAC;QACN,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;QAClD,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;QAC/D,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACvD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACvD,kBAAkB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;KAC1E,CAAC;SACD,QAAQ,CAAC,oBAAoB,CAAC;IACjC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACpE,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,MAAiB;IAC7C,MAAM,CAAC,YAAY,CACjB,mBAAmB,EACnB;QACE,KAAK,EAAE,WAAW;QAClB,WAAW,EAAE,0BAA0B;QACvC,WAAW,EAAE,mBAAmB;QAChC,YAAY,EAAE,oBAAoB;KACnC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAC1C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,qBAAqB,EACrB;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,qBAAqB;QAClC,YAAY,EAAE,sBAAsB;KACrC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAC5C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,wBAAwB,EACxB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EAAE,+BAA+B;QAC5C,WAAW,EAAE,wBAAwB;QACrC,YAAY,EAAE,yBAAyB;KACxC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAC/C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,oBAAoB,EACpB;QACE,KAAK,EAAE,oBAAoB;QAC3B,WAAW,EAAE,2BAA2B;QACxC,WAAW,EAAE,oBAAoB;QACjC,YAAY,EAAE,qBAAqB;KACpC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAC3C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ExtractedArticle, ExtractedMetadata, MetadataBlock } from '../../config/types.js';
|
|
2
|
+
export declare function shouldUseArticle(extractMainContent: boolean, article: ExtractedArticle | null): article is ExtractedArticle;
|
|
3
|
+
export declare function buildMetadata(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, useArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
4
|
+
export declare function truncateContent(content: string, maxLength?: number): {
|
|
5
|
+
content: string;
|
|
6
|
+
truncated: boolean;
|
|
7
|
+
};
|
|
8
|
+
//# sourceMappingURL=common.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACd,MAAM,uBAAuB,CAAC;AAE/B,wBAAgB,gBAAgB,CAC9B,kBAAkB,EAAE,OAAO,EAC3B,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAI7B;AAED,wBAAgB,aAAa,CAC3B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,UAAU,EAAE,OAAO,EACnB,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAmB3B;AAED,wBAAgB,eAAe,CAC7B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,GACjB;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAQzC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { config } from '../../config/index.js';
|
|
2
|
+
export function shouldUseArticle(extractMainContent, article) {
|
|
3
|
+
return (extractMainContent && config.extraction.extractMainContent && !!article);
|
|
4
|
+
}
|
|
5
|
+
export function buildMetadata(url, article, extractedMeta, useArticle, includeMetadata) {
|
|
6
|
+
if (!includeMetadata || !config.extraction.includeMetadata)
|
|
7
|
+
return undefined;
|
|
8
|
+
const now = new Date().toISOString();
|
|
9
|
+
return useArticle && article
|
|
10
|
+
? {
|
|
11
|
+
type: 'metadata',
|
|
12
|
+
title: article.title,
|
|
13
|
+
author: article.byline,
|
|
14
|
+
url,
|
|
15
|
+
fetchedAt: now,
|
|
16
|
+
}
|
|
17
|
+
: {
|
|
18
|
+
type: 'metadata',
|
|
19
|
+
title: extractedMeta.title,
|
|
20
|
+
description: extractedMeta.description,
|
|
21
|
+
author: extractedMeta.author,
|
|
22
|
+
url,
|
|
23
|
+
fetchedAt: now,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
export function truncateContent(content, maxLength) {
|
|
27
|
+
if (!maxLength || maxLength <= 0 || content.length <= maxLength) {
|
|
28
|
+
return { content, truncated: false };
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
content: `${content.substring(0, maxLength)}\n...[truncated]`,
|
|
32
|
+
truncated: true,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=common.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAO/C,MAAM,UAAU,gBAAgB,CAC9B,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,CACL,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,CAAC,CAAC,OAAO,CACxE,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,UAAmB,EACnB,eAAwB;IAExB,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IAC7E,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,UAAU,IAAI,OAAO;QAC1B,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB;IAElB,IAAI,CAAC,SAAS,IAAI,SAAS,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,kBAAkB;QAC7D,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EACpB,cAAc,EACf,MAAM,uBAAuB,CAAC;AA+B/B,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAiF5B"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import * as cache from '../../services/cache.js';
|
|
2
|
+
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
|
+
import { logDebug } from '../../services/logger.js';
|
|
4
|
+
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
5
|
+
const pendingRequests = new Map();
|
|
6
|
+
const DEDUPLICATION_TIMEOUT = 60000; // 1 minute TTL
|
|
7
|
+
// Cleanup stale pending requests every 30 seconds to prevent memory leak
|
|
8
|
+
const cleanupInterval = setInterval(() => {
|
|
9
|
+
const now = Date.now();
|
|
10
|
+
for (const [key, value] of pendingRequests.entries()) {
|
|
11
|
+
if (now - value.timestamp > DEDUPLICATION_TIMEOUT) {
|
|
12
|
+
pendingRequests.delete(key);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}, 30000);
|
|
16
|
+
// Allow Node.js to exit if this is the only active timer
|
|
17
|
+
cleanupInterval.unref();
|
|
18
|
+
export async function executeFetchPipeline(options) {
|
|
19
|
+
const { url, cacheNamespace, customHeaders, retries, signal, timeout, transform, serialize = JSON.stringify, deserialize = (cached) => JSON.parse(cached), } = options;
|
|
20
|
+
const normalizedUrl = validateAndNormalizeUrl(url);
|
|
21
|
+
const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
|
|
22
|
+
// Check cache first
|
|
23
|
+
if (cacheKey) {
|
|
24
|
+
const cached = cache.get(cacheKey);
|
|
25
|
+
if (cached) {
|
|
26
|
+
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
27
|
+
const data = deserialize(cached.content);
|
|
28
|
+
return {
|
|
29
|
+
data,
|
|
30
|
+
fromCache: true,
|
|
31
|
+
url: normalizedUrl,
|
|
32
|
+
fetchedAt: cached.fetchedAt,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Check for pending request to prevent duplicate fetches
|
|
37
|
+
const dedupeKey = `${cacheNamespace}:${normalizedUrl}`;
|
|
38
|
+
const pending = pendingRequests.get(dedupeKey);
|
|
39
|
+
if (pending) {
|
|
40
|
+
logDebug('Request deduplication hit', { url: normalizedUrl });
|
|
41
|
+
return pending.promise;
|
|
42
|
+
}
|
|
43
|
+
// Build fetch options
|
|
44
|
+
const fetchOptions = {
|
|
45
|
+
customHeaders,
|
|
46
|
+
signal,
|
|
47
|
+
timeout,
|
|
48
|
+
};
|
|
49
|
+
// Create new request
|
|
50
|
+
const request = (async () => {
|
|
51
|
+
try {
|
|
52
|
+
logDebug('Fetching URL', { url: normalizedUrl, retries });
|
|
53
|
+
const fetchResult = await fetchUrlWithRetry(normalizedUrl, fetchOptions, retries);
|
|
54
|
+
const { html } = fetchResult;
|
|
55
|
+
const data = transform(html, normalizedUrl);
|
|
56
|
+
if (cacheKey) {
|
|
57
|
+
const serialized = serialize(data);
|
|
58
|
+
cache.set(cacheKey, serialized);
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
data,
|
|
62
|
+
fromCache: false,
|
|
63
|
+
url: normalizedUrl,
|
|
64
|
+
fetchedAt: new Date().toISOString(),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
finally {
|
|
68
|
+
// Clean up pending request
|
|
69
|
+
pendingRequests.delete(dedupeKey);
|
|
70
|
+
}
|
|
71
|
+
})();
|
|
72
|
+
pendingRequests.set(dedupeKey, {
|
|
73
|
+
promise: request,
|
|
74
|
+
timestamp: Date.now(),
|
|
75
|
+
});
|
|
76
|
+
return request;
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=fetch-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AAEjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAEpD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAQvE,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAC1D,MAAM,qBAAqB,GAAG,KAAK,CAAC,CAAC,eAAe;AAEpD,yEAAyE;AACzE,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;IACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,eAAe,CAAC,OAAO,EAAE,EAAE,CAAC;QACrD,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,GAAG,qBAAqB,EAAE,CAAC;YAClD,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;AACH,CAAC,EAAE,KAAK,CAAC,CAAC;AAEV,yDAAyD;AACzD,eAAe,CAAC,KAAK,EAAE,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,EACJ,GAAG,EACH,cAAc,EACd,aAAa,EACb,OAAO,EACP,MAAM,EACN,OAAO,EACP,SAAS,EACT,SAAS,GAAG,IAAI,CAAC,SAAS,EAC1B,WAAW,GAAG,CAAC,MAAc,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAM,GAC1D,GAAG,OAAO,CAAC;IAEZ,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAErE,oBAAoB;IACpB,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;YACzE,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAEzC,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,IAAI;gBACf,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,yDAAyD;IACzD,MAAM,SAAS,GAAG,GAAG,cAAc,IAAI,aAAa,EAAE,CAAC;IACvD,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,CAAC,2BAA2B,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,OAAO,OAAO,CAAC,OAAqC,CAAC;IACvD,CAAC;IAED,sBAAsB;IACtB,MAAM,YAAY,GAAiB;QACjC,aAAa;QACb,MAAM;QACN,OAAO;KACR,CAAC;IAEF,qBAAqB;IACrB,MAAM,OAAO,GAAG,CAAC,KAAK,IAAI,EAAE;QAC1B,IAAI,CAAC;YACH,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;YAC1D,MAAM,WAAW,GAAG,MAAM,iBAAiB,CACzC,aAAa,EACb,YAAY,EACZ,OAAO,CACR,CAAC;YACF,MAAM,EAAE,IAAI,EAAE,GAAG,WAAW,CAAC;YAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;YAE5C,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBACnC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAClC,CAAC;YAED,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,KAAK;gBAChB,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,2BAA2B;YAC3B,eAAe,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE;QAC7B,OAAO,EAAE,OAA2C;QACpD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;KACtB,CAAC,CAAC;IACH,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { executeFetchPipeline } from './fetch-pipeline.js';
|
|
2
|
+
export type { FetchPipelineOptions, PipelineResult } from './fetch-pipeline.js';
|
|
3
|
+
export { createSuccessResponse, createCachedResponse, createBatchResponse, } from './response-builder.js';
|
|
4
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,YAAY,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAE3D,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response-builder.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/response-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EAEpB,cAAc,EACd,YAAY,EACb,MAAM,uBAAuB,CAAC;AAE/B,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,cAAc,EAAE,GACxB,YAAY,CAAC,oBAAoB,CAAC,CA2BpC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export function createBatchResponse(results) {
|
|
2
|
+
const summary = {
|
|
3
|
+
total: results.length,
|
|
4
|
+
successful: results.filter((r) => r.success).length,
|
|
5
|
+
failed: results.filter((r) => !r.success).length,
|
|
6
|
+
cached: results.filter((r) => r.cached).length,
|
|
7
|
+
totalContentBlocks: results.reduce((sum, r) => sum + (r.contentBlocks ?? 0), 0),
|
|
8
|
+
};
|
|
9
|
+
const structuredContent = {
|
|
10
|
+
results,
|
|
11
|
+
summary,
|
|
12
|
+
fetchedAt: new Date().toISOString(),
|
|
13
|
+
};
|
|
14
|
+
return {
|
|
15
|
+
content: [
|
|
16
|
+
{
|
|
17
|
+
type: 'text',
|
|
18
|
+
text: JSON.stringify(structuredContent, null, 2),
|
|
19
|
+
},
|
|
20
|
+
],
|
|
21
|
+
structuredContent,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=response-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response-builder.js","sourceRoot":"","sources":["../../../src/tools/utils/response-builder.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,mBAAmB,CACjC,OAAyB;IAEzB,MAAM,OAAO,GAAiB;QAC5B,KAAK,EAAE,OAAO,CAAC,MAAM;QACrB,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QACnD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QAChD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM;QAC9C,kBAAkB,EAAE,OAAO,CAAC,MAAM,CAChC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,EACxC,CAAC,CACF;KACF,CAAC;IAEF,MAAM,iBAAiB,GAAyB;QAC9C,OAAO;QACP,OAAO;QACP,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;aACjD;SACF;QACD,iBAAiB;KAClB,CAAC;AACJ,CAAC"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { ContentBlockUnion, MetadataBlock } from '../types
|
|
1
|
+
import type { ContentBlockUnion, MetadataBlock } from '../config/types.js';
|
|
2
2
|
export declare function toJsonl(blocks: ContentBlockUnion[], metadata?: MetadataBlock): string;
|
|
3
3
|
//# sourceMappingURL=jsonl.transformer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AA6B3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,iBAAiB,EAAE,EAC3B,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CA0BR"}
|
|
@@ -5,7 +5,8 @@ function truncateBlock(block) {
|
|
|
5
5
|
switch (block.type) {
|
|
6
6
|
case 'paragraph':
|
|
7
7
|
case 'heading':
|
|
8
|
-
case 'code':
|
|
8
|
+
case 'code':
|
|
9
|
+
case 'blockquote': {
|
|
9
10
|
const truncated = truncateText(block.text, maxLength);
|
|
10
11
|
return truncated === block.text ? block : { ...block, text: truncated };
|
|
11
12
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW,CAAC;QACjB,KAAK,SAAS,CAAC;QACf,KAAK,MAAM,CAAC;QACZ,KAAK,YAAY,CAAC,CAAC,CAAC;YAClB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1E,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;YACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CACrC,CAAC;YACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAClE,CAAC;QACD;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAA2B,EAC3B,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,oDAAoD;IACpD,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,OAAO,GAAG;gBACd,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,qCAAqC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAiKxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAsB7E"}
|
|
@@ -1,4 +1,69 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
|
+
import { detectLanguage } from '../utils/language-detector.js';
|
|
3
|
+
// Markdown-specific noise patterns (minimal set - content-cleaner.ts handles most filtering)
|
|
4
|
+
// Only patterns that commonly appear as standalone lines in markdown output
|
|
5
|
+
const NOISE_LINE_PATTERNS = [
|
|
6
|
+
// Single letters or panel labels (common in code examples)
|
|
7
|
+
/^[A-Z]$/,
|
|
8
|
+
/^Panel\s+[A-Z]$/i,
|
|
9
|
+
// Empty structural elements that survive HTML->Markdown conversion
|
|
10
|
+
/^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$/,
|
|
11
|
+
/^[,;:\-–—]+$/,
|
|
12
|
+
/^\[\d+\]$/,
|
|
13
|
+
/^\(\d+\)$/,
|
|
14
|
+
];
|
|
15
|
+
/**
|
|
16
|
+
* Check if a line is noise that should be removed
|
|
17
|
+
*/
|
|
18
|
+
function isNoiseLine(line) {
|
|
19
|
+
const trimmed = line.trim();
|
|
20
|
+
// Empty lines are fine
|
|
21
|
+
if (!trimmed)
|
|
22
|
+
return false;
|
|
23
|
+
// Don't filter lines inside code blocks, headings, or lists
|
|
24
|
+
if (trimmed.startsWith('#') ||
|
|
25
|
+
trimmed.startsWith('-') ||
|
|
26
|
+
trimmed.startsWith('*') ||
|
|
27
|
+
trimmed.startsWith('`') ||
|
|
28
|
+
trimmed.startsWith('>') ||
|
|
29
|
+
trimmed.startsWith('|')) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
// Check against noise patterns
|
|
33
|
+
for (const pattern of NOISE_LINE_PATTERNS) {
|
|
34
|
+
if (pattern.test(trimmed)) {
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Post-process markdown to remove noise lines
|
|
42
|
+
*/
|
|
43
|
+
function cleanMarkdownContent(markdown) {
|
|
44
|
+
// Split by lines but preserve code blocks
|
|
45
|
+
const lines = markdown.split('\n');
|
|
46
|
+
const cleanedLines = [];
|
|
47
|
+
let inCodeBlock = false;
|
|
48
|
+
for (const line of lines) {
|
|
49
|
+
// Track code block boundaries
|
|
50
|
+
if (line.trim().startsWith('```')) {
|
|
51
|
+
inCodeBlock = !inCodeBlock;
|
|
52
|
+
cleanedLines.push(line);
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
// Don't filter inside code blocks
|
|
56
|
+
if (inCodeBlock) {
|
|
57
|
+
cleanedLines.push(line);
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
// Filter noise lines outside code blocks
|
|
61
|
+
if (!isNoiseLine(line)) {
|
|
62
|
+
cleanedLines.push(line);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return cleanedLines.join('\n');
|
|
66
|
+
}
|
|
2
67
|
const turndown = new TurndownService({
|
|
3
68
|
headingStyle: 'atx',
|
|
4
69
|
codeBlockStyle: 'fenced',
|
|
@@ -10,13 +75,37 @@ turndown.addRule('removeNoise', {
|
|
|
10
75
|
filter: ['script', 'style', 'noscript', 'nav', 'footer', 'aside', 'iframe'],
|
|
11
76
|
replacement: () => '',
|
|
12
77
|
});
|
|
78
|
+
// Enhanced code block handling with language detection
|
|
79
|
+
turndown.addRule('fencedCodeBlockWithLanguage', {
|
|
80
|
+
filter: (node, options) => {
|
|
81
|
+
return (options.codeBlockStyle === 'fenced' &&
|
|
82
|
+
node.nodeName === 'PRE' &&
|
|
83
|
+
node.firstChild !== null &&
|
|
84
|
+
node.firstChild.nodeName === 'CODE');
|
|
85
|
+
},
|
|
86
|
+
replacement: (_content, node) => {
|
|
87
|
+
const codeNode = node.firstChild;
|
|
88
|
+
const code = codeNode.textContent || '';
|
|
89
|
+
// Try to get language from class
|
|
90
|
+
const className = codeNode.getAttribute('class') ?? '';
|
|
91
|
+
const dataLang = codeNode.getAttribute('data-language') ?? '';
|
|
92
|
+
const languageMatch = /language-(\w+)/.exec(className) ??
|
|
93
|
+
/lang-(\w+)/.exec(className) ??
|
|
94
|
+
/highlight-(\w+)/.exec(className) ??
|
|
95
|
+
/^(\w+)$/.exec(dataLang);
|
|
96
|
+
// Use detected language from class, or try to detect from content
|
|
97
|
+
const language = languageMatch?.[1] ?? detectLanguage(code) ?? '';
|
|
98
|
+
return `\n\n\`\`\`${language}\n${code.replace(/\n$/, '')}\n\`\`\`\n\n`;
|
|
99
|
+
},
|
|
100
|
+
});
|
|
13
101
|
// Pre-compiled regex patterns
|
|
14
|
-
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r'|>&*!?,#]/;
|
|
102
|
+
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r\t'|>&*!?,#]/;
|
|
15
103
|
const YAML_NUMERIC = /^[\d.]+$/;
|
|
16
104
|
const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
|
|
17
105
|
const ESCAPE_BACKSLASH = /\\/g;
|
|
18
106
|
const ESCAPE_QUOTE = /"/g;
|
|
19
107
|
const ESCAPE_NEWLINE = /\n/g;
|
|
108
|
+
const ESCAPE_TAB = /\t/g;
|
|
20
109
|
const MULTIPLE_NEWLINES = /\n{3,}/g;
|
|
21
110
|
function escapeYamlValue(value) {
|
|
22
111
|
const needsQuoting = YAML_SPECIAL_CHARS.test(value) ||
|
|
@@ -30,7 +119,8 @@ function escapeYamlValue(value) {
|
|
|
30
119
|
return `"${value
|
|
31
120
|
.replace(ESCAPE_BACKSLASH, '\\\\')
|
|
32
121
|
.replace(ESCAPE_QUOTE, '\\"')
|
|
33
|
-
.replace(ESCAPE_NEWLINE, '\\n')
|
|
122
|
+
.replace(ESCAPE_NEWLINE, '\\n')
|
|
123
|
+
.replace(ESCAPE_TAB, '\\t')}"`;
|
|
34
124
|
}
|
|
35
125
|
function createFrontmatter(metadata) {
|
|
36
126
|
const lines = ['---'];
|
|
@@ -43,18 +133,22 @@ function createFrontmatter(metadata) {
|
|
|
43
133
|
}
|
|
44
134
|
export function htmlToMarkdown(html, metadata) {
|
|
45
135
|
if (!html || typeof html !== 'string') {
|
|
46
|
-
return metadata ? createFrontmatter(metadata)
|
|
136
|
+
return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
|
|
47
137
|
}
|
|
48
138
|
let content = '';
|
|
49
139
|
try {
|
|
50
140
|
content = turndown.turndown(html);
|
|
51
141
|
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
142
|
+
// Clean up noise lines from the markdown
|
|
143
|
+
content = cleanMarkdownContent(content);
|
|
144
|
+
// Final cleanup of multiple newlines after removing noise
|
|
145
|
+
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
52
146
|
}
|
|
53
147
|
catch {
|
|
54
|
-
return metadata ? createFrontmatter(metadata)
|
|
148
|
+
return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
|
|
55
149
|
}
|
|
56
150
|
if (metadata) {
|
|
57
|
-
return createFrontmatter(metadata)
|
|
151
|
+
return `${createFrontmatter(metadata)}\n\n${content}`;
|
|
58
152
|
}
|
|
59
153
|
return content;
|
|
60
154
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,6FAA6F;AAC7F,4EAA4E;AAC5E,MAAM,mBAAmB,GAAsB;IAC7C,2DAA2D;IAC3D,SAAS;IACT,kBAAkB;IAElB,mEAAmE;IACnE,8BAA8B;IAC9B,cAAc;IACd,WAAW;IACX,WAAW;CACH,CAAC;AAEX;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,uBAAuB;IACvB,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,4DAA4D;IAC5D,IACE,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EACvB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,KAAK,MAAM,OAAO,IAAI,mBAAmB,EAAE,CAAC;QAC1C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,8BAA8B;QAC9B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,WAAW,GAAG,CAAC,WAAW,CAAC;YAC3B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,IAAI,WAAW,EAAE,CAAC;YAChB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;IAChB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,wBAAwB;AACxB,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;IAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;IAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,uDAAuD;AACvD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;IAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;QACxB,OAAO,CACL,OAAO,CAAC,cAAc,KAAK,QAAQ;YACnC,IAAI,CAAC,QAAQ,KAAK,KAAK;YACvB,IAAI,CAAC,UAAU,KAAK,IAAI;YACxB,IAAI,CAAC,UAAU,CAAC,QAAQ,KAAK,MAAM,CACpC,CAAC;IACJ,CAAC;IACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;QAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;QAExC,iCAAiC;QACjC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;YAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE3B,kEAAkE;QAClE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAElE,OAAO,aAAa,QAAQ,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,cAAc,CAAC;IACzE,CAAC;CACF,CAAC,CAAC;AAEH,8BAA8B;AAC9B,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AACtD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AACjE,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,UAAU,GAAG,KAAK,CAAC;AACzB,MAAM,iBAAiB,GAAG,SAAS,CAAC;AAEpC,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,YAAY,GAChB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAC;IAEhC,OAAO,IAAI,KAAK;SACb,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC;SACjC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC;SAC5B,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC;SAC9B,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IACtB,IAAI,QAAQ,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5E,IAAI,QAAQ,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,CAAC;QACH,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,yCAAyC;QACzC,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACxC,0DAA0D;QAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,OAAO,OAAO,EAAE,CAAC;IACxD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -4,9 +4,9 @@ interface ContentBlock {
|
|
|
4
4
|
}
|
|
5
5
|
export interface MetadataBlock extends ContentBlock {
|
|
6
6
|
type: 'metadata';
|
|
7
|
-
title?: string;
|
|
8
|
-
description?: string;
|
|
9
|
-
author?: string;
|
|
7
|
+
title?: string | undefined;
|
|
8
|
+
description?: string | undefined;
|
|
9
|
+
author?: string | undefined;
|
|
10
10
|
url: string;
|
|
11
11
|
fetchedAt: string;
|
|
12
12
|
}
|
|
@@ -26,27 +26,27 @@ export interface ListBlock extends ContentBlock {
|
|
|
26
26
|
}
|
|
27
27
|
export interface CodeBlock extends ContentBlock {
|
|
28
28
|
type: 'code';
|
|
29
|
-
language?: string;
|
|
29
|
+
language?: string | undefined;
|
|
30
30
|
text: string;
|
|
31
31
|
}
|
|
32
32
|
export interface TableBlock extends ContentBlock {
|
|
33
33
|
type: 'table';
|
|
34
|
-
headers?: string[];
|
|
34
|
+
headers?: string[] | undefined;
|
|
35
35
|
rows: string[][];
|
|
36
36
|
}
|
|
37
37
|
export interface ImageBlock extends ContentBlock {
|
|
38
38
|
type: 'image';
|
|
39
39
|
src: string;
|
|
40
|
-
alt?: string;
|
|
40
|
+
alt?: string | undefined;
|
|
41
41
|
}
|
|
42
42
|
export type ContentBlockUnion = MetadataBlock | HeadingBlock | ParagraphBlock | ListBlock | CodeBlock | TableBlock | ImageBlock;
|
|
43
43
|
export interface ExtractedArticle {
|
|
44
|
-
title?: string;
|
|
45
|
-
byline?: string;
|
|
44
|
+
title?: string | undefined;
|
|
45
|
+
byline?: string | undefined;
|
|
46
46
|
content: string;
|
|
47
47
|
textContent: string;
|
|
48
|
-
excerpt?: string;
|
|
49
|
-
siteName?: string;
|
|
48
|
+
excerpt?: string | undefined;
|
|
49
|
+
siteName?: string | undefined;
|
|
50
50
|
}
|
|
51
51
|
export interface CacheEntry {
|
|
52
52
|
url: string;
|
|
@@ -57,7 +57,7 @@ export interface CacheEntry {
|
|
|
57
57
|
export interface ExtractedLink {
|
|
58
58
|
href: string;
|
|
59
59
|
text: string;
|
|
60
|
-
type: 'internal' | 'external';
|
|
60
|
+
type: 'internal' | 'external' | 'image';
|
|
61
61
|
}
|
|
62
62
|
export {};
|
|
63
63
|
//# sourceMappingURL=content.types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content.types.d.ts","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":"AACA,KAAK,gBAAgB,GACjB,UAAU,GACV,SAAS,GACT,WAAW,GACX,MAAM,GACN,MAAM,GACN,OAAO,GACP,OAAO,CAAC;AAGZ,UAAU,YAAY;IACpB,IAAI,EAAE,gBAAgB,CAAC;CACxB;AAGD,MAAM,WAAW,aAAc,SAAQ,YAAY;IACjD,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"content.types.d.ts","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":"AACA,KAAK,gBAAgB,GACjB,UAAU,GACV,SAAS,GACT,WAAW,GACX,MAAM,GACN,MAAM,GACN,OAAO,GACP,OAAO,CAAC;AAGZ,UAAU,YAAY;IACpB,IAAI,EAAE,gBAAgB,CAAC;CACxB;AAGD,MAAM,WAAW,aAAc,SAAQ,YAAY;IACjD,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,YAAa,SAAQ,YAAY;IAChD,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,cAAe,SAAQ,YAAY;IAClD,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,EAAE,GAAG,SAAS,CAAC;IAC/B,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1B;AAGD,MAAM,MAAM,iBAAiB,GACzB,aAAa,GACb,YAAY,GACZ,cAAc,GACd,SAAS,GACT,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAGf,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC/B;AAGD,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,UAAU,GAAG,OAAO,CAAC;CACzC"}
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,mBAAmB,oBAAoB,CAAC"}
|
package/dist/types/index.js
CHANGED
package/dist/types/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":""}
|
package/dist/types/schemas.d.ts
CHANGED
|
@@ -1,22 +1,49 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool input types - used for type safety in tool handlers
|
|
3
3
|
*/
|
|
4
|
-
|
|
4
|
+
/** Common request options shared across tools */
|
|
5
|
+
export interface RequestOptions {
|
|
6
|
+
/** Custom HTTP headers for the request */
|
|
7
|
+
customHeaders?: Record<string, string> | undefined;
|
|
8
|
+
/** Request timeout in milliseconds (1000-60000) */
|
|
9
|
+
timeout?: number | undefined;
|
|
10
|
+
/** Number of retry attempts (1-10) */
|
|
11
|
+
retries?: number | undefined;
|
|
12
|
+
}
|
|
13
|
+
export interface FetchUrlInput extends RequestOptions {
|
|
5
14
|
url: string;
|
|
6
|
-
extractMainContent?: boolean;
|
|
7
|
-
includeMetadata?: boolean;
|
|
8
|
-
maxContentLength?: number;
|
|
9
|
-
format?: 'jsonl' | 'markdown';
|
|
10
|
-
customHeaders?: Record<string, string>;
|
|
15
|
+
extractMainContent?: boolean | undefined;
|
|
16
|
+
includeMetadata?: boolean | undefined;
|
|
17
|
+
maxContentLength?: number | undefined;
|
|
18
|
+
format?: 'jsonl' | 'markdown' | undefined;
|
|
11
19
|
}
|
|
12
|
-
export interface FetchLinksInput {
|
|
20
|
+
export interface FetchLinksInput extends RequestOptions {
|
|
13
21
|
url: string;
|
|
14
|
-
includeExternal?: boolean;
|
|
15
|
-
includeInternal?: boolean;
|
|
22
|
+
includeExternal?: boolean | undefined;
|
|
23
|
+
includeInternal?: boolean | undefined;
|
|
24
|
+
/** Maximum number of links to return */
|
|
25
|
+
maxLinks?: number | undefined;
|
|
26
|
+
/** Regex pattern to filter links (matches against href) */
|
|
27
|
+
filterPattern?: string | undefined;
|
|
28
|
+
/** Include image links (img src attributes) */
|
|
29
|
+
includeImages?: boolean | undefined;
|
|
16
30
|
}
|
|
17
|
-
export interface FetchMarkdownInput {
|
|
31
|
+
export interface FetchMarkdownInput extends RequestOptions {
|
|
18
32
|
url: string;
|
|
19
|
-
extractMainContent?: boolean;
|
|
20
|
-
includeMetadata?: boolean;
|
|
33
|
+
extractMainContent?: boolean | undefined;
|
|
34
|
+
includeMetadata?: boolean | undefined;
|
|
35
|
+
/** Maximum content length in characters */
|
|
36
|
+
maxContentLength?: number | undefined;
|
|
37
|
+
/** Generate table of contents from headings */
|
|
38
|
+
generateToc?: boolean | undefined;
|
|
39
|
+
}
|
|
40
|
+
export interface FetchUrlsInput extends RequestOptions {
|
|
41
|
+
urls: string[];
|
|
42
|
+
extractMainContent?: boolean | undefined;
|
|
43
|
+
includeMetadata?: boolean | undefined;
|
|
44
|
+
maxContentLength?: number | undefined;
|
|
45
|
+
format?: 'jsonl' | 'markdown' | undefined;
|
|
46
|
+
concurrency?: number | undefined;
|
|
47
|
+
continueOnError?: boolean | undefined;
|
|
21
48
|
}
|
|
22
49
|
//# sourceMappingURL=schemas.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,aAAa;
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,iDAAiD;AACjD,MAAM,WAAW,cAAc;IAC7B,0CAA0C;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;IACnD,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC9B;AAED,MAAM,WAAW,aAAc,SAAQ,cAAc;IACnD,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC;CAC3C;AAED,MAAM,WAAW,eAAgB,SAAQ,cAAc;IACrD,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,wCAAwC;IACxC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,2DAA2D;IAC3D,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,+CAA+C;IAC/C,aAAa,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACrC;AAED,MAAM,WAAW,kBAAmB,SAAQ,cAAc;IACxD,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,2CAA2C;IAC3C,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,+CAA+C;IAC/C,WAAW,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACnC;AAED,MAAM,WAAW,cAAe,SAAQ,cAAc;IACpD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC;IAC1C,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACvC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
interface ConcurrencyOptions {
|
|
2
|
+
onProgress?: (completed: number, total: number) => void;
|
|
3
|
+
}
|
|
4
|
+
export declare function runWithConcurrency<T>(limit: number, tasks: (() => Promise<T>)[], options?: ConcurrencyOptions): Promise<PromiseSettledResult<T>[]>;
|
|
5
|
+
export {};
|
|
6
|
+
//# sourceMappingURL=concurrency.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAEA,UAAU,kBAAkB;IAC1B,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzD;AAsBD,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,EAC3B,OAAO,CAAC,EAAE,kBAAkB,GAC3B,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,CAAC,CAiBpC"}
|