@j0hanz/superfetch 1.1.9 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -469
- package/dist/config/constants.d.ts +19 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +24 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/config/formatting.d.ts +0 -2
- package/dist/config/formatting.d.ts.map +1 -1
- package/dist/config/formatting.js +1 -3
- package/dist/config/formatting.js.map +1 -1
- package/dist/config/index.d.ts +9 -3
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +19 -16
- package/dist/config/index.js.map +1 -1
- package/dist/config/types/content.d.ts +1 -20
- package/dist/config/types/content.d.ts.map +1 -1
- package/dist/config/types/content.js +0 -1
- package/dist/config/types/runtime.d.ts +7 -5
- package/dist/config/types/runtime.d.ts.map +1 -1
- package/dist/config/types/runtime.js +0 -1
- package/dist/config/types/tools.d.ts +5 -50
- package/dist/config/types/tools.d.ts.map +1 -1
- package/dist/config/types/tools.js +0 -1
- package/dist/errors/app-error.d.ts +0 -1
- package/dist/errors/app-error.js +0 -1
- package/dist/http/auth.d.ts +0 -1
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +17 -13
- package/dist/http/auth.js.map +1 -1
- package/dist/http/cors.d.ts +0 -1
- package/dist/http/cors.js +4 -1
- package/dist/http/cors.js.map +1 -1
- package/dist/http/download-routes.d.ts +14 -0
- package/dist/http/download-routes.d.ts.map +1 -0
- package/dist/http/download-routes.js +131 -0
- package/dist/http/download-routes.js.map +1 -0
- package/dist/http/mcp-routes.d.ts +1 -2
- package/dist/http/mcp-routes.d.ts.map +1 -1
- package/dist/http/mcp-routes.js +1 -2
- package/dist/http/mcp-routes.js.map +1 -1
- package/dist/http/mcp-session-helpers.d.ts +13 -0
- package/dist/http/mcp-session-helpers.d.ts.map +1 -0
- package/dist/http/mcp-session-helpers.js +64 -0
- package/dist/http/mcp-session-helpers.js.map +1 -0
- package/dist/http/mcp-session.d.ts +1 -3
- package/dist/http/mcp-session.d.ts.map +1 -1
- package/dist/http/mcp-session.js +7 -71
- package/dist/http/mcp-session.js.map +1 -1
- package/dist/http/mcp-validation.d.ts +1 -2
- package/dist/http/mcp-validation.d.ts.map +1 -1
- package/dist/http/mcp-validation.js +6 -27
- package/dist/http/mcp-validation.js.map +1 -1
- package/dist/http/rate-limit.d.ts +1 -2
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +0 -1
- package/dist/http/rate-limit.js.map +1 -1
- package/dist/http/server-middleware.d.ts +9 -0
- package/dist/http/server-middleware.d.ts.map +1 -0
- package/dist/http/server-middleware.js +111 -0
- package/dist/http/server-middleware.js.map +1 -0
- package/dist/http/server.d.ts +0 -1
- package/dist/http/server.d.ts.map +1 -1
- package/dist/http/server.js +20 -99
- package/dist/http/server.js.map +1 -1
- package/dist/http/session-cleanup.d.ts +2 -0
- package/dist/http/session-cleanup.d.ts.map +1 -0
- package/dist/http/session-cleanup.js +37 -0
- package/dist/http/session-cleanup.js.map +1 -0
- package/dist/http/sessions.d.ts +1 -2
- package/dist/http/sessions.d.ts.map +1 -1
- package/dist/http/sessions.js +0 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +13 -6
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +0 -1
- package/dist/middleware/error-handler.js +0 -1
- package/dist/resources/cached-content.d.ts +0 -1
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +76 -12
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts +0 -1
- package/dist/resources/index.js +0 -1
- package/dist/server.d.ts +0 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +8 -3
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +7 -4
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +86 -26
- package/dist/services/cache.js.map +1 -1
- package/dist/services/context.d.ts +2 -2
- package/dist/services/context.d.ts.map +1 -1
- package/dist/services/context.js +0 -1
- package/dist/services/extractor.d.ts +1 -2
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +45 -18
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher/agents.d.ts +0 -1
- package/dist/services/fetcher/agents.d.ts.map +1 -1
- package/dist/services/fetcher/agents.js +3 -7
- package/dist/services/fetcher/agents.js.map +1 -1
- package/dist/services/fetcher/errors.d.ts +0 -1
- package/dist/services/fetcher/errors.js +0 -1
- package/dist/services/fetcher/headers.d.ts.map +1 -1
- package/dist/services/fetcher/headers.js +2 -24
- package/dist/services/fetcher/headers.js.map +1 -1
- package/dist/services/fetcher/interceptors.d.ts +2 -2
- package/dist/services/fetcher/interceptors.d.ts.map +1 -1
- package/dist/services/fetcher/interceptors.js +30 -21
- package/dist/services/fetcher/interceptors.js.map +1 -1
- package/dist/services/fetcher/redirects.d.ts +0 -2
- package/dist/services/fetcher/redirects.d.ts.map +1 -1
- package/dist/services/fetcher/redirects.js +20 -18
- package/dist/services/fetcher/redirects.js.map +1 -1
- package/dist/services/fetcher/response.d.ts +0 -1
- package/dist/services/fetcher/response.js +4 -5
- package/dist/services/fetcher/retry-policy.d.ts +1 -28
- package/dist/services/fetcher/retry-policy.d.ts.map +1 -1
- package/dist/services/fetcher/retry-policy.js +119 -126
- package/dist/services/fetcher/retry-policy.js.map +1 -1
- package/dist/services/fetcher.d.ts +1 -2
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +18 -13
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts +1 -2
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +0 -1
- package/dist/services/parser.d.ts +1 -3
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +5 -39
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.js +15 -19
- package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +0 -2
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +1 -2
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +50 -20
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.d.ts +14 -3
- package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.js +66 -3
- package/dist/tools/handlers/fetch-single.shared.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -2
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +39 -17
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls/validation.d.ts +0 -1
- package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls/validation.js +1 -1
- package/dist/tools/handlers/fetch-urls/validation.js.map +1 -1
- package/dist/tools/index.d.ts +0 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +1 -20
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/schemas.d.ts +57 -250
- package/dist/tools/schemas.d.ts.map +1 -1
- package/dist/tools/schemas.js +38 -198
- package/dist/tools/schemas.js.map +1 -1
- package/dist/tools/utils/cache-vary.d.ts +0 -2
- package/dist/tools/utils/cache-vary.d.ts.map +1 -1
- package/dist/tools/utils/cache-vary.js +8 -40
- package/dist/tools/utils/cache-vary.js.map +1 -1
- package/dist/tools/utils/common.d.ts +2 -4
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +6 -7
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/content-transform.d.ts +1 -3
- package/dist/tools/utils/content-transform.d.ts.map +1 -1
- package/dist/tools/utils/content-transform.js +65 -14
- package/dist/tools/utils/content-transform.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts +1 -2
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +25 -21
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/tools/utils/inline-content.d.ts +3 -3
- package/dist/tools/utils/inline-content.d.ts.map +1 -1
- package/dist/tools/utils/inline-content.js +0 -1
- package/dist/transformers/jsonl.transformer.d.ts +1 -2
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +0 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -2
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +11 -7
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/code-language.d.ts +2 -0
- package/dist/utils/code-language.d.ts.map +1 -0
- package/dist/utils/code-language.js +56 -0
- package/dist/utils/code-language.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +0 -2
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +0 -4
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/crypto.d.ts +2 -0
- package/dist/utils/crypto.d.ts.map +1 -0
- package/dist/utils/crypto.js +32 -0
- package/dist/utils/crypto.js.map +1 -0
- package/dist/utils/download-url.d.ts +8 -0
- package/dist/utils/download-url.d.ts.map +1 -0
- package/dist/utils/download-url.js +27 -0
- package/dist/utils/download-url.js.map +1 -0
- package/dist/utils/error-utils.d.ts +3 -0
- package/dist/utils/error-utils.d.ts.map +1 -0
- package/dist/utils/error-utils.js +12 -0
- package/dist/utils/error-utils.js.map +1 -0
- package/dist/utils/filename-generator.d.ts +1 -0
- package/dist/utils/filename-generator.d.ts.map +1 -0
- package/dist/utils/filename-generator.js +59 -0
- package/dist/utils/filename-generator.js.map +1 -0
- package/dist/utils/header-normalizer.d.ts +7 -4
- package/dist/utils/header-normalizer.d.ts.map +1 -1
- package/dist/utils/header-normalizer.js +23 -17
- package/dist/utils/header-normalizer.js.map +1 -1
- package/dist/utils/html-truncator.d.ts +0 -1
- package/dist/utils/html-truncator.js +0 -1
- package/dist/utils/sanitizer.d.ts +0 -1
- package/dist/utils/sanitizer.js +0 -1
- package/dist/utils/tool-error-handler.d.ts +1 -3
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +11 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-sanitizer.d.ts +2 -0
- package/dist/utils/url-sanitizer.d.ts.map +1 -0
- package/dist/utils/url-sanitizer.js +12 -0
- package/dist/utils/url-sanitizer.js.map +1 -0
- package/dist/utils/url-validator.d.ts +1 -3
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +89 -53
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +7 -9
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemas.js","sourceRoot":"","sources":["../../src/tools/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,aAAa,EAAE,CAAC;SACb,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CAAC,qCAAqC,CAAC;IAClD,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,IAAI,CAAC;SACT,GAAG,CAAC,
|
|
1
|
+
{"version":3,"file":"schemas.js","sourceRoot":"","sources":["../../src/tools/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,aAAa,EAAE,CAAC;SACb,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CAAC,qCAAqC,CAAC;IAClD,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,IAAI,CAAC;SACT,GAAG,CAAC,MAAM,CAAC;SACX,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;SAC/B,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,iCAAiC,CAAC;CAC/C,CAAC,CAAC;AAEH,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;CACpD,CAAC,CAAC;AAEH,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,eAAe,CAAC;CAC7B,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;IAC3E,WAAW,EAAE,CAAC;SACX,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,gCAAgC,CAAC;IAC7C,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACxE,SAAS,EAAE,CAAC;SACT,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;IAChE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uCAAuC,CAAC;IACzE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;IAChE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;CACtE,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,mBAAmB,GAAG,oBAAoB;KACpD,MAAM,CAAC;IACN,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;CACpD,CAAC;KACD,KAAK,CAAC,uBAAuB,CAAC;KAC9B,KAAK,CAAC,mBAAmB,CAAC;KAC1B,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,wBAAwB,GAAG,oBAAoB;KACzD,MAAM,CAAC;IACN,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;CACpD,CAAC;KACD,KAAK,CAAC,uBAAuB,CAAC;KAC9B,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC;KAClC,MAAM,CAAC;IACN,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,aAAa,EAAE,CAAC;SACb,MAAM,EAAE;SACR,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,CAAC;IACpE,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;CACjE,CAAC;KACD,KAAK,CAAC,oBAAoB,CAAC;KAC3B,MAAM,EAAE,CAAC;AAEZ,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC;KACvC,MAAM,CAAC;IACN,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,0CAA0C,CAAC;IACvD,IAAI,EAAE,kBAAkB;SACrB,QAAQ,EAAE;SACV,QAAQ,CAAC,6CAA6C,CAAC;CAC3D,CAAC;KACD,KAAK,CAAC,oBAAoB,CAAC;KAC3B,MAAM,EAAE,CAAC"}
|
|
@@ -1,3 +1 @@
|
|
|
1
|
-
export declare function normalizeHeadersForCache(headers?: Record<string, string>): Record<string, string> | undefined;
|
|
2
1
|
export declare function appendHeaderVary(cacheVary: Record<string, unknown> | string | undefined, customHeaders?: Record<string, string>): Record<string, unknown> | string | undefined;
|
|
3
|
-
//# sourceMappingURL=cache-vary.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache-vary.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cache-vary.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAIA,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,EACvD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,CAY9C"}
|
|
@@ -1,44 +1,12 @@
|
|
|
1
1
|
import { config } from '../../config/index.js';
|
|
2
|
-
|
|
3
|
-
if (!headers || Object.keys(headers).length === 0)
|
|
4
|
-
return undefined;
|
|
5
|
-
const normalized = buildNormalizedHeaders(headers, config.security.blockedHeaders);
|
|
6
|
-
const iterator = normalized.keys();
|
|
7
|
-
if (iterator.next().done)
|
|
8
|
-
return undefined;
|
|
9
|
-
return Object.fromEntries(normalized.entries());
|
|
10
|
-
}
|
|
2
|
+
import { normalizeHeaderRecord } from '../../utils/header-normalizer.js';
|
|
11
3
|
export function appendHeaderVary(cacheVary, customHeaders) {
|
|
12
|
-
const
|
|
13
|
-
if (!
|
|
14
|
-
return undefined;
|
|
15
|
-
if (typeof cacheVary === 'string') {
|
|
16
|
-
return buildStringVary(cacheVary, headerVary);
|
|
17
|
-
}
|
|
18
|
-
if (!headerVary)
|
|
4
|
+
const headers = normalizeHeaderRecord(customHeaders, config.security.blockedHeaders, { trimValues: true });
|
|
5
|
+
if (!headers)
|
|
19
6
|
return cacheVary;
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
if (blockedHeaders.has(key.toLowerCase()))
|
|
26
|
-
continue;
|
|
27
|
-
setHeaderValue(normalized, key, value);
|
|
28
|
-
}
|
|
29
|
-
return normalized;
|
|
30
|
-
}
|
|
31
|
-
function setHeaderValue(headers, key, value) {
|
|
32
|
-
try {
|
|
33
|
-
headers.set(key, value.trim());
|
|
34
|
-
}
|
|
35
|
-
catch {
|
|
36
|
-
// Ignore invalid headers for cache keys
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
function buildStringVary(key, headerVary) {
|
|
40
|
-
if (!headerVary)
|
|
41
|
-
return { key };
|
|
42
|
-
return { key, headers: headerVary };
|
|
7
|
+
if (!cacheVary)
|
|
8
|
+
return { headers };
|
|
9
|
+
return typeof cacheVary === 'string'
|
|
10
|
+
? { key: cacheVary, headers }
|
|
11
|
+
: { ...cacheVary, headers };
|
|
43
12
|
}
|
|
44
|
-
//# sourceMappingURL=cache-vary.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache-vary.js","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,
|
|
1
|
+
{"version":3,"file":"cache-vary.js","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,UAAU,gBAAgB,CAC9B,SAAuD,EACvD,aAAsC;IAEtC,MAAM,OAAO,GAAG,qBAAqB,CACnC,aAAa,EACb,MAAM,CAAC,QAAQ,CAAC,cAAc,EAC9B,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IAEF,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAC/B,IAAI,CAAC,SAAS;QAAE,OAAO,EAAE,OAAO,EAAE,CAAC;IACnC,OAAO,OAAO,SAAS,KAAK,QAAQ;QAClC,CAAC,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,EAAE;QAC7B,CAAC,CAAC,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE,CAAC;AAChC,CAAC"}
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
1
|
+
import type { ExtractedArticle, ExtractedMetadata, MetadataBlock } from '../../config/types/content.js';
|
|
2
|
+
import type { TruncationResult } from '../../config/types/runtime.js';
|
|
3
3
|
export declare function determineContentExtractionSource(extractMainContent: boolean, article: ExtractedArticle | null): article is ExtractedArticle;
|
|
4
4
|
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
5
|
-
export declare function enforceContentLengthLimit(content: string, maxLength?: number): TruncationResult;
|
|
6
5
|
export declare function truncateContent(content: string, maxLength?: number, suffix?: string): TruncationResult;
|
|
7
|
-
//# sourceMappingURL=common.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,
|
|
1
|
+
{"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACd,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,wBAAgB,gCAAgC,CAC9C,kBAAkB,EAAE,OAAO,EAC3B,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAmB3B;AAED,wBAAgB,eAAe,CAC7B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,EAClB,MAAM,SAAoB,GACzB,gBAAgB,CAYlB"}
|
|
@@ -23,17 +23,16 @@ export function createContentMetadataBlock(url, article, extractedMeta, shouldEx
|
|
|
23
23
|
fetchedAt: now,
|
|
24
24
|
};
|
|
25
25
|
}
|
|
26
|
-
export function enforceContentLengthLimit(content, maxLength) {
|
|
27
|
-
return truncateContent(content, maxLength);
|
|
28
|
-
}
|
|
29
26
|
export function truncateContent(content, maxLength, suffix = TRUNCATION_MARKER) {
|
|
30
|
-
|
|
31
|
-
|
|
27
|
+
if (maxLength === undefined ||
|
|
28
|
+
maxLength <= 0 ||
|
|
29
|
+
content.length <= maxLength) {
|
|
32
30
|
return { content, truncated: false };
|
|
33
31
|
}
|
|
32
|
+
const safeMax = Math.max(0, maxLength - suffix.length);
|
|
33
|
+
const marker = suffix.length > maxLength ? suffix.substring(0, maxLength) : suffix;
|
|
34
34
|
return {
|
|
35
|
-
content: `${content.substring(0,
|
|
35
|
+
content: `${content.substring(0, safeMax)}${marker}`,
|
|
36
36
|
truncated: true,
|
|
37
37
|
};
|
|
38
38
|
}
|
|
39
|
-
//# sourceMappingURL=common.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAQ/D,MAAM,UAAU,gCAAgC,CAC9C,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,kBAAkB,IAAI,CAAC,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,wBAAiC,EACjC,eAAwB;IAExB,IAAI,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IACvC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,wBAAwB,IAAI,OAAO;QACxC,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB,EAClB,MAAM,GAAG,iBAAiB;IAE1B,MAAM,cAAc,GAClB,SAAS,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAEzE,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IAED,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,MAAM,EAAE;QACtD,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { JsonlTransformResult, MarkdownTransformResult } from '../../config/types.js';
|
|
1
|
+
import type { JsonlTransformResult, MarkdownTransformResult } from '../../config/types/content.js';
|
|
2
2
|
interface ExtractionOptions {
|
|
3
3
|
readonly extractMainContent: boolean;
|
|
4
4
|
readonly includeMetadata: boolean;
|
|
@@ -7,10 +7,8 @@ interface ContentLengthOptions {
|
|
|
7
7
|
readonly maxContentLength?: number;
|
|
8
8
|
}
|
|
9
9
|
interface MarkdownOptions extends ExtractionOptions, ContentLengthOptions {
|
|
10
|
-
readonly generateToc?: boolean;
|
|
11
10
|
}
|
|
12
11
|
export declare function transformHtmlToJsonl(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
|
|
13
12
|
export declare function transformHtmlToMarkdown(html: string, url: string, options: MarkdownOptions): MarkdownTransformResult;
|
|
14
13
|
export declare function transformHtmlToMarkdownWithBlocks(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
|
|
15
14
|
export {};
|
|
16
|
-
//# sourceMappingURL=content-transform.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content-transform.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EACpB,uBAAuB,EACxB,MAAM,
|
|
1
|
+
{"version":3,"file":"content-transform.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EACpB,uBAAuB,EACxB,MAAM,+BAA+B,CAAC;AAcvC,UAAU,iBAAiB;IACzB,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAQD,UAAU,oBAAoB;IAC5B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,UAAU,eAAgB,SAAQ,iBAAiB,EAAE,oBAAoB;CAAG;AA4D5E,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CAatB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,eAAe,GACvB,uBAAuB,CAYzB;AAED,wBAAgB,iCAAiC,CAC/C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CActB"}
|
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
import { TRUNCATION_MARKER } from '../../config/formatting.js';
|
|
2
2
|
import { extractContent } from '../../services/extractor.js';
|
|
3
3
|
import { parseHtml } from '../../services/parser.js';
|
|
4
|
+
import { sanitizeText } from '../../utils/sanitizer.js';
|
|
4
5
|
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
5
6
|
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
6
7
|
import { createContentMetadataBlock, determineContentExtractionSource, truncateContent, } from './common.js';
|
|
8
|
+
const TITLE_PATTERN = /<title[^>]*>([\s\S]*?)<\/title>/i;
|
|
7
9
|
function resolveContentSource(html, url, options) {
|
|
10
|
+
if (!options.extractMainContent && !options.includeMetadata) {
|
|
11
|
+
return {
|
|
12
|
+
sourceHtml: html,
|
|
13
|
+
title: extractTitleFromHtml(html),
|
|
14
|
+
metadata: undefined,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
8
17
|
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
9
18
|
extractArticle: options.extractMainContent,
|
|
10
19
|
});
|
|
@@ -14,36 +23,78 @@ function resolveContentSource(html, url, options) {
|
|
|
14
23
|
const title = shouldExtractFromArticle ? article.title : extractedMeta.title;
|
|
15
24
|
return { sourceHtml, title, metadata };
|
|
16
25
|
}
|
|
17
|
-
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
26
|
+
function extractTitleFromHtml(html) {
|
|
27
|
+
const match = TITLE_PATTERN.exec(html);
|
|
28
|
+
if (!match?.[1])
|
|
29
|
+
return undefined;
|
|
30
|
+
const decoded = decodeHtmlEntities(match[1]);
|
|
31
|
+
const text = sanitizeText(decoded);
|
|
32
|
+
return text || undefined;
|
|
33
|
+
}
|
|
34
|
+
function decodeHtmlEntities(value) {
|
|
35
|
+
if (!value.includes('&'))
|
|
36
|
+
return value;
|
|
37
|
+
const basicDecoded = value
|
|
38
|
+
.replace(/&/g, '&')
|
|
39
|
+
.replace(/</g, '<')
|
|
40
|
+
.replace(/>/g, '>')
|
|
41
|
+
.replace(/"/g, '"')
|
|
42
|
+
.replace(/'/g, "'");
|
|
43
|
+
return basicDecoded
|
|
44
|
+
.replace(/&#(\d+);/g, (match, code) => {
|
|
45
|
+
const parsed = Number.parseInt(code, 10);
|
|
46
|
+
return Number.isFinite(parsed) && parsed >= 0 && parsed <= 0x10ffff
|
|
47
|
+
? String.fromCodePoint(parsed)
|
|
48
|
+
: match;
|
|
49
|
+
})
|
|
50
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (match, code) => {
|
|
51
|
+
const parsed = Number.parseInt(code, 16);
|
|
52
|
+
return Number.isFinite(parsed) && parsed >= 0 && parsed <= 0x10ffff
|
|
53
|
+
? String.fromCodePoint(parsed)
|
|
54
|
+
: match;
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
function buildJsonlPayload(context, maxContentLength) {
|
|
58
|
+
const contentBlocks = parseHtml(context.sourceHtml);
|
|
59
|
+
const { content, truncated } = truncateContent(toJsonl(contentBlocks, context.metadata), maxContentLength);
|
|
21
60
|
return {
|
|
22
61
|
content,
|
|
23
62
|
contentBlocks: contentBlocks.length,
|
|
24
|
-
|
|
63
|
+
truncated,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
function buildMarkdownPayload(context, maxContentLength) {
|
|
67
|
+
const markdown = htmlToMarkdown(context.sourceHtml, context.metadata);
|
|
68
|
+
const { content, truncated } = truncateContent(markdown, maxContentLength, TRUNCATION_MARKER);
|
|
69
|
+
return { content, truncated };
|
|
70
|
+
}
|
|
71
|
+
export function transformHtmlToJsonl(html, url, options) {
|
|
72
|
+
const context = resolveContentSource(html, url, options);
|
|
73
|
+
const { content, contentBlocks, truncated } = buildJsonlPayload(context, options.maxContentLength);
|
|
74
|
+
return {
|
|
75
|
+
content,
|
|
76
|
+
contentBlocks,
|
|
77
|
+
title: context.title,
|
|
25
78
|
...(truncated && { truncated }),
|
|
26
79
|
};
|
|
27
80
|
}
|
|
28
81
|
export function transformHtmlToMarkdown(html, url, options) {
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const { content, truncated } = truncateContent(markdown, options.maxContentLength, TRUNCATION_MARKER);
|
|
82
|
+
const context = resolveContentSource(html, url, options);
|
|
83
|
+
const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
|
|
32
84
|
return {
|
|
33
85
|
markdown: content,
|
|
34
|
-
title,
|
|
86
|
+
title: context.title,
|
|
35
87
|
truncated,
|
|
36
88
|
};
|
|
37
89
|
}
|
|
38
90
|
export function transformHtmlToMarkdownWithBlocks(html, url, options) {
|
|
39
|
-
const
|
|
40
|
-
const contentBlocks = parseHtml(sourceHtml);
|
|
41
|
-
const { content, truncated } =
|
|
91
|
+
const context = resolveContentSource(html, url, options);
|
|
92
|
+
const contentBlocks = parseHtml(context.sourceHtml);
|
|
93
|
+
const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
|
|
42
94
|
return {
|
|
43
95
|
content,
|
|
44
96
|
contentBlocks: contentBlocks.length,
|
|
45
|
-
title,
|
|
97
|
+
title: context.title,
|
|
46
98
|
...(truncated && { truncated }),
|
|
47
99
|
};
|
|
48
100
|
}
|
|
49
|
-
//# sourceMappingURL=content-transform.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content-transform.js","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAM/D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,OAAO,EACL,0BAA0B,EAC1B,gCAAgC,EAChC,eAAe,GAChB,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"content-transform.js","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAM/D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,OAAO,EACL,0BAA0B,EAC1B,gCAAgC,EAChC,eAAe,GAChB,MAAM,aAAa,CAAC;AAmBrB,SAAS,oBAAoB,CAC3B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,gCAAgC,CAC/D,OAAO,CAAC,kBAAkB,EAC1B,OAAO,CACR,CAAC;IAEF,MAAM,UAAU,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrE,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,EACH,OAAO,EACP,aAAa,EACb,wBAAwB,EACxB,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE7E,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,iBAAiB,CACxB,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,OAAO,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,CAAC,EACxC,gBAAgB,CACjB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,SAAS;KACV,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACtE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,CAClB,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,GAAG,iBAAiB,CAC7D,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa;QACb,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,GAAW,EACX,OAAwB;IAExB,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iCAAiC,CAC/C,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { FetchPipelineOptions, PipelineResult } from '../../config/types.js';
|
|
1
|
+
import type { FetchPipelineOptions, PipelineResult } from '../../config/types/runtime.js';
|
|
2
2
|
/**
|
|
3
3
|
* Unified fetch pipeline that handles caching, fetching, and transformation.
|
|
4
4
|
* Implements cache-first strategy with automatic serialization.
|
|
@@ -8,4 +8,3 @@ import type { FetchPipelineOptions, PipelineResult } from '../../config/types.js
|
|
|
8
8
|
* @returns Promise resolving to the pipeline result
|
|
9
9
|
*/
|
|
10
10
|
export declare function executeFetchPipeline<T>(options: FetchPipelineOptions<T>): Promise<PipelineResult<T>>;
|
|
11
|
-
//# sourceMappingURL=fetch-pipeline.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,oBAAoB,EACpB,cAAc,EACf,MAAM,
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,oBAAoB,EACpB,cAAc,EACf,MAAM,+BAA+B,CAAC;AAuDvC;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAwB5B"}
|
|
@@ -1,29 +1,22 @@
|
|
|
1
1
|
import * as cache from '../../services/cache.js';
|
|
2
2
|
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
|
-
import { logDebug
|
|
3
|
+
import { logDebug } from '../../services/logger.js';
|
|
4
4
|
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
5
5
|
import { appendHeaderVary } from './cache-vary.js';
|
|
6
|
-
function safeJsonParse(cached, cacheKey) {
|
|
7
|
-
try {
|
|
8
|
-
return JSON.parse(cached);
|
|
9
|
-
}
|
|
10
|
-
catch {
|
|
11
|
-
logWarn('Cache deserialize failed, treating as miss', {
|
|
12
|
-
key: cacheKey.substring(0, 100),
|
|
13
|
-
});
|
|
14
|
-
return undefined;
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
6
|
function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalizedUrl) {
|
|
18
7
|
if (!cacheKey)
|
|
19
8
|
return null;
|
|
20
9
|
const cached = cache.get(cacheKey);
|
|
21
10
|
if (!cached)
|
|
22
11
|
return null;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
12
|
+
if (!deserialize) {
|
|
13
|
+
logDebug('Cache miss due to missing deserializer', {
|
|
14
|
+
namespace: cacheNamespace,
|
|
15
|
+
url: normalizedUrl,
|
|
16
|
+
});
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
const data = deserialize(cached.content);
|
|
27
20
|
if (data === undefined) {
|
|
28
21
|
logDebug('Cache miss due to deserialize failure', {
|
|
29
22
|
namespace: cacheNamespace,
|
|
@@ -31,6 +24,7 @@ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalized
|
|
|
31
24
|
});
|
|
32
25
|
return null;
|
|
33
26
|
}
|
|
27
|
+
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
34
28
|
return {
|
|
35
29
|
data,
|
|
36
30
|
fromCache: true,
|
|
@@ -48,7 +42,7 @@ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalized
|
|
|
48
42
|
* @returns Promise resolving to the pipeline result
|
|
49
43
|
*/
|
|
50
44
|
export async function executeFetchPipeline(options) {
|
|
51
|
-
const normalizedUrl = validateAndNormalizeUrl(options.url);
|
|
45
|
+
const normalizedUrl = await validateAndNormalizeUrl(options.url);
|
|
52
46
|
const cacheKey = resolveCacheKey(options, normalizedUrl);
|
|
53
47
|
const cachedResult = attemptCacheRetrieval(cacheKey, options.deserialize, options.cacheNamespace, normalizedUrl);
|
|
54
48
|
if (cachedResult)
|
|
@@ -57,7 +51,7 @@ export async function executeFetchPipeline(options) {
|
|
|
57
51
|
logDebug('Fetching URL', { url: normalizedUrl, retries: options.retries });
|
|
58
52
|
const html = await fetchUrlWithRetry(normalizedUrl, fetchOptions, options.retries);
|
|
59
53
|
const data = options.transform(html, normalizedUrl);
|
|
60
|
-
persistCache(cacheKey, data, options.serialize);
|
|
54
|
+
persistCache(cacheKey, data, options.serialize, normalizedUrl);
|
|
61
55
|
return buildPipelineResult(normalizedUrl, data, cacheKey);
|
|
62
56
|
}
|
|
63
57
|
function resolveCacheKey(options, normalizedUrl) {
|
|
@@ -71,11 +65,22 @@ function buildFetchOptions(options) {
|
|
|
71
65
|
timeout: options.timeout,
|
|
72
66
|
};
|
|
73
67
|
}
|
|
74
|
-
function persistCache(cacheKey, data, serialize) {
|
|
68
|
+
function persistCache(cacheKey, data, serialize, normalizedUrl) {
|
|
75
69
|
if (!cacheKey)
|
|
76
70
|
return;
|
|
77
71
|
const serializer = serialize ?? JSON.stringify;
|
|
78
|
-
cache.set(cacheKey, serializer(data)
|
|
72
|
+
cache.set(cacheKey, serializer(data), {
|
|
73
|
+
url: normalizedUrl,
|
|
74
|
+
title: extractTitle(data),
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
function extractTitle(value) {
|
|
78
|
+
if (!value || typeof value !== 'object')
|
|
79
|
+
return undefined;
|
|
80
|
+
if (!('title' in value))
|
|
81
|
+
return undefined;
|
|
82
|
+
const { title } = value;
|
|
83
|
+
return typeof title === 'string' ? title : undefined;
|
|
79
84
|
}
|
|
80
85
|
function buildPipelineResult(url, data, cacheKey) {
|
|
81
86
|
return {
|
|
@@ -86,4 +91,3 @@ function buildPipelineResult(url, data, cacheKey) {
|
|
|
86
91
|
cacheKey,
|
|
87
92
|
};
|
|
88
93
|
}
|
|
89
|
-
//# sourceMappingURL=fetch-pipeline.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAE7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAEnD,SAAS,aAAa,CAAC,MAAc,EAAE,QAAgB;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,4CAA4C,EAAE;YACpD,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAChC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,QAAuB,EACvB,WAA4D,EAC5D,cAAsB,EACtB,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW;QACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC;QAC7B,CAAC,CAAE,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAmB,CAAC;IAE/D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,QAAQ,CAAC,uCAAuC,EAAE;YAChD,SAAS,EAAE,cAAc;YACzB,GAAG,EAAE,aAAa;SACnB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,IAAI;QACf,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,aAAa,GAAG,uBAAuB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAEzD,MAAM,YAAY,GAAG,qBAAqB,CACxC,QAAQ,EACR,OAAO,CAAC,WAAW,EACnB,OAAO,CAAC,cAAc,EACtB,aAAa,CACd,CAAC;IACF,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IAEtC,MAAM,YAAY,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAE3E,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAClC,aAAa,EACb,YAAY,EACZ,OAAO,CAAC,OAAO,CAChB,CAAC;IACF,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IACpD,YAAY,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAE7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAEnD,SAAS,aAAa,CAAC,MAAc,EAAE,QAAgB;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,4CAA4C,EAAE;YACpD,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAChC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,QAAuB,EACvB,WAA4D,EAC5D,cAAsB,EACtB,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW;QACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC;QAC7B,CAAC,CAAE,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAmB,CAAC;IAE/D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,QAAQ,CAAC,uCAAuC,EAAE;YAChD,SAAS,EAAE,cAAc;YACzB,GAAG,EAAE,aAAa;SACnB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,IAAI;QACf,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,aAAa,GAAG,uBAAuB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAEzD,MAAM,YAAY,GAAG,qBAAqB,CACxC,QAAQ,EACR,OAAO,CAAC,WAAW,EACnB,OAAO,CAAC,cAAc,EACtB,aAAa,CACd,CAAC;IACF,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IAEtC,MAAM,YAAY,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAE3E,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAClC,aAAa,EACb,YAAY,EACZ,OAAO,CAAC,OAAO,CAChB,CAAC;IACF,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IACpD,YAAY,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAE/D,OAAO,mBAAmB,CAAC,aAAa,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,eAAe,CACtB,OAAgC,EAChC,aAAqB;IAErB,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IAC7E,OAAO,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,EAAE,aAAa,EAAE,SAAS,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,iBAAiB,CAAI,OAAgC;IAC5D,OAAO;QACL,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CACnB,QAAuB,EACvB,IAAO,EACP,SAA8C,EAC9C,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO;IACtB,MAAM,UAAU,GAAG,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC;IAC/C,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,CAAC,EAAE;QACpC,GAAG,EAAE,aAAa;QAClB,KAAK,EAAE,YAAY,CAAC,IAAI,CAAC;KAC1B,CAAC,CAAC;AACL,CAAC;AAED,SAAS,YAAY,CAAC,KAAc;IAClC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IAC1D,IAAI,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC1C,MAAM,EAAE,KAAK,EAAE,GAAG,KAA4B,CAAC;IAC/C,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;AACvD,CAAC;AAED,SAAS,mBAAmB,CAC1B,GAAW,EACX,IAAO,EACP,QAAuB;IAEvB,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,KAAK;QAChB,GAAG;QACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ;KACT,CAAC;AACJ,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
type InlineContentFormat = 'jsonl' | 'markdown';
|
|
2
|
+
interface InlineContentResult {
|
|
3
3
|
content?: string;
|
|
4
4
|
contentSize: number;
|
|
5
5
|
resourceUri?: string;
|
|
@@ -8,4 +8,4 @@ export interface InlineContentResult {
|
|
|
8
8
|
truncated?: boolean;
|
|
9
9
|
}
|
|
10
10
|
export declare function applyInlineContentLimit(content: string, cacheKey: string | null, format: InlineContentFormat): InlineContentResult;
|
|
11
|
-
|
|
11
|
+
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"inline-content.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/inline-content.ts"],"names":[],"mappings":"AAKA,
|
|
1
|
+
{"version":3,"file":"inline-content.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/inline-content.ts"],"names":[],"mappings":"AAKA,KAAK,mBAAmB,GAAG,OAAO,GAAG,UAAU,CAAC;AAEhD,UAAU,mBAAmB;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,MAAM,EAAE,mBAAmB,GAC1B,mBAAmB,CAkBrB"}
|
|
@@ -1,3 +1,2 @@
|
|
|
1
|
-
import type { ContentBlockUnion, MetadataBlock } from '../config/types.js';
|
|
1
|
+
import type { ContentBlockUnion, MetadataBlock } from '../config/types/content.js';
|
|
2
2
|
export declare function toJsonl(blocks: readonly ContentBlockUnion[], metadata?: MetadataBlock): string;
|
|
3
|
-
//# sourceMappingURL=jsonl.transformer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,iBAAiB,EACjB,aAAa,EACd,MAAM,4BAA4B,CAAC;AAkEpC,wBAAgB,OAAO,CACrB,MAAM,EAAE,SAAS,iBAAiB,EAAE,EACpC,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAGR"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAM5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,WAAW;IACX,SAAS;IACT,MAAM;IACN,YAAY;CACb,CAAC,CAAC;AAEH,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,MAAM,IAAI,KAAK,CAAC;AACzB,CAAC;AAED,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;AAC/B,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAmD,EACnD,SAAiB;IAEjB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAsD,EACtD,SAAiB;IAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;IACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAC7C,CAAC;IACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3D,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAwB;IAC9C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAG,iBAAiB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CACxB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAwB;IACjD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,eAAe,GAAG;YACtB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;SAClB,CAAC;QACF,OAAO,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AA+HhE,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAa7E"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
2
|
import { CODE_BLOCK, FRONTMATTER_DELIMITER, joinLines, } from '../config/formatting.js';
|
|
3
|
-
import { detectLanguageFromCode } from '../
|
|
3
|
+
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../utils/code-language.js';
|
|
4
4
|
let turndownInstance = null;
|
|
5
5
|
function getTurndown() {
|
|
6
6
|
if (turndownInstance)
|
|
@@ -41,8 +41,16 @@ function isFencedCodeBlock(node, options) {
|
|
|
41
41
|
return false;
|
|
42
42
|
return firstChild.nodeName === 'CODE';
|
|
43
43
|
}
|
|
44
|
+
function isElement(node) {
|
|
45
|
+
return (node !== null &&
|
|
46
|
+
typeof node === 'object' &&
|
|
47
|
+
'getAttribute' in node &&
|
|
48
|
+
typeof node.getAttribute === 'function');
|
|
49
|
+
}
|
|
44
50
|
function formatFencedCodeBlock(node) {
|
|
45
51
|
const codeNode = node.firstChild;
|
|
52
|
+
if (!isElement(codeNode))
|
|
53
|
+
return '';
|
|
46
54
|
const code = codeNode.textContent || '';
|
|
47
55
|
const language = resolveCodeLanguage(codeNode, code);
|
|
48
56
|
return CODE_BLOCK.format(code, language);
|
|
@@ -50,11 +58,8 @@ function formatFencedCodeBlock(node) {
|
|
|
50
58
|
function resolveCodeLanguage(codeNode, code) {
|
|
51
59
|
const className = codeNode.getAttribute('class') ?? '';
|
|
52
60
|
const dataLang = codeNode.getAttribute('data-language') ?? '';
|
|
53
|
-
const
|
|
54
|
-
|
|
55
|
-
/highlight-(\w+)/.exec(className) ??
|
|
56
|
-
/^(\w+)$/.exec(dataLang);
|
|
57
|
-
return languageMatch?.[1] ?? detectLanguageFromCode(code) ?? '';
|
|
61
|
+
const attributeLanguage = resolveLanguageFromAttributes(className, dataLang);
|
|
62
|
+
return attributeLanguage ?? detectLanguageFromCode(code) ?? '';
|
|
58
63
|
}
|
|
59
64
|
const YAML_SPECIAL_CHARS = /[:[\]{}"\r\t'|>&*!?,#]|\n/;
|
|
60
65
|
const YAML_NUMERIC = /^[\d.]+$/;
|
|
@@ -119,4 +124,3 @@ export function htmlToMarkdown(html, metadata) {
|
|
|
119
124
|
function isValidHtmlInput(html) {
|
|
120
125
|
return Boolean(html && typeof html === 'string');
|
|
121
126
|
}
|
|
122
|
-
//# sourceMappingURL=markdown.transformer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,OAAO,EACL,UAAU,EACV,qBAAqB,EACrB,SAAS,GACV,MAAM,yBAAyB,CAAC;AAGjC,OAAO,
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,OAAO,EACL,UAAU,EACV,qBAAqB,EACrB,SAAS,GACV,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,sBAAsB,EACtB,6BAA6B,GAC9B,MAAM,2BAA2B,CAAC;AAEnC,IAAI,gBAAgB,GAA2B,IAAI,CAAC;AAEpD,SAAS,WAAW;IAClB,IAAI,gBAAgB;QAAE,OAAO,gBAAgB,CAAC;IAC9C,gBAAgB,GAAG,sBAAsB,EAAE,CAAC;IAC5C,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,SAAS,sBAAsB;IAC7B,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,WAAW,EAAE,GAAG;QAChB,gBAAgB,EAAE,GAAG;KACtB,CAAC,CAAC;IAEH,YAAY,CAAC,QAAQ,CAAC,CAAC;IACvB,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAE5B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,YAAY,CAAC,QAAyB;IAC7C,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;QAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;AACL,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAyB;IAClD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;QAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC;QAC3D,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC;KAC7D,CAAC,CAAC;AACL,CAAC;AAED,SAAS,iBAAiB,CACxB,IAA0B,EAC1B,OAAgC;IAEhC,IAAI,OAAO,CAAC,cAAc,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IACtD,IAAI,IAAI,CAAC,QAAQ,KAAK,KAAK;QAAE,OAAO,KAAK,CAAC;IAC1C,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC5B,IAAI,CAAC,UAAU;QAAE,OAAO,KAAK,CAAC;IAC9B,OAAO,UAAU,CAAC,QAAQ,KAAK,MAAM,CAAC;AACxC,CAAC;AAED,SAAS,qBAAqB,CAAC,IAA0B;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;IAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;IACxC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IACrD,OAAO,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,mBAAmB,CAAC,QAAqB,EAAE,IAAY;IAC9D,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;IAC9D,MAAM,iBAAiB,GAAG,6BAA6B,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC7E,OAAO,iBAAiB,IAAI,sBAAsB,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;AACjE,CAAC;AAED,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;AACvD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AAEjE,MAAM,eAAe,GAAG;IACtB,SAAS,EAAE,KAAK;IAChB,KAAK,EAAE,IAAI;IACX,OAAO,EAAE,KAAK;IACd,GAAG,EAAE,KAAK;CACF,CAAC;AAEX,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,MAAM,GAAG;QACb,CAAC,KAAa,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QACjD,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QAC/D,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,KAAK,EAAE;QAC/B,CAAC,KAAa,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QAC3C,CAAC,KAAa,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC;KACnD,CAAC;IAEF,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,OAAO,GAAG,KAAK;SAClB,OAAO,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC;SACrC,OAAO,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC;SACvC,OAAO,CAAC,eAAe,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAEvC,OAAO,IAAI,OAAO,GAAG,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAa,CAAC,qBAAqB,CAAC,CAAC;IAEhD,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAClC,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAY;IACzC,OAAO,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,qBAAqB,CAAC,QAAwB;IACrD,OAAO,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,MAAM,WAAW,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAEpD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,KAAK,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,WAAW,CAAC;IACrB,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,OAAO,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-language.d.ts","sourceRoot":"","sources":["../../src/utils/code-language.ts"],"names":[],"mappings":"AAkCA,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAOvE;AAED,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAGpB"}
|