@j0hanz/superfetch 2.1.3 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts +1 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +37 -6
- package/dist/cache.js.map +1 -0
- package/dist/config.d.ts +1 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +1 -0
- package/dist/config.js.map +1 -0
- package/dist/crypto.d.ts +1 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/crypto.js +1 -0
- package/dist/crypto.js.map +1 -0
- package/dist/errors.d.ts +1 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +1 -0
- package/dist/errors.js.map +1 -0
- package/dist/fetch.d.ts +1 -0
- package/dist/fetch.d.ts.map +1 -0
- package/dist/fetch.js +1 -0
- package/dist/fetch.js.map +1 -0
- package/dist/http.d.ts +2 -0
- package/dist/http.d.ts.map +1 -0
- package/dist/http.js +34 -5
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp.d.ts +1 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +1 -0
- package/dist/mcp.js.map +1 -0
- package/dist/observability.d.ts +1 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +1 -0
- package/dist/observability.js.map +1 -0
- package/dist/tools.d.ts +1 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +1 -0
- package/dist/tools.js.map +1 -0
- package/dist/transform.d.ts +1 -0
- package/dist/transform.d.ts.map +1 -0
- package/dist/transform.js +1 -0
- package/dist/transform.js.map +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +1 -0
- package/dist/utils.js.map +1 -0
- package/dist/workers/transform-worker.d.ts +1 -0
- package/dist/workers/transform-worker.d.ts.map +1 -0
- package/dist/workers/transform-worker.js +1 -0
- package/dist/workers/transform-worker.js.map +1 -0
- package/package.json +8 -5
- package/dist/config/auth-config.d.ts +0 -16
- package/dist/config/auth-config.js +0 -53
- package/dist/config/constants.d.ts +0 -17
- package/dist/config/constants.d.ts.map +0 -1
- package/dist/config/constants.js +0 -22
- package/dist/config/constants.js.map +0 -1
- package/dist/config/env-parsers.d.ts +0 -8
- package/dist/config/env-parsers.js +0 -96
- package/dist/config/formatting.d.ts +0 -7
- package/dist/config/formatting.d.ts.map +0 -1
- package/dist/config/formatting.js +0 -9
- package/dist/config/formatting.js.map +0 -1
- package/dist/config/index.d.ts +0 -61
- package/dist/config/index.d.ts.map +0 -1
- package/dist/config/index.js +0 -107
- package/dist/config/index.js.map +0 -1
- package/dist/config/types/content.d.ts +0 -41
- package/dist/config/types/content.d.ts.map +0 -1
- package/dist/config/types/content.js +0 -1
- package/dist/config/types/content.js.map +0 -1
- package/dist/config/types/runtime.d.ts +0 -72
- package/dist/config/types/runtime.d.ts.map +0 -1
- package/dist/config/types/runtime.js +0 -1
- package/dist/config/types/runtime.js.map +0 -1
- package/dist/config/types/tools.d.ts +0 -28
- package/dist/config/types/tools.d.ts.map +0 -1
- package/dist/config/types/tools.js +0 -1
- package/dist/config/types/tools.js.map +0 -1
- package/dist/config/types.d.ts +0 -4
- package/dist/config/types.d.ts.map +0 -1
- package/dist/config/types.js +0 -2
- package/dist/config/types.js.map +0 -1
- package/dist/errors/app-error.d.ts +0 -7
- package/dist/errors/app-error.d.ts.map +0 -1
- package/dist/errors/app-error.js +0 -16
- package/dist/errors/app-error.js.map +0 -1
- package/dist/errors/index.d.ts +0 -2
- package/dist/errors/index.d.ts.map +0 -1
- package/dist/errors/index.js +0 -2
- package/dist/errors/index.js.map +0 -1
- package/dist/http/accept-policy.d.ts +0 -3
- package/dist/http/accept-policy.js +0 -45
- package/dist/http/async-handler.d.ts +0 -2
- package/dist/http/async-handler.js +0 -5
- package/dist/http/auth-introspection.d.ts +0 -2
- package/dist/http/auth-introspection.js +0 -141
- package/dist/http/auth-static.d.ts +0 -2
- package/dist/http/auth-static.js +0 -23
- package/dist/http/auth.d.ts +0 -3
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js +0 -269
- package/dist/http/auth.js.map +0 -1
- package/dist/http/base-middleware.d.ts +0 -7
- package/dist/http/base-middleware.js +0 -143
- package/dist/http/cors.d.ts +0 -2
- package/dist/http/cors.d.ts.map +0 -1
- package/dist/http/cors.js +0 -9
- package/dist/http/cors.js.map +0 -1
- package/dist/http/download-routes.d.ts +0 -2
- package/dist/http/download-routes.d.ts.map +0 -1
- package/dist/http/download-routes.js +0 -104
- package/dist/http/download-routes.js.map +0 -1
- package/dist/http/error-handler.d.ts +0 -2
- package/dist/http/error-handler.js +0 -55
- package/dist/http/host-allowlist.d.ts +0 -3
- package/dist/http/host-allowlist.js +0 -117
- package/dist/http/jsonrpc-http.d.ts +0 -2
- package/dist/http/jsonrpc-http.js +0 -10
- package/dist/http/mcp-routes.d.ts +0 -9
- package/dist/http/mcp-routes.d.ts.map +0 -1
- package/dist/http/mcp-routes.js +0 -204
- package/dist/http/mcp-routes.js.map +0 -1
- package/dist/http/mcp-session-eviction.d.ts +0 -3
- package/dist/http/mcp-session-eviction.js +0 -24
- package/dist/http/mcp-session-helpers.d.ts +0 -12
- package/dist/http/mcp-session-helpers.d.ts.map +0 -1
- package/dist/http/mcp-session-helpers.js +0 -64
- package/dist/http/mcp-session-helpers.js.map +0 -1
- package/dist/http/mcp-session-init.d.ts +0 -7
- package/dist/http/mcp-session-init.js +0 -94
- package/dist/http/mcp-session-slots.d.ts +0 -17
- package/dist/http/mcp-session-slots.js +0 -55
- package/dist/http/mcp-session-transport-init.d.ts +0 -7
- package/dist/http/mcp-session-transport-init.js +0 -41
- package/dist/http/mcp-session-transport.d.ts +0 -7
- package/dist/http/mcp-session-transport.js +0 -57
- package/dist/http/mcp-session-types.d.ts +0 -5
- package/dist/http/mcp-session-types.js +0 -1
- package/dist/http/mcp-session.d.ts +0 -10
- package/dist/http/mcp-session.d.ts.map +0 -1
- package/dist/http/mcp-session.js +0 -23
- package/dist/http/mcp-session.js.map +0 -1
- package/dist/http/mcp-sessions.d.ts +0 -41
- package/dist/http/mcp-sessions.js +0 -392
- package/dist/http/mcp-validation.d.ts +0 -3
- package/dist/http/mcp-validation.d.ts.map +0 -1
- package/dist/http/mcp-validation.js +0 -14
- package/dist/http/mcp-validation.js.map +0 -1
- package/dist/http/protocol-policy.d.ts +0 -2
- package/dist/http/protocol-policy.js +0 -31
- package/dist/http/rate-limit.d.ts +0 -12
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js +0 -93
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/http/server-config.d.ts +0 -1
- package/dist/http/server-config.js +0 -40
- package/dist/http/server-middleware.d.ts +0 -7
- package/dist/http/server-middleware.d.ts.map +0 -1
- package/dist/http/server-middleware.js +0 -52
- package/dist/http/server-middleware.js.map +0 -1
- package/dist/http/server-shutdown.d.ts +0 -4
- package/dist/http/server-shutdown.js +0 -43
- package/dist/http/server-tuning.d.ts +0 -9
- package/dist/http/server-tuning.js +0 -45
- package/dist/http/server.d.ts +0 -3
- package/dist/http/server.d.ts.map +0 -1
- package/dist/http/server.js +0 -291
- package/dist/http/server.js.map +0 -1
- package/dist/http/session-cleanup.d.ts +0 -2
- package/dist/http/session-cleanup.d.ts.map +0 -1
- package/dist/http/session-cleanup.js +0 -40
- package/dist/http/session-cleanup.js.map +0 -1
- package/dist/http/sessions.d.ts +0 -14
- package/dist/http/sessions.d.ts.map +0 -1
- package/dist/http/sessions.js +0 -63
- package/dist/http/sessions.js.map +0 -1
- package/dist/middleware/error-handler.d.ts +0 -2
- package/dist/middleware/error-handler.d.ts.map +0 -1
- package/dist/middleware/error-handler.js +0 -56
- package/dist/middleware/error-handler.js.map +0 -1
- package/dist/middleware/rate-limiter.d.ts +0 -16
- package/dist/middleware/rate-limiter.d.ts.map +0 -1
- package/dist/middleware/rate-limiter.js +0 -111
- package/dist/middleware/rate-limiter.js.map +0 -1
- package/dist/parsers/base-html-element-parser.d.ts +0 -43
- package/dist/parsers/base-html-element-parser.d.ts.map +0 -1
- package/dist/parsers/base-html-element-parser.js +0 -59
- package/dist/parsers/base-html-element-parser.js.map +0 -1
- package/dist/parsers/heading-element-parser.d.ts +0 -14
- package/dist/parsers/heading-element-parser.d.ts.map +0 -1
- package/dist/parsers/heading-element-parser.js +0 -26
- package/dist/parsers/heading-element-parser.js.map +0 -1
- package/dist/parsers/image-element-parser.d.ts +0 -16
- package/dist/parsers/image-element-parser.d.ts.map +0 -1
- package/dist/parsers/image-element-parser.js +0 -33
- package/dist/parsers/image-element-parser.js.map +0 -1
- package/dist/parsers/link-element-parser.d.ts +0 -15
- package/dist/parsers/link-element-parser.d.ts.map +0 -1
- package/dist/parsers/link-element-parser.js +0 -28
- package/dist/parsers/link-element-parser.js.map +0 -1
- package/dist/parsers/open-graph-parser.d.ts +0 -17
- package/dist/parsers/open-graph-parser.d.ts.map +0 -1
- package/dist/parsers/open-graph-parser.js +0 -41
- package/dist/parsers/open-graph-parser.js.map +0 -1
- package/dist/parsers/schema-org-parser.d.ts +0 -17
- package/dist/parsers/schema-org-parser.d.ts.map +0 -1
- package/dist/parsers/schema-org-parser.js +0 -32
- package/dist/parsers/schema-org-parser.js.map +0 -1
- package/dist/parsers/standard-meta-parser.d.ts +0 -18
- package/dist/parsers/standard-meta-parser.d.ts.map +0 -1
- package/dist/parsers/standard-meta-parser.js +0 -32
- package/dist/parsers/standard-meta-parser.js.map +0 -1
- package/dist/parsers/twitter-card-parser.d.ts +0 -17
- package/dist/parsers/twitter-card-parser.d.ts.map +0 -1
- package/dist/parsers/twitter-card-parser.js +0 -41
- package/dist/parsers/twitter-card-parser.js.map +0 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +0 -1
- package/dist/prompts/index.js +0 -73
- package/dist/prompts/index.js.map +0 -1
- package/dist/resources/cached-content-params.d.ts +0 -5
- package/dist/resources/cached-content-params.js +0 -36
- package/dist/resources/cached-content.d.ts +0 -2
- package/dist/resources/cached-content.d.ts.map +0 -1
- package/dist/resources/cached-content.js +0 -132
- package/dist/resources/cached-content.js.map +0 -1
- package/dist/resources/index.d.ts +0 -2
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js +0 -4
- package/dist/resources/index.js.map +0 -1
- package/dist/server.d.ts +0 -3
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js +0 -94
- package/dist/server.js.map +0 -1
- package/dist/services/cache-events.d.ts +0 -8
- package/dist/services/cache-events.js +0 -19
- package/dist/services/cache-keys.d.ts +0 -7
- package/dist/services/cache-keys.js +0 -57
- package/dist/services/cache.d.ts +0 -17
- package/dist/services/cache.d.ts.map +0 -1
- package/dist/services/cache.js +0 -145
- package/dist/services/cache.js.map +0 -1
- package/dist/services/cache.service.d.ts +0 -52
- package/dist/services/cache.service.d.ts.map +0 -1
- package/dist/services/cache.service.js +0 -113
- package/dist/services/cache.service.js.map +0 -1
- package/dist/services/card-extractor.d.ts +0 -6
- package/dist/services/card-extractor.d.ts.map +0 -1
- package/dist/services/card-extractor.js +0 -199
- package/dist/services/card-extractor.js.map +0 -1
- package/dist/services/context.d.ts +0 -10
- package/dist/services/context.d.ts.map +0 -1
- package/dist/services/context.js +0 -14
- package/dist/services/context.js.map +0 -1
- package/dist/services/extractor.d.ts +0 -5
- package/dist/services/extractor.d.ts.map +0 -1
- package/dist/services/extractor.js +0 -142
- package/dist/services/extractor.js.map +0 -1
- package/dist/services/extractor.service.d.ts +0 -18
- package/dist/services/extractor.service.d.ts.map +0 -1
- package/dist/services/extractor.service.js +0 -75
- package/dist/services/extractor.service.js.map +0 -1
- package/dist/services/fetcher/agents.d.ts +0 -3
- package/dist/services/fetcher/agents.d.ts.map +0 -1
- package/dist/services/fetcher/agents.js +0 -100
- package/dist/services/fetcher/agents.js.map +0 -1
- package/dist/services/fetcher/dns-selection.d.ts +0 -2
- package/dist/services/fetcher/dns-selection.js +0 -72
- package/dist/services/fetcher/errors.d.ts +0 -4
- package/dist/services/fetcher/errors.d.ts.map +0 -1
- package/dist/services/fetcher/errors.js +0 -70
- package/dist/services/fetcher/errors.js.map +0 -1
- package/dist/services/fetcher/headers.d.ts +0 -2
- package/dist/services/fetcher/headers.d.ts.map +0 -1
- package/dist/services/fetcher/headers.js +0 -6
- package/dist/services/fetcher/headers.js.map +0 -1
- package/dist/services/fetcher/interceptors.d.ts +0 -10
- package/dist/services/fetcher/interceptors.d.ts.map +0 -1
- package/dist/services/fetcher/interceptors.js +0 -108
- package/dist/services/fetcher/interceptors.js.map +0 -1
- package/dist/services/fetcher/redirects.d.ts +0 -4
- package/dist/services/fetcher/redirects.d.ts.map +0 -1
- package/dist/services/fetcher/redirects.js +0 -78
- package/dist/services/fetcher/redirects.js.map +0 -1
- package/dist/services/fetcher/response.d.ts +0 -4
- package/dist/services/fetcher/response.d.ts.map +0 -1
- package/dist/services/fetcher/response.js +0 -104
- package/dist/services/fetcher/response.js.map +0 -1
- package/dist/services/fetcher/retry-policy.d.ts +0 -1
- package/dist/services/fetcher/retry-policy.d.ts.map +0 -1
- package/dist/services/fetcher/retry-policy.js +0 -131
- package/dist/services/fetcher/retry-policy.js.map +0 -1
- package/dist/services/fetcher.d.ts +0 -25
- package/dist/services/fetcher.d.ts.map +0 -1
- package/dist/services/fetcher.js +0 -607
- package/dist/services/fetcher.js.map +0 -1
- package/dist/services/fetcher.service.d.ts +0 -18
- package/dist/services/fetcher.service.d.ts.map +0 -1
- package/dist/services/fetcher.service.js +0 -122
- package/dist/services/fetcher.service.js.map +0 -1
- package/dist/services/fifo-queue.d.ts +0 -8
- package/dist/services/fifo-queue.js +0 -25
- package/dist/services/logger.d.ts +0 -5
- package/dist/services/logger.d.ts.map +0 -1
- package/dist/services/logger.js +0 -52
- package/dist/services/logger.js.map +0 -1
- package/dist/services/logger.service.d.ts +0 -5
- package/dist/services/logger.service.d.ts.map +0 -1
- package/dist/services/logger.service.js +0 -57
- package/dist/services/logger.service.js.map +0 -1
- package/dist/services/metadata-collector.d.ts +0 -2
- package/dist/services/metadata-collector.js +0 -80
- package/dist/services/parser.d.ts +0 -6
- package/dist/services/parser.d.ts.map +0 -1
- package/dist/services/parser.js +0 -278
- package/dist/services/parser.js.map +0 -1
- package/dist/services/parser.service.d.ts +0 -42
- package/dist/services/parser.service.d.ts.map +0 -1
- package/dist/services/parser.service.js +0 -209
- package/dist/services/parser.service.js.map +0 -1
- package/dist/services/session-manager.d.ts +0 -18
- package/dist/services/session-manager.d.ts.map +0 -1
- package/dist/services/session-manager.js +0 -73
- package/dist/services/session-manager.js.map +0 -1
- package/dist/services/telemetry.d.ts +0 -19
- package/dist/services/telemetry.js +0 -43
- package/dist/services/transform-worker-pool.d.ts +0 -11
- package/dist/services/transform-worker-pool.js +0 -244
- package/dist/services/transform-worker-types.d.ts +0 -32
- package/dist/services/transform-worker-types.js +0 -14
- package/dist/strategies/exponential-backoff-strategy.d.ts +0 -13
- package/dist/strategies/exponential-backoff-strategy.d.ts.map +0 -1
- package/dist/strategies/exponential-backoff-strategy.js +0 -32
- package/dist/strategies/exponential-backoff-strategy.js.map +0 -1
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts +0 -4
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-links/link-extractor.js +0 -159
- package/dist/tools/handlers/fetch-links/link-extractor.js.map +0 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +0 -5
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-links.tool.js +0 -98
- package/dist/tools/handlers/fetch-links.tool.js.map +0 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +0 -11
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +0 -97
- package/dist/tools/handlers/fetch-markdown.tool.js.map +0 -1
- package/dist/tools/handlers/fetch-single.shared.d.ts +0 -31
- package/dist/tools/handlers/fetch-single.shared.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-single.shared.js +0 -223
- package/dist/tools/handlers/fetch-single.shared.js.map +0 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +0 -10
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-url.tool.js +0 -119
- package/dist/tools/handlers/fetch-url.tool.js.map +0 -1
- package/dist/tools/handlers/fetch-urls/processor.d.ts +0 -13
- package/dist/tools/handlers/fetch-urls/processor.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-urls/processor.js +0 -153
- package/dist/tools/handlers/fetch-urls/processor.js.map +0 -1
- package/dist/tools/handlers/fetch-urls/response.d.ts +0 -3
- package/dist/tools/handlers/fetch-urls/response.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-urls/response.js +0 -79
- package/dist/tools/handlers/fetch-urls/response.js.map +0 -1
- package/dist/tools/handlers/fetch-urls/validation.d.ts +0 -5
- package/dist/tools/handlers/fetch-urls/validation.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-urls/validation.js +0 -18
- package/dist/tools/handlers/fetch-urls/validation.js.map +0 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +0 -5
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +0 -1
- package/dist/tools/handlers/fetch-urls.tool.js +0 -124
- package/dist/tools/handlers/fetch-urls.tool.js.map +0 -1
- package/dist/tools/index.d.ts +0 -3
- package/dist/tools/index.d.ts.map +0 -1
- package/dist/tools/index.js +0 -37
- package/dist/tools/index.js.map +0 -1
- package/dist/tools/schemas.d.ts +0 -12
- package/dist/tools/schemas.d.ts.map +0 -1
- package/dist/tools/schemas.js +0 -21
- package/dist/tools/schemas.js.map +0 -1
- package/dist/tools/utils/cache-vary.d.ts +0 -1
- package/dist/tools/utils/cache-vary.d.ts.map +0 -1
- package/dist/tools/utils/cache-vary.js +0 -12
- package/dist/tools/utils/cache-vary.js.map +0 -1
- package/dist/tools/utils/cached-markdown.d.ts +0 -5
- package/dist/tools/utils/cached-markdown.js +0 -46
- package/dist/tools/utils/common.d.ts +0 -5
- package/dist/tools/utils/common.d.ts.map +0 -1
- package/dist/tools/utils/common.js +0 -42
- package/dist/tools/utils/common.js.map +0 -1
- package/dist/tools/utils/content-shaping.d.ts +0 -4
- package/dist/tools/utils/content-shaping.js +0 -67
- package/dist/tools/utils/content-transform-async.d.ts +0 -6
- package/dist/tools/utils/content-transform-async.js +0 -33
- package/dist/tools/utils/content-transform-core.d.ts +0 -5
- package/dist/tools/utils/content-transform-core.js +0 -180
- package/dist/tools/utils/content-transform-workers.d.ts +0 -1
- package/dist/tools/utils/content-transform-workers.js +0 -1
- package/dist/tools/utils/content-transform.d.ts +0 -3
- package/dist/tools/utils/content-transform.d.ts.map +0 -1
- package/dist/tools/utils/content-transform.js +0 -41
- package/dist/tools/utils/content-transform.js.map +0 -1
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -2
- package/dist/tools/utils/fetch-pipeline.d.ts.map +0 -1
- package/dist/tools/utils/fetch-pipeline.js +0 -98
- package/dist/tools/utils/fetch-pipeline.js.map +0 -1
- package/dist/tools/utils/frontmatter.d.ts +0 -3
- package/dist/tools/utils/frontmatter.js +0 -73
- package/dist/tools/utils/index.d.ts +0 -4
- package/dist/tools/utils/index.d.ts.map +0 -1
- package/dist/tools/utils/index.js +0 -3
- package/dist/tools/utils/index.js.map +0 -1
- package/dist/tools/utils/inline-content.d.ts +0 -10
- package/dist/tools/utils/inline-content.d.ts.map +0 -1
- package/dist/tools/utils/inline-content.js +0 -35
- package/dist/tools/utils/inline-content.js.map +0 -1
- package/dist/tools/utils/markdown-heuristics.d.ts +0 -1
- package/dist/tools/utils/markdown-heuristics.js +0 -19
- package/dist/tools/utils/markdown-signals.d.ts +0 -1
- package/dist/tools/utils/markdown-signals.js +0 -19
- package/dist/tools/utils/markdown-toc.d.ts +0 -3
- package/dist/tools/utils/markdown-toc.d.ts.map +0 -1
- package/dist/tools/utils/markdown-toc.js +0 -35
- package/dist/tools/utils/markdown-toc.js.map +0 -1
- package/dist/tools/utils/raw-markdown-frontmatter.d.ts +0 -3
- package/dist/tools/utils/raw-markdown-frontmatter.js +0 -73
- package/dist/tools/utils/raw-markdown.d.ts +0 -6
- package/dist/tools/utils/raw-markdown.js +0 -149
- package/dist/tools/utils/response-builder.d.ts +0 -3
- package/dist/tools/utils/response-builder.d.ts.map +0 -1
- package/dist/tools/utils/response-builder.js +0 -24
- package/dist/tools/utils/response-builder.js.map +0 -1
- package/dist/tools/utils/tool-response.d.ts +0 -9
- package/dist/tools/utils/tool-response.d.ts.map +0 -1
- package/dist/tools/utils/tool-response.js +0 -19
- package/dist/tools/utils/tool-response.js.map +0 -1
- package/dist/transformers/jsonl.transformer.d.ts +0 -2
- package/dist/transformers/jsonl.transformer.d.ts.map +0 -1
- package/dist/transformers/jsonl.transformer.js +0 -75
- package/dist/transformers/jsonl.transformer.js.map +0 -1
- package/dist/transformers/markdown/fenced-code-rule.d.ts +0 -2
- package/dist/transformers/markdown/fenced-code-rule.js +0 -38
- package/dist/transformers/markdown/frontmatter.d.ts +0 -2
- package/dist/transformers/markdown/frontmatter.js +0 -45
- package/dist/transformers/markdown/noise-rule.d.ts +0 -2
- package/dist/transformers/markdown/noise-rule.js +0 -80
- package/dist/transformers/markdown/turndown-instance.d.ts +0 -2
- package/dist/transformers/markdown/turndown-instance.js +0 -19
- package/dist/transformers/markdown.d.ts +0 -5
- package/dist/transformers/markdown.js +0 -314
- package/dist/transformers/markdown.transformer.d.ts +0 -2
- package/dist/transformers/markdown.transformer.d.ts.map +0 -1
- package/dist/transformers/markdown.transformer.js +0 -14
- package/dist/transformers/markdown.transformer.js.map +0 -1
- package/dist/types/content.types.d.ts +0 -63
- package/dist/types/content.types.d.ts.map +0 -1
- package/dist/types/content.types.js +0 -2
- package/dist/types/content.types.js.map +0 -1
- package/dist/types/index.d.ts +0 -2
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -2
- package/dist/types/index.js.map +0 -1
- package/dist/types/schemas.d.ts +0 -49
- package/dist/types/schemas.d.ts.map +0 -1
- package/dist/types/schemas.js +0 -5
- package/dist/types/schemas.js.map +0 -1
- package/dist/utils/cached-payload.d.ts +0 -7
- package/dist/utils/cached-payload.js +0 -36
- package/dist/utils/cancellation.d.ts +0 -1
- package/dist/utils/cancellation.js +0 -18
- package/dist/utils/code-language-bash.d.ts +0 -1
- package/dist/utils/code-language-bash.js +0 -48
- package/dist/utils/code-language-core.d.ts +0 -2
- package/dist/utils/code-language-core.js +0 -13
- package/dist/utils/code-language-detectors.d.ts +0 -5
- package/dist/utils/code-language-detectors.js +0 -142
- package/dist/utils/code-language-helpers.d.ts +0 -5
- package/dist/utils/code-language-helpers.js +0 -62
- package/dist/utils/code-language-parsing.d.ts +0 -5
- package/dist/utils/code-language-parsing.js +0 -62
- package/dist/utils/code-language.d.ts +0 -2
- package/dist/utils/code-language.d.ts.map +0 -1
- package/dist/utils/code-language.js +0 -260
- package/dist/utils/code-language.js.map +0 -1
- package/dist/utils/concurrency.d.ts +0 -3
- package/dist/utils/concurrency.d.ts.map +0 -1
- package/dist/utils/concurrency.js +0 -38
- package/dist/utils/concurrency.js.map +0 -1
- package/dist/utils/content-cleaner.d.ts +0 -5
- package/dist/utils/content-cleaner.d.ts.map +0 -1
- package/dist/utils/content-cleaner.js +0 -77
- package/dist/utils/content-cleaner.js.map +0 -1
- package/dist/utils/crypto.d.ts +0 -2
- package/dist/utils/crypto.d.ts.map +0 -1
- package/dist/utils/crypto.js +0 -32
- package/dist/utils/crypto.js.map +0 -1
- package/dist/utils/download-url.d.ts +0 -16
- package/dist/utils/download-url.d.ts.map +0 -1
- package/dist/utils/download-url.js +0 -30
- package/dist/utils/download-url.js.map +0 -1
- package/dist/utils/error-details.d.ts +0 -3
- package/dist/utils/error-details.js +0 -12
- package/dist/utils/error-utils.d.ts +0 -3
- package/dist/utils/error-utils.d.ts.map +0 -1
- package/dist/utils/error-utils.js +0 -12
- package/dist/utils/error-utils.js.map +0 -1
- package/dist/utils/filename-generator.d.ts +0 -1
- package/dist/utils/filename-generator.d.ts.map +0 -1
- package/dist/utils/filename-generator.js +0 -81
- package/dist/utils/filename-generator.js.map +0 -1
- package/dist/utils/guards.d.ts +0 -1
- package/dist/utils/guards.js +0 -3
- package/dist/utils/header-normalizer.d.ts +0 -5
- package/dist/utils/header-normalizer.d.ts.map +0 -1
- package/dist/utils/header-normalizer.js +0 -31
- package/dist/utils/header-normalizer.js.map +0 -1
- package/dist/utils/host-normalizer.d.ts +0 -1
- package/dist/utils/host-normalizer.js +0 -37
- package/dist/utils/html-truncator.d.ts +0 -1
- package/dist/utils/html-truncator.d.ts.map +0 -1
- package/dist/utils/html-truncator.js +0 -13
- package/dist/utils/html-truncator.js.map +0 -1
- package/dist/utils/ip-address.d.ts +0 -4
- package/dist/utils/ip-address.js +0 -6
- package/dist/utils/language-detector.d.ts +0 -2
- package/dist/utils/language-detector.d.ts.map +0 -1
- package/dist/utils/language-detector.js +0 -39
- package/dist/utils/language-detector.js.map +0 -1
- package/dist/utils/sanitizer.d.ts +0 -2
- package/dist/utils/sanitizer.d.ts.map +0 -1
- package/dist/utils/sanitizer.js +0 -20
- package/dist/utils/sanitizer.js.map +0 -1
- package/dist/utils/tool-error-handler.d.ts +0 -3
- package/dist/utils/tool-error-handler.d.ts.map +0 -1
- package/dist/utils/tool-error-handler.js +0 -31
- package/dist/utils/tool-error-handler.js.map +0 -1
- package/dist/utils/url-redactor.d.ts +0 -1
- package/dist/utils/url-redactor.js +0 -13
- package/dist/utils/url-sanitizer.d.ts +0 -2
- package/dist/utils/url-sanitizer.d.ts.map +0 -1
- package/dist/utils/url-sanitizer.js +0 -12
- package/dist/utils/url-sanitizer.js.map +0 -1
- package/dist/utils/url-transformer.d.ts +0 -7
- package/dist/utils/url-transformer.js +0 -147
- package/dist/utils/url-validator.d.ts +0 -6
- package/dist/utils/url-validator.d.ts.map +0 -1
- package/dist/utils/url-validator.js +0 -156
- package/dist/utils/url-validator.js.map +0 -1
- package/dist/workers/content-transform.worker.d.ts +0 -1
- package/dist/workers/content-transform.worker.js +0 -40
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { TRUNCATION_MARKER } from '../../config/formatting.js';
|
|
2
|
-
export function determineContentExtractionSource(extractMainContent, article) {
|
|
3
|
-
return extractMainContent && !!article;
|
|
4
|
-
}
|
|
5
|
-
export function createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, includeMetadata) {
|
|
6
|
-
if (!includeMetadata)
|
|
7
|
-
return undefined;
|
|
8
|
-
const now = new Date().toISOString();
|
|
9
|
-
const metadata = {
|
|
10
|
-
type: 'metadata',
|
|
11
|
-
url,
|
|
12
|
-
fetchedAt: now,
|
|
13
|
-
};
|
|
14
|
-
if (shouldExtractFromArticle && article) {
|
|
15
|
-
if (article.title !== undefined)
|
|
16
|
-
metadata.title = article.title;
|
|
17
|
-
if (article.byline !== undefined)
|
|
18
|
-
metadata.author = article.byline;
|
|
19
|
-
return metadata;
|
|
20
|
-
}
|
|
21
|
-
if (extractedMeta.title !== undefined)
|
|
22
|
-
metadata.title = extractedMeta.title;
|
|
23
|
-
if (extractedMeta.description !== undefined) {
|
|
24
|
-
metadata.description = extractedMeta.description;
|
|
25
|
-
}
|
|
26
|
-
if (extractedMeta.author !== undefined)
|
|
27
|
-
metadata.author = extractedMeta.author;
|
|
28
|
-
return metadata;
|
|
29
|
-
}
|
|
30
|
-
export function truncateContent(content, maxLength, suffix = TRUNCATION_MARKER) {
|
|
31
|
-
if (maxLength === undefined ||
|
|
32
|
-
maxLength <= 0 ||
|
|
33
|
-
content.length <= maxLength) {
|
|
34
|
-
return { content, truncated: false };
|
|
35
|
-
}
|
|
36
|
-
const safeMax = Math.max(0, maxLength - suffix.length);
|
|
37
|
-
const marker = suffix.length > maxLength ? suffix.substring(0, maxLength) : suffix;
|
|
38
|
-
return {
|
|
39
|
-
content: `${content.substring(0, safeMax)}${marker}`,
|
|
40
|
-
truncated: true,
|
|
41
|
-
};
|
|
42
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAQ/D,MAAM,UAAU,gCAAgC,CAC9C,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,kBAAkB,IAAI,CAAC,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,wBAAiC,EACjC,eAAwB;IAExB,IAAI,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IACvC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,wBAAwB,IAAI,OAAO;QACxC,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB,EAClB,MAAM,GAAG,iBAAiB;IAE1B,MAAM,cAAc,GAClB,SAAS,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAEzE,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IAED,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,MAAM,EAAE;QACtD,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
import type { ExtractedArticle, ExtractedMetadata, MetadataBlock } from '../../config/types/content.js';
|
|
2
|
-
export declare function isExtractionSufficient(article: ExtractedArticle | null, originalHtml: string): boolean;
|
|
3
|
-
export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
|
|
4
|
-
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
const MIN_CONTENT_RATIO = 0.3;
|
|
2
|
-
const MIN_HTML_LENGTH_FOR_GATE = 100;
|
|
3
|
-
function stripHtmlTags(html) {
|
|
4
|
-
const parts = [];
|
|
5
|
-
let inTag = false;
|
|
6
|
-
for (const char of html) {
|
|
7
|
-
if (char === '<') {
|
|
8
|
-
inTag = true;
|
|
9
|
-
continue;
|
|
10
|
-
}
|
|
11
|
-
if (char === '>') {
|
|
12
|
-
inTag = false;
|
|
13
|
-
continue;
|
|
14
|
-
}
|
|
15
|
-
if (!inTag) {
|
|
16
|
-
parts.push(char);
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
return parts.join('');
|
|
20
|
-
}
|
|
21
|
-
function estimateTextLength(html) {
|
|
22
|
-
return stripHtmlTags(html).replace(/\s+/g, ' ').trim().length;
|
|
23
|
-
}
|
|
24
|
-
export function isExtractionSufficient(article, originalHtml) {
|
|
25
|
-
if (!article)
|
|
26
|
-
return false;
|
|
27
|
-
const articleLength = article.textContent.length;
|
|
28
|
-
const originalLength = estimateTextLength(originalHtml);
|
|
29
|
-
if (originalLength < MIN_HTML_LENGTH_FOR_GATE)
|
|
30
|
-
return true;
|
|
31
|
-
return articleLength / originalLength >= MIN_CONTENT_RATIO;
|
|
32
|
-
}
|
|
33
|
-
export function determineContentExtractionSource(article) {
|
|
34
|
-
return !!article;
|
|
35
|
-
}
|
|
36
|
-
function applyArticleMetadata(metadata, article) {
|
|
37
|
-
if (article.title !== undefined)
|
|
38
|
-
metadata.title = article.title;
|
|
39
|
-
if (article.byline !== undefined)
|
|
40
|
-
metadata.author = article.byline;
|
|
41
|
-
}
|
|
42
|
-
function applyExtractedMetadata(metadata, extractedMeta) {
|
|
43
|
-
if (extractedMeta.title !== undefined)
|
|
44
|
-
metadata.title = extractedMeta.title;
|
|
45
|
-
if (extractedMeta.description !== undefined) {
|
|
46
|
-
metadata.description = extractedMeta.description;
|
|
47
|
-
}
|
|
48
|
-
if (extractedMeta.author !== undefined) {
|
|
49
|
-
metadata.author = extractedMeta.author;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
export function createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, includeMetadata) {
|
|
53
|
-
if (!includeMetadata)
|
|
54
|
-
return undefined;
|
|
55
|
-
const now = new Date().toISOString();
|
|
56
|
-
const metadata = {
|
|
57
|
-
type: 'metadata',
|
|
58
|
-
url,
|
|
59
|
-
fetchedAt: now,
|
|
60
|
-
};
|
|
61
|
-
if (shouldExtractFromArticle && article) {
|
|
62
|
-
applyArticleMetadata(metadata, article);
|
|
63
|
-
return metadata;
|
|
64
|
-
}
|
|
65
|
-
applyExtractedMetadata(metadata, extractedMeta);
|
|
66
|
-
return metadata;
|
|
67
|
-
}
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
import type { JsonlTransformResult, MarkdownTransformResult, TransformOptions } from '../../config/types/content.js';
|
|
2
|
-
export declare function transformHtmlToJsonlAsync(html: string, url: string, options: TransformOptions): Promise<JsonlTransformResult>;
|
|
3
|
-
export declare function transformHtmlToMarkdownAsync(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
|
|
4
|
-
export declare function transformHtmlToMarkdownWithBlocksAsync(html: string, url: string, options: TransformOptions & {
|
|
5
|
-
includeContentBlocks?: boolean;
|
|
6
|
-
}): Promise<JsonlTransformResult>;
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import { logWarn } from '../../services/logger.js';
|
|
2
|
-
import { runTransformInWorker, } from '../../services/transform-worker-pool.js';
|
|
3
|
-
import { transformHtmlToJsonl, transformHtmlToMarkdown, transformHtmlToMarkdownWithBlocks, } from './content-transform.js';
|
|
4
|
-
async function runOrFallback(job, fallback) {
|
|
5
|
-
try {
|
|
6
|
-
const result = await runTransformInWorker(job);
|
|
7
|
-
if (result)
|
|
8
|
-
return result;
|
|
9
|
-
}
|
|
10
|
-
catch (error) {
|
|
11
|
-
logWarn('Transform worker unavailable; using main thread', {
|
|
12
|
-
error: error instanceof Error ? error.message : String(error),
|
|
13
|
-
});
|
|
14
|
-
}
|
|
15
|
-
return fallback();
|
|
16
|
-
}
|
|
17
|
-
export async function transformHtmlToJsonlAsync(html, url, options) {
|
|
18
|
-
const result = await runOrFallback({ mode: 'jsonl', html, url, options }, () => transformHtmlToJsonl(html, url, options));
|
|
19
|
-
return result;
|
|
20
|
-
}
|
|
21
|
-
export async function transformHtmlToMarkdownAsync(html, url, options) {
|
|
22
|
-
const result = await runOrFallback({ mode: 'markdown', html, url, options }, () => transformHtmlToMarkdown(html, url, options));
|
|
23
|
-
return result;
|
|
24
|
-
}
|
|
25
|
-
export async function transformHtmlToMarkdownWithBlocksAsync(html, url, options) {
|
|
26
|
-
const result = await runOrFallback({
|
|
27
|
-
mode: 'markdown-blocks',
|
|
28
|
-
html,
|
|
29
|
-
url,
|
|
30
|
-
options,
|
|
31
|
-
}, () => transformHtmlToMarkdownWithBlocks(html, url, options));
|
|
32
|
-
return result;
|
|
33
|
-
}
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import type { ExtractedArticle, ExtractedMetadata, MarkdownTransformResult, MetadataBlock, TransformOptions } from '../../config/types/content.js';
|
|
2
|
-
export declare function isExtractionSufficient(article: ExtractedArticle | null, originalHtml: string): boolean;
|
|
3
|
-
export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
|
|
4
|
-
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
5
|
-
export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
|
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
import { extractContent } from '../../services/extractor.js';
|
|
2
|
-
import { logDebug } from '../../services/logger.js';
|
|
3
|
-
import { endTransformStage, startTransformStage, } from '../../services/telemetry.js';
|
|
4
|
-
import { throwIfAborted } from '../../utils/cancellation.js';
|
|
5
|
-
import { htmlToMarkdown } from '../../transformers/markdown.js';
|
|
6
|
-
import { tryTransformRawContent } from './raw-markdown.js';
|
|
7
|
-
const MIN_CONTENT_RATIO = 0.3;
|
|
8
|
-
const MIN_HTML_LENGTH_FOR_GATE = 100;
|
|
9
|
-
function stripHtmlTags(html) {
|
|
10
|
-
const parts = [];
|
|
11
|
-
let inTag = false;
|
|
12
|
-
for (const char of html) {
|
|
13
|
-
if (char === '<') {
|
|
14
|
-
inTag = true;
|
|
15
|
-
continue;
|
|
16
|
-
}
|
|
17
|
-
if (char === '>') {
|
|
18
|
-
inTag = false;
|
|
19
|
-
continue;
|
|
20
|
-
}
|
|
21
|
-
if (!inTag) {
|
|
22
|
-
parts.push(char);
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
return parts.join('');
|
|
26
|
-
}
|
|
27
|
-
function estimateTextLength(html) {
|
|
28
|
-
return stripHtmlTags(html).replace(/\s+/g, ' ').trim().length;
|
|
29
|
-
}
|
|
30
|
-
export function isExtractionSufficient(article, originalHtml) {
|
|
31
|
-
if (!article)
|
|
32
|
-
return false;
|
|
33
|
-
const articleLength = article.textContent.length;
|
|
34
|
-
const originalLength = estimateTextLength(originalHtml);
|
|
35
|
-
if (originalLength < MIN_HTML_LENGTH_FOR_GATE)
|
|
36
|
-
return true;
|
|
37
|
-
return articleLength / originalLength >= MIN_CONTENT_RATIO;
|
|
38
|
-
}
|
|
39
|
-
export function determineContentExtractionSource(article) {
|
|
40
|
-
return !!article;
|
|
41
|
-
}
|
|
42
|
-
function applyArticleMetadata(metadata, article) {
|
|
43
|
-
if (article.title !== undefined)
|
|
44
|
-
metadata.title = article.title;
|
|
45
|
-
if (article.byline !== undefined)
|
|
46
|
-
metadata.author = article.byline;
|
|
47
|
-
}
|
|
48
|
-
function applyExtractedMetadata(metadata, extractedMeta) {
|
|
49
|
-
if (extractedMeta.title !== undefined)
|
|
50
|
-
metadata.title = extractedMeta.title;
|
|
51
|
-
if (extractedMeta.description !== undefined) {
|
|
52
|
-
metadata.description = extractedMeta.description;
|
|
53
|
-
}
|
|
54
|
-
if (extractedMeta.author !== undefined) {
|
|
55
|
-
metadata.author = extractedMeta.author;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
export function createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, includeMetadata) {
|
|
59
|
-
if (!includeMetadata)
|
|
60
|
-
return undefined;
|
|
61
|
-
const now = new Date().toISOString();
|
|
62
|
-
const metadata = {
|
|
63
|
-
type: 'metadata',
|
|
64
|
-
url,
|
|
65
|
-
fetchedAt: now,
|
|
66
|
-
};
|
|
67
|
-
if (shouldExtractFromArticle && article) {
|
|
68
|
-
applyArticleMetadata(metadata, article);
|
|
69
|
-
return metadata;
|
|
70
|
-
}
|
|
71
|
-
applyExtractedMetadata(metadata, extractedMeta);
|
|
72
|
-
return metadata;
|
|
73
|
-
}
|
|
74
|
-
function buildArticleContentSource({ url, article, extractedMeta, includeMetadata, }) {
|
|
75
|
-
const metadata = createContentMetadataBlock(url, article, extractedMeta, true, includeMetadata);
|
|
76
|
-
return {
|
|
77
|
-
sourceHtml: article.content,
|
|
78
|
-
title: article.title,
|
|
79
|
-
metadata,
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
function buildFullHtmlContentSource({ html, url, article, extractedMeta, includeMetadata, }) {
|
|
83
|
-
const metadata = createContentMetadataBlock(url, article, extractedMeta, false, includeMetadata);
|
|
84
|
-
return {
|
|
85
|
-
sourceHtml: html,
|
|
86
|
-
title: extractedMeta.title,
|
|
87
|
-
metadata,
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
function logQualityGateFallback({ url, articleLength, }) {
|
|
91
|
-
logDebug('Quality gate: Readability extraction below threshold, using full HTML', {
|
|
92
|
-
url: url.substring(0, 80),
|
|
93
|
-
articleLength,
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
function tryBuildExtractedArticleContentSource({ html, url, article, extractedMeta, includeMetadata, }) {
|
|
97
|
-
if (!article)
|
|
98
|
-
return null;
|
|
99
|
-
const shouldExtractFromArticle = determineContentExtractionSource(article);
|
|
100
|
-
if (shouldExtractFromArticle && isExtractionSufficient(article, html)) {
|
|
101
|
-
return buildArticleContentSource({
|
|
102
|
-
url,
|
|
103
|
-
article,
|
|
104
|
-
extractedMeta,
|
|
105
|
-
includeMetadata,
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
if (shouldExtractFromArticle) {
|
|
109
|
-
logQualityGateFallback({
|
|
110
|
-
url,
|
|
111
|
-
articleLength: article.textContent.length,
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
return null;
|
|
115
|
-
}
|
|
116
|
-
function resolveContentSource({ html, url, includeMetadata, signal, }) {
|
|
117
|
-
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
118
|
-
extractArticle: true,
|
|
119
|
-
...(signal ? { signal } : {}),
|
|
120
|
-
});
|
|
121
|
-
const extracted = tryBuildExtractedArticleContentSource({
|
|
122
|
-
html,
|
|
123
|
-
url,
|
|
124
|
-
article,
|
|
125
|
-
extractedMeta,
|
|
126
|
-
includeMetadata,
|
|
127
|
-
});
|
|
128
|
-
if (extracted)
|
|
129
|
-
return extracted;
|
|
130
|
-
return buildFullHtmlContentSource({
|
|
131
|
-
html,
|
|
132
|
-
url,
|
|
133
|
-
article,
|
|
134
|
-
extractedMeta,
|
|
135
|
-
includeMetadata,
|
|
136
|
-
});
|
|
137
|
-
}
|
|
138
|
-
export function transformHtmlToMarkdownInProcess(html, url, options) {
|
|
139
|
-
const totalStage = startTransformStage(url, 'transform:total');
|
|
140
|
-
let success = false;
|
|
141
|
-
try {
|
|
142
|
-
throwIfAborted(options.signal, url, 'transform:begin');
|
|
143
|
-
const rawStage = startTransformStage(url, 'transform:raw');
|
|
144
|
-
const raw = tryTransformRawContent({
|
|
145
|
-
html,
|
|
146
|
-
url,
|
|
147
|
-
includeMetadata: options.includeMetadata,
|
|
148
|
-
});
|
|
149
|
-
endTransformStage(rawStage);
|
|
150
|
-
if (raw) {
|
|
151
|
-
success = true;
|
|
152
|
-
return raw;
|
|
153
|
-
}
|
|
154
|
-
const extractStage = startTransformStage(url, 'transform:extract');
|
|
155
|
-
const context = resolveContentSource({
|
|
156
|
-
html,
|
|
157
|
-
url,
|
|
158
|
-
includeMetadata: options.includeMetadata,
|
|
159
|
-
...(options.signal ? { signal: options.signal } : {}),
|
|
160
|
-
});
|
|
161
|
-
endTransformStage(extractStage);
|
|
162
|
-
const markdownStage = startTransformStage(url, 'transform:markdown');
|
|
163
|
-
const content = htmlToMarkdown(context.sourceHtml, context.metadata, {
|
|
164
|
-
url,
|
|
165
|
-
...(options.signal ? { signal: options.signal } : {}),
|
|
166
|
-
});
|
|
167
|
-
endTransformStage(markdownStage);
|
|
168
|
-
success = true;
|
|
169
|
-
return {
|
|
170
|
-
markdown: content,
|
|
171
|
-
title: context.title,
|
|
172
|
-
truncated: false,
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
finally {
|
|
176
|
-
if (success) {
|
|
177
|
-
endTransformStage(totalStage, { truncated: false });
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,3 +0,0 @@
|
|
|
1
|
-
import type { MarkdownTransformResult, TransformOptions } from '../../config/types/content.js';
|
|
2
|
-
export { createContentMetadataBlock, determineContentExtractionSource, isExtractionSufficient, } from './content-transform-core.js';
|
|
3
|
-
export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"content-transform.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EACpB,uBAAuB,EACxB,MAAM,+BAA+B,CAAC;AAcvC,UAAU,iBAAiB;IACzB,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAQD,UAAU,oBAAoB;IAC5B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,UAAU,eAAgB,SAAQ,iBAAiB,EAAE,oBAAoB;CAAG;AA4D5E,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CAatB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,eAAe,GACvB,uBAAuB,CAYzB;AAED,wBAAgB,iCAAiC,CAC/C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CActB"}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import { FetchError } from '../../errors/app-error.js';
|
|
2
|
-
import { endTransformStage, startTransformStage, } from '../../services/telemetry.js';
|
|
3
|
-
import { getOrCreateTransformWorkerPool } from '../../services/transform-worker-pool.js';
|
|
4
|
-
import { throwIfAborted } from '../../utils/cancellation.js';
|
|
5
|
-
import { transformHtmlToMarkdownInProcess } from './content-transform-core.js';
|
|
6
|
-
export { createContentMetadataBlock, determineContentExtractionSource, isExtractionSufficient, } from './content-transform-core.js';
|
|
7
|
-
export async function transformHtmlToMarkdown(html, url, options) {
|
|
8
|
-
const totalStage = startTransformStage(url, 'transform:total');
|
|
9
|
-
let success = false;
|
|
10
|
-
try {
|
|
11
|
-
throwIfAborted(options.signal, url, 'transform:begin');
|
|
12
|
-
const workerStage = startTransformStage(url, 'transform:worker');
|
|
13
|
-
try {
|
|
14
|
-
const pool = getOrCreateTransformWorkerPool();
|
|
15
|
-
const result = await pool.transform(html, url, {
|
|
16
|
-
includeMetadata: options.includeMetadata,
|
|
17
|
-
...(options.signal ? { signal: options.signal } : {}),
|
|
18
|
-
});
|
|
19
|
-
success = true;
|
|
20
|
-
return result;
|
|
21
|
-
}
|
|
22
|
-
catch (error) {
|
|
23
|
-
if (error instanceof FetchError) {
|
|
24
|
-
throw error;
|
|
25
|
-
}
|
|
26
|
-
// Stability-first: if worker infrastructure fails, fall back to in-process.
|
|
27
|
-
throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
28
|
-
const fallback = transformHtmlToMarkdownInProcess(html, url, options);
|
|
29
|
-
success = true;
|
|
30
|
-
return fallback;
|
|
31
|
-
}
|
|
32
|
-
finally {
|
|
33
|
-
endTransformStage(workerStage);
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
finally {
|
|
37
|
-
if (success) {
|
|
38
|
-
endTransformStage(totalStage, { truncated: false });
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"content-transform.js","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAM/D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,OAAO,EACL,0BAA0B,EAC1B,gCAAgC,EAChC,eAAe,GAChB,MAAM,aAAa,CAAC;AAmBrB,SAAS,oBAAoB,CAC3B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,gCAAgC,CAC/D,OAAO,CAAC,kBAAkB,EAC1B,OAAO,CACR,CAAC;IAEF,MAAM,UAAU,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrE,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,EACH,OAAO,EACP,aAAa,EACb,wBAAwB,EACxB,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE7E,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,iBAAiB,CACxB,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,OAAO,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,CAAC,EACxC,gBAAgB,CACjB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,SAAS;KACV,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACtE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,CAClB,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,GAAG,iBAAiB,CAC7D,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa;QACb,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,GAAW,EACX,OAAwB;IAExB,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iCAAiC,CAC/C,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,oBAAoB,EACpB,cAAc,EACf,MAAM,+BAA+B,CAAC;AAuDvC;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAwB5B"}
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
import * as cache from '../../services/cache.js';
|
|
2
|
-
import { createCacheKey } from '../../services/cache-keys.js';
|
|
3
|
-
import { fetchNormalizedUrl } from '../../services/fetcher.js';
|
|
4
|
-
import { logDebug } from '../../services/logger.js';
|
|
5
|
-
import { isRecord } from '../../utils/guards.js';
|
|
6
|
-
import { transformToRawUrl } from '../../utils/url-transformer.js';
|
|
7
|
-
import { normalizeUrl } from '../../utils/url-validator.js';
|
|
8
|
-
function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
|
|
9
|
-
if (!cacheKey)
|
|
10
|
-
return null;
|
|
11
|
-
const cached = cache.get(cacheKey);
|
|
12
|
-
if (!cached)
|
|
13
|
-
return null;
|
|
14
|
-
if (!deserialize) {
|
|
15
|
-
logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
|
|
16
|
-
return null;
|
|
17
|
-
}
|
|
18
|
-
const data = deserialize(cached.content);
|
|
19
|
-
if (data === undefined) {
|
|
20
|
-
logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
|
|
21
|
-
return null;
|
|
22
|
-
}
|
|
23
|
-
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
24
|
-
return {
|
|
25
|
-
data,
|
|
26
|
-
fromCache: true,
|
|
27
|
-
url: normalizedUrl,
|
|
28
|
-
fetchedAt: cached.fetchedAt,
|
|
29
|
-
cacheKey,
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
function resolveNormalizedUrl(url) {
|
|
33
|
-
const { normalizedUrl: validatedUrl } = normalizeUrl(url);
|
|
34
|
-
const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
|
|
35
|
-
return { normalizedUrl, originalUrl: validatedUrl, transformed };
|
|
36
|
-
}
|
|
37
|
-
export async function executeFetchPipeline(options) {
|
|
38
|
-
const resolvedUrl = resolveNormalizedUrl(options.url);
|
|
39
|
-
logRawUrlTransformation(resolvedUrl);
|
|
40
|
-
const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
|
|
41
|
-
const cachedResult = attemptCacheRetrieval({
|
|
42
|
-
cacheKey,
|
|
43
|
-
deserialize: options.deserialize,
|
|
44
|
-
cacheNamespace: options.cacheNamespace,
|
|
45
|
-
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
46
|
-
});
|
|
47
|
-
if (cachedResult)
|
|
48
|
-
return cachedResult;
|
|
49
|
-
logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
|
|
50
|
-
const fetchOptions = options.signal === undefined ? {} : { signal: options.signal };
|
|
51
|
-
const html = await fetchNormalizedUrl(resolvedUrl.normalizedUrl, fetchOptions);
|
|
52
|
-
const data = await options.transform(html, resolvedUrl.normalizedUrl);
|
|
53
|
-
if (cache.isEnabled()) {
|
|
54
|
-
persistCache({
|
|
55
|
-
cacheKey,
|
|
56
|
-
data,
|
|
57
|
-
serialize: options.serialize,
|
|
58
|
-
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
59
|
-
});
|
|
60
|
-
}
|
|
61
|
-
return {
|
|
62
|
-
data,
|
|
63
|
-
fromCache: false,
|
|
64
|
-
url: resolvedUrl.normalizedUrl,
|
|
65
|
-
fetchedAt: new Date().toISOString(),
|
|
66
|
-
cacheKey,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
|
|
70
|
-
if (!cacheKey)
|
|
71
|
-
return;
|
|
72
|
-
const serializer = serialize ?? JSON.stringify;
|
|
73
|
-
const title = extractTitle(data);
|
|
74
|
-
const metadata = {
|
|
75
|
-
url: normalizedUrl,
|
|
76
|
-
...(title === undefined ? {} : { title }),
|
|
77
|
-
};
|
|
78
|
-
cache.set(cacheKey, serializer(data), metadata);
|
|
79
|
-
}
|
|
80
|
-
function extractTitle(value) {
|
|
81
|
-
if (!isRecord(value))
|
|
82
|
-
return undefined;
|
|
83
|
-
const { title } = value;
|
|
84
|
-
return typeof title === 'string' ? title : undefined;
|
|
85
|
-
}
|
|
86
|
-
function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
|
|
87
|
-
logDebug(`Cache miss due to ${reason}`, {
|
|
88
|
-
namespace: cacheNamespace,
|
|
89
|
-
url: normalizedUrl,
|
|
90
|
-
});
|
|
91
|
-
}
|
|
92
|
-
function logRawUrlTransformation(resolvedUrl) {
|
|
93
|
-
if (!resolvedUrl.transformed)
|
|
94
|
-
return;
|
|
95
|
-
logDebug('Using transformed raw content URL', {
|
|
96
|
-
original: resolvedUrl.originalUrl,
|
|
97
|
-
});
|
|
98
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAE7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAEnD,SAAS,aAAa,CAAC,MAAc,EAAE,QAAgB;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,4CAA4C,EAAE;YACpD,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAChC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,QAAuB,EACvB,WAA4D,EAC5D,cAAsB,EACtB,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW;QACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC;QAC7B,CAAC,CAAE,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAmB,CAAC;IAE/D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,QAAQ,CAAC,uCAAuC,EAAE;YAChD,SAAS,EAAE,cAAc;YACzB,GAAG,EAAE,aAAa;SACnB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,IAAI;QACf,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,aAAa,GAAG,uBAAuB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAEzD,MAAM,YAAY,GAAG,qBAAqB,CACxC,QAAQ,EACR,OAAO,CAAC,WAAW,EACnB,OAAO,CAAC,cAAc,EACtB,aAAa,CACd,CAAC;IACF,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IAEtC,MAAM,YAAY,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAE3E,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAClC,aAAa,EACb,YAAY,EACZ,OAAO,CAAC,OAAO,CAChB,CAAC;IACF,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IACpD,YAAY,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAE/D,OAAO,mBAAmB,CAAC,aAAa,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,eAAe,CACtB,OAAgC,EAChC,aAAqB;IAErB,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IAC7E,OAAO,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,cAAc,EAAE,aAAa,EAAE,SAAS,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,iBAAiB,CAAI,OAAgC;IAC5D,OAAO;QACL,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CACnB,QAAuB,EACvB,IAAO,EACP,SAA8C,EAC9C,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO;IACtB,MAAM,UAAU,GAAG,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC;IAC/C,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,CAAC,EAAE;QACpC,GAAG,EAAE,aAAa;QAClB,KAAK,EAAE,YAAY,CAAC,IAAI,CAAC;KAC1B,CAAC,CAAC;AACL,CAAC;AAED,SAAS,YAAY,CAAC,KAAc;IAClC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IAC1D,IAAI,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC1C,MAAM,EAAE,KAAK,EAAE,GAAG,KAA4B,CAAC;IAC/C,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;AACvD,CAAC;AAED,SAAS,mBAAmB,CAC1B,GAAW,EACX,IAAO,EACP,QAAuB;IAEvB,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,KAAK;QAChB,GAAG;QACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ;KACT,CAAC;AACJ,CAAC"}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
function detectLineEnding(content) {
|
|
2
|
-
return content.includes('\r\n') ? '\r\n' : '\n';
|
|
3
|
-
}
|
|
4
|
-
function findFrontmatterLines(content) {
|
|
5
|
-
const lineEnding = detectLineEnding(content);
|
|
6
|
-
const lines = content.split(lineEnding);
|
|
7
|
-
if (lines[0] !== '---')
|
|
8
|
-
return null;
|
|
9
|
-
const endIndex = lines.indexOf('---', 1);
|
|
10
|
-
if (endIndex === -1)
|
|
11
|
-
return null;
|
|
12
|
-
return { lineEnding, lines, endIndex };
|
|
13
|
-
}
|
|
14
|
-
function stripOptionalQuotes(value) {
|
|
15
|
-
const trimmed = value.trim();
|
|
16
|
-
if (trimmed.length < 2)
|
|
17
|
-
return trimmed;
|
|
18
|
-
const first = trimmed[0];
|
|
19
|
-
const last = trimmed[trimmed.length - 1];
|
|
20
|
-
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
21
|
-
return trimmed.slice(1, -1).trim();
|
|
22
|
-
}
|
|
23
|
-
return trimmed;
|
|
24
|
-
}
|
|
25
|
-
function parseFrontmatterEntry(line) {
|
|
26
|
-
const trimmed = line.trim();
|
|
27
|
-
if (!trimmed)
|
|
28
|
-
return null;
|
|
29
|
-
const separatorIndex = trimmed.indexOf(':');
|
|
30
|
-
if (separatorIndex <= 0)
|
|
31
|
-
return null;
|
|
32
|
-
const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
|
|
33
|
-
const value = trimmed.slice(separatorIndex + 1);
|
|
34
|
-
return { key, value };
|
|
35
|
-
}
|
|
36
|
-
function isTitleKey(key) {
|
|
37
|
-
return key === 'title' || key === 'name';
|
|
38
|
-
}
|
|
39
|
-
export function extractTitleFromRawMarkdown(content) {
|
|
40
|
-
const frontmatter = findFrontmatterLines(content);
|
|
41
|
-
if (!frontmatter)
|
|
42
|
-
return undefined;
|
|
43
|
-
const { lines, endIndex } = frontmatter;
|
|
44
|
-
const entry = lines
|
|
45
|
-
.slice(1, endIndex)
|
|
46
|
-
.map((line) => parseFrontmatterEntry(line))
|
|
47
|
-
.find((parsed) => parsed !== null && isTitleKey(parsed.key));
|
|
48
|
-
if (!entry)
|
|
49
|
-
return undefined;
|
|
50
|
-
const value = stripOptionalQuotes(entry.value);
|
|
51
|
-
return value || undefined;
|
|
52
|
-
}
|
|
53
|
-
export function addSourceToMarkdown(content, url) {
|
|
54
|
-
const frontmatter = findFrontmatterLines(content);
|
|
55
|
-
if (!frontmatter) {
|
|
56
|
-
return `---\nsource: "${url}"\n---\n\n${content}`;
|
|
57
|
-
}
|
|
58
|
-
const { lineEnding, lines, endIndex } = frontmatter;
|
|
59
|
-
const bodyLines = lines.slice(1, endIndex);
|
|
60
|
-
const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
|
|
61
|
-
if (hasSource)
|
|
62
|
-
return content;
|
|
63
|
-
const updatedLines = [
|
|
64
|
-
lines[0],
|
|
65
|
-
...bodyLines,
|
|
66
|
-
`source: "${url}"`,
|
|
67
|
-
...lines.slice(endIndex),
|
|
68
|
-
];
|
|
69
|
-
return updatedLines.join(lineEnding);
|
|
70
|
-
}
|
|
71
|
-
export function hasFrontmatter(trimmed) {
|
|
72
|
-
return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
|
|
73
|
-
}
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
export { executeFetchPipeline } from './fetch-pipeline.js';
|
|
2
|
-
export type { FetchPipelineOptions, PipelineResult } from './fetch-pipeline.js';
|
|
3
|
-
export { createSuccessResponse, createCachedResponse, createBatchResponse, } from './response-builder.js';
|
|
4
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,YAAY,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAE3D,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
interface InlineContentResult {
|
|
2
|
-
content?: string;
|
|
3
|
-
contentSize: number;
|
|
4
|
-
resourceUri?: string;
|
|
5
|
-
resourceMimeType?: string;
|
|
6
|
-
error?: string;
|
|
7
|
-
truncated?: boolean;
|
|
8
|
-
}
|
|
9
|
-
export declare function applyInlineContentLimit(content: string, cacheKey: string | null): InlineContentResult;
|
|
10
|
-
export {};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"inline-content.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/inline-content.ts"],"names":[],"mappings":"AAKA,KAAK,mBAAmB,GAAG,OAAO,GAAG,UAAU,CAAC;AAEhD,UAAU,mBAAmB;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,MAAM,EAAE,mBAAmB,GAC1B,mBAAmB,CAkBrB"}
|