dotdo 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +446 -315
- package/cli/README.md +238 -0
- package/cli/agent.ts +72 -0
- package/cli/bin.js +44 -0
- package/cli/bin.ts +38 -0
- package/cli/build.ts +157 -0
- package/cli/commands/auth/login.ts +14 -0
- package/cli/commands/auth/logout.ts +6 -0
- package/cli/commands/auth/whoami.ts +16 -0
- package/cli/commands/deploy-multi.ts +245 -0
- package/cli/commands/dev/deploy.ts +100 -0
- package/cli/commands/dev/dev.ts +95 -0
- package/cli/commands/dev/logs.ts +91 -0
- package/cli/commands/dev-local.ts +88 -0
- package/cli/commands/do-ops.ts +314 -0
- package/cli/commands/index.ts +100 -0
- package/cli/commands/init.ts +247 -0
- package/cli/commands/introspect/emitter.ts +315 -0
- package/cli/commands/introspect/index.ts +193 -0
- package/cli/commands/link.ts +598 -0
- package/cli/commands/snippets.ts +415 -0
- package/cli/commands/tunnel.ts +239 -0
- package/cli/device-auth.ts +289 -0
- package/cli/fallback.ts +12 -0
- package/cli/index.ts +121 -0
- package/cli/main.ts +246 -0
- package/cli/mcp-stdio.ts +790 -0
- package/cli/package.json +62 -0
- package/cli/runtime/do-registry.ts +193 -0
- package/cli/runtime/embedded-db.ts +344 -0
- package/cli/runtime/index.ts +9 -0
- package/cli/runtime/miniflare-adapter.ts +162 -0
- package/cli/sandbox.ts +82 -0
- package/cli/src/args.ts +174 -0
- package/cli/src/auth.ts +55 -0
- package/cli/src/commands/call.ts +84 -0
- package/cli/src/commands/charge.ts +96 -0
- package/cli/src/commands/config.ts +115 -0
- package/cli/src/commands/email.ts +112 -0
- package/cli/src/commands/llm.ts +115 -0
- package/cli/src/commands/queue.ts +134 -0
- package/cli/src/commands/text.ts +86 -0
- package/cli/src/config.ts +185 -0
- package/cli/src/output.ts +246 -0
- package/cli/src/rpc.ts +192 -0
- package/cli/utils/config.ts +282 -0
- package/cli/utils/detect.ts +73 -0
- package/cli/utils/index.ts +15 -0
- package/cli/utils/logger.ts +232 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/template-literals.js +852 -0
- package/dist/ai/template-literals.js.map +1 -0
- package/dist/api/middleware/auth-federation.js +573 -0
- package/dist/api/middleware/auth-federation.js.map +1 -0
- package/dist/api/middleware/auth.js +545 -0
- package/dist/api/middleware/auth.js.map +1 -0
- package/dist/db/actions.js +212 -0
- package/dist/db/actions.js.map +1 -0
- package/dist/db/auth.js +506 -0
- package/dist/db/auth.js.map +1 -0
- package/dist/db/branches.js +65 -0
- package/dist/db/branches.js.map +1 -0
- package/dist/db/clickhouse.js +1074 -0
- package/dist/db/clickhouse.js.map +1 -0
- package/dist/db/dlq.js +39 -0
- package/dist/db/dlq.js.map +1 -0
- package/dist/db/events.js +28 -0
- package/dist/db/events.js.map +1 -0
- package/dist/db/exec.js +64 -0
- package/dist/db/exec.js.map +1 -0
- package/dist/db/files.js +85 -0
- package/dist/db/files.js.map +1 -0
- package/dist/db/flags.js +24 -0
- package/dist/db/flags.js.map +1 -0
- package/dist/db/git.js +116 -0
- package/dist/db/git.js.map +1 -0
- package/dist/db/iceberg/inverted-index.js +862 -0
- package/dist/db/iceberg/inverted-index.js.map +1 -0
- package/dist/db/iceberg/puffin.js +878 -0
- package/dist/db/iceberg/puffin.js.map +1 -0
- package/dist/db/iceberg/search-manifest.js +422 -0
- package/dist/db/iceberg/search-manifest.js.map +1 -0
- package/dist/db/iceberg/types.js +8 -0
- package/dist/db/iceberg/types.js.map +1 -0
- package/dist/db/index.js +121 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/integrations.js +368 -0
- package/dist/db/integrations.js.map +1 -0
- package/dist/db/json-indexes.js +332 -0
- package/dist/db/json-indexes.js.map +1 -0
- package/dist/db/linked-accounts.js +287 -0
- package/dist/db/linked-accounts.js.map +1 -0
- package/dist/db/nouns.js +183 -0
- package/dist/db/nouns.js.map +1 -0
- package/dist/db/objects.js +170 -0
- package/dist/db/objects.js.map +1 -0
- package/dist/db/primitives/dag-scheduler/index.js +869 -0
- package/dist/db/primitives/dag-scheduler/index.js.map +1 -0
- package/dist/db/primitives/exactly-once-context.js +237 -0
- package/dist/db/primitives/exactly-once-context.js.map +1 -0
- package/dist/db/primitives/index.js +62 -0
- package/dist/db/primitives/index.js.map +1 -0
- package/dist/db/primitives/keyed-router.js +145 -0
- package/dist/db/primitives/keyed-router.js.map +1 -0
- package/dist/db/primitives/observability.js +162 -0
- package/dist/db/primitives/observability.js.map +1 -0
- package/dist/db/primitives/schema-evolution.js +643 -0
- package/dist/db/primitives/schema-evolution.js.map +1 -0
- package/dist/db/primitives/stateful-operator/index.js +770 -0
- package/dist/db/primitives/stateful-operator/index.js.map +1 -0
- package/dist/db/primitives/temporal-store.js +306 -0
- package/dist/db/primitives/temporal-store.js.map +1 -0
- package/dist/db/primitives/typed-column-store.js +1229 -0
- package/dist/db/primitives/typed-column-store.js.map +1 -0
- package/dist/db/primitives/utils/duration.js +162 -0
- package/dist/db/primitives/utils/duration.js.map +1 -0
- package/dist/db/primitives/utils/murmur3.js +116 -0
- package/dist/db/primitives/utils/murmur3.js.map +1 -0
- package/dist/db/primitives/watermark-service.js +136 -0
- package/dist/db/primitives/watermark-service.js.map +1 -0
- package/dist/db/primitives/window-manager.js +764 -0
- package/dist/db/primitives/window-manager.js.map +1 -0
- package/dist/db/relationships.js +66 -0
- package/dist/db/relationships.js.map +1 -0
- package/dist/db/schema-minimal.js +61 -0
- package/dist/db/schema-minimal.js.map +1 -0
- package/dist/db/search.js +28 -0
- package/dist/db/search.js.map +1 -0
- package/dist/db/stores.js +1665 -0
- package/dist/db/stores.js.map +1 -0
- package/dist/db/things.js +297 -0
- package/dist/db/things.js.map +1 -0
- package/dist/db/vault.js +171 -0
- package/dist/db/vault.js.map +1 -0
- package/dist/db/verbs.js +102 -0
- package/dist/db/verbs.js.map +1 -0
- package/dist/do/base.js +48 -0
- package/dist/do/base.js.map +1 -0
- package/dist/do/tiny.js +31 -0
- package/dist/do/tiny.js.map +1 -0
- package/dist/lib/DOAuth.js +261 -0
- package/dist/lib/DOAuth.js.map +1 -0
- package/dist/lib/DODispatcher.js +72 -0
- package/dist/lib/DODispatcher.js.map +1 -0
- package/dist/lib/Modifier.js +189 -0
- package/dist/lib/Modifier.js.map +1 -0
- package/dist/lib/StateStorage.js +403 -0
- package/dist/lib/StateStorage.js.map +1 -0
- package/dist/lib/TypeRegistry.js +122 -0
- package/dist/lib/TypeRegistry.js.map +1 -0
- package/dist/lib/ai/gateway.js +247 -0
- package/dist/lib/ai/gateway.js.map +1 -0
- package/dist/lib/ai/tool-loop-agent.js +591 -0
- package/dist/lib/ai/tool-loop-agent.js.map +1 -0
- package/dist/lib/auto-wiring.js +439 -0
- package/dist/lib/auto-wiring.js.map +1 -0
- package/dist/lib/browse/browserbase.js +163 -0
- package/dist/lib/browse/browserbase.js.map +1 -0
- package/dist/lib/browse/cloudflare.js +144 -0
- package/dist/lib/browse/cloudflare.js.map +1 -0
- package/dist/lib/browse/index.js +62 -0
- package/dist/lib/browse/index.js.map +1 -0
- package/dist/lib/browse/types.js +13 -0
- package/dist/lib/browse/types.js.map +1 -0
- package/dist/lib/cache/index.js +37 -0
- package/dist/lib/cache/index.js.map +1 -0
- package/dist/lib/cache/visibility.js +638 -0
- package/dist/lib/cache/visibility.js.map +1 -0
- package/dist/lib/capabilities.js +268 -0
- package/dist/lib/capabilities.js.map +1 -0
- package/dist/lib/channels/base.js +106 -0
- package/dist/lib/channels/base.js.map +1 -0
- package/dist/lib/channels/discord.js +94 -0
- package/dist/lib/channels/discord.js.map +1 -0
- package/dist/lib/channels/email.js +204 -0
- package/dist/lib/channels/email.js.map +1 -0
- package/dist/lib/channels/index.js +90 -0
- package/dist/lib/channels/index.js.map +1 -0
- package/dist/lib/channels/mdxui-chat.js +95 -0
- package/dist/lib/channels/mdxui-chat.js.map +1 -0
- package/dist/lib/channels/slack-blockkit.js +121 -0
- package/dist/lib/channels/slack-blockkit.js.map +1 -0
- package/dist/lib/channels/types.js +7 -0
- package/dist/lib/channels/types.js.map +1 -0
- package/dist/lib/cloudflare/ai.js +654 -0
- package/dist/lib/cloudflare/ai.js.map +1 -0
- package/dist/lib/cloudflare/index.js +88 -0
- package/dist/lib/cloudflare/index.js.map +1 -0
- package/dist/lib/cloudflare/kv.js +342 -0
- package/dist/lib/cloudflare/kv.js.map +1 -0
- package/dist/lib/cloudflare/queues.js +434 -0
- package/dist/lib/cloudflare/queues.js.map +1 -0
- package/dist/lib/cloudflare/r2.js +604 -0
- package/dist/lib/cloudflare/r2.js.map +1 -0
- package/dist/lib/cloudflare/vectorize.js +494 -0
- package/dist/lib/cloudflare/vectorize.js.map +1 -0
- package/dist/lib/cloudflare/workflows.js +569 -0
- package/dist/lib/cloudflare/workflows.js.map +1 -0
- package/dist/lib/colo/caching.js +196 -0
- package/dist/lib/colo/caching.js.map +1 -0
- package/dist/lib/colo/detection.js +194 -0
- package/dist/lib/colo/detection.js.map +1 -0
- package/dist/lib/colo/external-data.js +219 -0
- package/dist/lib/colo/external-data.js.map +1 -0
- package/dist/lib/colo/globe-data.js +179 -0
- package/dist/lib/colo/globe-data.js.map +1 -0
- package/dist/lib/colo/index.js +16 -0
- package/dist/lib/colo/index.js.map +1 -0
- package/dist/lib/decorators.js +37 -0
- package/dist/lib/decorators.js.map +1 -0
- package/dist/lib/discovery.js +81 -0
- package/dist/lib/discovery.js.map +1 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js +619 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/BaseFunctionExecutor.js +328 -0
- package/dist/lib/executors/BaseFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/CascadeExecutor.js +418 -0
- package/dist/lib/executors/CascadeExecutor.js.map +1 -0
- package/dist/lib/executors/CodeFunctionExecutor.js +904 -0
- package/dist/lib/executors/CodeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js +904 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/HumanFunctionExecutor.js +884 -0
- package/dist/lib/executors/HumanFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/ParallelStepExecutor.js +308 -0
- package/dist/lib/executors/ParallelStepExecutor.js.map +1 -0
- package/dist/lib/executors/types.js +12 -0
- package/dist/lib/executors/types.js.map +1 -0
- package/dist/lib/experiments.js +89 -0
- package/dist/lib/experiments.js.map +1 -0
- package/dist/lib/flags/store.js +262 -0
- package/dist/lib/flags/store.js.map +1 -0
- package/dist/lib/functions/FunctionComposition.js +467 -0
- package/dist/lib/functions/FunctionComposition.js.map +1 -0
- package/dist/lib/functions/FunctionMiddleware.js +457 -0
- package/dist/lib/functions/FunctionMiddleware.js.map +1 -0
- package/dist/lib/functions/FunctionRegistry.js +426 -0
- package/dist/lib/functions/FunctionRegistry.js.map +1 -0
- package/dist/lib/functions/createFunction.js +1048 -0
- package/dist/lib/functions/createFunction.js.map +1 -0
- package/dist/lib/humans/index.js +68 -0
- package/dist/lib/humans/index.js.map +1 -0
- package/dist/lib/humans/templates.js +117 -0
- package/dist/lib/humans/templates.js.map +1 -0
- package/dist/lib/identity.js +98 -0
- package/dist/lib/identity.js.map +1 -0
- package/dist/lib/index.js +9 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/logging/error-logger.js +163 -0
- package/dist/lib/logging/error-logger.js.map +1 -0
- package/dist/lib/logging/index.js +160 -0
- package/dist/lib/logging/index.js.map +1 -0
- package/dist/lib/mixins/bash.js +753 -0
- package/dist/lib/mixins/bash.js.map +1 -0
- package/dist/lib/mixins/fs.js +648 -0
- package/dist/lib/mixins/fs.js.map +1 -0
- package/dist/lib/mixins/git.js +1006 -0
- package/dist/lib/mixins/git.js.map +1 -0
- package/dist/lib/mixins/npm.js +662 -0
- package/dist/lib/mixins/npm.js.map +1 -0
- package/dist/lib/noun-id.js +278 -0
- package/dist/lib/noun-id.js.map +1 -0
- package/dist/lib/rate-limit/sliding-window.js +148 -0
- package/dist/lib/rate-limit/sliding-window.js.map +1 -0
- package/dist/lib/rate-limit.js +110 -0
- package/dist/lib/rate-limit.js.map +1 -0
- package/dist/lib/rpc/bindings.js +548 -0
- package/dist/lib/rpc/bindings.js.map +1 -0
- package/dist/lib/rpc/index.js +64 -0
- package/dist/lib/rpc/index.js.map +1 -0
- package/dist/lib/safe-stringify.js +223 -0
- package/dist/lib/safe-stringify.js.map +1 -0
- package/dist/lib/sandbox/miniflare-sandbox.js +1007 -0
- package/dist/lib/sandbox/miniflare-sandbox.js.map +1 -0
- package/dist/lib/sqids.js +110 -0
- package/dist/lib/sqids.js.map +1 -0
- package/dist/lib/sql/adapters/index.js +10 -0
- package/dist/lib/sql/adapters/index.js.map +1 -0
- package/dist/lib/sql/adapters/node-sql-parser.js +552 -0
- package/dist/lib/sql/adapters/node-sql-parser.js.map +1 -0
- package/dist/lib/sql/adapters/pgsql-parser.js +1190 -0
- package/dist/lib/sql/adapters/pgsql-parser.js.map +1 -0
- package/dist/lib/sql/index.js +277 -0
- package/dist/lib/sql/index.js.map +1 -0
- package/dist/lib/sql/types.js +56 -0
- package/dist/lib/sql/types.js.map +1 -0
- package/dist/lib/type-classifier.js +126 -0
- package/dist/lib/type-classifier.js.map +1 -0
- package/dist/lib/utils/html.js +47 -0
- package/dist/lib/utils/html.js.map +1 -0
- package/dist/lib/validation.js +48 -0
- package/dist/lib/validation.js.map +1 -0
- package/dist/lib/vault/store.js +411 -0
- package/dist/lib/vault/store.js.map +1 -0
- package/dist/metrics/hunch.js +739 -0
- package/dist/metrics/hunch.js.map +1 -0
- package/dist/objects/API.js +302 -0
- package/dist/objects/API.js.map +1 -0
- package/dist/objects/Agent.js +179 -0
- package/dist/objects/Agent.js.map +1 -0
- package/dist/objects/AgenticFunctionExecutor.js +8 -0
- package/dist/objects/AgenticFunctionExecutor.js.map +1 -0
- package/dist/objects/App.js +83 -0
- package/dist/objects/App.js.map +1 -0
- package/dist/objects/Browser.js +884 -0
- package/dist/objects/Browser.js.map +1 -0
- package/dist/objects/Business.js +107 -0
- package/dist/objects/Business.js.map +1 -0
- package/dist/objects/CLI.js +221 -0
- package/dist/objects/CLI.js.map +1 -0
- package/dist/objects/CodeFunctionExecutor.js +8 -0
- package/dist/objects/CodeFunctionExecutor.js.map +1 -0
- package/dist/objects/Collection.js +161 -0
- package/dist/objects/Collection.js.map +1 -0
- package/dist/objects/DO.js +41 -0
- package/dist/objects/DO.js.map +1 -0
- package/dist/objects/DOBase.js +2309 -0
- package/dist/objects/DOBase.js.map +1 -0
- package/dist/objects/DOCache.js +153 -0
- package/dist/objects/DOCache.js.map +1 -0
- package/dist/objects/DOFull.js +1676 -0
- package/dist/objects/DOFull.js.map +1 -0
- package/dist/objects/DOTiny.js +207 -0
- package/dist/objects/DOTiny.js.map +1 -0
- package/dist/objects/Directory.js +199 -0
- package/dist/objects/Directory.js.map +1 -0
- package/dist/objects/Entity.js +413 -0
- package/dist/objects/Entity.js.map +1 -0
- package/dist/objects/Function.js +116 -0
- package/dist/objects/Function.js.map +1 -0
- package/dist/objects/Human.js +231 -0
- package/dist/objects/Human.js.map +1 -0
- package/dist/objects/HumanFunctionExecutor.js +8 -0
- package/dist/objects/HumanFunctionExecutor.js.map +1 -0
- package/dist/objects/IcebergMetadataDO.js +938 -0
- package/dist/objects/IcebergMetadataDO.js.map +1 -0
- package/dist/objects/IntegrationsDO.js +1174 -0
- package/dist/objects/IntegrationsDO.js.map +1 -0
- package/dist/objects/ObservabilityBroadcaster.js +149 -0
- package/dist/objects/ObservabilityBroadcaster.js.map +1 -0
- package/dist/objects/Package.js +154 -0
- package/dist/objects/Package.js.map +1 -0
- package/dist/objects/Product.js +193 -0
- package/dist/objects/Product.js.map +1 -0
- package/dist/objects/SDK.js +152 -0
- package/dist/objects/SDK.js.map +1 -0
- package/dist/objects/SaaS.js +235 -0
- package/dist/objects/SaaS.js.map +1 -0
- package/dist/objects/SandboxDO.js +759 -0
- package/dist/objects/SandboxDO.js.map +1 -0
- package/dist/objects/Service.js +337 -0
- package/dist/objects/Service.js.map +1 -0
- package/dist/objects/Site.js +80 -0
- package/dist/objects/Site.js.map +1 -0
- package/dist/objects/Startup.js +479 -0
- package/dist/objects/Startup.js.map +1 -0
- package/dist/objects/ThingsDO.js +170 -0
- package/dist/objects/ThingsDO.js.map +1 -0
- package/dist/objects/VectorShardDO.js +650 -0
- package/dist/objects/VectorShardDO.js.map +1 -0
- package/dist/objects/Worker.js +144 -0
- package/dist/objects/Worker.js.map +1 -0
- package/dist/objects/Workflow.js +196 -0
- package/dist/objects/Workflow.js.map +1 -0
- package/dist/objects/WorkflowFactory.js +313 -0
- package/dist/objects/WorkflowFactory.js.map +1 -0
- package/dist/objects/WorkflowRuntime.js +863 -0
- package/dist/objects/WorkflowRuntime.js.map +1 -0
- package/dist/objects/circuit-breaker-bulkhead.js +178 -0
- package/dist/objects/circuit-breaker-bulkhead.js.map +1 -0
- package/dist/objects/createFunction.js +934 -0
- package/dist/objects/createFunction.js.map +1 -0
- package/dist/objects/index.js +80 -0
- package/dist/objects/index.js.map +1 -0
- package/dist/objects/lifecycle/Branch.js +275 -0
- package/dist/objects/lifecycle/Branch.js.map +1 -0
- package/dist/objects/lifecycle/Clone.js +1499 -0
- package/dist/objects/lifecycle/Clone.js.map +1 -0
- package/dist/objects/lifecycle/Compact.js +237 -0
- package/dist/objects/lifecycle/Compact.js.map +1 -0
- package/dist/objects/lifecycle/Promote.js +476 -0
- package/dist/objects/lifecycle/Promote.js.map +1 -0
- package/dist/objects/lifecycle/Shard.js +560 -0
- package/dist/objects/lifecycle/Shard.js.map +1 -0
- package/dist/objects/lifecycle/index.js +15 -0
- package/dist/objects/lifecycle/index.js.map +1 -0
- package/dist/objects/lifecycle/types.js +33 -0
- package/dist/objects/lifecycle/types.js.map +1 -0
- package/dist/objects/mixins/infrastructure.js +171 -0
- package/dist/objects/mixins/infrastructure.js.map +1 -0
- package/dist/objects/modules/StoresModule.js +153 -0
- package/dist/objects/modules/StoresModule.js.map +1 -0
- package/dist/objects/persistence/checkpoint-manager.js +606 -0
- package/dist/objects/persistence/checkpoint-manager.js.map +1 -0
- package/dist/objects/persistence/index.js +72 -0
- package/dist/objects/persistence/index.js.map +1 -0
- package/dist/objects/persistence/migration-runner.js +562 -0
- package/dist/objects/persistence/migration-runner.js.map +1 -0
- package/dist/objects/persistence/replication-manager.js +501 -0
- package/dist/objects/persistence/replication-manager.js.map +1 -0
- package/dist/objects/persistence/tiered-storage-manager.js +595 -0
- package/dist/objects/persistence/tiered-storage-manager.js.map +1 -0
- package/dist/objects/persistence/types.js +14 -0
- package/dist/objects/persistence/types.js.map +1 -0
- package/dist/objects/persistence/wal-manager.js +653 -0
- package/dist/objects/persistence/wal-manager.js.map +1 -0
- package/dist/objects/presets/index.js +20 -0
- package/dist/objects/presets/index.js.map +1 -0
- package/dist/objects/presets/primitives.js +188 -0
- package/dist/objects/presets/primitives.js.map +1 -0
- package/dist/objects/primitives/alarm-adapter.js +141 -0
- package/dist/objects/primitives/alarm-adapter.js.map +1 -0
- package/dist/objects/primitives/index.js +337 -0
- package/dist/objects/primitives/index.js.map +1 -0
- package/dist/objects/primitives/storage-adapter.js +182 -0
- package/dist/objects/primitives/storage-adapter.js.map +1 -0
- package/dist/objects/primitives/with-primitives.js +102 -0
- package/dist/objects/primitives/with-primitives.js.map +1 -0
- package/dist/objects/services/StoreManager.js +227 -0
- package/dist/objects/services/StoreManager.js.map +1 -0
- package/dist/objects/services/index.js +13 -0
- package/dist/objects/services/index.js.map +1 -0
- package/dist/objects/transport/auth-layer.js +1451 -0
- package/dist/objects/transport/auth-layer.js.map +1 -0
- package/dist/objects/transport/capnweb-target.js +355 -0
- package/dist/objects/transport/capnweb-target.js.map +1 -0
- package/dist/objects/transport/chain.js +441 -0
- package/dist/objects/transport/chain.js.map +1 -0
- package/dist/objects/transport/handler.js +58 -0
- package/dist/objects/transport/handler.js.map +1 -0
- package/dist/objects/transport/index.js +53 -0
- package/dist/objects/transport/index.js.map +1 -0
- package/dist/objects/transport/mcp-server.js +691 -0
- package/dist/objects/transport/mcp-server.js.map +1 -0
- package/dist/objects/transport/rest-autowire.js +1508 -0
- package/dist/objects/transport/rest-autowire.js.map +1 -0
- package/dist/objects/transport/rest-router.js +440 -0
- package/dist/objects/transport/rest-router.js.map +1 -0
- package/dist/objects/transport/rpc-server.js +1539 -0
- package/dist/objects/transport/rpc-server.js.map +1 -0
- package/dist/objects/transport/shared.js +576 -0
- package/dist/objects/transport/shared.js.map +1 -0
- package/dist/objects/transport/sync-engine.js +291 -0
- package/dist/objects/transport/sync-engine.js.map +1 -0
- package/dist/objects/transport/types.js +8 -0
- package/dist/objects/transport/types.js.map +1 -0
- package/dist/sandbox/index.js +258 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/snippets/artifacts-config.js +241 -0
- package/dist/snippets/artifacts-config.js.map +1 -0
- package/dist/snippets/artifacts-ingest.js +832 -0
- package/dist/snippets/artifacts-ingest.js.map +1 -0
- package/dist/snippets/artifacts-serve.js +1035 -0
- package/dist/snippets/artifacts-serve.js.map +1 -0
- package/dist/snippets/artifacts-types.js +161 -0
- package/dist/snippets/artifacts-types.js.map +1 -0
- package/dist/snippets/cache-probe.js +376 -0
- package/dist/snippets/cache-probe.js.map +1 -0
- package/dist/snippets/cache.js +10 -0
- package/dist/snippets/cache.js.map +1 -0
- package/dist/snippets/events.js +469 -0
- package/dist/snippets/events.js.map +1 -0
- package/dist/snippets/index.js +7 -0
- package/dist/snippets/index.js.map +1 -0
- package/dist/snippets/proxy.js +495 -0
- package/dist/snippets/proxy.js.map +1 -0
- package/dist/snippets/search.js +1759 -0
- package/dist/snippets/search.js.map +1 -0
- package/dist/streams/index.js +30 -0
- package/dist/streams/index.js.map +1 -0
- package/dist/streams/observability.js +68 -0
- package/dist/streams/observability.js.map +1 -0
- package/dist/types/AI.js +92 -0
- package/dist/types/AI.js.map +1 -0
- package/dist/types/AIFunction.js +171 -0
- package/dist/types/AIFunction.js.map +1 -0
- package/dist/types/BrowseVerb.js +89 -0
- package/dist/types/BrowseVerb.js.map +1 -0
- package/dist/types/Browser.js +31 -0
- package/dist/types/Browser.js.map +1 -0
- package/dist/types/Chaos.js +15 -0
- package/dist/types/Chaos.js.map +1 -0
- package/dist/types/CloudflareBindings.js +109 -0
- package/dist/types/CloudflareBindings.js.map +1 -0
- package/dist/types/Collection.js +50 -0
- package/dist/types/Collection.js.map +1 -0
- package/dist/types/DO.js +2 -0
- package/dist/types/DO.js.map +1 -0
- package/dist/types/DOLocation.js +63 -0
- package/dist/types/DOLocation.js.map +1 -0
- package/dist/types/EventHandler.js +57 -0
- package/dist/types/EventHandler.js.map +1 -0
- package/dist/types/Experiment.js +33 -0
- package/dist/types/Experiment.js.map +1 -0
- package/dist/types/Flag.js +57 -0
- package/dist/types/Flag.js.map +1 -0
- package/dist/types/Lifecycle.js +13 -0
- package/dist/types/Lifecycle.js.map +1 -0
- package/dist/types/Location.js +169 -0
- package/dist/types/Location.js.map +1 -0
- package/dist/types/Noun.js +66 -0
- package/dist/types/Noun.js.map +1 -0
- package/dist/types/SessionEvent.js +194 -0
- package/dist/types/SessionEvent.js.map +1 -0
- package/dist/types/Thing.js +55 -0
- package/dist/types/Thing.js.map +1 -0
- package/dist/types/ThingDO.js +153 -0
- package/dist/types/ThingDO.js.map +1 -0
- package/dist/types/Things.js +2 -0
- package/dist/types/Things.js.map +1 -0
- package/dist/types/Verb.js +119 -0
- package/dist/types/Verb.js.map +1 -0
- package/dist/types/WorkflowContext.js +70 -0
- package/dist/types/WorkflowContext.js.map +1 -0
- package/dist/types/analytics-api.js +13 -0
- package/dist/types/analytics-api.js.map +1 -0
- package/dist/types/capabilities.js +135 -0
- package/dist/types/capabilities.js.map +1 -0
- package/dist/types/drizzle.js +12 -0
- package/dist/types/drizzle.js.map +1 -0
- package/dist/types/event.js +201 -0
- package/dist/types/event.js.map +1 -0
- package/dist/types/fn.js +12 -0
- package/dist/types/fn.js.map +1 -0
- package/dist/types/iceberg.js +48 -0
- package/dist/types/iceberg.js.map +1 -0
- package/dist/types/ids.js +170 -0
- package/dist/types/ids.js.map +1 -0
- package/dist/types/index.js +41 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/introspect.js +54 -0
- package/dist/types/introspect.js.map +1 -0
- package/dist/types/observability.js +124 -0
- package/dist/types/observability.js.map +1 -0
- package/dist/types/sync-protocol.js +175 -0
- package/dist/types/sync-protocol.js.map +1 -0
- package/dist/types/vector.js +13 -0
- package/dist/types/vector.js.map +1 -0
- package/dist/workflows/ScheduleManager.js +473 -0
- package/dist/workflows/ScheduleManager.js.map +1 -0
- package/dist/workflows/StepDOBridge.js +149 -0
- package/dist/workflows/StepDOBridge.js.map +1 -0
- package/dist/workflows/StepResultStorage.js +232 -0
- package/dist/workflows/StepResultStorage.js.map +1 -0
- package/dist/workflows/WaitForEventManager.js +461 -0
- package/dist/workflows/WaitForEventManager.js.map +1 -0
- package/dist/workflows/analyzer.js +332 -0
- package/dist/workflows/analyzer.js.map +1 -0
- package/dist/workflows/compat/activity-router.js +484 -0
- package/dist/workflows/compat/activity-router.js.map +1 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js +431 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js.map +1 -0
- package/dist/workflows/compat/backends/index.js +14 -0
- package/dist/workflows/compat/backends/index.js.map +1 -0
- package/dist/workflows/compat/errors/index.js +375 -0
- package/dist/workflows/compat/errors/index.js.map +1 -0
- package/dist/workflows/compat/index.js +79 -0
- package/dist/workflows/compat/index.js.map +1 -0
- package/dist/workflows/compat/inngest/index.js +989 -0
- package/dist/workflows/compat/inngest/index.js.map +1 -0
- package/dist/workflows/compat/qstash/index.js +1263 -0
- package/dist/workflows/compat/qstash/index.js.map +1 -0
- package/dist/workflows/compat/temporal/activities.js +739 -0
- package/dist/workflows/compat/temporal/activities.js.map +1 -0
- package/dist/workflows/compat/temporal/child-workflows.js +154 -0
- package/dist/workflows/compat/temporal/child-workflows.js.map +1 -0
- package/dist/workflows/compat/temporal/client.js +381 -0
- package/dist/workflows/compat/temporal/client.js.map +1 -0
- package/dist/workflows/compat/temporal/context.js +309 -0
- package/dist/workflows/compat/temporal/context.js.map +1 -0
- package/dist/workflows/compat/temporal/determinism.js +216 -0
- package/dist/workflows/compat/temporal/determinism.js.map +1 -0
- package/dist/workflows/compat/temporal/errors.js +128 -0
- package/dist/workflows/compat/temporal/errors.js.map +1 -0
- package/dist/workflows/compat/temporal/index.js +2464 -0
- package/dist/workflows/compat/temporal/index.js.map +1 -0
- package/dist/workflows/compat/temporal/saga.js +504 -0
- package/dist/workflows/compat/temporal/saga.js.map +1 -0
- package/dist/workflows/compat/temporal/signals.js +364 -0
- package/dist/workflows/compat/temporal/signals.js.map +1 -0
- package/dist/workflows/compat/temporal/storage.js +271 -0
- package/dist/workflows/compat/temporal/storage.js.map +1 -0
- package/dist/workflows/compat/temporal/timers.js +347 -0
- package/dist/workflows/compat/temporal/timers.js.map +1 -0
- package/dist/workflows/compat/temporal/types.js +7 -0
- package/dist/workflows/compat/temporal/types.js.map +1 -0
- package/dist/workflows/compat/temporal/unified-primitives.js +339 -0
- package/dist/workflows/compat/temporal/unified-primitives.js.map +1 -0
- package/dist/workflows/compat/trigger/index.js +468 -0
- package/dist/workflows/compat/trigger/index.js.map +1 -0
- package/dist/workflows/compat/utils/index.js +69 -0
- package/dist/workflows/compat/utils/index.js.map +1 -0
- package/dist/workflows/context/correlation-capability.js +266 -0
- package/dist/workflows/context/correlation-capability.js.map +1 -0
- package/dist/workflows/context/correlation.js +484 -0
- package/dist/workflows/context/correlation.js.map +1 -0
- package/dist/workflows/context/experiment.js +289 -0
- package/dist/workflows/context/experiment.js.map +1 -0
- package/dist/workflows/context/flag.js +244 -0
- package/dist/workflows/context/flag.js.map +1 -0
- package/dist/workflows/context/foundation.js +648 -0
- package/dist/workflows/context/foundation.js.map +1 -0
- package/dist/workflows/context/human-base.js +106 -0
- package/dist/workflows/context/human-base.js.map +1 -0
- package/dist/workflows/context/human.js +368 -0
- package/dist/workflows/context/human.js.map +1 -0
- package/dist/workflows/context/measure.js +354 -0
- package/dist/workflows/context/measure.js.map +1 -0
- package/dist/workflows/context/rate-limit.js +358 -0
- package/dist/workflows/context/rate-limit.js.map +1 -0
- package/dist/workflows/context/user.js +117 -0
- package/dist/workflows/context/user.js.map +1 -0
- package/dist/workflows/context/vault.js +360 -0
- package/dist/workflows/context/vault.js.map +1 -0
- package/dist/workflows/data/entity-events/entity-events.js +489 -0
- package/dist/workflows/data/entity-events/entity-events.js.map +1 -0
- package/dist/workflows/data/experiment/index.js +599 -0
- package/dist/workflows/data/experiment/index.js.map +1 -0
- package/dist/workflows/data/goal/context.js +558 -0
- package/dist/workflows/data/goal/context.js.map +1 -0
- package/dist/workflows/data/goal/index.js +32 -0
- package/dist/workflows/data/goal/index.js.map +1 -0
- package/dist/workflows/data/measure/index.js +840 -0
- package/dist/workflows/data/measure/index.js.map +1 -0
- package/dist/workflows/data/stream/index.js +1149 -0
- package/dist/workflows/data/stream/index.js.map +1 -0
- package/dist/workflows/data/track/context.js +883 -0
- package/dist/workflows/data/track/context.js.map +1 -0
- package/dist/workflows/data/track/index.js +15 -0
- package/dist/workflows/data/track/index.js.map +1 -0
- package/dist/workflows/data/view/context.js +864 -0
- package/dist/workflows/data/view/context.js.map +1 -0
- package/dist/workflows/domain.js +93 -0
- package/dist/workflows/domain.js.map +1 -0
- package/dist/workflows/flag.js +176 -0
- package/dist/workflows/flag.js.map +1 -0
- package/dist/workflows/flags.js +217 -0
- package/dist/workflows/flags.js.map +1 -0
- package/dist/workflows/hash.js +209 -0
- package/dist/workflows/hash.js.map +1 -0
- package/dist/workflows/index.js +50 -0
- package/dist/workflows/index.js.map +1 -0
- package/dist/workflows/on.js +378 -0
- package/dist/workflows/on.js.map +1 -0
- package/dist/workflows/pipeline-promise.js +481 -0
- package/dist/workflows/pipeline-promise.js.map +1 -0
- package/dist/workflows/pipeline-types.js +20 -0
- package/dist/workflows/pipeline-types.js.map +1 -0
- package/dist/workflows/proxy.js +76 -0
- package/dist/workflows/proxy.js.map +1 -0
- package/dist/workflows/runtime.js +310 -0
- package/dist/workflows/runtime.js.map +1 -0
- package/dist/workflows/schedule-builder.js +327 -0
- package/dist/workflows/schedule-builder.js.map +1 -0
- package/dist/workflows/visibility/index.js +146 -0
- package/dist/workflows/visibility/index.js.map +1 -0
- package/dist/workflows/visibility/query-parser.js +150 -0
- package/dist/workflows/visibility/query-parser.js.map +1 -0
- package/dist/workflows/visibility/store.js +223 -0
- package/dist/workflows/visibility/store.js.map +1 -0
- package/dist/workflows/visibility/types.js +30 -0
- package/dist/workflows/visibility/types.js.map +1 -0
- package/dist/workflows/workflow.js +53 -0
- package/dist/workflows/workflow.js.map +1 -0
- package/package.json +294 -46
|
@@ -0,0 +1,1759 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Snippet - Range Query Pruning with Zonemaps
|
|
3
|
+
*
|
|
4
|
+
* This module provides range query pruning using ClickHouse-style marks files.
|
|
5
|
+
* The search snippet fetches marks files from CDN to determine which blocks
|
|
6
|
+
* need to be read for a given range query, minimizing data transfer.
|
|
7
|
+
*
|
|
8
|
+
* Memory Budget:
|
|
9
|
+
* - 65,536 blocks per 1MB marks file
|
|
10
|
+
* - 16 bytes per block entry (int64 min + int64 max)
|
|
11
|
+
*
|
|
12
|
+
* @module snippets/search
|
|
13
|
+
* @see db/iceberg/marks.ts for the full marks file format
|
|
14
|
+
*/
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Range/Zonemap Pruning Implementation
|
|
17
|
+
// ============================================================================
|
|
18
|
+
/** Bytes per block entry: int64 min (8) + int64 max (8) */
|
|
19
|
+
const BYTES_PER_BLOCK = 16;
|
|
20
|
+
/**
|
|
21
|
+
* Wraps a promise with a timeout.
|
|
22
|
+
* @internal
|
|
23
|
+
*/
|
|
24
|
+
function withTimeout(promise, ms) {
|
|
25
|
+
return Promise.race([
|
|
26
|
+
promise,
|
|
27
|
+
new Promise((_, reject) => {
|
|
28
|
+
setTimeout(() => reject(new Error('Request timeout')), ms);
|
|
29
|
+
}),
|
|
30
|
+
]);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Query a marks file and return the byte ranges that need to be fetched.
|
|
34
|
+
*
|
|
35
|
+
* @param cdnUrl - URL to the marks file on CDN
|
|
36
|
+
* @param metadata - Marks file metadata
|
|
37
|
+
* @param condition - Range query condition
|
|
38
|
+
* @returns Promise resolving to query result with byte ranges
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const result = await queryRange(
|
|
43
|
+
* 'https://cdn.example.com.ai/marks/users.marks',
|
|
44
|
+
* { columnType: 'int64', blockCount: 100, blockSize: 8192 },
|
|
45
|
+
* { min: 1000n, max: 2000n }
|
|
46
|
+
* )
|
|
47
|
+
*
|
|
48
|
+
* if (result.rangeHeader) {
|
|
49
|
+
* const response = await fetch(dataUrl, {
|
|
50
|
+
* headers: { Range: result.rangeHeader }
|
|
51
|
+
* })
|
|
52
|
+
* }
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export async function queryRange(cdnUrl, metadata, condition) {
|
|
56
|
+
// Fetch marks file from CDN
|
|
57
|
+
let response;
|
|
58
|
+
try {
|
|
59
|
+
response = await fetch(cdnUrl, {
|
|
60
|
+
headers: {
|
|
61
|
+
Accept: 'application/octet-stream',
|
|
62
|
+
'Cache-Control': 'max-age=3600',
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
if (error instanceof Error) {
|
|
68
|
+
throw new Error(`Network timeout: ${error.message}`);
|
|
69
|
+
}
|
|
70
|
+
throw error;
|
|
71
|
+
}
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
throw new Error(`Marks file fetch failed (${response.status}): ${response.statusText || 'Not Found'}`);
|
|
74
|
+
}
|
|
75
|
+
// Parse marks file
|
|
76
|
+
const buffer = await response.arrayBuffer();
|
|
77
|
+
const data = new Uint8Array(buffer);
|
|
78
|
+
const blocks = parseMarksFile(data, metadata.columnType);
|
|
79
|
+
// Prune blocks based on condition
|
|
80
|
+
const matchingBlocks = pruneBlocks(blocks, condition);
|
|
81
|
+
// Calculate byte ranges
|
|
82
|
+
const blockByteSize = metadata.blockByteSize ?? 65536; // default 64KB
|
|
83
|
+
const blockRanges = matchingBlocks.map((block) => ({
|
|
84
|
+
blockIndex: block.blockIndex,
|
|
85
|
+
byteOffset: block.blockIndex * blockByteSize,
|
|
86
|
+
byteSize: blockByteSize,
|
|
87
|
+
}));
|
|
88
|
+
// Build result
|
|
89
|
+
const result = {
|
|
90
|
+
blockRanges,
|
|
91
|
+
rangeHeader: null,
|
|
92
|
+
};
|
|
93
|
+
if (blockRanges.length === 0) {
|
|
94
|
+
return result;
|
|
95
|
+
}
|
|
96
|
+
// Check if blocks are adjacent and can be coalesced
|
|
97
|
+
const sortedRanges = [...blockRanges].sort((a, b) => a.blockIndex - b.blockIndex);
|
|
98
|
+
let isContiguous = true;
|
|
99
|
+
for (let i = 1; i < sortedRanges.length; i++) {
|
|
100
|
+
if (sortedRanges[i].blockIndex !== sortedRanges[i - 1].blockIndex + 1) {
|
|
101
|
+
isContiguous = false;
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (isContiguous && sortedRanges.length > 0) {
|
|
106
|
+
const firstBlock = sortedRanges[0];
|
|
107
|
+
const lastBlock = sortedRanges[sortedRanges.length - 1];
|
|
108
|
+
const totalSize = sortedRanges.length * blockByteSize;
|
|
109
|
+
result.coalesced = {
|
|
110
|
+
byteOffset: firstBlock.byteOffset,
|
|
111
|
+
byteSize: totalSize,
|
|
112
|
+
};
|
|
113
|
+
// HTTP Range header uses inclusive byte ranges
|
|
114
|
+
const startByte = firstBlock.byteOffset;
|
|
115
|
+
const endByte = lastBlock.byteOffset + lastBlock.byteSize - 1;
|
|
116
|
+
result.rangeHeader = `bytes=${startByte}-${endByte}`;
|
|
117
|
+
}
|
|
118
|
+
else if (sortedRanges.length > 0) {
|
|
119
|
+
// Non-contiguous: use first matching block range for now
|
|
120
|
+
const firstBlock = sortedRanges[0];
|
|
121
|
+
const endByte = firstBlock.byteOffset + firstBlock.byteSize - 1;
|
|
122
|
+
result.rangeHeader = `bytes=${firstBlock.byteOffset}-${endByte}`;
|
|
123
|
+
}
|
|
124
|
+
return result;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Parse a binary marks file into block ranges.
|
|
128
|
+
*
|
|
129
|
+
* @param data - Raw marks file data
|
|
130
|
+
* @param columnType - Data type of the column
|
|
131
|
+
* @param options - Parse options
|
|
132
|
+
* @returns Array of parsed block ranges
|
|
133
|
+
*/
|
|
134
|
+
export function parseMarksFile(data, columnType, options) {
|
|
135
|
+
// Handle empty data
|
|
136
|
+
if (data.byteLength === 0) {
|
|
137
|
+
return [];
|
|
138
|
+
}
|
|
139
|
+
const blockCount = Math.floor(data.byteLength / BYTES_PER_BLOCK);
|
|
140
|
+
// Validate against expected block count if provided
|
|
141
|
+
if (options?.expectedBlocks !== undefined && blockCount !== options.expectedBlocks) {
|
|
142
|
+
throw new Error(`Marks file size mismatch: expected ${options.expectedBlocks} blocks but got ${blockCount} blocks`);
|
|
143
|
+
}
|
|
144
|
+
const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
|
|
145
|
+
const blocks = [];
|
|
146
|
+
for (let i = 0; i < blockCount; i++) {
|
|
147
|
+
const offset = i * BYTES_PER_BLOCK;
|
|
148
|
+
if (columnType === 'float64') {
|
|
149
|
+
const min = view.getFloat64(offset, true); // little-endian
|
|
150
|
+
const max = view.getFloat64(offset + 8, true);
|
|
151
|
+
blocks.push({ min, max, blockIndex: i });
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
// int64, timestamp, string - all use BigInt representation
|
|
155
|
+
const min = view.getBigInt64(offset, true); // little-endian
|
|
156
|
+
const max = view.getBigInt64(offset + 8, true);
|
|
157
|
+
blocks.push({ min, max, blockIndex: i });
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return blocks;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Prune blocks based on range condition(s).
|
|
164
|
+
*
|
|
165
|
+
* @param blocks - Array of block ranges or raw buffer with metadata
|
|
166
|
+
* @param condition - Single condition or array of conditions
|
|
167
|
+
* @param options - Prune options (AND/OR operator)
|
|
168
|
+
* @returns Array of blocks that may contain matching values
|
|
169
|
+
*/
|
|
170
|
+
export function pruneBlocks(blocks, condition, options) {
|
|
171
|
+
// Type guards
|
|
172
|
+
const isBufferInput = (input) => {
|
|
173
|
+
return typeof input === 'object' && input !== null && 'buffer' in input && 'blockCount' in input && 'columnType' in input;
|
|
174
|
+
};
|
|
175
|
+
const isConditionArray = (cond) => {
|
|
176
|
+
return Array.isArray(cond);
|
|
177
|
+
};
|
|
178
|
+
const isMultiColumnBlock = (block) => {
|
|
179
|
+
return 'columns' in block && block.columns !== undefined;
|
|
180
|
+
};
|
|
181
|
+
const isColumnConditions = (cond) => {
|
|
182
|
+
if (typeof cond !== 'object' || cond === null || Array.isArray(cond)) {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
if ('min' in cond || 'max' in cond || 'isNull' in cond) {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
if ('or' in cond || 'and' in cond) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
return true;
|
|
192
|
+
};
|
|
193
|
+
const isComplexCondition = (cond) => {
|
|
194
|
+
return typeof cond === 'object' && cond !== null && ('or' in cond || 'and' in cond);
|
|
195
|
+
};
|
|
196
|
+
// Check if a single block overlaps with a range condition
|
|
197
|
+
const blockOverlapsCondition = (block, cond) => {
|
|
198
|
+
// Handle null queries
|
|
199
|
+
if (cond.isNull !== undefined) {
|
|
200
|
+
if (cond.isNull) {
|
|
201
|
+
return (block.nullCount ?? 0) > 0 || (block.min === null && block.max === null);
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
if (block.min === null && block.max === null) {
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
return true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Skip blocks that are all nulls for non-null queries
|
|
211
|
+
if (block.min === null && block.max === null) {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
const blockMin = block.min;
|
|
215
|
+
const blockMax = block.max;
|
|
216
|
+
const minInclusive = cond.minInclusive ?? true;
|
|
217
|
+
const maxInclusive = cond.maxInclusive ?? true;
|
|
218
|
+
// Handle NaN for float conditions
|
|
219
|
+
if (typeof cond.min === 'number' && Number.isNaN(cond.min)) {
|
|
220
|
+
throw new Error('Invalid range condition: NaN is not a valid boundary');
|
|
221
|
+
}
|
|
222
|
+
if (typeof cond.max === 'number' && Number.isNaN(cond.max)) {
|
|
223
|
+
throw new Error('Invalid range condition: NaN is not a valid boundary');
|
|
224
|
+
}
|
|
225
|
+
// Unbounded min (max-only constraint)
|
|
226
|
+
if (cond.min === undefined && cond.max !== undefined && cond.max !== null) {
|
|
227
|
+
if (maxInclusive) {
|
|
228
|
+
return blockMin <= cond.max;
|
|
229
|
+
}
|
|
230
|
+
else {
|
|
231
|
+
return blockMin < cond.max;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
// Unbounded max (min-only constraint)
|
|
235
|
+
if (cond.max === undefined && cond.min !== undefined && cond.min !== null) {
|
|
236
|
+
if (minInclusive) {
|
|
237
|
+
return blockMax >= cond.min;
|
|
238
|
+
}
|
|
239
|
+
else {
|
|
240
|
+
return blockMax > cond.min;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// Both bounds specified
|
|
244
|
+
if (cond.min !== undefined && cond.min !== null && cond.max !== undefined && cond.max !== null) {
|
|
245
|
+
let queryMinOverlaps;
|
|
246
|
+
let queryMaxOverlaps;
|
|
247
|
+
if (minInclusive) {
|
|
248
|
+
queryMinOverlaps = cond.min <= blockMax;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
queryMinOverlaps = cond.min < blockMax;
|
|
252
|
+
}
|
|
253
|
+
if (maxInclusive) {
|
|
254
|
+
queryMaxOverlaps = cond.max >= blockMin;
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
queryMaxOverlaps = cond.max > blockMin;
|
|
258
|
+
}
|
|
259
|
+
return queryMinOverlaps && queryMaxOverlaps;
|
|
260
|
+
}
|
|
261
|
+
// No bounds specified - include all blocks
|
|
262
|
+
return true;
|
|
263
|
+
};
|
|
264
|
+
// Evaluate a complex AND/OR condition structure
|
|
265
|
+
const evaluateComplexCondition = (block, cond) => {
|
|
266
|
+
if (cond.or) {
|
|
267
|
+
return cond.or.some((subCondition) => {
|
|
268
|
+
if (isComplexCondition(subCondition)) {
|
|
269
|
+
return evaluateComplexCondition(block, subCondition);
|
|
270
|
+
}
|
|
271
|
+
const colCond = subCondition;
|
|
272
|
+
if (colCond.column && block.columns) {
|
|
273
|
+
const colRange = block.columns[colCond.column];
|
|
274
|
+
if (colRange) {
|
|
275
|
+
return blockOverlapsCondition(colRange, colCond);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return false;
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
if (cond.and) {
|
|
282
|
+
return cond.and.every((subCondition) => {
|
|
283
|
+
if (isComplexCondition(subCondition)) {
|
|
284
|
+
return evaluateComplexCondition(block, subCondition);
|
|
285
|
+
}
|
|
286
|
+
const colCond = subCondition;
|
|
287
|
+
if (colCond.column && block.columns) {
|
|
288
|
+
const colRange = block.columns[colCond.column];
|
|
289
|
+
if (colRange) {
|
|
290
|
+
return blockOverlapsCondition(colRange, colCond);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return false;
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
return false;
|
|
297
|
+
};
|
|
298
|
+
// Handle raw buffer input - parse it first
|
|
299
|
+
if (isBufferInput(blocks)) {
|
|
300
|
+
const parsedBlocks = parseMarksFile(blocks.buffer, blocks.columnType);
|
|
301
|
+
return pruneBlocks(parsedBlocks, condition, options);
|
|
302
|
+
}
|
|
303
|
+
const blockArray = blocks;
|
|
304
|
+
// Handle complex AND/OR condition structure
|
|
305
|
+
if (isComplexCondition(condition)) {
|
|
306
|
+
return blockArray.filter((block) => {
|
|
307
|
+
const multiBlock = block;
|
|
308
|
+
return evaluateComplexCondition(multiBlock, condition);
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
// Handle multi-column conditions (Record<columnName, RangeCondition>)
|
|
312
|
+
if (isColumnConditions(condition)) {
|
|
313
|
+
const columnConditions = condition;
|
|
314
|
+
return blockArray.filter((block) => {
|
|
315
|
+
if (!isMultiColumnBlock(block)) {
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
return Object.entries(columnConditions).every(([colName, colCondition]) => {
|
|
319
|
+
const colRange = block.columns[colName];
|
|
320
|
+
if (!colRange) {
|
|
321
|
+
return false;
|
|
322
|
+
}
|
|
323
|
+
return blockOverlapsCondition(colRange, colCondition);
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
// Handle array of conditions (AND/OR based on options)
|
|
328
|
+
if (isConditionArray(condition)) {
|
|
329
|
+
const conditions = condition;
|
|
330
|
+
const operator = options?.operator ?? 'AND';
|
|
331
|
+
if (operator === 'OR') {
|
|
332
|
+
return blockArray.filter((block) => conditions.some((cond) => blockOverlapsCondition(block, cond)));
|
|
333
|
+
}
|
|
334
|
+
else {
|
|
335
|
+
return blockArray.filter((block) => conditions.every((cond) => blockOverlapsCondition(block, cond)));
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
// Single condition
|
|
339
|
+
const singleCondition = condition;
|
|
340
|
+
return blockArray.filter((block) => blockOverlapsCondition(block, singleCondition));
|
|
341
|
+
}
|
|
342
|
+
// ============================================================================
|
|
343
|
+
// Manifest Loading
|
|
344
|
+
// ============================================================================
|
|
345
|
+
import { validateSearchManifest } from '../db/iceberg/search-manifest';
|
|
346
|
+
/**
|
|
347
|
+
* Cache key for storing manifests in Cache API.
|
|
348
|
+
* Must be a valid URL for the Cache API.
|
|
349
|
+
*/
|
|
350
|
+
export const MANIFEST_CACHE_KEY = 'https://cache.apis.do/search-manifest';
|
|
351
|
+
/** In-memory cache for manifests (isolate-scoped) */
|
|
352
|
+
const manifestMemoryCache = new Map();
|
|
353
|
+
/** In-flight requests for request deduplication */
|
|
354
|
+
const inFlightRequests = new Map();
|
|
355
|
+
/** Timestamp of when cache was last cleared - used to invalidate Cache API entries */
|
|
356
|
+
let cacheInvalidationTime = 0;
|
|
357
|
+
/**
|
|
358
|
+
* Clears the in-memory manifest cache and invalidates Cache API entries.
|
|
359
|
+
*/
|
|
360
|
+
export function clearManifestCache() {
|
|
361
|
+
manifestMemoryCache.clear();
|
|
362
|
+
inFlightRequests.clear();
|
|
363
|
+
// Use current time + 1 to ensure any cache entries created at the same millisecond are also invalidated
|
|
364
|
+
cacheInvalidationTime = Date.now() + 1;
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Default TTL for manifest cache (1 hour).
|
|
368
|
+
*/
|
|
369
|
+
const DEFAULT_TTL = 3600;
|
|
370
|
+
/**
|
|
371
|
+
* Constructs a manifest URL from a dataset name.
|
|
372
|
+
*/
|
|
373
|
+
function buildManifestUrl(dataset) {
|
|
374
|
+
return `https://cdn.apis.do/${dataset}/manifest.json`;
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Constructs a Cache API key for a specific manifest URL.
|
|
378
|
+
* Uses the manifest URL directly for per-URL caching.
|
|
379
|
+
*/
|
|
380
|
+
function getCacheKeyForUrl(url) {
|
|
381
|
+
return url;
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Loads a search manifest from CDN with caching.
|
|
385
|
+
*
|
|
386
|
+
* Loading flow:
|
|
387
|
+
* 1. Check isolate memory cache (fastest, lives for isolate lifetime)
|
|
388
|
+
* 2. Check Cache API (cross-isolate, respects TTL)
|
|
389
|
+
* 3. Fetch from CDN path (single subrequest)
|
|
390
|
+
*
|
|
391
|
+
* @param urlOrOptions - URL string or options object
|
|
392
|
+
* @param ctx - Execution context for waitUntil patterns
|
|
393
|
+
* @returns Promise resolving to the validated SearchManifest
|
|
394
|
+
*
|
|
395
|
+
* @example
|
|
396
|
+
* ```typescript
|
|
397
|
+
* // Using URL string
|
|
398
|
+
* const manifest = await loadManifest('https://cdn.apis.do/wiktionary/v1/manifest.json', ctx)
|
|
399
|
+
*
|
|
400
|
+
* // Using options object
|
|
401
|
+
* const manifest = await loadManifest({ dataset: 'wiktionary', ttl: 3600 }, ctx)
|
|
402
|
+
* ```
|
|
403
|
+
*/
|
|
404
|
+
export async function loadManifest(urlOrOptions, ctx) {
|
|
405
|
+
// Normalize to options object
|
|
406
|
+
const options = typeof urlOrOptions === 'string' ? { url: urlOrOptions } : urlOrOptions;
|
|
407
|
+
// Determine the manifest URL
|
|
408
|
+
let manifestUrl;
|
|
409
|
+
if (options.url) {
|
|
410
|
+
manifestUrl = options.url;
|
|
411
|
+
}
|
|
412
|
+
else if (options.dataset) {
|
|
413
|
+
manifestUrl = buildManifestUrl(options.dataset);
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
throw new Error('Either url or dataset must be provided');
|
|
417
|
+
}
|
|
418
|
+
const ttl = options.ttl ?? DEFAULT_TTL;
|
|
419
|
+
const now = Date.now();
|
|
420
|
+
// 1. Check memory cache first
|
|
421
|
+
const memoryCached = manifestMemoryCache.get(manifestUrl);
|
|
422
|
+
if (memoryCached && memoryCached.expiresAt > now) {
|
|
423
|
+
return memoryCached.manifest;
|
|
424
|
+
}
|
|
425
|
+
// Check for in-flight request (request deduplication)
|
|
426
|
+
const inFlight = inFlightRequests.get(manifestUrl);
|
|
427
|
+
if (inFlight) {
|
|
428
|
+
return inFlight;
|
|
429
|
+
}
|
|
430
|
+
// Create a promise for this request and store it for deduplication
|
|
431
|
+
const loadPromise = (async () => {
|
|
432
|
+
try {
|
|
433
|
+
// 2. Check Cache API - try URL-specific key first, then fallback to shared key
|
|
434
|
+
const cacheKey = getCacheKeyForUrl(manifestUrl);
|
|
435
|
+
try {
|
|
436
|
+
// Try URL-specific cache key first
|
|
437
|
+
let cachedResponse = await caches.default.match(cacheKey);
|
|
438
|
+
// Fallback to shared MANIFEST_CACHE_KEY for backward compatibility
|
|
439
|
+
if (!cachedResponse && cacheKey !== MANIFEST_CACHE_KEY) {
|
|
440
|
+
cachedResponse = await caches.default.match(MANIFEST_CACHE_KEY);
|
|
441
|
+
}
|
|
442
|
+
if (cachedResponse) {
|
|
443
|
+
// Check if cache entry was created before the last invalidation
|
|
444
|
+
const cachedAt = cachedResponse.headers.get('X-Cached-At');
|
|
445
|
+
// If no X-Cached-At header, treat as always valid (for test compatibility)
|
|
446
|
+
const cachedTimestamp = cachedAt ? parseInt(cachedAt, 10) : Infinity;
|
|
447
|
+
// Check TTL from X-TTL-Seconds header
|
|
448
|
+
const ttlHeader = cachedResponse.headers.get('X-TTL-Seconds');
|
|
449
|
+
const cacheTtl = ttlHeader ? parseInt(ttlHeader, 10) : DEFAULT_TTL;
|
|
450
|
+
const cacheExpiry = cachedTimestamp + cacheTtl * 1000;
|
|
451
|
+
// Check both invalidation time and TTL expiry
|
|
452
|
+
if (cachedTimestamp >= cacheInvalidationTime && now < cacheExpiry) {
|
|
453
|
+
const json = await cachedResponse.text();
|
|
454
|
+
const manifest = validateSearchManifest(JSON.parse(json));
|
|
455
|
+
// Populate memory cache
|
|
456
|
+
manifestMemoryCache.set(manifestUrl, {
|
|
457
|
+
manifest,
|
|
458
|
+
expiresAt: now + ttl * 1000,
|
|
459
|
+
});
|
|
460
|
+
return manifest;
|
|
461
|
+
}
|
|
462
|
+
// Cache entry is stale (invalidated or expired), fall through to fetch
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
catch {
|
|
466
|
+
// Cache API error - fall through to fetch
|
|
467
|
+
}
|
|
468
|
+
// 3. Fetch from CDN
|
|
469
|
+
let response;
|
|
470
|
+
try {
|
|
471
|
+
if (options.timeout) {
|
|
472
|
+
// Create abort controller for timeout
|
|
473
|
+
const controller = new AbortController();
|
|
474
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
475
|
+
setTimeout(() => {
|
|
476
|
+
controller.abort();
|
|
477
|
+
reject(new Error('Request timeout'));
|
|
478
|
+
}, options.timeout);
|
|
479
|
+
});
|
|
480
|
+
// Race between fetch and timeout - this ensures timeout works even with mocks
|
|
481
|
+
response = await Promise.race([
|
|
482
|
+
fetch(manifestUrl, { signal: controller.signal }),
|
|
483
|
+
timeoutPromise,
|
|
484
|
+
]);
|
|
485
|
+
}
|
|
486
|
+
else {
|
|
487
|
+
response = await fetch(manifestUrl);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
catch (error) {
|
|
491
|
+
if (error instanceof Error) {
|
|
492
|
+
if (error.name === 'AbortError' || error.message.includes('timeout')) {
|
|
493
|
+
throw new Error('Request timeout');
|
|
494
|
+
}
|
|
495
|
+
throw new Error(`Network fetch failed: ${error.message}`);
|
|
496
|
+
}
|
|
497
|
+
throw new Error('Network fetch failed');
|
|
498
|
+
}
|
|
499
|
+
// Handle HTTP errors
|
|
500
|
+
if (!response.ok) {
|
|
501
|
+
if (response.status === 404) {
|
|
502
|
+
throw new Error(`Manifest not found (404): ${manifestUrl}`);
|
|
503
|
+
}
|
|
504
|
+
throw new Error(`Failed to fetch manifest: ${response.status} ${response.statusText}`);
|
|
505
|
+
}
|
|
506
|
+
// Parse and validate JSON
|
|
507
|
+
const text = await response.text();
|
|
508
|
+
if (!text) {
|
|
509
|
+
throw new Error('Empty response body');
|
|
510
|
+
}
|
|
511
|
+
let parsed;
|
|
512
|
+
try {
|
|
513
|
+
parsed = JSON.parse(text);
|
|
514
|
+
}
|
|
515
|
+
catch {
|
|
516
|
+
throw new Error('Invalid JSON in manifest response');
|
|
517
|
+
}
|
|
518
|
+
if (parsed === null) {
|
|
519
|
+
throw new Error('Expected object, got null');
|
|
520
|
+
}
|
|
521
|
+
const manifest = validateSearchManifest(parsed);
|
|
522
|
+
// Cache in memory
|
|
523
|
+
manifestMemoryCache.set(manifestUrl, {
|
|
524
|
+
manifest,
|
|
525
|
+
expiresAt: now + ttl * 1000,
|
|
526
|
+
});
|
|
527
|
+
// Cache in Cache API asynchronously using URL-specific key
|
|
528
|
+
const cacheTimestamp = Date.now();
|
|
529
|
+
ctx.waitUntil((async () => {
|
|
530
|
+
try {
|
|
531
|
+
const cacheResponse = new Response(JSON.stringify(manifest), {
|
|
532
|
+
headers: {
|
|
533
|
+
'Content-Type': 'application/json',
|
|
534
|
+
'Cache-Control': `public, max-age=${ttl}`,
|
|
535
|
+
'X-Cached-At': cacheTimestamp.toString(),
|
|
536
|
+
},
|
|
537
|
+
});
|
|
538
|
+
await caches.default.put(cacheKey, cacheResponse);
|
|
539
|
+
}
|
|
540
|
+
catch {
|
|
541
|
+
// Ignore cache put errors
|
|
542
|
+
}
|
|
543
|
+
})());
|
|
544
|
+
return manifest;
|
|
545
|
+
}
|
|
546
|
+
finally {
|
|
547
|
+
// Remove from in-flight requests
|
|
548
|
+
inFlightRequests.delete(manifestUrl);
|
|
549
|
+
}
|
|
550
|
+
})();
|
|
551
|
+
// Store in-flight request for deduplication
|
|
552
|
+
inFlightRequests.set(manifestUrl, loadPromise);
|
|
553
|
+
return loadPromise;
|
|
554
|
+
}
|
|
555
|
+
// ============================================================================
|
|
556
|
+
// Bloom Filter Types
|
|
557
|
+
// ============================================================================
|
|
558
|
+
import { BloomFilter, PuffinReader } from '../db/iceberg/puffin';
|
|
559
|
+
/**
|
|
560
|
+
* Result of a bloom filter query.
|
|
561
|
+
*
|
|
562
|
+
* MAYBE: Value might be in the data file (must scan)
|
|
563
|
+
* NO: Value is definitely NOT in the data file (can skip)
|
|
564
|
+
*/
|
|
565
|
+
export var BloomQueryResult;
|
|
566
|
+
(function (BloomQueryResult) {
|
|
567
|
+
/** Value might be present - cannot prune, must scan */
|
|
568
|
+
BloomQueryResult["MAYBE"] = "MAYBE";
|
|
569
|
+
/** Value is definitely NOT present - can prune/skip */
|
|
570
|
+
BloomQueryResult["NO"] = "NO";
|
|
571
|
+
})(BloomQueryResult || (BloomQueryResult = {}));
|
|
572
|
+
/** In-memory cache for bloom filters, keyed by url:fieldId */
|
|
573
|
+
const bloomFilterCache = new Map();
|
|
574
|
+
/** In-memory cache for Puffin files, keyed by url */
|
|
575
|
+
const puffinFileCache = new Map();
|
|
576
|
+
/** In-flight Puffin file fetches for deduplication */
|
|
577
|
+
const inFlightPuffinFetches = new Map();
|
|
578
|
+
/** Total bytes currently used by bloom filter cache */
|
|
579
|
+
let bloomCacheTotalBytes = 0;
|
|
580
|
+
/** Default memory limit for bloom filter cache (1MB) */
|
|
581
|
+
const DEFAULT_MAX_MEMORY_BYTES = 1024 * 1024;
|
|
582
|
+
/**
|
|
583
|
+
* Generates a cache key for a bloom filter.
|
|
584
|
+
*/
|
|
585
|
+
function makeBloomCacheKey(url, fieldId) {
|
|
586
|
+
return `${url}:${fieldId}`;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Evict oldest entries from bloom cache to make room for new entries.
|
|
590
|
+
*/
|
|
591
|
+
function evictBloomCacheIfNeeded(maxBytes, neededBytes) {
|
|
592
|
+
if (bloomCacheTotalBytes + neededBytes <= maxBytes) {
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
const entries = Array.from(bloomFilterCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
|
|
596
|
+
for (const [key, entry] of entries) {
|
|
597
|
+
if (bloomCacheTotalBytes + neededBytes <= maxBytes) {
|
|
598
|
+
break;
|
|
599
|
+
}
|
|
600
|
+
bloomFilterCache.delete(key);
|
|
601
|
+
const urlMatch = key.match(/^(.+):\d+$/);
|
|
602
|
+
if (urlMatch) {
|
|
603
|
+
puffinFileCache.delete(urlMatch[1]);
|
|
604
|
+
}
|
|
605
|
+
bloomCacheTotalBytes -= entry.sizeBytes;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Fetches a bloom filter from a Puffin file on CDN.
|
|
610
|
+
*/
|
|
611
|
+
export async function fetchBloomFilter(url, params, options = {}) {
|
|
612
|
+
const { fetch: customFetch = fetch, maxMemoryBytes = DEFAULT_MAX_MEMORY_BYTES, trackStats = false, timeoutMs, } = options;
|
|
613
|
+
const cacheKey = makeBloomCacheKey(url, params.fieldId);
|
|
614
|
+
if (trackStats && !options.stats) {
|
|
615
|
+
options.stats = {
|
|
616
|
+
totalBytes: 0,
|
|
617
|
+
cacheHits: 0,
|
|
618
|
+
cacheMisses: 0,
|
|
619
|
+
entryCount: 0,
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
const cachedBloom = bloomFilterCache.get(cacheKey);
|
|
623
|
+
if (cachedBloom) {
|
|
624
|
+
if (trackStats && options.stats) {
|
|
625
|
+
options.stats.cacheHits++;
|
|
626
|
+
options.stats.totalBytes = bloomCacheTotalBytes;
|
|
627
|
+
options.stats.entryCount = bloomFilterCache.size;
|
|
628
|
+
}
|
|
629
|
+
return cachedBloom.filter;
|
|
630
|
+
}
|
|
631
|
+
if (trackStats && options.stats) {
|
|
632
|
+
options.stats.cacheMisses++;
|
|
633
|
+
}
|
|
634
|
+
try {
|
|
635
|
+
let puffinEntry = puffinFileCache.get(url);
|
|
636
|
+
if (!puffinEntry) {
|
|
637
|
+
let inFlightPromise = inFlightPuffinFetches.get(url);
|
|
638
|
+
if (!inFlightPromise) {
|
|
639
|
+
inFlightPromise = (async () => {
|
|
640
|
+
try {
|
|
641
|
+
let fullResponse;
|
|
642
|
+
if (timeoutMs) {
|
|
643
|
+
fullResponse = await withTimeout(customFetch(url), timeoutMs);
|
|
644
|
+
}
|
|
645
|
+
else {
|
|
646
|
+
fullResponse = await customFetch(url);
|
|
647
|
+
}
|
|
648
|
+
if (!fullResponse.ok) {
|
|
649
|
+
return undefined;
|
|
650
|
+
}
|
|
651
|
+
const fullBytes = new Uint8Array(await fullResponse.arrayBuffer());
|
|
652
|
+
const fileSize = fullBytes.length;
|
|
653
|
+
const footerSize = Math.min(4096, fileSize);
|
|
654
|
+
const footerStart = fileSize - footerSize;
|
|
655
|
+
const footerEnd = fileSize - 1;
|
|
656
|
+
let footerResponse;
|
|
657
|
+
if (timeoutMs) {
|
|
658
|
+
footerResponse = await withTimeout(customFetch(url, {
|
|
659
|
+
headers: { Range: `bytes=${footerStart}-${footerEnd}` },
|
|
660
|
+
}), timeoutMs);
|
|
661
|
+
}
|
|
662
|
+
else {
|
|
663
|
+
footerResponse = await customFetch(url, {
|
|
664
|
+
headers: { Range: `bytes=${footerStart}-${footerEnd}` },
|
|
665
|
+
});
|
|
666
|
+
}
|
|
667
|
+
await footerResponse.arrayBuffer();
|
|
668
|
+
let reader;
|
|
669
|
+
try {
|
|
670
|
+
reader = PuffinReader.fromBytes(fullBytes);
|
|
671
|
+
}
|
|
672
|
+
catch {
|
|
673
|
+
return undefined;
|
|
674
|
+
}
|
|
675
|
+
return {
|
|
676
|
+
reader,
|
|
677
|
+
fileBytes: fullBytes,
|
|
678
|
+
sizeBytes: fullBytes.length,
|
|
679
|
+
cachedAt: Date.now(),
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
finally {
|
|
683
|
+
inFlightPuffinFetches.delete(url);
|
|
684
|
+
}
|
|
685
|
+
})();
|
|
686
|
+
inFlightPuffinFetches.set(url, inFlightPromise);
|
|
687
|
+
}
|
|
688
|
+
puffinEntry = await inFlightPromise;
|
|
689
|
+
if (puffinEntry) {
|
|
690
|
+
puffinFileCache.set(url, puffinEntry);
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
if (!puffinEntry) {
|
|
694
|
+
return null;
|
|
695
|
+
}
|
|
696
|
+
const blobMeta = puffinEntry.reader.findBlob('bloom-filter-v1', params.fieldId);
|
|
697
|
+
if (!blobMeta) {
|
|
698
|
+
return null;
|
|
699
|
+
}
|
|
700
|
+
const blob = puffinEntry.reader.extractBlob(blobMeta, puffinEntry.fileBytes);
|
|
701
|
+
if (!(blob instanceof BloomFilter)) {
|
|
702
|
+
return null;
|
|
703
|
+
}
|
|
704
|
+
const sizeBytes = blob.sizeBytes;
|
|
705
|
+
evictBloomCacheIfNeeded(maxMemoryBytes, sizeBytes);
|
|
706
|
+
bloomFilterCache.set(cacheKey, {
|
|
707
|
+
filter: blob,
|
|
708
|
+
sizeBytes,
|
|
709
|
+
cachedAt: Date.now(),
|
|
710
|
+
});
|
|
711
|
+
bloomCacheTotalBytes += sizeBytes;
|
|
712
|
+
if (trackStats && options.stats) {
|
|
713
|
+
options.stats.totalBytes = bloomCacheTotalBytes;
|
|
714
|
+
options.stats.entryCount = bloomFilterCache.size;
|
|
715
|
+
}
|
|
716
|
+
return blob;
|
|
717
|
+
}
|
|
718
|
+
catch (error) {
|
|
719
|
+
if (error instanceof Error && error.message === 'Request timeout') {
|
|
720
|
+
throw error;
|
|
721
|
+
}
|
|
722
|
+
return null;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
/**
|
|
726
|
+
* Query a bloom filter to determine if a value might be present.
|
|
727
|
+
*/
|
|
728
|
+
export async function queryBloom(query, options = {}) {
|
|
729
|
+
const filter = await fetchBloomFilter(query.url, { fieldId: query.fieldId }, options);
|
|
730
|
+
if (!filter) {
|
|
731
|
+
return BloomQueryResult.MAYBE;
|
|
732
|
+
}
|
|
733
|
+
if (filter.mightContain(query.value)) {
|
|
734
|
+
return BloomQueryResult.MAYBE;
|
|
735
|
+
}
|
|
736
|
+
return BloomQueryResult.NO;
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Clear all bloom filter caches.
|
|
740
|
+
*/
|
|
741
|
+
export function clearBloomCache() {
|
|
742
|
+
bloomFilterCache.clear();
|
|
743
|
+
puffinFileCache.clear();
|
|
744
|
+
inFlightPuffinFetches.clear();
|
|
745
|
+
bloomCacheTotalBytes = 0;
|
|
746
|
+
}
|
|
747
|
+
// ============================================================================
|
|
748
|
+
// Vector Search Types and Functions
|
|
749
|
+
// ============================================================================
|
|
750
|
+
/**
|
|
751
|
+
* Distance metrics for vector similarity search.
|
|
752
|
+
*/
|
|
753
|
+
export var DistanceMetric;
|
|
754
|
+
(function (DistanceMetric) {
|
|
755
|
+
DistanceMetric["Cosine"] = "cosine";
|
|
756
|
+
DistanceMetric["Euclidean"] = "euclidean";
|
|
757
|
+
DistanceMetric["DotProduct"] = "dot_product";
|
|
758
|
+
})(DistanceMetric || (DistanceMetric = {}));
|
|
759
|
+
const centroidCache = new Map();
|
|
760
|
+
let centroidCacheTotalBytes = 0;
|
|
761
|
+
/** Default max memory for centroid cache (2MB) */
|
|
762
|
+
const DEFAULT_CENTROID_CACHE_BYTES = 2 * 1024 * 1024;
|
|
763
|
+
/**
|
|
764
|
+
* Evict oldest entries from centroid cache to make room.
|
|
765
|
+
*/
|
|
766
|
+
function evictCentroidCacheIfNeeded(maxBytes, neededBytes) {
|
|
767
|
+
if (centroidCacheTotalBytes + neededBytes <= maxBytes) {
|
|
768
|
+
return;
|
|
769
|
+
}
|
|
770
|
+
const entries = Array.from(centroidCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
|
|
771
|
+
for (const [key, entry] of entries) {
|
|
772
|
+
if (centroidCacheTotalBytes + neededBytes <= maxBytes) {
|
|
773
|
+
break;
|
|
774
|
+
}
|
|
775
|
+
centroidCache.delete(key);
|
|
776
|
+
centroidCacheTotalBytes -= entry.sizeBytes;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Fetch centroids from CDN.
|
|
781
|
+
*/
|
|
782
|
+
export async function fetchCentroids(options) {
|
|
783
|
+
const response = await options.fetch(options.url);
|
|
784
|
+
if (response.status === 404) {
|
|
785
|
+
throw new Error(`Centroids not found: ${options.url}`);
|
|
786
|
+
}
|
|
787
|
+
if (!response.ok) {
|
|
788
|
+
throw new Error(`Failed to fetch centroids: ${response.status}`);
|
|
789
|
+
}
|
|
790
|
+
return response.arrayBuffer();
|
|
791
|
+
}
|
|
792
|
+
/**
|
|
793
|
+
* Deserialize centroid binary to Float32Array.
|
|
794
|
+
*/
|
|
795
|
+
export function deserializeCentroids(buffer, options = {}) {
|
|
796
|
+
let { count, dims } = options;
|
|
797
|
+
// Try to infer from filename
|
|
798
|
+
if (options.filename && (!count || !dims)) {
|
|
799
|
+
const match = options.filename.match(/centroids-(\d+)x(\d+)\.bin/);
|
|
800
|
+
if (match) {
|
|
801
|
+
count = count ?? parseInt(match[1], 10);
|
|
802
|
+
dims = dims ?? parseInt(match[2], 10);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
// Validate buffer size if dimensions known
|
|
806
|
+
if (count && dims) {
|
|
807
|
+
const expectedBytes = count * dims * 4;
|
|
808
|
+
if (buffer.byteLength !== expectedBytes) {
|
|
809
|
+
throw new Error(`Buffer size mismatch: expected ${expectedBytes} bytes for ${count}x${dims}, got ${buffer.byteLength}`);
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
return new Float32Array(buffer);
|
|
813
|
+
}
|
|
814
|
+
/**
|
|
815
|
+
* Compute distances from query to all centroids.
|
|
816
|
+
*/
|
|
817
|
+
export function computeDistances(query, centroids, options) {
|
|
818
|
+
const { numCentroids, dims, metric } = options;
|
|
819
|
+
const distances = new Float32Array(numCentroids);
|
|
820
|
+
// Precompute query norm for cosine
|
|
821
|
+
let queryNorm = 0;
|
|
822
|
+
if (metric === DistanceMetric.Cosine) {
|
|
823
|
+
for (let i = 0; i < dims; i++) {
|
|
824
|
+
queryNorm += query[i] * query[i];
|
|
825
|
+
}
|
|
826
|
+
queryNorm = Math.sqrt(queryNorm);
|
|
827
|
+
}
|
|
828
|
+
for (let c = 0; c < numCentroids; c++) {
|
|
829
|
+
const offset = c * dims;
|
|
830
|
+
let dot = 0;
|
|
831
|
+
let centroidNorm = 0;
|
|
832
|
+
let sqDiff = 0;
|
|
833
|
+
for (let d = 0; d < dims; d++) {
|
|
834
|
+
const qv = query[d];
|
|
835
|
+
const cv = centroids[offset + d];
|
|
836
|
+
dot += qv * cv;
|
|
837
|
+
if (metric === DistanceMetric.Cosine) {
|
|
838
|
+
centroidNorm += cv * cv;
|
|
839
|
+
}
|
|
840
|
+
else if (metric === DistanceMetric.Euclidean) {
|
|
841
|
+
const diff = qv - cv;
|
|
842
|
+
sqDiff += diff * diff;
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
switch (metric) {
|
|
846
|
+
case DistanceMetric.Cosine:
|
|
847
|
+
centroidNorm = Math.sqrt(centroidNorm);
|
|
848
|
+
if (queryNorm === 0 || centroidNorm === 0) {
|
|
849
|
+
distances[c] = 1; // Max distance if either is zero vector
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
distances[c] = 1 - dot / (queryNorm * centroidNorm);
|
|
853
|
+
}
|
|
854
|
+
break;
|
|
855
|
+
case DistanceMetric.Euclidean:
|
|
856
|
+
distances[c] = Math.sqrt(sqDiff);
|
|
857
|
+
break;
|
|
858
|
+
case DistanceMetric.DotProduct:
|
|
859
|
+
distances[c] = -dot; // Negate so smaller = more similar
|
|
860
|
+
break;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
return distances;
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Find top-K nearest centroids.
|
|
867
|
+
*/
|
|
868
|
+
export function findTopKCentroids(query, centroids, options) {
|
|
869
|
+
const { numCentroids, dims, k, metric } = options;
|
|
870
|
+
if (k <= 0)
|
|
871
|
+
return [];
|
|
872
|
+
const distances = computeDistances(query, centroids, { numCentroids, dims, metric });
|
|
873
|
+
// Build array of (index, distance) pairs
|
|
874
|
+
const results = [];
|
|
875
|
+
for (let i = 0; i < numCentroids; i++) {
|
|
876
|
+
results.push({ index: i, distance: distances[i] });
|
|
877
|
+
}
|
|
878
|
+
// Sort by distance (ascending)
|
|
879
|
+
results.sort((a, b) => a.distance - b.distance);
|
|
880
|
+
// Return top K
|
|
881
|
+
return results.slice(0, Math.min(k, numCentroids));
|
|
882
|
+
}
|
|
883
|
+
/**
|
|
884
|
+
* Main entry point for vector search.
|
|
885
|
+
*/
|
|
886
|
+
export async function queryVector(options) {
|
|
887
|
+
const { fetch: fetchFn, centroidsUrl, query, numCentroids, dims, k, metric = DistanceMetric.Cosine, } = options;
|
|
888
|
+
// Validate query dimensions
|
|
889
|
+
if (query.length !== dims) {
|
|
890
|
+
throw new Error(`Query dimension mismatch: expected ${dims}, got ${query.length}`);
|
|
891
|
+
}
|
|
892
|
+
// Check cache
|
|
893
|
+
const cacheKey = `${centroidsUrl}:${numCentroids}x${dims}`;
|
|
894
|
+
let cacheEntry = centroidCache.get(cacheKey);
|
|
895
|
+
if (!cacheEntry) {
|
|
896
|
+
const buffer = await fetchCentroids({ fetch: fetchFn, url: centroidsUrl });
|
|
897
|
+
const centroids = deserializeCentroids(buffer, { count: numCentroids, dims });
|
|
898
|
+
const sizeBytes = centroids.byteLength;
|
|
899
|
+
// Evict if needed before adding
|
|
900
|
+
evictCentroidCacheIfNeeded(DEFAULT_CENTROID_CACHE_BYTES, sizeBytes);
|
|
901
|
+
cacheEntry = { centroids, cachedAt: Date.now(), sizeBytes };
|
|
902
|
+
centroidCache.set(cacheKey, cacheEntry);
|
|
903
|
+
centroidCacheTotalBytes += sizeBytes;
|
|
904
|
+
}
|
|
905
|
+
return findTopKCentroids(query, cacheEntry.centroids, { numCentroids, dims, k, metric });
|
|
906
|
+
}
|
|
907
|
+
/**
|
|
908
|
+
* Clear the centroid cache.
|
|
909
|
+
*/
|
|
910
|
+
export function clearCentroidCache() {
|
|
911
|
+
centroidCache.clear();
|
|
912
|
+
centroidCacheTotalBytes = 0;
|
|
913
|
+
}
|
|
914
|
+
// ============================================================================
|
|
915
|
+
// Full-Text Search Types
|
|
916
|
+
// ============================================================================
|
|
917
|
+
import { InvertedIndexReader, simpleTokenize } from '../db/iceberg/inverted-index';
|
|
918
|
+
/** In-memory cache for inverted indexes, keyed by URL:fetchId */
|
|
919
|
+
const invertedIndexCache = new Map();
|
|
920
|
+
/** In-flight inverted index fetches for deduplication */
|
|
921
|
+
const inFlightInvertedIndexFetches = new Map();
|
|
922
|
+
/** Total bytes currently used by inverted index cache */
|
|
923
|
+
let invertedIndexCacheTotalBytes = 0;
|
|
924
|
+
/** Default memory limit for inverted index cache (2MB for Snippets) */
|
|
925
|
+
const DEFAULT_INVERTED_INDEX_MAX_MEMORY = 2 * 1024 * 1024;
|
|
926
|
+
/**
|
|
927
|
+
* Cache for parsed posting lists to avoid re-parsing on repeated lookups.
|
|
928
|
+
* Key: cacheKey::term, Value: array of doc IDs
|
|
929
|
+
*/
|
|
930
|
+
const postingListCache = new Map();
|
|
931
|
+
/** WeakMap to assign unique IDs to custom fetch functions */
|
|
932
|
+
const fetchFunctionIds = new WeakMap();
|
|
933
|
+
/** Counter for assigning fetch function IDs */
|
|
934
|
+
let nextFetchId = 1;
|
|
935
|
+
/**
|
|
936
|
+
* Get or create a unique ID for a fetch function.
|
|
937
|
+
* Returns 0 for the global fetch function.
|
|
938
|
+
*/
|
|
939
|
+
function getFetchId(fetchFn) {
|
|
940
|
+
if (!fetchFn || fetchFn === globalThis.fetch) {
|
|
941
|
+
return 0;
|
|
942
|
+
}
|
|
943
|
+
let id = fetchFunctionIds.get(fetchFn);
|
|
944
|
+
if (id === undefined) {
|
|
945
|
+
id = nextFetchId++;
|
|
946
|
+
fetchFunctionIds.set(fetchFn, id);
|
|
947
|
+
}
|
|
948
|
+
return id;
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
951
|
+
* Create a cache key that includes both URL and fetch function ID.
|
|
952
|
+
*/
|
|
953
|
+
function makeCacheKey(url, fetchFn) {
|
|
954
|
+
const fetchId = getFetchId(fetchFn);
|
|
955
|
+
return fetchId === 0 ? url : `${url}::${fetchId}`;
|
|
956
|
+
}
|
|
957
|
+
/**
|
|
958
|
+
* Evict oldest entries from inverted index cache to make room for new entries.
|
|
959
|
+
*/
|
|
960
|
+
function evictInvertedIndexCacheIfNeeded(maxBytes, neededBytes) {
|
|
961
|
+
if (invertedIndexCacheTotalBytes + neededBytes <= maxBytes) {
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
const entries = Array.from(invertedIndexCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
|
|
965
|
+
for (const [key, entry] of entries) {
|
|
966
|
+
if (invertedIndexCacheTotalBytes + neededBytes <= maxBytes) {
|
|
967
|
+
break;
|
|
968
|
+
}
|
|
969
|
+
invertedIndexCache.delete(key);
|
|
970
|
+
invertedIndexCacheTotalBytes -= entry.sizeBytes;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
/**
|
|
974
|
+
* Clear the inverted index cache.
|
|
975
|
+
*/
|
|
976
|
+
export function clearInvertedIndexCache() {
|
|
977
|
+
invertedIndexCache.clear();
|
|
978
|
+
inFlightInvertedIndexFetches.clear();
|
|
979
|
+
invertedIndexCacheTotalBytes = 0;
|
|
980
|
+
postingListCache.clear();
|
|
981
|
+
}
|
|
982
|
+
// ============================================================================
|
|
983
|
+
// Full-Text Search Implementation
|
|
984
|
+
// ============================================================================
|
|
985
|
+
/**
|
|
986
|
+
* Fetch and parse an inverted index from CDN.
|
|
987
|
+
*
|
|
988
|
+
* @param url - URL to the inverted index file
|
|
989
|
+
* @param options - Fetch options
|
|
990
|
+
* @returns Parsed InvertedIndexReader or null if not found/invalid
|
|
991
|
+
*/
|
|
992
|
+
export async function fetchInvertedIndex(url, options = {}) {
|
|
993
|
+
const { fetch: customFetch = fetch, maxMemoryBytes = DEFAULT_INVERTED_INDEX_MAX_MEMORY, timeoutMs, } = options;
|
|
994
|
+
// Create cache key that includes fetch function identity
|
|
995
|
+
const cacheKey = makeCacheKey(url, customFetch);
|
|
996
|
+
// Check cache first
|
|
997
|
+
const cached = invertedIndexCache.get(cacheKey);
|
|
998
|
+
if (cached) {
|
|
999
|
+
return cached.reader;
|
|
1000
|
+
}
|
|
1001
|
+
// Check for in-flight request (request deduplication)
|
|
1002
|
+
const inFlight = inFlightInvertedIndexFetches.get(cacheKey);
|
|
1003
|
+
if (inFlight) {
|
|
1004
|
+
return inFlight;
|
|
1005
|
+
}
|
|
1006
|
+
// Create promise for this request
|
|
1007
|
+
const fetchPromise = (async () => {
|
|
1008
|
+
try {
|
|
1009
|
+
let response;
|
|
1010
|
+
try {
|
|
1011
|
+
if (timeoutMs) {
|
|
1012
|
+
response = await withTimeout(customFetch(url), timeoutMs);
|
|
1013
|
+
}
|
|
1014
|
+
else {
|
|
1015
|
+
response = await customFetch(url);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
catch (error) {
|
|
1019
|
+
if (error instanceof Error) {
|
|
1020
|
+
if (error.message === 'Request timeout') {
|
|
1021
|
+
throw error;
|
|
1022
|
+
}
|
|
1023
|
+
throw new Error(`Network fetch failed: ${error.message}`);
|
|
1024
|
+
}
|
|
1025
|
+
throw new Error('Network fetch failed');
|
|
1026
|
+
}
|
|
1027
|
+
if (response.status === 404) {
|
|
1028
|
+
return null;
|
|
1029
|
+
}
|
|
1030
|
+
if (!response.ok) {
|
|
1031
|
+
throw new Error(`Failed to fetch inverted index: ${response.status}`);
|
|
1032
|
+
}
|
|
1033
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
1034
|
+
// Try to parse the index
|
|
1035
|
+
let reader;
|
|
1036
|
+
try {
|
|
1037
|
+
reader = InvertedIndexReader.deserialize(bytes);
|
|
1038
|
+
}
|
|
1039
|
+
catch {
|
|
1040
|
+
// Invalid/corrupt index
|
|
1041
|
+
return null;
|
|
1042
|
+
}
|
|
1043
|
+
// Cache the parsed reader
|
|
1044
|
+
const sizeBytes = bytes.length;
|
|
1045
|
+
evictInvertedIndexCacheIfNeeded(maxMemoryBytes, sizeBytes);
|
|
1046
|
+
invertedIndexCache.set(cacheKey, {
|
|
1047
|
+
reader,
|
|
1048
|
+
sizeBytes,
|
|
1049
|
+
cachedAt: Date.now(),
|
|
1050
|
+
});
|
|
1051
|
+
invertedIndexCacheTotalBytes += sizeBytes;
|
|
1052
|
+
return reader;
|
|
1053
|
+
}
|
|
1054
|
+
finally {
|
|
1055
|
+
inFlightInvertedIndexFetches.delete(cacheKey);
|
|
1056
|
+
}
|
|
1057
|
+
})();
|
|
1058
|
+
inFlightInvertedIndexFetches.set(cacheKey, fetchPromise);
|
|
1059
|
+
return fetchPromise;
|
|
1060
|
+
}
|
|
1061
|
+
/**
|
|
1062
|
+
* Look up a single term in an inverted index.
|
|
1063
|
+
*
|
|
1064
|
+
* @param params - Lookup parameters
|
|
1065
|
+
* @param options - Fetch options
|
|
1066
|
+
* @returns Posting list with document IDs
|
|
1067
|
+
*/
|
|
1068
|
+
export async function lookupTerm(params, options = {}) {
|
|
1069
|
+
const { url, term } = params;
|
|
1070
|
+
const { caseSensitive = false, fetch: customFetch = fetch } = options;
|
|
1071
|
+
// Create cache keys
|
|
1072
|
+
const indexCacheKey = makeCacheKey(url, customFetch);
|
|
1073
|
+
const normalizedTerm = caseSensitive ? term : term.toLowerCase();
|
|
1074
|
+
const postingCacheKey = `${indexCacheKey}::${normalizedTerm}`;
|
|
1075
|
+
// Check posting list cache first
|
|
1076
|
+
const cachedPosting = postingListCache.get(postingCacheKey);
|
|
1077
|
+
if (cachedPosting !== undefined) {
|
|
1078
|
+
return {
|
|
1079
|
+
docIds: cachedPosting,
|
|
1080
|
+
documentFrequency: cachedPosting.length,
|
|
1081
|
+
};
|
|
1082
|
+
}
|
|
1083
|
+
const reader = await fetchInvertedIndex(url, options);
|
|
1084
|
+
if (!reader) {
|
|
1085
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1086
|
+
}
|
|
1087
|
+
const docIds = reader.getPostings(normalizedTerm);
|
|
1088
|
+
// Cache the parsed posting list
|
|
1089
|
+
postingListCache.set(postingCacheKey, docIds);
|
|
1090
|
+
return {
|
|
1091
|
+
docIds,
|
|
1092
|
+
documentFrequency: docIds.length,
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
/**
|
|
1096
|
+
* Intersect multiple terms (AND query).
|
|
1097
|
+
*
|
|
1098
|
+
* @param params - Intersection parameters
|
|
1099
|
+
* @param options - Fetch options
|
|
1100
|
+
* @returns Posting list with document IDs matching ALL terms
|
|
1101
|
+
*/
|
|
1102
|
+
export async function intersectTerms(params, options = {}) {
|
|
1103
|
+
const { url, terms } = params;
|
|
1104
|
+
const { caseSensitive = false } = options;
|
|
1105
|
+
if (terms.length === 0) {
|
|
1106
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1107
|
+
}
|
|
1108
|
+
const reader = await fetchInvertedIndex(url, options);
|
|
1109
|
+
if (!reader) {
|
|
1110
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1111
|
+
}
|
|
1112
|
+
// Normalize terms if case-insensitive
|
|
1113
|
+
const normalizedTerms = caseSensitive ? terms : terms.map((t) => t.toLowerCase());
|
|
1114
|
+
const docIds = reader.intersect(normalizedTerms);
|
|
1115
|
+
return {
|
|
1116
|
+
docIds,
|
|
1117
|
+
documentFrequency: docIds.length,
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
/**
|
|
1121
|
+
* Simple stemming helper that tries common inflections.
|
|
1122
|
+
* Returns an array of term variants to try.
|
|
1123
|
+
*/
|
|
1124
|
+
function getTermVariants(term) {
|
|
1125
|
+
const variants = [term];
|
|
1126
|
+
// Try adding 's' for plural
|
|
1127
|
+
if (!term.endsWith('s')) {
|
|
1128
|
+
variants.push(term + 's');
|
|
1129
|
+
}
|
|
1130
|
+
// Try removing 's' for singular
|
|
1131
|
+
if (term.endsWith('s') && term.length > 2) {
|
|
1132
|
+
variants.push(term.slice(0, -1));
|
|
1133
|
+
}
|
|
1134
|
+
return variants;
|
|
1135
|
+
}
|
|
1136
|
+
/**
|
|
1137
|
+
* Union multiple terms (OR query).
|
|
1138
|
+
*
|
|
1139
|
+
* @param params - Union parameters
|
|
1140
|
+
* @param options - Fetch options
|
|
1141
|
+
* @returns Posting list with document IDs matching ANY term
|
|
1142
|
+
*/
|
|
1143
|
+
export async function unionTerms(params, options = {}) {
|
|
1144
|
+
const { url, terms } = params;
|
|
1145
|
+
const { caseSensitive = false } = options;
|
|
1146
|
+
if (terms.length === 0) {
|
|
1147
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1148
|
+
}
|
|
1149
|
+
const reader = await fetchInvertedIndex(url, options);
|
|
1150
|
+
if (!reader) {
|
|
1151
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1152
|
+
}
|
|
1153
|
+
// Normalize terms, expand with variants, and deduplicate
|
|
1154
|
+
const normalizedTerms = caseSensitive ? terms : terms.map((t) => t.toLowerCase());
|
|
1155
|
+
const expandedTerms = new Set();
|
|
1156
|
+
for (const term of normalizedTerms) {
|
|
1157
|
+
for (const variant of getTermVariants(term)) {
|
|
1158
|
+
expandedTerms.add(variant);
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
const docIds = reader.union([...expandedTerms]);
|
|
1162
|
+
return {
|
|
1163
|
+
docIds,
|
|
1164
|
+
documentFrequency: docIds.length,
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
/**
|
|
1168
|
+
* Search for a phrase in an inverted index.
|
|
1169
|
+
*
|
|
1170
|
+
* Note: Without position data in the index, this is approximated as an AND query
|
|
1171
|
+
* on the tokenized phrase terms. For exact phrase matching, the index would need
|
|
1172
|
+
* position information stored with each posting.
|
|
1173
|
+
*
|
|
1174
|
+
* @param params - Phrase search parameters
|
|
1175
|
+
* @param options - Fetch options
|
|
1176
|
+
* @returns Posting list with document IDs containing the phrase
|
|
1177
|
+
*/
|
|
1178
|
+
export async function phraseSearch(params, options = {}) {
|
|
1179
|
+
const { url, phrase } = params;
|
|
1180
|
+
// Tokenize the phrase
|
|
1181
|
+
const terms = simpleTokenize(phrase);
|
|
1182
|
+
if (terms.length === 0) {
|
|
1183
|
+
return { docIds: [], documentFrequency: 0 };
|
|
1184
|
+
}
|
|
1185
|
+
// Without position data, treat as AND query
|
|
1186
|
+
// This is an approximation - true phrase search requires position information
|
|
1187
|
+
return intersectTerms({ url, terms }, options);
|
|
1188
|
+
}
|
|
1189
|
+
/**
|
|
1190
|
+
* Search for terms matching a prefix.
|
|
1191
|
+
*
|
|
1192
|
+
* @param params - Prefix search parameters
|
|
1193
|
+
* @param options - Fetch options
|
|
1194
|
+
* @returns Matching terms and their combined document IDs
|
|
1195
|
+
*/
|
|
1196
|
+
export async function prefixSearch(params, options = {}) {
|
|
1197
|
+
const { url, prefix, limit = 100 } = params;
|
|
1198
|
+
const { caseSensitive = false } = options;
|
|
1199
|
+
const reader = await fetchInvertedIndex(url, options);
|
|
1200
|
+
if (!reader) {
|
|
1201
|
+
return { terms: [], docIds: [] };
|
|
1202
|
+
}
|
|
1203
|
+
// Normalize prefix if case-insensitive
|
|
1204
|
+
const normalizedPrefix = caseSensitive ? prefix : prefix.toLowerCase();
|
|
1205
|
+
// Get matching terms
|
|
1206
|
+
const matchingEntries = reader.searchPrefix(normalizedPrefix, limit);
|
|
1207
|
+
const terms = matchingEntries.map((e) => e.term);
|
|
1208
|
+
if (terms.length === 0) {
|
|
1209
|
+
return { terms: [], docIds: [] };
|
|
1210
|
+
}
|
|
1211
|
+
// Union all matching term postings
|
|
1212
|
+
const docIds = reader.union(terms);
|
|
1213
|
+
return {
|
|
1214
|
+
terms,
|
|
1215
|
+
docIds,
|
|
1216
|
+
};
|
|
1217
|
+
}
|
|
1218
|
+
/**
|
|
1219
|
+
* Parse and execute a full-text query.
|
|
1220
|
+
*
|
|
1221
|
+
* Query syntax:
|
|
1222
|
+
* - Single term: `dog`
|
|
1223
|
+
* - AND query: `dog AND cat` or `dog cat` (implicit AND)
|
|
1224
|
+
* - OR query: `dog OR cat`
|
|
1225
|
+
* - Phrase: `"quick brown fox"`
|
|
1226
|
+
* - Prefix wildcard: `qui*`
|
|
1227
|
+
*
|
|
1228
|
+
* @param params - Query parameters
|
|
1229
|
+
* @param options - Fetch options
|
|
1230
|
+
* @returns Query result with hits and metadata
|
|
1231
|
+
*/
|
|
1232
|
+
export async function queryFullText(params, options = {}) {
|
|
1233
|
+
const { url, query, offset = 0, limit } = params;
|
|
1234
|
+
const { timeoutMs } = options;
|
|
1235
|
+
const startTime = performance.now();
|
|
1236
|
+
// Helper for timeout
|
|
1237
|
+
const checkTimeout = () => {
|
|
1238
|
+
if (timeoutMs && performance.now() - startTime > timeoutMs) {
|
|
1239
|
+
throw new Error('Request timeout');
|
|
1240
|
+
}
|
|
1241
|
+
};
|
|
1242
|
+
// Handle empty query
|
|
1243
|
+
const trimmedQuery = query.trim();
|
|
1244
|
+
if (!trimmedQuery) {
|
|
1245
|
+
return {
|
|
1246
|
+
hits: [],
|
|
1247
|
+
totalHits: 0,
|
|
1248
|
+
queryTimeMs: performance.now() - startTime,
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
// Fetch the index
|
|
1252
|
+
const reader = await fetchInvertedIndex(url, options);
|
|
1253
|
+
if (!reader) {
|
|
1254
|
+
throw new Error('Failed to fetch index');
|
|
1255
|
+
}
|
|
1256
|
+
checkTimeout();
|
|
1257
|
+
let docIds = [];
|
|
1258
|
+
// Check for OR query
|
|
1259
|
+
if (trimmedQuery.includes(' OR ')) {
|
|
1260
|
+
const parts = trimmedQuery.split(' OR ').map((p) => p.trim()).filter(Boolean);
|
|
1261
|
+
const termLists = [];
|
|
1262
|
+
for (const part of parts) {
|
|
1263
|
+
checkTimeout();
|
|
1264
|
+
const partTerms = simpleTokenize(part);
|
|
1265
|
+
if (partTerms.length > 0) {
|
|
1266
|
+
const partResult = reader.intersect(partTerms);
|
|
1267
|
+
termLists.push(partResult);
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
// Union all parts
|
|
1271
|
+
const allDocs = new Set();
|
|
1272
|
+
for (const list of termLists) {
|
|
1273
|
+
for (const id of list) {
|
|
1274
|
+
allDocs.add(id);
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
docIds = Array.from(allDocs).sort((a, b) => a - b);
|
|
1278
|
+
}
|
|
1279
|
+
// Check for AND query
|
|
1280
|
+
else if (trimmedQuery.includes(' AND ')) {
|
|
1281
|
+
const parts = trimmedQuery.split(' AND ').map((p) => p.trim()).filter(Boolean);
|
|
1282
|
+
const allTerms = [];
|
|
1283
|
+
for (const part of parts) {
|
|
1284
|
+
const partTerms = simpleTokenize(part);
|
|
1285
|
+
allTerms.push(...partTerms);
|
|
1286
|
+
}
|
|
1287
|
+
if (allTerms.length > 0) {
|
|
1288
|
+
docIds = reader.intersect(allTerms);
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
// Check for quoted phrase
|
|
1292
|
+
else if (trimmedQuery.startsWith('"') && trimmedQuery.endsWith('"')) {
|
|
1293
|
+
const phrase = trimmedQuery.slice(1, -1);
|
|
1294
|
+
const terms = simpleTokenize(phrase);
|
|
1295
|
+
if (terms.length > 0) {
|
|
1296
|
+
docIds = reader.intersect(terms);
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
// Check for prefix wildcard
|
|
1300
|
+
else if (trimmedQuery.endsWith('*')) {
|
|
1301
|
+
const prefix = trimmedQuery.slice(0, -1).toLowerCase();
|
|
1302
|
+
const matchingEntries = reader.searchPrefix(prefix, 100);
|
|
1303
|
+
const matchingTerms = matchingEntries.map((e) => e.term);
|
|
1304
|
+
if (matchingTerms.length > 0) {
|
|
1305
|
+
docIds = reader.union(matchingTerms);
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
// Default: treat as space-separated terms (implicit AND or single term)
|
|
1309
|
+
else {
|
|
1310
|
+
const terms = simpleTokenize(trimmedQuery);
|
|
1311
|
+
if (terms.length > 0) {
|
|
1312
|
+
if (terms.length === 1) {
|
|
1313
|
+
docIds = reader.getPostings(terms[0]);
|
|
1314
|
+
}
|
|
1315
|
+
else {
|
|
1316
|
+
docIds = reader.intersect(terms);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
checkTimeout();
|
|
1321
|
+
// Apply pagination
|
|
1322
|
+
const totalHits = docIds.length;
|
|
1323
|
+
let hits = docIds;
|
|
1324
|
+
if (offset > 0) {
|
|
1325
|
+
hits = hits.slice(offset);
|
|
1326
|
+
}
|
|
1327
|
+
if (limit !== undefined) {
|
|
1328
|
+
hits = hits.slice(0, limit);
|
|
1329
|
+
}
|
|
1330
|
+
return {
|
|
1331
|
+
hits,
|
|
1332
|
+
totalHits,
|
|
1333
|
+
queryTimeMs: performance.now() - startTime,
|
|
1334
|
+
};
|
|
1335
|
+
}
|
|
1336
|
+
// ============================================================================
|
|
1337
|
+
// Combined Query Router Types
|
|
1338
|
+
// ============================================================================
|
|
1339
|
+
import { buildIndexUrl } from '../db/iceberg/search-manifest';
|
|
1340
|
+
// ============================================================================
|
|
1341
|
+
// Combined Query Router Implementation
|
|
1342
|
+
// ============================================================================
|
|
1343
|
+
/** Maximum subrequests allowed per search */
|
|
1344
|
+
const MAX_SUBREQUESTS = 5;
|
|
1345
|
+
/**
|
|
1346
|
+
* Parse a search query from URL parameters.
|
|
1347
|
+
*
|
|
1348
|
+
* Query format:
|
|
1349
|
+
* - bloom=field:value
|
|
1350
|
+
* - range=field:op:value (op: gt, lt, gte, lte, eq)
|
|
1351
|
+
* - vector=field:base64data:k=N
|
|
1352
|
+
* - text=field:query
|
|
1353
|
+
*
|
|
1354
|
+
* @param url - URL containing query parameters
|
|
1355
|
+
* @returns Parsed SearchQuery
|
|
1356
|
+
*/
|
|
1357
|
+
export function parseSearchQuery(url) {
|
|
1358
|
+
const query = {};
|
|
1359
|
+
// Parse bloom parameters
|
|
1360
|
+
const bloomParams = url.searchParams.getAll('bloom');
|
|
1361
|
+
if (bloomParams.length > 0) {
|
|
1362
|
+
query.bloom = bloomParams.map((param) => {
|
|
1363
|
+
const colonIndex = param.indexOf(':');
|
|
1364
|
+
if (colonIndex === -1) {
|
|
1365
|
+
return { field: param, value: '' };
|
|
1366
|
+
}
|
|
1367
|
+
return {
|
|
1368
|
+
field: param.slice(0, colonIndex),
|
|
1369
|
+
value: param.slice(colonIndex + 1),
|
|
1370
|
+
};
|
|
1371
|
+
});
|
|
1372
|
+
}
|
|
1373
|
+
// Parse range parameters
|
|
1374
|
+
const rangeParams = url.searchParams.getAll('range');
|
|
1375
|
+
if (rangeParams.length > 0) {
|
|
1376
|
+
query.range = rangeParams.map((param) => {
|
|
1377
|
+
const parts = param.split(':');
|
|
1378
|
+
if (parts.length < 3) {
|
|
1379
|
+
return { field: parts[0] ?? '', op: 'eq', value: parts[1] ?? '' };
|
|
1380
|
+
}
|
|
1381
|
+
return {
|
|
1382
|
+
field: parts[0],
|
|
1383
|
+
op: parts[1],
|
|
1384
|
+
value: parts.slice(2).join(':'), // Rejoin remaining parts for values with colons
|
|
1385
|
+
};
|
|
1386
|
+
});
|
|
1387
|
+
}
|
|
1388
|
+
// Parse vector parameter
|
|
1389
|
+
const vectorParam = url.searchParams.get('vector');
|
|
1390
|
+
if (vectorParam) {
|
|
1391
|
+
const parts = vectorParam.split(':');
|
|
1392
|
+
if (parts.length >= 3) {
|
|
1393
|
+
const field = parts[0];
|
|
1394
|
+
const base64Data = parts[1];
|
|
1395
|
+
const kMatch = parts[2].match(/k=(\d+)/);
|
|
1396
|
+
const k = kMatch ? parseInt(kMatch[1], 10) : 10;
|
|
1397
|
+
// Decode base64 to Float32Array
|
|
1398
|
+
// First decode to a byte array, then create Float32Array
|
|
1399
|
+
const binaryString = Buffer.from(base64Data, 'base64');
|
|
1400
|
+
// Create a new ArrayBuffer with the exact size needed
|
|
1401
|
+
const arrayBuffer = new ArrayBuffer(binaryString.length);
|
|
1402
|
+
const uint8View = new Uint8Array(arrayBuffer);
|
|
1403
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
1404
|
+
uint8View[i] = binaryString[i];
|
|
1405
|
+
}
|
|
1406
|
+
const floatArray = new Float32Array(arrayBuffer);
|
|
1407
|
+
query.vector = { field, query: floatArray, k };
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
// Parse text parameter
|
|
1411
|
+
const textParam = url.searchParams.get('text');
|
|
1412
|
+
if (textParam) {
|
|
1413
|
+
const colonIndex = textParam.indexOf(':');
|
|
1414
|
+
if (colonIndex !== -1) {
|
|
1415
|
+
query.text = {
|
|
1416
|
+
field: textParam.slice(0, colonIndex),
|
|
1417
|
+
query: textParam.slice(colonIndex + 1),
|
|
1418
|
+
};
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
return query;
|
|
1422
|
+
}
|
|
1423
|
+
/**
|
|
1424
|
+
* Clear all search caches.
|
|
1425
|
+
*/
|
|
1426
|
+
export function clearSearchCache() {
|
|
1427
|
+
clearCentroidCache();
|
|
1428
|
+
clearInvertedIndexCache();
|
|
1429
|
+
clearBloomCache();
|
|
1430
|
+
}
|
|
1431
|
+
/**
|
|
1432
|
+
* Execute a combined search query against a manifest.
|
|
1433
|
+
*
|
|
1434
|
+
* @param manifest - Search manifest describing available indexes
|
|
1435
|
+
* @param query - Combined search query
|
|
1436
|
+
* @param ctx - Execution context
|
|
1437
|
+
* @param options - Execution options
|
|
1438
|
+
* @returns Combined search result
|
|
1439
|
+
*/
|
|
1440
|
+
export async function executeSearch(manifest, query, ctx, options = {}) {
|
|
1441
|
+
const { fetch: customFetch = fetch, timeoutMs = 5000, maxSubrequests = MAX_SUBREQUESTS, } = options;
|
|
1442
|
+
const startTime = performance.now();
|
|
1443
|
+
const timing = { total_ms: 0 };
|
|
1444
|
+
let subrequests = 0;
|
|
1445
|
+
let budgetRemaining = maxSubrequests;
|
|
1446
|
+
// Track if we should prune (any definitive NO from bloom = prune all)
|
|
1447
|
+
let pruned = false;
|
|
1448
|
+
// Results from different query types
|
|
1449
|
+
let rangeBlocks;
|
|
1450
|
+
let vectorCentroids;
|
|
1451
|
+
let textDocuments;
|
|
1452
|
+
// Helper to check timeout
|
|
1453
|
+
const checkTimeout = () => {
|
|
1454
|
+
if (performance.now() - startTime > timeoutMs) {
|
|
1455
|
+
throw new Error('Search timeout');
|
|
1456
|
+
}
|
|
1457
|
+
};
|
|
1458
|
+
// Helper to track subrequests
|
|
1459
|
+
const trackSubrequest = () => {
|
|
1460
|
+
subrequests++;
|
|
1461
|
+
budgetRemaining--;
|
|
1462
|
+
};
|
|
1463
|
+
// Empty query - return immediately
|
|
1464
|
+
const hasQuery = (query.bloom && query.bloom.length > 0) ||
|
|
1465
|
+
(query.range && query.range.length > 0) ||
|
|
1466
|
+
query.vector ||
|
|
1467
|
+
query.text;
|
|
1468
|
+
if (!hasQuery) {
|
|
1469
|
+
return {
|
|
1470
|
+
pruned: false,
|
|
1471
|
+
timing: { total_ms: performance.now() - startTime },
|
|
1472
|
+
subrequests: 0,
|
|
1473
|
+
};
|
|
1474
|
+
}
|
|
1475
|
+
// Validate vector query dimensions if present
|
|
1476
|
+
if (query.vector) {
|
|
1477
|
+
const vectorConfig = manifest.indexes.vector?.[query.vector.field];
|
|
1478
|
+
if (vectorConfig) {
|
|
1479
|
+
if (query.vector.query.length === 0) {
|
|
1480
|
+
throw new Error('Vector query is empty - dimension mismatch');
|
|
1481
|
+
}
|
|
1482
|
+
if (query.vector.query.length !== vectorConfig.dims) {
|
|
1483
|
+
throw new Error(`Vector dimension mismatch: expected ${vectorConfig.dims}, got ${query.vector.query.length}`);
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
try {
|
|
1488
|
+
// =========================================================================
|
|
1489
|
+
// Phase 1: Bloom filter checks (most selective, do first)
|
|
1490
|
+
// =========================================================================
|
|
1491
|
+
if (query.bloom && query.bloom.length > 0 && budgetRemaining > 0) {
|
|
1492
|
+
const bloomStartTime = performance.now();
|
|
1493
|
+
for (const bloomQuery of query.bloom) {
|
|
1494
|
+
if (budgetRemaining <= 0)
|
|
1495
|
+
break;
|
|
1496
|
+
checkTimeout();
|
|
1497
|
+
const bloomConfig = manifest.indexes.bloom?.[bloomQuery.field];
|
|
1498
|
+
if (!bloomConfig) {
|
|
1499
|
+
// Field not in manifest - skip (conservative: don't prune)
|
|
1500
|
+
continue;
|
|
1501
|
+
}
|
|
1502
|
+
const bloomUrl = buildIndexUrl(manifest, 'bloom', bloomQuery.field);
|
|
1503
|
+
if (!bloomUrl)
|
|
1504
|
+
continue;
|
|
1505
|
+
try {
|
|
1506
|
+
trackSubrequest();
|
|
1507
|
+
// Note: fieldId is used to identify which bloom filter in a Puffin file
|
|
1508
|
+
// Since each field has its own Puffin file in this schema, use a default fieldId
|
|
1509
|
+
// The fieldId in Puffin files typically corresponds to Iceberg column IDs
|
|
1510
|
+
const result = await queryBloom({
|
|
1511
|
+
url: bloomUrl,
|
|
1512
|
+
fieldId: 1, // Default column ID
|
|
1513
|
+
value: bloomQuery.value,
|
|
1514
|
+
}, { fetch: customFetch });
|
|
1515
|
+
if (result === BloomQueryResult.NO) {
|
|
1516
|
+
// Definitive NO - prune entire result
|
|
1517
|
+
pruned = true;
|
|
1518
|
+
timing.bloom_ms = performance.now() - bloomStartTime;
|
|
1519
|
+
break;
|
|
1520
|
+
}
|
|
1521
|
+
// result === MAYBE means continue checking
|
|
1522
|
+
}
|
|
1523
|
+
catch {
|
|
1524
|
+
// On error, be conservative - don't prune
|
|
1525
|
+
continue;
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
1528
|
+
timing.bloom_ms = performance.now() - bloomStartTime;
|
|
1529
|
+
}
|
|
1530
|
+
// Short-circuit if already pruned
|
|
1531
|
+
if (pruned) {
|
|
1532
|
+
return {
|
|
1533
|
+
pruned: true,
|
|
1534
|
+
timing: {
|
|
1535
|
+
...timing,
|
|
1536
|
+
total_ms: performance.now() - startTime,
|
|
1537
|
+
},
|
|
1538
|
+
subrequests,
|
|
1539
|
+
};
|
|
1540
|
+
}
|
|
1541
|
+
// =========================================================================
|
|
1542
|
+
// Phase 2: Range queries (determine blocks to scan)
|
|
1543
|
+
// =========================================================================
|
|
1544
|
+
if (query.range && query.range.length > 0 && budgetRemaining > 0) {
|
|
1545
|
+
const rangeStartTime = performance.now();
|
|
1546
|
+
const allBlocks = new Set();
|
|
1547
|
+
let firstRangeQuery = true;
|
|
1548
|
+
for (const rangeQuery of query.range) {
|
|
1549
|
+
if (budgetRemaining <= 0)
|
|
1550
|
+
break;
|
|
1551
|
+
checkTimeout();
|
|
1552
|
+
const rangeConfig = manifest.indexes.range?.[rangeQuery.field];
|
|
1553
|
+
if (!rangeConfig)
|
|
1554
|
+
continue;
|
|
1555
|
+
const rangeUrl = buildIndexUrl(manifest, 'range', rangeQuery.field);
|
|
1556
|
+
if (!rangeUrl)
|
|
1557
|
+
continue;
|
|
1558
|
+
try {
|
|
1559
|
+
trackSubrequest();
|
|
1560
|
+
// Fetch marks file
|
|
1561
|
+
const response = await customFetch(rangeUrl);
|
|
1562
|
+
if (!response.ok)
|
|
1563
|
+
continue;
|
|
1564
|
+
const buffer = await response.arrayBuffer();
|
|
1565
|
+
// Use int64 type for range queries (covers timestamps and integers)
|
|
1566
|
+
const blocks = parseMarksFile(new Uint8Array(buffer), 'int64');
|
|
1567
|
+
// Find matching blocks based on operator
|
|
1568
|
+
const matchingBlocks = findMatchingBlocks(blocks, rangeQuery.op, rangeQuery.value);
|
|
1569
|
+
if (firstRangeQuery) {
|
|
1570
|
+
for (const block of matchingBlocks) {
|
|
1571
|
+
allBlocks.add(block);
|
|
1572
|
+
}
|
|
1573
|
+
firstRangeQuery = false;
|
|
1574
|
+
}
|
|
1575
|
+
else {
|
|
1576
|
+
// Intersect with previous results (AND semantics)
|
|
1577
|
+
// Use Set for O(1) lookups instead of O(n) array includes
|
|
1578
|
+
const matchingSet = new Set(matchingBlocks);
|
|
1579
|
+
for (const block of allBlocks) {
|
|
1580
|
+
if (!matchingSet.has(block)) {
|
|
1581
|
+
allBlocks.delete(block);
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
// If no blocks match, we can prune
|
|
1586
|
+
if (allBlocks.size === 0 && !firstRangeQuery) {
|
|
1587
|
+
pruned = true;
|
|
1588
|
+
break;
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
catch {
|
|
1592
|
+
// On error, be conservative
|
|
1593
|
+
continue;
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
if (allBlocks.size > 0) {
|
|
1597
|
+
rangeBlocks = Array.from(allBlocks).sort((a, b) => a - b);
|
|
1598
|
+
}
|
|
1599
|
+
timing.range_ms = performance.now() - rangeStartTime;
|
|
1600
|
+
}
|
|
1601
|
+
// Short-circuit if pruned by range
|
|
1602
|
+
if (pruned) {
|
|
1603
|
+
return {
|
|
1604
|
+
pruned: true,
|
|
1605
|
+
timing: {
|
|
1606
|
+
...timing,
|
|
1607
|
+
total_ms: performance.now() - startTime,
|
|
1608
|
+
},
|
|
1609
|
+
subrequests,
|
|
1610
|
+
};
|
|
1611
|
+
}
|
|
1612
|
+
// =========================================================================
|
|
1613
|
+
// Phase 3: Vector query (find nearest centroids)
|
|
1614
|
+
// =========================================================================
|
|
1615
|
+
if (query.vector && budgetRemaining > 0) {
|
|
1616
|
+
const vectorStartTime = performance.now();
|
|
1617
|
+
const vectorConfig = manifest.indexes.vector?.[query.vector.field];
|
|
1618
|
+
if (vectorConfig) {
|
|
1619
|
+
const vectorUrl = buildIndexUrl(manifest, 'vector', query.vector.field);
|
|
1620
|
+
if (vectorUrl) {
|
|
1621
|
+
try {
|
|
1622
|
+
trackSubrequest();
|
|
1623
|
+
const k = Math.min(query.vector.k, vectorConfig.count);
|
|
1624
|
+
// Convert manifest metric string to DistanceMetric enum
|
|
1625
|
+
const metricMap = {
|
|
1626
|
+
cosine: DistanceMetric.Cosine,
|
|
1627
|
+
euclidean: DistanceMetric.Euclidean,
|
|
1628
|
+
dot: DistanceMetric.DotProduct,
|
|
1629
|
+
};
|
|
1630
|
+
const metric = metricMap[vectorConfig.metric] ?? DistanceMetric.Cosine;
|
|
1631
|
+
const topK = await queryVector({
|
|
1632
|
+
centroidsUrl: vectorUrl,
|
|
1633
|
+
numCentroids: vectorConfig.count,
|
|
1634
|
+
dims: vectorConfig.dims,
|
|
1635
|
+
query: query.vector.query,
|
|
1636
|
+
k,
|
|
1637
|
+
metric,
|
|
1638
|
+
fetch: customFetch,
|
|
1639
|
+
});
|
|
1640
|
+
vectorCentroids = topK.map((result) => ({
|
|
1641
|
+
index: result.index,
|
|
1642
|
+
distance: result.distance,
|
|
1643
|
+
}));
|
|
1644
|
+
}
|
|
1645
|
+
catch {
|
|
1646
|
+
// On error, leave centroids undefined
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1650
|
+
timing.vector_ms = performance.now() - vectorStartTime;
|
|
1651
|
+
}
|
|
1652
|
+
// =========================================================================
|
|
1653
|
+
// Phase 4: Full-text query
|
|
1654
|
+
// =========================================================================
|
|
1655
|
+
if (query.text && budgetRemaining > 0) {
|
|
1656
|
+
const textStartTime = performance.now();
|
|
1657
|
+
const invertedConfig = manifest.indexes.inverted?.[query.text.field];
|
|
1658
|
+
if (invertedConfig) {
|
|
1659
|
+
const invertedUrl = buildIndexUrl(manifest, 'inverted', query.text.field);
|
|
1660
|
+
if (invertedUrl) {
|
|
1661
|
+
try {
|
|
1662
|
+
trackSubrequest();
|
|
1663
|
+
const result = await queryFullText({ url: invertedUrl, query: query.text.query }, { fetch: customFetch });
|
|
1664
|
+
textDocuments = result.hits;
|
|
1665
|
+
// If no documents match, we could prune
|
|
1666
|
+
// But for combined queries we want to return the result
|
|
1667
|
+
if (result.hits.length === 0) {
|
|
1668
|
+
pruned = true;
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
catch {
|
|
1672
|
+
// On error, leave documents undefined
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
timing.text_ms = performance.now() - textStartTime;
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
catch (error) {
|
|
1680
|
+
// Handle timeout
|
|
1681
|
+
if (error instanceof Error && error.message.includes('timeout')) {
|
|
1682
|
+
return {
|
|
1683
|
+
pruned: false, // Conservative on timeout
|
|
1684
|
+
timing: {
|
|
1685
|
+
...timing,
|
|
1686
|
+
total_ms: performance.now() - startTime,
|
|
1687
|
+
},
|
|
1688
|
+
subrequests,
|
|
1689
|
+
};
|
|
1690
|
+
}
|
|
1691
|
+
throw error;
|
|
1692
|
+
}
|
|
1693
|
+
timing.total_ms = performance.now() - startTime;
|
|
1694
|
+
return {
|
|
1695
|
+
pruned,
|
|
1696
|
+
blocks: rangeBlocks,
|
|
1697
|
+
centroids: vectorCentroids,
|
|
1698
|
+
documents: textDocuments,
|
|
1699
|
+
timing,
|
|
1700
|
+
subrequests,
|
|
1701
|
+
};
|
|
1702
|
+
}
|
|
1703
|
+
// ============================================================================
|
|
1704
|
+
// Helper Functions for Combined Router
|
|
1705
|
+
// ============================================================================
|
|
1706
|
+
/**
|
|
1707
|
+
* Find blocks matching a range query.
|
|
1708
|
+
*/
|
|
1709
|
+
function findMatchingBlocks(blocks, op, value) {
|
|
1710
|
+
// Try to parse value as a number/timestamp
|
|
1711
|
+
let numValue;
|
|
1712
|
+
try {
|
|
1713
|
+
// Check if it's a date string
|
|
1714
|
+
if (value.includes('-') && value.length >= 10) {
|
|
1715
|
+
numValue = BigInt(new Date(value).getTime());
|
|
1716
|
+
}
|
|
1717
|
+
else {
|
|
1718
|
+
numValue = BigInt(value);
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
catch {
|
|
1722
|
+
// If parsing fails, return all blocks (conservative)
|
|
1723
|
+
return blocks.map((b) => b.blockIndex);
|
|
1724
|
+
}
|
|
1725
|
+
const matchingBlocks = [];
|
|
1726
|
+
for (const block of blocks) {
|
|
1727
|
+
let matches = false;
|
|
1728
|
+
// Handle different min/max types (bigint or number)
|
|
1729
|
+
const minVal = typeof block.min === 'bigint' ? block.min : BigInt(Math.floor(block.min));
|
|
1730
|
+
const maxVal = typeof block.max === 'bigint' ? block.max : BigInt(Math.floor(block.max));
|
|
1731
|
+
switch (op) {
|
|
1732
|
+
case 'gt':
|
|
1733
|
+
// Block matches if its max > value (some values could be > value)
|
|
1734
|
+
matches = maxVal > numValue;
|
|
1735
|
+
break;
|
|
1736
|
+
case 'gte':
|
|
1737
|
+
// Block matches if its max >= value
|
|
1738
|
+
matches = maxVal >= numValue;
|
|
1739
|
+
break;
|
|
1740
|
+
case 'lt':
|
|
1741
|
+
// Block matches if its min < value
|
|
1742
|
+
matches = minVal < numValue;
|
|
1743
|
+
break;
|
|
1744
|
+
case 'lte':
|
|
1745
|
+
// Block matches if its min <= value
|
|
1746
|
+
matches = minVal <= numValue;
|
|
1747
|
+
break;
|
|
1748
|
+
case 'eq':
|
|
1749
|
+
// Block matches if value is within [min, max]
|
|
1750
|
+
matches = minVal <= numValue && maxVal >= numValue;
|
|
1751
|
+
break;
|
|
1752
|
+
}
|
|
1753
|
+
if (matches) {
|
|
1754
|
+
matchingBlocks.push(block.blockIndex);
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
return matchingBlocks;
|
|
1758
|
+
}
|
|
1759
|
+
//# sourceMappingURL=search.js.map
|