dotdo 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +446 -315
- package/cli/README.md +238 -0
- package/cli/agent.ts +72 -0
- package/cli/bin.js +44 -0
- package/cli/bin.ts +38 -0
- package/cli/build.ts +157 -0
- package/cli/commands/auth/login.ts +14 -0
- package/cli/commands/auth/logout.ts +6 -0
- package/cli/commands/auth/whoami.ts +16 -0
- package/cli/commands/deploy-multi.ts +245 -0
- package/cli/commands/dev/deploy.ts +100 -0
- package/cli/commands/dev/dev.ts +95 -0
- package/cli/commands/dev/logs.ts +91 -0
- package/cli/commands/dev-local.ts +88 -0
- package/cli/commands/do-ops.ts +314 -0
- package/cli/commands/index.ts +100 -0
- package/cli/commands/init.ts +247 -0
- package/cli/commands/introspect/emitter.ts +315 -0
- package/cli/commands/introspect/index.ts +193 -0
- package/cli/commands/link.ts +598 -0
- package/cli/commands/snippets.ts +415 -0
- package/cli/commands/tunnel.ts +239 -0
- package/cli/device-auth.ts +289 -0
- package/cli/fallback.ts +12 -0
- package/cli/index.ts +121 -0
- package/cli/main.ts +246 -0
- package/cli/mcp-stdio.ts +790 -0
- package/cli/package.json +62 -0
- package/cli/runtime/do-registry.ts +193 -0
- package/cli/runtime/embedded-db.ts +344 -0
- package/cli/runtime/index.ts +9 -0
- package/cli/runtime/miniflare-adapter.ts +162 -0
- package/cli/sandbox.ts +82 -0
- package/cli/src/args.ts +174 -0
- package/cli/src/auth.ts +55 -0
- package/cli/src/commands/call.ts +84 -0
- package/cli/src/commands/charge.ts +96 -0
- package/cli/src/commands/config.ts +115 -0
- package/cli/src/commands/email.ts +112 -0
- package/cli/src/commands/llm.ts +115 -0
- package/cli/src/commands/queue.ts +134 -0
- package/cli/src/commands/text.ts +86 -0
- package/cli/src/config.ts +185 -0
- package/cli/src/output.ts +246 -0
- package/cli/src/rpc.ts +192 -0
- package/cli/utils/config.ts +282 -0
- package/cli/utils/detect.ts +73 -0
- package/cli/utils/index.ts +15 -0
- package/cli/utils/logger.ts +232 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/template-literals.js +852 -0
- package/dist/ai/template-literals.js.map +1 -0
- package/dist/api/middleware/auth-federation.js +573 -0
- package/dist/api/middleware/auth-federation.js.map +1 -0
- package/dist/api/middleware/auth.js +545 -0
- package/dist/api/middleware/auth.js.map +1 -0
- package/dist/db/actions.js +212 -0
- package/dist/db/actions.js.map +1 -0
- package/dist/db/auth.js +506 -0
- package/dist/db/auth.js.map +1 -0
- package/dist/db/branches.js +65 -0
- package/dist/db/branches.js.map +1 -0
- package/dist/db/clickhouse.js +1074 -0
- package/dist/db/clickhouse.js.map +1 -0
- package/dist/db/dlq.js +39 -0
- package/dist/db/dlq.js.map +1 -0
- package/dist/db/events.js +28 -0
- package/dist/db/events.js.map +1 -0
- package/dist/db/exec.js +64 -0
- package/dist/db/exec.js.map +1 -0
- package/dist/db/files.js +85 -0
- package/dist/db/files.js.map +1 -0
- package/dist/db/flags.js +24 -0
- package/dist/db/flags.js.map +1 -0
- package/dist/db/git.js +116 -0
- package/dist/db/git.js.map +1 -0
- package/dist/db/iceberg/inverted-index.js +862 -0
- package/dist/db/iceberg/inverted-index.js.map +1 -0
- package/dist/db/iceberg/puffin.js +878 -0
- package/dist/db/iceberg/puffin.js.map +1 -0
- package/dist/db/iceberg/search-manifest.js +422 -0
- package/dist/db/iceberg/search-manifest.js.map +1 -0
- package/dist/db/iceberg/types.js +8 -0
- package/dist/db/iceberg/types.js.map +1 -0
- package/dist/db/index.js +121 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/integrations.js +368 -0
- package/dist/db/integrations.js.map +1 -0
- package/dist/db/json-indexes.js +332 -0
- package/dist/db/json-indexes.js.map +1 -0
- package/dist/db/linked-accounts.js +287 -0
- package/dist/db/linked-accounts.js.map +1 -0
- package/dist/db/nouns.js +183 -0
- package/dist/db/nouns.js.map +1 -0
- package/dist/db/objects.js +170 -0
- package/dist/db/objects.js.map +1 -0
- package/dist/db/primitives/dag-scheduler/index.js +869 -0
- package/dist/db/primitives/dag-scheduler/index.js.map +1 -0
- package/dist/db/primitives/exactly-once-context.js +237 -0
- package/dist/db/primitives/exactly-once-context.js.map +1 -0
- package/dist/db/primitives/index.js +62 -0
- package/dist/db/primitives/index.js.map +1 -0
- package/dist/db/primitives/keyed-router.js +145 -0
- package/dist/db/primitives/keyed-router.js.map +1 -0
- package/dist/db/primitives/observability.js +162 -0
- package/dist/db/primitives/observability.js.map +1 -0
- package/dist/db/primitives/schema-evolution.js +643 -0
- package/dist/db/primitives/schema-evolution.js.map +1 -0
- package/dist/db/primitives/stateful-operator/index.js +770 -0
- package/dist/db/primitives/stateful-operator/index.js.map +1 -0
- package/dist/db/primitives/temporal-store.js +306 -0
- package/dist/db/primitives/temporal-store.js.map +1 -0
- package/dist/db/primitives/typed-column-store.js +1229 -0
- package/dist/db/primitives/typed-column-store.js.map +1 -0
- package/dist/db/primitives/utils/duration.js +162 -0
- package/dist/db/primitives/utils/duration.js.map +1 -0
- package/dist/db/primitives/utils/murmur3.js +116 -0
- package/dist/db/primitives/utils/murmur3.js.map +1 -0
- package/dist/db/primitives/watermark-service.js +136 -0
- package/dist/db/primitives/watermark-service.js.map +1 -0
- package/dist/db/primitives/window-manager.js +764 -0
- package/dist/db/primitives/window-manager.js.map +1 -0
- package/dist/db/relationships.js +66 -0
- package/dist/db/relationships.js.map +1 -0
- package/dist/db/schema-minimal.js +61 -0
- package/dist/db/schema-minimal.js.map +1 -0
- package/dist/db/search.js +28 -0
- package/dist/db/search.js.map +1 -0
- package/dist/db/stores.js +1665 -0
- package/dist/db/stores.js.map +1 -0
- package/dist/db/things.js +297 -0
- package/dist/db/things.js.map +1 -0
- package/dist/db/vault.js +171 -0
- package/dist/db/vault.js.map +1 -0
- package/dist/db/verbs.js +102 -0
- package/dist/db/verbs.js.map +1 -0
- package/dist/do/base.js +48 -0
- package/dist/do/base.js.map +1 -0
- package/dist/do/tiny.js +31 -0
- package/dist/do/tiny.js.map +1 -0
- package/dist/lib/DOAuth.js +261 -0
- package/dist/lib/DOAuth.js.map +1 -0
- package/dist/lib/DODispatcher.js +72 -0
- package/dist/lib/DODispatcher.js.map +1 -0
- package/dist/lib/Modifier.js +189 -0
- package/dist/lib/Modifier.js.map +1 -0
- package/dist/lib/StateStorage.js +403 -0
- package/dist/lib/StateStorage.js.map +1 -0
- package/dist/lib/TypeRegistry.js +122 -0
- package/dist/lib/TypeRegistry.js.map +1 -0
- package/dist/lib/ai/gateway.js +247 -0
- package/dist/lib/ai/gateway.js.map +1 -0
- package/dist/lib/ai/tool-loop-agent.js +591 -0
- package/dist/lib/ai/tool-loop-agent.js.map +1 -0
- package/dist/lib/auto-wiring.js +439 -0
- package/dist/lib/auto-wiring.js.map +1 -0
- package/dist/lib/browse/browserbase.js +163 -0
- package/dist/lib/browse/browserbase.js.map +1 -0
- package/dist/lib/browse/cloudflare.js +144 -0
- package/dist/lib/browse/cloudflare.js.map +1 -0
- package/dist/lib/browse/index.js +62 -0
- package/dist/lib/browse/index.js.map +1 -0
- package/dist/lib/browse/types.js +13 -0
- package/dist/lib/browse/types.js.map +1 -0
- package/dist/lib/cache/index.js +37 -0
- package/dist/lib/cache/index.js.map +1 -0
- package/dist/lib/cache/visibility.js +638 -0
- package/dist/lib/cache/visibility.js.map +1 -0
- package/dist/lib/capabilities.js +268 -0
- package/dist/lib/capabilities.js.map +1 -0
- package/dist/lib/channels/base.js +106 -0
- package/dist/lib/channels/base.js.map +1 -0
- package/dist/lib/channels/discord.js +94 -0
- package/dist/lib/channels/discord.js.map +1 -0
- package/dist/lib/channels/email.js +204 -0
- package/dist/lib/channels/email.js.map +1 -0
- package/dist/lib/channels/index.js +90 -0
- package/dist/lib/channels/index.js.map +1 -0
- package/dist/lib/channels/mdxui-chat.js +95 -0
- package/dist/lib/channels/mdxui-chat.js.map +1 -0
- package/dist/lib/channels/slack-blockkit.js +121 -0
- package/dist/lib/channels/slack-blockkit.js.map +1 -0
- package/dist/lib/channels/types.js +7 -0
- package/dist/lib/channels/types.js.map +1 -0
- package/dist/lib/cloudflare/ai.js +654 -0
- package/dist/lib/cloudflare/ai.js.map +1 -0
- package/dist/lib/cloudflare/index.js +88 -0
- package/dist/lib/cloudflare/index.js.map +1 -0
- package/dist/lib/cloudflare/kv.js +342 -0
- package/dist/lib/cloudflare/kv.js.map +1 -0
- package/dist/lib/cloudflare/queues.js +434 -0
- package/dist/lib/cloudflare/queues.js.map +1 -0
- package/dist/lib/cloudflare/r2.js +604 -0
- package/dist/lib/cloudflare/r2.js.map +1 -0
- package/dist/lib/cloudflare/vectorize.js +494 -0
- package/dist/lib/cloudflare/vectorize.js.map +1 -0
- package/dist/lib/cloudflare/workflows.js +569 -0
- package/dist/lib/cloudflare/workflows.js.map +1 -0
- package/dist/lib/colo/caching.js +196 -0
- package/dist/lib/colo/caching.js.map +1 -0
- package/dist/lib/colo/detection.js +194 -0
- package/dist/lib/colo/detection.js.map +1 -0
- package/dist/lib/colo/external-data.js +219 -0
- package/dist/lib/colo/external-data.js.map +1 -0
- package/dist/lib/colo/globe-data.js +179 -0
- package/dist/lib/colo/globe-data.js.map +1 -0
- package/dist/lib/colo/index.js +16 -0
- package/dist/lib/colo/index.js.map +1 -0
- package/dist/lib/decorators.js +37 -0
- package/dist/lib/decorators.js.map +1 -0
- package/dist/lib/discovery.js +81 -0
- package/dist/lib/discovery.js.map +1 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js +619 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/BaseFunctionExecutor.js +328 -0
- package/dist/lib/executors/BaseFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/CascadeExecutor.js +418 -0
- package/dist/lib/executors/CascadeExecutor.js.map +1 -0
- package/dist/lib/executors/CodeFunctionExecutor.js +904 -0
- package/dist/lib/executors/CodeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js +904 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/HumanFunctionExecutor.js +884 -0
- package/dist/lib/executors/HumanFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/ParallelStepExecutor.js +308 -0
- package/dist/lib/executors/ParallelStepExecutor.js.map +1 -0
- package/dist/lib/executors/types.js +12 -0
- package/dist/lib/executors/types.js.map +1 -0
- package/dist/lib/experiments.js +89 -0
- package/dist/lib/experiments.js.map +1 -0
- package/dist/lib/flags/store.js +262 -0
- package/dist/lib/flags/store.js.map +1 -0
- package/dist/lib/functions/FunctionComposition.js +467 -0
- package/dist/lib/functions/FunctionComposition.js.map +1 -0
- package/dist/lib/functions/FunctionMiddleware.js +457 -0
- package/dist/lib/functions/FunctionMiddleware.js.map +1 -0
- package/dist/lib/functions/FunctionRegistry.js +426 -0
- package/dist/lib/functions/FunctionRegistry.js.map +1 -0
- package/dist/lib/functions/createFunction.js +1048 -0
- package/dist/lib/functions/createFunction.js.map +1 -0
- package/dist/lib/humans/index.js +68 -0
- package/dist/lib/humans/index.js.map +1 -0
- package/dist/lib/humans/templates.js +117 -0
- package/dist/lib/humans/templates.js.map +1 -0
- package/dist/lib/identity.js +98 -0
- package/dist/lib/identity.js.map +1 -0
- package/dist/lib/index.js +9 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/logging/error-logger.js +163 -0
- package/dist/lib/logging/error-logger.js.map +1 -0
- package/dist/lib/logging/index.js +160 -0
- package/dist/lib/logging/index.js.map +1 -0
- package/dist/lib/mixins/bash.js +753 -0
- package/dist/lib/mixins/bash.js.map +1 -0
- package/dist/lib/mixins/fs.js +648 -0
- package/dist/lib/mixins/fs.js.map +1 -0
- package/dist/lib/mixins/git.js +1006 -0
- package/dist/lib/mixins/git.js.map +1 -0
- package/dist/lib/mixins/npm.js +662 -0
- package/dist/lib/mixins/npm.js.map +1 -0
- package/dist/lib/noun-id.js +278 -0
- package/dist/lib/noun-id.js.map +1 -0
- package/dist/lib/rate-limit/sliding-window.js +148 -0
- package/dist/lib/rate-limit/sliding-window.js.map +1 -0
- package/dist/lib/rate-limit.js +110 -0
- package/dist/lib/rate-limit.js.map +1 -0
- package/dist/lib/rpc/bindings.js +548 -0
- package/dist/lib/rpc/bindings.js.map +1 -0
- package/dist/lib/rpc/index.js +64 -0
- package/dist/lib/rpc/index.js.map +1 -0
- package/dist/lib/safe-stringify.js +223 -0
- package/dist/lib/safe-stringify.js.map +1 -0
- package/dist/lib/sandbox/miniflare-sandbox.js +1007 -0
- package/dist/lib/sandbox/miniflare-sandbox.js.map +1 -0
- package/dist/lib/sqids.js +110 -0
- package/dist/lib/sqids.js.map +1 -0
- package/dist/lib/sql/adapters/index.js +10 -0
- package/dist/lib/sql/adapters/index.js.map +1 -0
- package/dist/lib/sql/adapters/node-sql-parser.js +552 -0
- package/dist/lib/sql/adapters/node-sql-parser.js.map +1 -0
- package/dist/lib/sql/adapters/pgsql-parser.js +1190 -0
- package/dist/lib/sql/adapters/pgsql-parser.js.map +1 -0
- package/dist/lib/sql/index.js +277 -0
- package/dist/lib/sql/index.js.map +1 -0
- package/dist/lib/sql/types.js +56 -0
- package/dist/lib/sql/types.js.map +1 -0
- package/dist/lib/type-classifier.js +126 -0
- package/dist/lib/type-classifier.js.map +1 -0
- package/dist/lib/utils/html.js +47 -0
- package/dist/lib/utils/html.js.map +1 -0
- package/dist/lib/validation.js +48 -0
- package/dist/lib/validation.js.map +1 -0
- package/dist/lib/vault/store.js +411 -0
- package/dist/lib/vault/store.js.map +1 -0
- package/dist/metrics/hunch.js +739 -0
- package/dist/metrics/hunch.js.map +1 -0
- package/dist/objects/API.js +302 -0
- package/dist/objects/API.js.map +1 -0
- package/dist/objects/Agent.js +179 -0
- package/dist/objects/Agent.js.map +1 -0
- package/dist/objects/AgenticFunctionExecutor.js +8 -0
- package/dist/objects/AgenticFunctionExecutor.js.map +1 -0
- package/dist/objects/App.js +83 -0
- package/dist/objects/App.js.map +1 -0
- package/dist/objects/Browser.js +884 -0
- package/dist/objects/Browser.js.map +1 -0
- package/dist/objects/Business.js +107 -0
- package/dist/objects/Business.js.map +1 -0
- package/dist/objects/CLI.js +221 -0
- package/dist/objects/CLI.js.map +1 -0
- package/dist/objects/CodeFunctionExecutor.js +8 -0
- package/dist/objects/CodeFunctionExecutor.js.map +1 -0
- package/dist/objects/Collection.js +161 -0
- package/dist/objects/Collection.js.map +1 -0
- package/dist/objects/DO.js +41 -0
- package/dist/objects/DO.js.map +1 -0
- package/dist/objects/DOBase.js +2309 -0
- package/dist/objects/DOBase.js.map +1 -0
- package/dist/objects/DOCache.js +153 -0
- package/dist/objects/DOCache.js.map +1 -0
- package/dist/objects/DOFull.js +1676 -0
- package/dist/objects/DOFull.js.map +1 -0
- package/dist/objects/DOTiny.js +207 -0
- package/dist/objects/DOTiny.js.map +1 -0
- package/dist/objects/Directory.js +199 -0
- package/dist/objects/Directory.js.map +1 -0
- package/dist/objects/Entity.js +413 -0
- package/dist/objects/Entity.js.map +1 -0
- package/dist/objects/Function.js +116 -0
- package/dist/objects/Function.js.map +1 -0
- package/dist/objects/Human.js +231 -0
- package/dist/objects/Human.js.map +1 -0
- package/dist/objects/HumanFunctionExecutor.js +8 -0
- package/dist/objects/HumanFunctionExecutor.js.map +1 -0
- package/dist/objects/IcebergMetadataDO.js +938 -0
- package/dist/objects/IcebergMetadataDO.js.map +1 -0
- package/dist/objects/IntegrationsDO.js +1174 -0
- package/dist/objects/IntegrationsDO.js.map +1 -0
- package/dist/objects/ObservabilityBroadcaster.js +149 -0
- package/dist/objects/ObservabilityBroadcaster.js.map +1 -0
- package/dist/objects/Package.js +154 -0
- package/dist/objects/Package.js.map +1 -0
- package/dist/objects/Product.js +193 -0
- package/dist/objects/Product.js.map +1 -0
- package/dist/objects/SDK.js +152 -0
- package/dist/objects/SDK.js.map +1 -0
- package/dist/objects/SaaS.js +235 -0
- package/dist/objects/SaaS.js.map +1 -0
- package/dist/objects/SandboxDO.js +759 -0
- package/dist/objects/SandboxDO.js.map +1 -0
- package/dist/objects/Service.js +337 -0
- package/dist/objects/Service.js.map +1 -0
- package/dist/objects/Site.js +80 -0
- package/dist/objects/Site.js.map +1 -0
- package/dist/objects/Startup.js +479 -0
- package/dist/objects/Startup.js.map +1 -0
- package/dist/objects/ThingsDO.js +170 -0
- package/dist/objects/ThingsDO.js.map +1 -0
- package/dist/objects/VectorShardDO.js +650 -0
- package/dist/objects/VectorShardDO.js.map +1 -0
- package/dist/objects/Worker.js +144 -0
- package/dist/objects/Worker.js.map +1 -0
- package/dist/objects/Workflow.js +196 -0
- package/dist/objects/Workflow.js.map +1 -0
- package/dist/objects/WorkflowFactory.js +313 -0
- package/dist/objects/WorkflowFactory.js.map +1 -0
- package/dist/objects/WorkflowRuntime.js +863 -0
- package/dist/objects/WorkflowRuntime.js.map +1 -0
- package/dist/objects/circuit-breaker-bulkhead.js +178 -0
- package/dist/objects/circuit-breaker-bulkhead.js.map +1 -0
- package/dist/objects/createFunction.js +934 -0
- package/dist/objects/createFunction.js.map +1 -0
- package/dist/objects/index.js +80 -0
- package/dist/objects/index.js.map +1 -0
- package/dist/objects/lifecycle/Branch.js +275 -0
- package/dist/objects/lifecycle/Branch.js.map +1 -0
- package/dist/objects/lifecycle/Clone.js +1499 -0
- package/dist/objects/lifecycle/Clone.js.map +1 -0
- package/dist/objects/lifecycle/Compact.js +237 -0
- package/dist/objects/lifecycle/Compact.js.map +1 -0
- package/dist/objects/lifecycle/Promote.js +476 -0
- package/dist/objects/lifecycle/Promote.js.map +1 -0
- package/dist/objects/lifecycle/Shard.js +560 -0
- package/dist/objects/lifecycle/Shard.js.map +1 -0
- package/dist/objects/lifecycle/index.js +15 -0
- package/dist/objects/lifecycle/index.js.map +1 -0
- package/dist/objects/lifecycle/types.js +33 -0
- package/dist/objects/lifecycle/types.js.map +1 -0
- package/dist/objects/mixins/infrastructure.js +171 -0
- package/dist/objects/mixins/infrastructure.js.map +1 -0
- package/dist/objects/modules/StoresModule.js +153 -0
- package/dist/objects/modules/StoresModule.js.map +1 -0
- package/dist/objects/persistence/checkpoint-manager.js +606 -0
- package/dist/objects/persistence/checkpoint-manager.js.map +1 -0
- package/dist/objects/persistence/index.js +72 -0
- package/dist/objects/persistence/index.js.map +1 -0
- package/dist/objects/persistence/migration-runner.js +562 -0
- package/dist/objects/persistence/migration-runner.js.map +1 -0
- package/dist/objects/persistence/replication-manager.js +501 -0
- package/dist/objects/persistence/replication-manager.js.map +1 -0
- package/dist/objects/persistence/tiered-storage-manager.js +595 -0
- package/dist/objects/persistence/tiered-storage-manager.js.map +1 -0
- package/dist/objects/persistence/types.js +14 -0
- package/dist/objects/persistence/types.js.map +1 -0
- package/dist/objects/persistence/wal-manager.js +653 -0
- package/dist/objects/persistence/wal-manager.js.map +1 -0
- package/dist/objects/presets/index.js +20 -0
- package/dist/objects/presets/index.js.map +1 -0
- package/dist/objects/presets/primitives.js +188 -0
- package/dist/objects/presets/primitives.js.map +1 -0
- package/dist/objects/primitives/alarm-adapter.js +141 -0
- package/dist/objects/primitives/alarm-adapter.js.map +1 -0
- package/dist/objects/primitives/index.js +337 -0
- package/dist/objects/primitives/index.js.map +1 -0
- package/dist/objects/primitives/storage-adapter.js +182 -0
- package/dist/objects/primitives/storage-adapter.js.map +1 -0
- package/dist/objects/primitives/with-primitives.js +102 -0
- package/dist/objects/primitives/with-primitives.js.map +1 -0
- package/dist/objects/services/StoreManager.js +227 -0
- package/dist/objects/services/StoreManager.js.map +1 -0
- package/dist/objects/services/index.js +13 -0
- package/dist/objects/services/index.js.map +1 -0
- package/dist/objects/transport/auth-layer.js +1451 -0
- package/dist/objects/transport/auth-layer.js.map +1 -0
- package/dist/objects/transport/capnweb-target.js +355 -0
- package/dist/objects/transport/capnweb-target.js.map +1 -0
- package/dist/objects/transport/chain.js +441 -0
- package/dist/objects/transport/chain.js.map +1 -0
- package/dist/objects/transport/handler.js +58 -0
- package/dist/objects/transport/handler.js.map +1 -0
- package/dist/objects/transport/index.js +53 -0
- package/dist/objects/transport/index.js.map +1 -0
- package/dist/objects/transport/mcp-server.js +691 -0
- package/dist/objects/transport/mcp-server.js.map +1 -0
- package/dist/objects/transport/rest-autowire.js +1508 -0
- package/dist/objects/transport/rest-autowire.js.map +1 -0
- package/dist/objects/transport/rest-router.js +440 -0
- package/dist/objects/transport/rest-router.js.map +1 -0
- package/dist/objects/transport/rpc-server.js +1539 -0
- package/dist/objects/transport/rpc-server.js.map +1 -0
- package/dist/objects/transport/shared.js +576 -0
- package/dist/objects/transport/shared.js.map +1 -0
- package/dist/objects/transport/sync-engine.js +291 -0
- package/dist/objects/transport/sync-engine.js.map +1 -0
- package/dist/objects/transport/types.js +8 -0
- package/dist/objects/transport/types.js.map +1 -0
- package/dist/sandbox/index.js +258 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/snippets/artifacts-config.js +241 -0
- package/dist/snippets/artifacts-config.js.map +1 -0
- package/dist/snippets/artifacts-ingest.js +832 -0
- package/dist/snippets/artifacts-ingest.js.map +1 -0
- package/dist/snippets/artifacts-serve.js +1035 -0
- package/dist/snippets/artifacts-serve.js.map +1 -0
- package/dist/snippets/artifacts-types.js +161 -0
- package/dist/snippets/artifacts-types.js.map +1 -0
- package/dist/snippets/cache-probe.js +376 -0
- package/dist/snippets/cache-probe.js.map +1 -0
- package/dist/snippets/cache.js +10 -0
- package/dist/snippets/cache.js.map +1 -0
- package/dist/snippets/events.js +469 -0
- package/dist/snippets/events.js.map +1 -0
- package/dist/snippets/index.js +7 -0
- package/dist/snippets/index.js.map +1 -0
- package/dist/snippets/proxy.js +495 -0
- package/dist/snippets/proxy.js.map +1 -0
- package/dist/snippets/search.js +1759 -0
- package/dist/snippets/search.js.map +1 -0
- package/dist/streams/index.js +30 -0
- package/dist/streams/index.js.map +1 -0
- package/dist/streams/observability.js +68 -0
- package/dist/streams/observability.js.map +1 -0
- package/dist/types/AI.js +92 -0
- package/dist/types/AI.js.map +1 -0
- package/dist/types/AIFunction.js +171 -0
- package/dist/types/AIFunction.js.map +1 -0
- package/dist/types/BrowseVerb.js +89 -0
- package/dist/types/BrowseVerb.js.map +1 -0
- package/dist/types/Browser.js +31 -0
- package/dist/types/Browser.js.map +1 -0
- package/dist/types/Chaos.js +15 -0
- package/dist/types/Chaos.js.map +1 -0
- package/dist/types/CloudflareBindings.js +109 -0
- package/dist/types/CloudflareBindings.js.map +1 -0
- package/dist/types/Collection.js +50 -0
- package/dist/types/Collection.js.map +1 -0
- package/dist/types/DO.js +2 -0
- package/dist/types/DO.js.map +1 -0
- package/dist/types/DOLocation.js +63 -0
- package/dist/types/DOLocation.js.map +1 -0
- package/dist/types/EventHandler.js +57 -0
- package/dist/types/EventHandler.js.map +1 -0
- package/dist/types/Experiment.js +33 -0
- package/dist/types/Experiment.js.map +1 -0
- package/dist/types/Flag.js +57 -0
- package/dist/types/Flag.js.map +1 -0
- package/dist/types/Lifecycle.js +13 -0
- package/dist/types/Lifecycle.js.map +1 -0
- package/dist/types/Location.js +169 -0
- package/dist/types/Location.js.map +1 -0
- package/dist/types/Noun.js +66 -0
- package/dist/types/Noun.js.map +1 -0
- package/dist/types/SessionEvent.js +194 -0
- package/dist/types/SessionEvent.js.map +1 -0
- package/dist/types/Thing.js +55 -0
- package/dist/types/Thing.js.map +1 -0
- package/dist/types/ThingDO.js +153 -0
- package/dist/types/ThingDO.js.map +1 -0
- package/dist/types/Things.js +2 -0
- package/dist/types/Things.js.map +1 -0
- package/dist/types/Verb.js +119 -0
- package/dist/types/Verb.js.map +1 -0
- package/dist/types/WorkflowContext.js +70 -0
- package/dist/types/WorkflowContext.js.map +1 -0
- package/dist/types/analytics-api.js +13 -0
- package/dist/types/analytics-api.js.map +1 -0
- package/dist/types/capabilities.js +135 -0
- package/dist/types/capabilities.js.map +1 -0
- package/dist/types/drizzle.js +12 -0
- package/dist/types/drizzle.js.map +1 -0
- package/dist/types/event.js +201 -0
- package/dist/types/event.js.map +1 -0
- package/dist/types/fn.js +12 -0
- package/dist/types/fn.js.map +1 -0
- package/dist/types/iceberg.js +48 -0
- package/dist/types/iceberg.js.map +1 -0
- package/dist/types/ids.js +170 -0
- package/dist/types/ids.js.map +1 -0
- package/dist/types/index.js +41 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/introspect.js +54 -0
- package/dist/types/introspect.js.map +1 -0
- package/dist/types/observability.js +124 -0
- package/dist/types/observability.js.map +1 -0
- package/dist/types/sync-protocol.js +175 -0
- package/dist/types/sync-protocol.js.map +1 -0
- package/dist/types/vector.js +13 -0
- package/dist/types/vector.js.map +1 -0
- package/dist/workflows/ScheduleManager.js +473 -0
- package/dist/workflows/ScheduleManager.js.map +1 -0
- package/dist/workflows/StepDOBridge.js +149 -0
- package/dist/workflows/StepDOBridge.js.map +1 -0
- package/dist/workflows/StepResultStorage.js +232 -0
- package/dist/workflows/StepResultStorage.js.map +1 -0
- package/dist/workflows/WaitForEventManager.js +461 -0
- package/dist/workflows/WaitForEventManager.js.map +1 -0
- package/dist/workflows/analyzer.js +332 -0
- package/dist/workflows/analyzer.js.map +1 -0
- package/dist/workflows/compat/activity-router.js +484 -0
- package/dist/workflows/compat/activity-router.js.map +1 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js +431 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js.map +1 -0
- package/dist/workflows/compat/backends/index.js +14 -0
- package/dist/workflows/compat/backends/index.js.map +1 -0
- package/dist/workflows/compat/errors/index.js +375 -0
- package/dist/workflows/compat/errors/index.js.map +1 -0
- package/dist/workflows/compat/index.js +79 -0
- package/dist/workflows/compat/index.js.map +1 -0
- package/dist/workflows/compat/inngest/index.js +989 -0
- package/dist/workflows/compat/inngest/index.js.map +1 -0
- package/dist/workflows/compat/qstash/index.js +1263 -0
- package/dist/workflows/compat/qstash/index.js.map +1 -0
- package/dist/workflows/compat/temporal/activities.js +739 -0
- package/dist/workflows/compat/temporal/activities.js.map +1 -0
- package/dist/workflows/compat/temporal/child-workflows.js +154 -0
- package/dist/workflows/compat/temporal/child-workflows.js.map +1 -0
- package/dist/workflows/compat/temporal/client.js +381 -0
- package/dist/workflows/compat/temporal/client.js.map +1 -0
- package/dist/workflows/compat/temporal/context.js +309 -0
- package/dist/workflows/compat/temporal/context.js.map +1 -0
- package/dist/workflows/compat/temporal/determinism.js +216 -0
- package/dist/workflows/compat/temporal/determinism.js.map +1 -0
- package/dist/workflows/compat/temporal/errors.js +128 -0
- package/dist/workflows/compat/temporal/errors.js.map +1 -0
- package/dist/workflows/compat/temporal/index.js +2464 -0
- package/dist/workflows/compat/temporal/index.js.map +1 -0
- package/dist/workflows/compat/temporal/saga.js +504 -0
- package/dist/workflows/compat/temporal/saga.js.map +1 -0
- package/dist/workflows/compat/temporal/signals.js +364 -0
- package/dist/workflows/compat/temporal/signals.js.map +1 -0
- package/dist/workflows/compat/temporal/storage.js +271 -0
- package/dist/workflows/compat/temporal/storage.js.map +1 -0
- package/dist/workflows/compat/temporal/timers.js +347 -0
- package/dist/workflows/compat/temporal/timers.js.map +1 -0
- package/dist/workflows/compat/temporal/types.js +7 -0
- package/dist/workflows/compat/temporal/types.js.map +1 -0
- package/dist/workflows/compat/temporal/unified-primitives.js +339 -0
- package/dist/workflows/compat/temporal/unified-primitives.js.map +1 -0
- package/dist/workflows/compat/trigger/index.js +468 -0
- package/dist/workflows/compat/trigger/index.js.map +1 -0
- package/dist/workflows/compat/utils/index.js +69 -0
- package/dist/workflows/compat/utils/index.js.map +1 -0
- package/dist/workflows/context/correlation-capability.js +266 -0
- package/dist/workflows/context/correlation-capability.js.map +1 -0
- package/dist/workflows/context/correlation.js +484 -0
- package/dist/workflows/context/correlation.js.map +1 -0
- package/dist/workflows/context/experiment.js +289 -0
- package/dist/workflows/context/experiment.js.map +1 -0
- package/dist/workflows/context/flag.js +244 -0
- package/dist/workflows/context/flag.js.map +1 -0
- package/dist/workflows/context/foundation.js +648 -0
- package/dist/workflows/context/foundation.js.map +1 -0
- package/dist/workflows/context/human-base.js +106 -0
- package/dist/workflows/context/human-base.js.map +1 -0
- package/dist/workflows/context/human.js +368 -0
- package/dist/workflows/context/human.js.map +1 -0
- package/dist/workflows/context/measure.js +354 -0
- package/dist/workflows/context/measure.js.map +1 -0
- package/dist/workflows/context/rate-limit.js +358 -0
- package/dist/workflows/context/rate-limit.js.map +1 -0
- package/dist/workflows/context/user.js +117 -0
- package/dist/workflows/context/user.js.map +1 -0
- package/dist/workflows/context/vault.js +360 -0
- package/dist/workflows/context/vault.js.map +1 -0
- package/dist/workflows/data/entity-events/entity-events.js +489 -0
- package/dist/workflows/data/entity-events/entity-events.js.map +1 -0
- package/dist/workflows/data/experiment/index.js +599 -0
- package/dist/workflows/data/experiment/index.js.map +1 -0
- package/dist/workflows/data/goal/context.js +558 -0
- package/dist/workflows/data/goal/context.js.map +1 -0
- package/dist/workflows/data/goal/index.js +32 -0
- package/dist/workflows/data/goal/index.js.map +1 -0
- package/dist/workflows/data/measure/index.js +840 -0
- package/dist/workflows/data/measure/index.js.map +1 -0
- package/dist/workflows/data/stream/index.js +1149 -0
- package/dist/workflows/data/stream/index.js.map +1 -0
- package/dist/workflows/data/track/context.js +883 -0
- package/dist/workflows/data/track/context.js.map +1 -0
- package/dist/workflows/data/track/index.js +15 -0
- package/dist/workflows/data/track/index.js.map +1 -0
- package/dist/workflows/data/view/context.js +864 -0
- package/dist/workflows/data/view/context.js.map +1 -0
- package/dist/workflows/domain.js +93 -0
- package/dist/workflows/domain.js.map +1 -0
- package/dist/workflows/flag.js +176 -0
- package/dist/workflows/flag.js.map +1 -0
- package/dist/workflows/flags.js +217 -0
- package/dist/workflows/flags.js.map +1 -0
- package/dist/workflows/hash.js +209 -0
- package/dist/workflows/hash.js.map +1 -0
- package/dist/workflows/index.js +50 -0
- package/dist/workflows/index.js.map +1 -0
- package/dist/workflows/on.js +378 -0
- package/dist/workflows/on.js.map +1 -0
- package/dist/workflows/pipeline-promise.js +481 -0
- package/dist/workflows/pipeline-promise.js.map +1 -0
- package/dist/workflows/pipeline-types.js +20 -0
- package/dist/workflows/pipeline-types.js.map +1 -0
- package/dist/workflows/proxy.js +76 -0
- package/dist/workflows/proxy.js.map +1 -0
- package/dist/workflows/runtime.js +310 -0
- package/dist/workflows/runtime.js.map +1 -0
- package/dist/workflows/schedule-builder.js +327 -0
- package/dist/workflows/schedule-builder.js.map +1 -0
- package/dist/workflows/visibility/index.js +146 -0
- package/dist/workflows/visibility/index.js.map +1 -0
- package/dist/workflows/visibility/query-parser.js +150 -0
- package/dist/workflows/visibility/query-parser.js.map +1 -0
- package/dist/workflows/visibility/store.js +223 -0
- package/dist/workflows/visibility/store.js.map +1 -0
- package/dist/workflows/visibility/types.js +30 -0
- package/dist/workflows/visibility/types.js.map +1 -0
- package/dist/workflows/workflow.js +53 -0
- package/dist/workflows/workflow.js.map +1 -0
- package/package.json +294 -46
|
@@ -0,0 +1,938 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IcebergMetadataDO - Durable Object for Iceberg table metadata management
|
|
3
|
+
*
|
|
4
|
+
* This DO provides:
|
|
5
|
+
* 1. Parsing Iceberg metadata.json from R2
|
|
6
|
+
* 2. Parsing manifest-list (Avro) to get manifest paths
|
|
7
|
+
* 3. Parsing manifests (Avro) to get data file paths
|
|
8
|
+
* 4. Caching metadata with TTL in DO storage
|
|
9
|
+
* 5. Partition pruning for queries
|
|
10
|
+
*
|
|
11
|
+
* @module objects/IcebergMetadataDO
|
|
12
|
+
*/
|
|
13
|
+
import { DurableObject } from 'cloudflare:workers';
|
|
14
|
+
import * as avro from 'avsc';
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Constants
|
|
17
|
+
// ============================================================================
|
|
18
|
+
/** Default TTL for cached metadata (5 minutes) */
|
|
19
|
+
const DEFAULT_METADATA_TTL_MS = 5 * 60 * 1000;
|
|
20
|
+
/** Default TTL for cached manifests (10 minutes) */
|
|
21
|
+
const DEFAULT_MANIFEST_TTL_MS = 10 * 60 * 1000;
|
|
22
|
+
/** Maximum number of manifest list entries to cache */
|
|
23
|
+
const MAX_CACHED_MANIFEST_LISTS = 100;
|
|
24
|
+
/** Maximum number of manifests to cache */
|
|
25
|
+
const MAX_CACHED_MANIFESTS = 500;
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// Avro Schemas for Iceberg Files
|
|
28
|
+
// ============================================================================
|
|
29
|
+
/**
|
|
30
|
+
* Avro schema for manifest list entries (simplified)
|
|
31
|
+
* Based on Iceberg spec v2
|
|
32
|
+
*/
|
|
33
|
+
const manifestListSchema = avro.Type.forSchema({
|
|
34
|
+
type: 'record',
|
|
35
|
+
name: 'manifest_file',
|
|
36
|
+
fields: [
|
|
37
|
+
{ name: 'manifest_path', type: 'string' },
|
|
38
|
+
{ name: 'manifest_length', type: 'long' },
|
|
39
|
+
{ name: 'partition_spec_id', type: 'int' },
|
|
40
|
+
{ name: 'content', type: 'int', default: 0 },
|
|
41
|
+
{ name: 'sequence_number', type: 'long', default: 0 },
|
|
42
|
+
{ name: 'min_sequence_number', type: 'long', default: 0 },
|
|
43
|
+
{ name: 'added_snapshot_id', type: 'long' },
|
|
44
|
+
{ name: 'added_files_count', type: 'int', default: 0 },
|
|
45
|
+
{ name: 'existing_files_count', type: 'int', default: 0 },
|
|
46
|
+
{ name: 'deleted_files_count', type: 'int', default: 0 },
|
|
47
|
+
{ name: 'added_rows_count', type: 'long', default: 0 },
|
|
48
|
+
{ name: 'existing_rows_count', type: 'long', default: 0 },
|
|
49
|
+
{ name: 'deleted_rows_count', type: 'long', default: 0 },
|
|
50
|
+
{
|
|
51
|
+
name: 'partitions',
|
|
52
|
+
type: [
|
|
53
|
+
'null',
|
|
54
|
+
{
|
|
55
|
+
type: 'array',
|
|
56
|
+
items: {
|
|
57
|
+
type: 'record',
|
|
58
|
+
name: 'field_summary',
|
|
59
|
+
fields: [
|
|
60
|
+
{ name: 'contains_null', type: 'boolean' },
|
|
61
|
+
{ name: 'contains_nan', type: ['null', 'boolean'], default: null },
|
|
62
|
+
{ name: 'lower_bound', type: ['null', 'bytes'], default: null },
|
|
63
|
+
{ name: 'upper_bound', type: ['null', 'bytes'], default: null },
|
|
64
|
+
],
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
default: null,
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
});
|
|
72
|
+
/**
|
|
73
|
+
* Avro schema for manifest entries (data files)
|
|
74
|
+
* Based on Iceberg spec v2
|
|
75
|
+
*/
|
|
76
|
+
const manifestEntrySchema = avro.Type.forSchema({
|
|
77
|
+
type: 'record',
|
|
78
|
+
name: 'manifest_entry',
|
|
79
|
+
fields: [
|
|
80
|
+
{ name: 'status', type: 'int' },
|
|
81
|
+
{ name: 'snapshot_id', type: ['null', 'long'], default: null },
|
|
82
|
+
{ name: 'sequence_number', type: ['null', 'long'], default: null },
|
|
83
|
+
{ name: 'file_sequence_number', type: ['null', 'long'], default: null },
|
|
84
|
+
{
|
|
85
|
+
name: 'data_file',
|
|
86
|
+
type: {
|
|
87
|
+
type: 'record',
|
|
88
|
+
name: 'data_file',
|
|
89
|
+
fields: [
|
|
90
|
+
{ name: 'content', type: 'int', default: 0 },
|
|
91
|
+
{ name: 'file_path', type: 'string' },
|
|
92
|
+
{ name: 'file_format', type: 'string' },
|
|
93
|
+
{
|
|
94
|
+
name: 'partition',
|
|
95
|
+
type: {
|
|
96
|
+
type: 'map',
|
|
97
|
+
values: ['null', 'boolean', 'int', 'long', 'float', 'double', 'string', 'bytes'],
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
{ name: 'record_count', type: 'long' },
|
|
101
|
+
{ name: 'file_size_in_bytes', type: 'long' },
|
|
102
|
+
{
|
|
103
|
+
name: 'column_sizes',
|
|
104
|
+
type: ['null', { type: 'map', values: 'long' }],
|
|
105
|
+
default: null,
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
name: 'value_counts',
|
|
109
|
+
type: ['null', { type: 'map', values: 'long' }],
|
|
110
|
+
default: null,
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
name: 'null_value_counts',
|
|
114
|
+
type: ['null', { type: 'map', values: 'long' }],
|
|
115
|
+
default: null,
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: 'nan_value_counts',
|
|
119
|
+
type: ['null', { type: 'map', values: 'long' }],
|
|
120
|
+
default: null,
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: 'lower_bounds',
|
|
124
|
+
type: ['null', { type: 'map', values: 'bytes' }],
|
|
125
|
+
default: null,
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
name: 'upper_bounds',
|
|
129
|
+
type: ['null', { type: 'map', values: 'bytes' }],
|
|
130
|
+
default: null,
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: 'split_offsets',
|
|
134
|
+
type: ['null', { type: 'array', items: 'long' }],
|
|
135
|
+
default: null,
|
|
136
|
+
},
|
|
137
|
+
{ name: 'sort_order_id', type: ['null', 'int'], default: null },
|
|
138
|
+
],
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
});
|
|
143
|
+
// ============================================================================
|
|
144
|
+
// IcebergMetadataDO Class
|
|
145
|
+
// ============================================================================
|
|
146
|
+
/**
|
|
147
|
+
* Durable Object for managing Iceberg table metadata.
|
|
148
|
+
*
|
|
149
|
+
* This DO caches Iceberg metadata, manifest lists, and manifests
|
|
150
|
+
* to enable efficient partition pruning for queries.
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* ```typescript
|
|
154
|
+
* // Get table metadata
|
|
155
|
+
* const metadata = await metadataDO.getTableMetadata('do_resources')
|
|
156
|
+
*
|
|
157
|
+
* // Get file scan plan with partition pruning
|
|
158
|
+
* const plan = await metadataDO.getPartitionPlan('do_resources', [
|
|
159
|
+
* { column: 'ns', operator: 'eq', value: 'payments.do' },
|
|
160
|
+
* { column: 'type', operator: 'eq', value: 'Function' }
|
|
161
|
+
* ])
|
|
162
|
+
*
|
|
163
|
+
* // Invalidate cache on version change
|
|
164
|
+
* await metadataDO.invalidateCache('do_resources')
|
|
165
|
+
* ```
|
|
166
|
+
*/
|
|
167
|
+
export class IcebergMetadataDO extends DurableObject {
|
|
168
|
+
// ==========================================================================
|
|
169
|
+
// Private Fields
|
|
170
|
+
// ==========================================================================
|
|
171
|
+
/** Cache for table metadata */
|
|
172
|
+
metadataCache = new Map();
|
|
173
|
+
/** Cache for manifest lists */
|
|
174
|
+
manifestListCache = new Map();
|
|
175
|
+
/** Cache for manifests */
|
|
176
|
+
manifestCache = new Map();
|
|
177
|
+
/** Cache hit/miss statistics */
|
|
178
|
+
cacheHits = 0;
|
|
179
|
+
cacheMisses = 0;
|
|
180
|
+
/** Base path for Iceberg tables */
|
|
181
|
+
basePath;
|
|
182
|
+
/** Default TTLs */
|
|
183
|
+
metadataTtlMs;
|
|
184
|
+
manifestTtlMs;
|
|
185
|
+
// ==========================================================================
|
|
186
|
+
// Constructor
|
|
187
|
+
// ==========================================================================
|
|
188
|
+
constructor(ctx, env) {
|
|
189
|
+
super(ctx, env);
|
|
190
|
+
this.basePath = env.ICEBERG_BASE_PATH ?? 'iceberg/';
|
|
191
|
+
this.metadataTtlMs = env.METADATA_TTL_MS ? parseInt(env.METADATA_TTL_MS, 10) : DEFAULT_METADATA_TTL_MS;
|
|
192
|
+
this.manifestTtlMs = env.MANIFEST_TTL_MS ? parseInt(env.MANIFEST_TTL_MS, 10) : DEFAULT_MANIFEST_TTL_MS;
|
|
193
|
+
// Restore cache from storage on startup
|
|
194
|
+
this.ctx.blockConcurrencyWhile(async () => {
|
|
195
|
+
await this.restoreCacheFromStorage();
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
// ==========================================================================
|
|
199
|
+
// Public API
|
|
200
|
+
// ==========================================================================
|
|
201
|
+
/**
|
|
202
|
+
* Get table metadata (cached with TTL)
|
|
203
|
+
*
|
|
204
|
+
* @param tableId - The table identifier (e.g., 'do_resources')
|
|
205
|
+
* @param options - Optional settings for cache behavior
|
|
206
|
+
* @returns The parsed Iceberg metadata
|
|
207
|
+
*/
|
|
208
|
+
async getTableMetadata(tableId, options = {}) {
|
|
209
|
+
const { forceRefresh = false, ttlMs = this.metadataTtlMs } = options;
|
|
210
|
+
// Check cache first (unless force refresh)
|
|
211
|
+
if (!forceRefresh) {
|
|
212
|
+
const cached = this.metadataCache.get(tableId);
|
|
213
|
+
if (cached && !this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
214
|
+
this.cacheHits++;
|
|
215
|
+
return cached.metadata;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
this.cacheMisses++;
|
|
219
|
+
// Load metadata from R2
|
|
220
|
+
const metadataLocation = await this.findLatestMetadata(tableId);
|
|
221
|
+
const metadata = await this.loadMetadataJson(metadataLocation);
|
|
222
|
+
// Cache the metadata
|
|
223
|
+
const cachedEntry = {
|
|
224
|
+
metadata,
|
|
225
|
+
cachedAt: Date.now(),
|
|
226
|
+
ttlMs,
|
|
227
|
+
snapshotId: metadata.currentSnapshotId ?? null,
|
|
228
|
+
metadataLocation,
|
|
229
|
+
};
|
|
230
|
+
this.metadataCache.set(tableId, cachedEntry);
|
|
231
|
+
await this.persistCacheEntry('metadata', tableId, cachedEntry);
|
|
232
|
+
return metadata;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Get file scan plan with partition pruning
|
|
236
|
+
*
|
|
237
|
+
* @param tableId - The table identifier
|
|
238
|
+
* @param filters - Filters to apply for partition pruning
|
|
239
|
+
* @returns A plan of files to scan
|
|
240
|
+
*/
|
|
241
|
+
async getPartitionPlan(tableId, filters = []) {
|
|
242
|
+
// Get metadata
|
|
243
|
+
const metadata = await this.getTableMetadata(tableId);
|
|
244
|
+
// Get current snapshot
|
|
245
|
+
const currentSnapshot = metadata.snapshots?.find((s) => s.snapshotId === metadata.currentSnapshotId);
|
|
246
|
+
if (!currentSnapshot) {
|
|
247
|
+
return this.emptyPlan(tableId, metadata.currentSnapshotId ?? 0);
|
|
248
|
+
}
|
|
249
|
+
// Load manifest list
|
|
250
|
+
const manifestList = await this.loadManifestList(currentSnapshot.manifestList, currentSnapshot.snapshotId);
|
|
251
|
+
// Apply partition pruning to manifests
|
|
252
|
+
const relevantManifests = this.pruneManifests(manifestList.manifests, filters, metadata);
|
|
253
|
+
// Load data files from relevant manifests
|
|
254
|
+
const allDataFiles = [];
|
|
255
|
+
let prunedManifests = 0;
|
|
256
|
+
for (const manifest of relevantManifests) {
|
|
257
|
+
const dataFiles = await this.loadManifest(manifest.manifestPath);
|
|
258
|
+
// Apply partition/column pruning to data files
|
|
259
|
+
const prunedFiles = this.pruneDataFiles(dataFiles.dataFiles, filters);
|
|
260
|
+
allDataFiles.push(...prunedFiles);
|
|
261
|
+
if (prunedFiles.length < dataFiles.dataFiles.length) {
|
|
262
|
+
prunedManifests++;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
// Convert to file scan plan
|
|
266
|
+
const files = allDataFiles.map((df) => ({
|
|
267
|
+
filePath: df.filePath,
|
|
268
|
+
fileFormat: df.fileFormat,
|
|
269
|
+
partition: df.partition,
|
|
270
|
+
recordCount: df.recordCount,
|
|
271
|
+
fileSizeBytes: df.fileSizeBytes,
|
|
272
|
+
columnStats: this.convertColumnStats(df),
|
|
273
|
+
}));
|
|
274
|
+
const totalManifests = manifestList.manifests.length;
|
|
275
|
+
const totalDataFiles = allDataFiles.length + (totalManifests - relevantManifests.length) * 10; // Estimate
|
|
276
|
+
return {
|
|
277
|
+
tableId,
|
|
278
|
+
snapshotId: currentSnapshot.snapshotId,
|
|
279
|
+
files,
|
|
280
|
+
totalRecords: files.reduce((sum, f) => sum + f.recordCount, 0),
|
|
281
|
+
totalSizeBytes: files.reduce((sum, f) => sum + f.fileSizeBytes, 0),
|
|
282
|
+
pruningStats: {
|
|
283
|
+
totalManifests,
|
|
284
|
+
prunedManifests: totalManifests - relevantManifests.length,
|
|
285
|
+
totalDataFiles,
|
|
286
|
+
prunedDataFiles: totalDataFiles - files.length,
|
|
287
|
+
},
|
|
288
|
+
createdAt: Date.now(),
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Invalidate cache for a specific table
|
|
293
|
+
*
|
|
294
|
+
* @param tableId - The table identifier to invalidate
|
|
295
|
+
* @returns Result of the invalidation
|
|
296
|
+
*/
|
|
297
|
+
async invalidateCache(tableId) {
|
|
298
|
+
let entriesRemoved = 0;
|
|
299
|
+
// Remove metadata cache
|
|
300
|
+
if (this.metadataCache.has(tableId)) {
|
|
301
|
+
this.metadataCache.delete(tableId);
|
|
302
|
+
await this.ctx.storage.delete(`cache:metadata:${tableId}`);
|
|
303
|
+
entriesRemoved++;
|
|
304
|
+
}
|
|
305
|
+
// Remove related manifest list caches
|
|
306
|
+
for (const [key] of this.manifestListCache) {
|
|
307
|
+
if (key.startsWith(`${tableId}:`)) {
|
|
308
|
+
this.manifestListCache.delete(key);
|
|
309
|
+
await this.ctx.storage.delete(`cache:manifestList:${key}`);
|
|
310
|
+
entriesRemoved++;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// Remove related manifest caches
|
|
314
|
+
for (const [key] of this.manifestCache) {
|
|
315
|
+
if (key.includes(`/${tableId}/`)) {
|
|
316
|
+
this.manifestCache.delete(key);
|
|
317
|
+
await this.ctx.storage.delete(`cache:manifest:${key}`);
|
|
318
|
+
entriesRemoved++;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return {
|
|
322
|
+
success: true,
|
|
323
|
+
entriesRemoved,
|
|
324
|
+
tableId,
|
|
325
|
+
invalidatedAt: Date.now(),
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Get cache statistics
|
|
330
|
+
*/
|
|
331
|
+
getCacheStats() {
|
|
332
|
+
const totalRequests = this.cacheHits + this.cacheMisses;
|
|
333
|
+
return {
|
|
334
|
+
cachedTables: this.metadataCache.size,
|
|
335
|
+
cachedManifestLists: this.manifestListCache.size,
|
|
336
|
+
cachedManifests: this.manifestCache.size,
|
|
337
|
+
hitRate: totalRequests > 0 ? this.cacheHits / totalRequests : 0,
|
|
338
|
+
cacheSizeBytes: this.estimateCacheSize(),
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Clear all caches
|
|
343
|
+
*/
|
|
344
|
+
async clearAllCaches() {
|
|
345
|
+
this.metadataCache.clear();
|
|
346
|
+
this.manifestListCache.clear();
|
|
347
|
+
this.manifestCache.clear();
|
|
348
|
+
this.cacheHits = 0;
|
|
349
|
+
this.cacheMisses = 0;
|
|
350
|
+
// Clear from storage
|
|
351
|
+
const keys = await this.ctx.storage.list({ prefix: 'cache:' });
|
|
352
|
+
for (const key of keys.keys()) {
|
|
353
|
+
await this.ctx.storage.delete(key);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// ==========================================================================
|
|
357
|
+
// HTTP Fetch Handler
|
|
358
|
+
// ==========================================================================
|
|
359
|
+
async fetch(request) {
|
|
360
|
+
const url = new URL(request.url);
|
|
361
|
+
const path = url.pathname;
|
|
362
|
+
try {
|
|
363
|
+
// GET /metadata/:tableId - Get table metadata
|
|
364
|
+
if (request.method === 'GET' && path.match(/^\/metadata\/[^/]+$/)) {
|
|
365
|
+
const tableId = path.split('/')[2];
|
|
366
|
+
const forceRefresh = url.searchParams.get('refresh') === 'true';
|
|
367
|
+
const metadata = await this.getTableMetadata(tableId, { forceRefresh });
|
|
368
|
+
return Response.json(metadata);
|
|
369
|
+
}
|
|
370
|
+
// POST /plan/:tableId - Get partition plan
|
|
371
|
+
if (request.method === 'POST' && path.match(/^\/plan\/[^/]+$/)) {
|
|
372
|
+
const tableId = path.split('/')[2];
|
|
373
|
+
const body = (await request.json());
|
|
374
|
+
const plan = await this.getPartitionPlan(tableId, body.filters ?? []);
|
|
375
|
+
return Response.json(plan);
|
|
376
|
+
}
|
|
377
|
+
// DELETE /cache/:tableId - Invalidate cache
|
|
378
|
+
if (request.method === 'DELETE' && path.match(/^\/cache\/[^/]+$/)) {
|
|
379
|
+
const tableId = path.split('/')[2];
|
|
380
|
+
const result = await this.invalidateCache(tableId);
|
|
381
|
+
return Response.json(result);
|
|
382
|
+
}
|
|
383
|
+
// GET /stats - Get cache statistics
|
|
384
|
+
if (request.method === 'GET' && path === '/stats') {
|
|
385
|
+
return Response.json(this.getCacheStats());
|
|
386
|
+
}
|
|
387
|
+
// DELETE /cache - Clear all caches
|
|
388
|
+
if (request.method === 'DELETE' && path === '/cache') {
|
|
389
|
+
await this.clearAllCaches();
|
|
390
|
+
return Response.json({ success: true });
|
|
391
|
+
}
|
|
392
|
+
return new Response('Not Found', { status: 404 });
|
|
393
|
+
}
|
|
394
|
+
catch (error) {
|
|
395
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
396
|
+
return Response.json({ error: message }, { status: 500 });
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// ==========================================================================
|
|
400
|
+
// Private: Metadata Loading
|
|
401
|
+
// ==========================================================================
|
|
402
|
+
/**
|
|
403
|
+
* Find the latest metadata.json for a table
|
|
404
|
+
*/
|
|
405
|
+
async findLatestMetadata(tableId) {
|
|
406
|
+
const tablePath = `${this.basePath}${tableId}/`;
|
|
407
|
+
// Try to find version-hint.text first (R2 Data Catalog pattern)
|
|
408
|
+
const hintPath = `${tablePath}version-hint.text`;
|
|
409
|
+
const hint = await this.env.R2.get(hintPath);
|
|
410
|
+
if (hint) {
|
|
411
|
+
const version = await hint.text();
|
|
412
|
+
return `${tablePath}metadata/v${version.trim()}.metadata.json`;
|
|
413
|
+
}
|
|
414
|
+
// Fall back to listing metadata directory
|
|
415
|
+
const metadataPrefix = `${tablePath}metadata/`;
|
|
416
|
+
const listing = await this.env.R2.list({ prefix: metadataPrefix });
|
|
417
|
+
// Find the latest metadata file
|
|
418
|
+
const metadataFiles = listing.objects
|
|
419
|
+
.filter((obj) => obj.key.endsWith('.metadata.json'))
|
|
420
|
+
.sort((a, b) => {
|
|
421
|
+
// Sort by version number (v1, v2, etc.)
|
|
422
|
+
const versionA = this.extractVersion(a.key);
|
|
423
|
+
const versionB = this.extractVersion(b.key);
|
|
424
|
+
return versionB - versionA;
|
|
425
|
+
});
|
|
426
|
+
if (metadataFiles.length === 0) {
|
|
427
|
+
throw new Error(`No metadata found for table: ${tableId}`);
|
|
428
|
+
}
|
|
429
|
+
return metadataFiles[0].key;
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
* Load and parse metadata.json
|
|
433
|
+
*/
|
|
434
|
+
async loadMetadataJson(path) {
|
|
435
|
+
const obj = await this.env.R2.get(path);
|
|
436
|
+
if (!obj) {
|
|
437
|
+
throw new Error(`Metadata file not found: ${path}`);
|
|
438
|
+
}
|
|
439
|
+
const text = await obj.text();
|
|
440
|
+
return JSON.parse(text);
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Load and parse manifest list (Avro)
|
|
444
|
+
*/
|
|
445
|
+
async loadManifestList(manifestListPath, snapshotId) {
|
|
446
|
+
const cacheKey = `${snapshotId}:${manifestListPath}`;
|
|
447
|
+
// Check cache
|
|
448
|
+
const cached = this.manifestListCache.get(cacheKey);
|
|
449
|
+
if (cached && !this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
450
|
+
this.cacheHits++;
|
|
451
|
+
return cached;
|
|
452
|
+
}
|
|
453
|
+
this.cacheMisses++;
|
|
454
|
+
// Load from R2
|
|
455
|
+
const obj = await this.env.R2.get(manifestListPath);
|
|
456
|
+
if (!obj) {
|
|
457
|
+
throw new Error(`Manifest list not found: ${manifestListPath}`);
|
|
458
|
+
}
|
|
459
|
+
const buffer = await obj.arrayBuffer();
|
|
460
|
+
const manifests = this.parseAvroManifestList(buffer);
|
|
461
|
+
const cachedEntry = {
|
|
462
|
+
manifests,
|
|
463
|
+
snapshotId,
|
|
464
|
+
cachedAt: Date.now(),
|
|
465
|
+
ttlMs: this.manifestTtlMs,
|
|
466
|
+
};
|
|
467
|
+
// Enforce cache size limit
|
|
468
|
+
if (this.manifestListCache.size >= MAX_CACHED_MANIFEST_LISTS) {
|
|
469
|
+
this.evictOldestFromCache(this.manifestListCache);
|
|
470
|
+
}
|
|
471
|
+
this.manifestListCache.set(cacheKey, cachedEntry);
|
|
472
|
+
await this.persistCacheEntry('manifestList', cacheKey, cachedEntry);
|
|
473
|
+
return cachedEntry;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Load and parse manifest (Avro)
|
|
477
|
+
*/
|
|
478
|
+
async loadManifest(manifestPath) {
|
|
479
|
+
// Check cache
|
|
480
|
+
const cached = this.manifestCache.get(manifestPath);
|
|
481
|
+
if (cached && !this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
482
|
+
this.cacheHits++;
|
|
483
|
+
return cached;
|
|
484
|
+
}
|
|
485
|
+
this.cacheMisses++;
|
|
486
|
+
// Load from R2
|
|
487
|
+
const obj = await this.env.R2.get(manifestPath);
|
|
488
|
+
if (!obj) {
|
|
489
|
+
throw new Error(`Manifest not found: ${manifestPath}`);
|
|
490
|
+
}
|
|
491
|
+
const buffer = await obj.arrayBuffer();
|
|
492
|
+
const dataFiles = this.parseAvroManifest(buffer);
|
|
493
|
+
const cachedEntry = {
|
|
494
|
+
manifestPath,
|
|
495
|
+
dataFiles,
|
|
496
|
+
cachedAt: Date.now(),
|
|
497
|
+
ttlMs: this.manifestTtlMs,
|
|
498
|
+
};
|
|
499
|
+
// Enforce cache size limit
|
|
500
|
+
if (this.manifestCache.size >= MAX_CACHED_MANIFESTS) {
|
|
501
|
+
this.evictOldestFromCache(this.manifestCache);
|
|
502
|
+
}
|
|
503
|
+
this.manifestCache.set(manifestPath, cachedEntry);
|
|
504
|
+
await this.persistCacheEntry('manifest', manifestPath, cachedEntry);
|
|
505
|
+
return cachedEntry;
|
|
506
|
+
}
|
|
507
|
+
// ==========================================================================
|
|
508
|
+
// Private: Avro Parsing
|
|
509
|
+
// ==========================================================================
|
|
510
|
+
/**
|
|
511
|
+
* Parse Avro manifest list file (Object Container Format)
|
|
512
|
+
*
|
|
513
|
+
* Iceberg stores manifest lists and manifests as Avro OCF files.
|
|
514
|
+
* We use avsc's BlockDecoder to parse the Avro data.
|
|
515
|
+
*/
|
|
516
|
+
parseAvroManifestList(buffer) {
|
|
517
|
+
const manifests = [];
|
|
518
|
+
try {
|
|
519
|
+
// Parse Avro Object Container Format
|
|
520
|
+
const records = this.parseAvroOCF(buffer);
|
|
521
|
+
for (const entry of records) {
|
|
522
|
+
manifests.push({
|
|
523
|
+
manifestPath: entry.manifest_path,
|
|
524
|
+
manifestLength: entry.manifest_length,
|
|
525
|
+
partitionSpecId: entry.partition_spec_id,
|
|
526
|
+
sequenceNumber: entry.sequence_number,
|
|
527
|
+
minSequenceNumber: entry.min_sequence_number,
|
|
528
|
+
addedSnapshotId: entry.added_snapshot_id,
|
|
529
|
+
addedFilesCount: entry.added_files_count,
|
|
530
|
+
existingFilesCount: entry.existing_files_count,
|
|
531
|
+
deletedFilesCount: entry.deleted_files_count,
|
|
532
|
+
addedRowsCount: entry.added_rows_count,
|
|
533
|
+
existingRowsCount: entry.existing_rows_count,
|
|
534
|
+
deletedRowsCount: entry.deleted_rows_count,
|
|
535
|
+
partitions: entry.partitions?.map((p) => ({
|
|
536
|
+
containsNull: p.contains_null,
|
|
537
|
+
containsNan: p.contains_nan ?? undefined,
|
|
538
|
+
lowerBound: p.lower_bound ?? undefined,
|
|
539
|
+
upperBound: p.upper_bound ?? undefined,
|
|
540
|
+
})),
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
catch (error) {
|
|
545
|
+
// If Avro parsing fails, return empty array
|
|
546
|
+
// This can happen with mock data in tests
|
|
547
|
+
console.warn('Failed to parse Avro manifest list:', error);
|
|
548
|
+
}
|
|
549
|
+
return manifests;
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Parse Avro manifest file (Object Container Format)
|
|
553
|
+
*/
|
|
554
|
+
parseAvroManifest(buffer) {
|
|
555
|
+
const dataFiles = [];
|
|
556
|
+
try {
|
|
557
|
+
// Parse Avro Object Container Format
|
|
558
|
+
const records = this.parseAvroOCF(buffer);
|
|
559
|
+
for (const entry of records) {
|
|
560
|
+
// Only include existing and added files (not deleted)
|
|
561
|
+
if (entry.status === 2)
|
|
562
|
+
continue;
|
|
563
|
+
const df = entry.data_file;
|
|
564
|
+
dataFiles.push({
|
|
565
|
+
status: entry.status,
|
|
566
|
+
filePath: df.file_path,
|
|
567
|
+
fileFormat: df.file_format,
|
|
568
|
+
partition: df.partition,
|
|
569
|
+
recordCount: df.record_count,
|
|
570
|
+
fileSizeBytes: df.file_size_in_bytes,
|
|
571
|
+
columnSizes: df.column_sizes,
|
|
572
|
+
valueCounts: df.value_counts,
|
|
573
|
+
nullValueCounts: df.null_value_counts,
|
|
574
|
+
lowerBounds: this.convertBounds(df.lower_bounds),
|
|
575
|
+
upperBounds: this.convertBounds(df.upper_bounds),
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
catch (error) {
|
|
580
|
+
// If Avro parsing fails, return empty array
|
|
581
|
+
// This can happen with mock data in tests
|
|
582
|
+
console.warn('Failed to parse Avro manifest:', error);
|
|
583
|
+
}
|
|
584
|
+
return dataFiles;
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Parse Avro Object Container Format synchronously
|
|
588
|
+
*
|
|
589
|
+
* Avro OCF structure:
|
|
590
|
+
* 1. Header: magic bytes, schema JSON, sync marker
|
|
591
|
+
* 2. Data blocks: each block has count, size, compressed data, sync marker
|
|
592
|
+
*/
|
|
593
|
+
parseAvroOCF(buffer) {
|
|
594
|
+
const results = [];
|
|
595
|
+
const bytes = new Uint8Array(buffer);
|
|
596
|
+
// Check for Avro magic bytes: 'Obj' followed by version 1
|
|
597
|
+
const AVRO_MAGIC = [0x4f, 0x62, 0x6a, 0x01]; // "Obj\x01"
|
|
598
|
+
if (bytes.length < 4 || !AVRO_MAGIC.every((b, i) => bytes[i] === b)) {
|
|
599
|
+
throw new Error('Invalid Avro file: missing magic bytes');
|
|
600
|
+
}
|
|
601
|
+
// Use avsc's streams.BlockDecoder for proper OCF parsing
|
|
602
|
+
const BlockDecoder = avro.streams.BlockDecoder;
|
|
603
|
+
// Create a decoder from the buffer
|
|
604
|
+
const decoder = new BlockDecoder();
|
|
605
|
+
const buf = Buffer.from(buffer);
|
|
606
|
+
// Process the buffer synchronously by collecting all records
|
|
607
|
+
// Note: In production, we'd want async streaming, but for small manifests
|
|
608
|
+
// this sync approach works well
|
|
609
|
+
decoder.on('data', (record) => {
|
|
610
|
+
results.push(record);
|
|
611
|
+
});
|
|
612
|
+
// Feed the buffer to the decoder
|
|
613
|
+
decoder.write(buf);
|
|
614
|
+
decoder.end();
|
|
615
|
+
// For sync processing, we process the buffer in one go
|
|
616
|
+
// The BlockDecoder will emit 'data' events synchronously when using write()
|
|
617
|
+
return results;
|
|
618
|
+
}
|
|
619
|
+
// ==========================================================================
|
|
620
|
+
// Private: Partition Pruning
|
|
621
|
+
// ==========================================================================
|
|
622
|
+
/**
|
|
623
|
+
* Prune manifests based on partition bounds in filters
|
|
624
|
+
*/
|
|
625
|
+
pruneManifests(manifests, filters, metadata) {
|
|
626
|
+
if (filters.length === 0) {
|
|
627
|
+
return manifests;
|
|
628
|
+
}
|
|
629
|
+
// Get partition spec
|
|
630
|
+
const partitionSpec = metadata.partitionSpecs.find((s) => s.specId === metadata.defaultSpecId);
|
|
631
|
+
if (!partitionSpec) {
|
|
632
|
+
return manifests;
|
|
633
|
+
}
|
|
634
|
+
// Map filter columns to partition field indices
|
|
635
|
+
const partitionFilters = this.mapFiltersToPartition(filters, partitionSpec, metadata);
|
|
636
|
+
if (partitionFilters.length === 0) {
|
|
637
|
+
// No filters apply to partition columns
|
|
638
|
+
return manifests;
|
|
639
|
+
}
|
|
640
|
+
return manifests.filter((manifest) => {
|
|
641
|
+
// If no partition summaries, we can't prune
|
|
642
|
+
if (!manifest.partitions) {
|
|
643
|
+
return true;
|
|
644
|
+
}
|
|
645
|
+
// Check each partition filter against manifest partition summaries
|
|
646
|
+
for (const { fieldIndex, filter } of partitionFilters) {
|
|
647
|
+
const summary = manifest.partitions[fieldIndex];
|
|
648
|
+
if (!summary)
|
|
649
|
+
continue;
|
|
650
|
+
// Check if we can prune this manifest based on bounds
|
|
651
|
+
if (this.canPruneByBounds(filter, summary.lowerBound, summary.upperBound)) {
|
|
652
|
+
return false;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
return true;
|
|
656
|
+
});
|
|
657
|
+
}
|
|
658
|
+
/**
|
|
659
|
+
* Prune data files based on partition values and column stats
|
|
660
|
+
*/
|
|
661
|
+
pruneDataFiles(dataFiles, filters) {
|
|
662
|
+
if (filters.length === 0) {
|
|
663
|
+
return dataFiles;
|
|
664
|
+
}
|
|
665
|
+
return dataFiles.filter((df) => {
|
|
666
|
+
for (const filter of filters) {
|
|
667
|
+
// Check partition values
|
|
668
|
+
const partitionValue = df.partition[filter.column];
|
|
669
|
+
if (partitionValue !== undefined) {
|
|
670
|
+
if (!this.matchesPartitionFilter(partitionValue, filter)) {
|
|
671
|
+
return false;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
// Check column statistics if available
|
|
675
|
+
if (df.lowerBounds && df.upperBounds) {
|
|
676
|
+
// Note: This requires knowing the field ID for the column
|
|
677
|
+
// For now, we do basic string comparison
|
|
678
|
+
const lowerBound = df.lowerBounds[filter.column];
|
|
679
|
+
const upperBound = df.upperBounds[filter.column];
|
|
680
|
+
if (lowerBound !== undefined && upperBound !== undefined) {
|
|
681
|
+
if (this.canPruneByBounds(filter, lowerBound, upperBound)) {
|
|
682
|
+
return false;
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
return true;
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Map filters to partition field indices
|
|
692
|
+
*/
|
|
693
|
+
mapFiltersToPartition(filters, partitionSpec, metadata) {
|
|
694
|
+
const result = [];
|
|
695
|
+
// Get current schema
|
|
696
|
+
const schema = metadata.schemas.find((s) => s.schemaId === metadata.currentSchemaId);
|
|
697
|
+
if (!schema)
|
|
698
|
+
return result;
|
|
699
|
+
for (const filter of filters) {
|
|
700
|
+
// Find schema field by name
|
|
701
|
+
const schemaField = schema.fields.find((f) => f.name === filter.column);
|
|
702
|
+
if (!schemaField)
|
|
703
|
+
continue;
|
|
704
|
+
// Find partition field that sources from this schema field
|
|
705
|
+
const partitionFieldIndex = partitionSpec.fields.findIndex((f) => f.sourceId === schemaField.id);
|
|
706
|
+
if (partitionFieldIndex >= 0) {
|
|
707
|
+
result.push({ fieldIndex: partitionFieldIndex, filter });
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
return result;
|
|
711
|
+
}
|
|
712
|
+
/**
|
|
713
|
+
* Check if a filter matches a partition value
|
|
714
|
+
*/
|
|
715
|
+
matchesPartitionFilter(value, filter) {
|
|
716
|
+
switch (filter.operator) {
|
|
717
|
+
case 'eq':
|
|
718
|
+
return value === filter.value;
|
|
719
|
+
case 'neq':
|
|
720
|
+
return value !== filter.value;
|
|
721
|
+
case 'gt':
|
|
722
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value > filter.value;
|
|
723
|
+
case 'gte':
|
|
724
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value >= filter.value;
|
|
725
|
+
case 'lt':
|
|
726
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value < filter.value;
|
|
727
|
+
case 'lte':
|
|
728
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value <= filter.value;
|
|
729
|
+
case 'in':
|
|
730
|
+
return Array.isArray(filter.values) && filter.values.includes(value);
|
|
731
|
+
case 'not_in':
|
|
732
|
+
return !Array.isArray(filter.values) || !filter.values.includes(value);
|
|
733
|
+
case 'is_null':
|
|
734
|
+
return value === null;
|
|
735
|
+
case 'is_not_null':
|
|
736
|
+
return value !== null;
|
|
737
|
+
default:
|
|
738
|
+
return true;
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Check if we can prune based on min/max bounds
|
|
743
|
+
*/
|
|
744
|
+
canPruneByBounds(filter, lowerBound, upperBound) {
|
|
745
|
+
const { operator, value } = filter;
|
|
746
|
+
// Convert bounds to comparable values
|
|
747
|
+
const lower = this.toComparable(lowerBound);
|
|
748
|
+
const upper = this.toComparable(upperBound);
|
|
749
|
+
const filterValue = this.toComparable(value);
|
|
750
|
+
if (lower === undefined || upper === undefined || filterValue === undefined) {
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
switch (operator) {
|
|
754
|
+
case 'eq':
|
|
755
|
+
// Prune if value is outside [lower, upper]
|
|
756
|
+
return filterValue < lower || filterValue > upper;
|
|
757
|
+
case 'gt':
|
|
758
|
+
// Prune if upper <= filter value
|
|
759
|
+
return upper <= filterValue;
|
|
760
|
+
case 'gte':
|
|
761
|
+
// Prune if upper < filter value
|
|
762
|
+
return upper < filterValue;
|
|
763
|
+
case 'lt':
|
|
764
|
+
// Prune if lower >= filter value
|
|
765
|
+
return lower >= filterValue;
|
|
766
|
+
case 'lte':
|
|
767
|
+
// Prune if lower > filter value
|
|
768
|
+
return lower > filterValue;
|
|
769
|
+
default:
|
|
770
|
+
return false;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// ==========================================================================
|
|
774
|
+
// Private: Utility Methods
|
|
775
|
+
// ==========================================================================
|
|
776
|
+
/**
|
|
777
|
+
* Check if a cache entry is expired
|
|
778
|
+
*/
|
|
779
|
+
isCacheExpired(cachedAt, ttlMs) {
|
|
780
|
+
return Date.now() - cachedAt > ttlMs;
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* Extract version number from metadata file path
|
|
784
|
+
*/
|
|
785
|
+
extractVersion(path) {
|
|
786
|
+
const match = path.match(/v(\d+)\.metadata\.json$/);
|
|
787
|
+
return match ? parseInt(match[1], 10) : 0;
|
|
788
|
+
}
|
|
789
|
+
/**
|
|
790
|
+
* Convert column bounds from Buffer to comparable values
|
|
791
|
+
*/
|
|
792
|
+
convertBounds(bounds) {
|
|
793
|
+
if (!bounds)
|
|
794
|
+
return undefined;
|
|
795
|
+
const result = {};
|
|
796
|
+
for (const [key, value] of Object.entries(bounds)) {
|
|
797
|
+
// Try to decode as UTF-8 string, fall back to Uint8Array
|
|
798
|
+
try {
|
|
799
|
+
result[parseInt(key, 10)] = value.toString('utf-8');
|
|
800
|
+
}
|
|
801
|
+
catch {
|
|
802
|
+
result[parseInt(key, 10)] = new Uint8Array(value);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
return result;
|
|
806
|
+
}
|
|
807
|
+
/**
|
|
808
|
+
* Convert DataFileEntry column stats to DataFileInfo format
|
|
809
|
+
*/
|
|
810
|
+
convertColumnStats(df) {
|
|
811
|
+
if (!df.lowerBounds && !df.upperBounds && !df.nullValueCounts) {
|
|
812
|
+
return undefined;
|
|
813
|
+
}
|
|
814
|
+
const stats = {};
|
|
815
|
+
// Combine all column IDs
|
|
816
|
+
const columnIds = new Set();
|
|
817
|
+
if (df.lowerBounds)
|
|
818
|
+
Object.keys(df.lowerBounds).forEach((k) => columnIds.add(parseInt(k, 10)));
|
|
819
|
+
if (df.upperBounds)
|
|
820
|
+
Object.keys(df.upperBounds).forEach((k) => columnIds.add(parseInt(k, 10)));
|
|
821
|
+
if (df.nullValueCounts)
|
|
822
|
+
Object.keys(df.nullValueCounts).forEach((k) => columnIds.add(parseInt(k, 10)));
|
|
823
|
+
for (const id of columnIds) {
|
|
824
|
+
stats[id.toString()] = {
|
|
825
|
+
lowerBound: df.lowerBounds?.[id],
|
|
826
|
+
upperBound: df.upperBounds?.[id],
|
|
827
|
+
nullCount: df.nullValueCounts?.[id],
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
return stats;
|
|
831
|
+
}
|
|
832
|
+
/**
|
|
833
|
+
* Convert a value to a comparable string/number
|
|
834
|
+
*/
|
|
835
|
+
toComparable(value) {
|
|
836
|
+
if (typeof value === 'string')
|
|
837
|
+
return value;
|
|
838
|
+
if (typeof value === 'number')
|
|
839
|
+
return value;
|
|
840
|
+
if (value instanceof Buffer)
|
|
841
|
+
return value.toString('utf-8');
|
|
842
|
+
if (value instanceof Uint8Array)
|
|
843
|
+
return new TextDecoder().decode(value);
|
|
844
|
+
return undefined;
|
|
845
|
+
}
|
|
846
|
+
/**
|
|
847
|
+
* Evict the oldest entry from a cache map
|
|
848
|
+
*/
|
|
849
|
+
evictOldestFromCache(cache) {
|
|
850
|
+
let oldestKey;
|
|
851
|
+
let oldestTime = Infinity;
|
|
852
|
+
for (const [key, value] of cache) {
|
|
853
|
+
if (value.cachedAt < oldestTime) {
|
|
854
|
+
oldestTime = value.cachedAt;
|
|
855
|
+
oldestKey = key;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
if (oldestKey) {
|
|
859
|
+
cache.delete(oldestKey);
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
/**
|
|
863
|
+
* Estimate total cache size in bytes
|
|
864
|
+
*/
|
|
865
|
+
estimateCacheSize() {
|
|
866
|
+
let size = 0;
|
|
867
|
+
// Rough estimate: JSON stringify and measure length
|
|
868
|
+
for (const cached of this.metadataCache.values()) {
|
|
869
|
+
size += JSON.stringify(cached.metadata).length;
|
|
870
|
+
}
|
|
871
|
+
for (const cached of this.manifestListCache.values()) {
|
|
872
|
+
size += JSON.stringify(cached.manifests).length;
|
|
873
|
+
}
|
|
874
|
+
for (const cached of this.manifestCache.values()) {
|
|
875
|
+
size += JSON.stringify(cached.dataFiles).length;
|
|
876
|
+
}
|
|
877
|
+
return size;
|
|
878
|
+
}
|
|
879
|
+
/**
|
|
880
|
+
* Create an empty file scan plan
|
|
881
|
+
*/
|
|
882
|
+
emptyPlan(tableId, snapshotId) {
|
|
883
|
+
return {
|
|
884
|
+
tableId,
|
|
885
|
+
snapshotId,
|
|
886
|
+
files: [],
|
|
887
|
+
totalRecords: 0,
|
|
888
|
+
totalSizeBytes: 0,
|
|
889
|
+
pruningStats: {
|
|
890
|
+
totalManifests: 0,
|
|
891
|
+
prunedManifests: 0,
|
|
892
|
+
totalDataFiles: 0,
|
|
893
|
+
prunedDataFiles: 0,
|
|
894
|
+
},
|
|
895
|
+
createdAt: Date.now(),
|
|
896
|
+
};
|
|
897
|
+
}
|
|
898
|
+
// ==========================================================================
|
|
899
|
+
// Private: Storage Persistence
|
|
900
|
+
// ==========================================================================
|
|
901
|
+
/**
|
|
902
|
+
* Persist a cache entry to DO storage
|
|
903
|
+
*/
|
|
904
|
+
async persistCacheEntry(type, key, entry) {
|
|
905
|
+
await this.ctx.storage.put(`cache:${type}:${key}`, entry);
|
|
906
|
+
}
|
|
907
|
+
/**
|
|
908
|
+
* Restore cache from DO storage on startup
|
|
909
|
+
*/
|
|
910
|
+
async restoreCacheFromStorage() {
|
|
911
|
+
const entries = await this.ctx.storage.list({ prefix: 'cache:' });
|
|
912
|
+
for (const [key, value] of entries) {
|
|
913
|
+
if (key.startsWith('cache:metadata:')) {
|
|
914
|
+
const tableId = key.slice('cache:metadata:'.length);
|
|
915
|
+
const cached = value;
|
|
916
|
+
if (!this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
917
|
+
this.metadataCache.set(tableId, cached);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
else if (key.startsWith('cache:manifestList:')) {
|
|
921
|
+
const cacheKey = key.slice('cache:manifestList:'.length);
|
|
922
|
+
const cached = value;
|
|
923
|
+
if (!this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
924
|
+
this.manifestListCache.set(cacheKey, cached);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
else if (key.startsWith('cache:manifest:')) {
|
|
928
|
+
const manifestPath = key.slice('cache:manifest:'.length);
|
|
929
|
+
const cached = value;
|
|
930
|
+
if (!this.isCacheExpired(cached.cachedAt, cached.ttlMs)) {
|
|
931
|
+
this.manifestCache.set(manifestPath, cached);
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
export default IcebergMetadataDO;
|
|
938
|
+
//# sourceMappingURL=IcebergMetadataDO.js.map
|