dotdo 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +446 -315
- package/dist/ai/index.js +19 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/template-literals.js +852 -0
- package/dist/ai/template-literals.js.map +1 -0
- package/dist/api/analytics/router.js +601 -0
- package/dist/api/analytics/router.js.map +1 -0
- package/dist/api/index.js +158 -0
- package/dist/api/index.js.map +1 -0
- package/dist/api/middleware/auth-federation.js +573 -0
- package/dist/api/middleware/auth-federation.js.map +1 -0
- package/dist/api/middleware/auth.js +544 -0
- package/dist/api/middleware/auth.js.map +1 -0
- package/dist/api/middleware/error-handling.js +176 -0
- package/dist/api/middleware/error-handling.js.map +1 -0
- package/dist/api/middleware/request-id.js +21 -0
- package/dist/api/middleware/request-id.js.map +1 -0
- package/dist/api/pages.js +1180 -0
- package/dist/api/pages.js.map +1 -0
- package/dist/api/routes/api.js +612 -0
- package/dist/api/routes/api.js.map +1 -0
- package/dist/api/routes/browsers.js +471 -0
- package/dist/api/routes/browsers.js.map +1 -0
- package/dist/api/routes/do.js +188 -0
- package/dist/api/routes/do.js.map +1 -0
- package/dist/api/routes/mcp.js +459 -0
- package/dist/api/routes/mcp.js.map +1 -0
- package/dist/api/routes/obs.js +445 -0
- package/dist/api/routes/obs.js.map +1 -0
- package/dist/api/routes/openapi.js +794 -0
- package/dist/api/routes/openapi.js.map +1 -0
- package/dist/api/routes/rpc.js +1103 -0
- package/dist/api/routes/rpc.js.map +1 -0
- package/dist/api/routes/sandboxes.js +389 -0
- package/dist/api/routes/sandboxes.js.map +1 -0
- package/dist/api/test-do.js +38 -0
- package/dist/api/test-do.js.map +1 -0
- package/dist/api/types.js +11 -0
- package/dist/api/types.js.map +1 -0
- package/dist/cli/bin.js +2 -0
- package/dist/cli/main.js +52342 -0
- package/dist/db/actions.js +212 -0
- package/dist/db/actions.js.map +1 -0
- package/dist/db/auth.js +506 -0
- package/dist/db/auth.js.map +1 -0
- package/dist/db/branches.js +65 -0
- package/dist/db/branches.js.map +1 -0
- package/dist/db/clickhouse.js +1074 -0
- package/dist/db/clickhouse.js.map +1 -0
- package/dist/db/dlq.js +39 -0
- package/dist/db/dlq.js.map +1 -0
- package/dist/db/events.js +28 -0
- package/dist/db/events.js.map +1 -0
- package/dist/db/exec.js +64 -0
- package/dist/db/exec.js.map +1 -0
- package/dist/db/files.js +85 -0
- package/dist/db/files.js.map +1 -0
- package/dist/db/flags.js +24 -0
- package/dist/db/flags.js.map +1 -0
- package/dist/db/git.js +116 -0
- package/dist/db/git.js.map +1 -0
- package/dist/db/iceberg/inverted-index.js +862 -0
- package/dist/db/iceberg/inverted-index.js.map +1 -0
- package/dist/db/iceberg/puffin.js +878 -0
- package/dist/db/iceberg/puffin.js.map +1 -0
- package/dist/db/iceberg/search-manifest.js +422 -0
- package/dist/db/iceberg/search-manifest.js.map +1 -0
- package/dist/db/iceberg/types.js +8 -0
- package/dist/db/iceberg/types.js.map +1 -0
- package/dist/db/index.js +121 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/integrations.js +368 -0
- package/dist/db/integrations.js.map +1 -0
- package/dist/db/json-indexes.js +332 -0
- package/dist/db/json-indexes.js.map +1 -0
- package/dist/db/linked-accounts.js +287 -0
- package/dist/db/linked-accounts.js.map +1 -0
- package/dist/db/nouns.js +183 -0
- package/dist/db/nouns.js.map +1 -0
- package/dist/db/objects.js +170 -0
- package/dist/db/objects.js.map +1 -0
- package/dist/db/primitives/dag-scheduler/index.js +869 -0
- package/dist/db/primitives/dag-scheduler/index.js.map +1 -0
- package/dist/db/primitives/exactly-once-context.js +237 -0
- package/dist/db/primitives/exactly-once-context.js.map +1 -0
- package/dist/db/primitives/index.js +62 -0
- package/dist/db/primitives/index.js.map +1 -0
- package/dist/db/primitives/keyed-router.js +145 -0
- package/dist/db/primitives/keyed-router.js.map +1 -0
- package/dist/db/primitives/observability.js +162 -0
- package/dist/db/primitives/observability.js.map +1 -0
- package/dist/db/primitives/schema-evolution.js +643 -0
- package/dist/db/primitives/schema-evolution.js.map +1 -0
- package/dist/db/primitives/stateful-operator/index.js +770 -0
- package/dist/db/primitives/stateful-operator/index.js.map +1 -0
- package/dist/db/primitives/temporal-store.js +306 -0
- package/dist/db/primitives/temporal-store.js.map +1 -0
- package/dist/db/primitives/typed-column-store.js +1229 -0
- package/dist/db/primitives/typed-column-store.js.map +1 -0
- package/dist/db/primitives/utils/duration.js +162 -0
- package/dist/db/primitives/utils/duration.js.map +1 -0
- package/dist/db/primitives/utils/murmur3.js +118 -0
- package/dist/db/primitives/utils/murmur3.js.map +1 -0
- package/dist/db/primitives/watermark-service.js +136 -0
- package/dist/db/primitives/watermark-service.js.map +1 -0
- package/dist/db/primitives/window-manager.js +764 -0
- package/dist/db/primitives/window-manager.js.map +1 -0
- package/dist/db/relationships.js +66 -0
- package/dist/db/relationships.js.map +1 -0
- package/dist/db/schema-minimal.js +61 -0
- package/dist/db/schema-minimal.js.map +1 -0
- package/dist/db/search.js +28 -0
- package/dist/db/search.js.map +1 -0
- package/dist/db/stores.js +1665 -0
- package/dist/db/stores.js.map +1 -0
- package/dist/db/things.js +297 -0
- package/dist/db/things.js.map +1 -0
- package/dist/db/vault.js +171 -0
- package/dist/db/vault.js.map +1 -0
- package/dist/db/verbs.js +102 -0
- package/dist/db/verbs.js.map +1 -0
- package/dist/do/base.js +48 -0
- package/dist/do/base.js.map +1 -0
- package/dist/do/bash.js +35 -0
- package/dist/do/bash.js.map +1 -0
- package/dist/do/fs.js +25 -0
- package/dist/do/fs.js.map +1 -0
- package/dist/do/full.js +61 -0
- package/dist/do/full.js.map +1 -0
- package/dist/do/git.js +28 -0
- package/dist/do/git.js.map +1 -0
- package/dist/do/index.js +52 -0
- package/dist/do/index.js.map +1 -0
- package/dist/do/tiny.js +31 -0
- package/dist/do/tiny.js.map +1 -0
- package/dist/lib/DOAuth.js +261 -0
- package/dist/lib/DOAuth.js.map +1 -0
- package/dist/lib/DODispatcher.js +72 -0
- package/dist/lib/DODispatcher.js.map +1 -0
- package/dist/lib/Modifier.js +189 -0
- package/dist/lib/Modifier.js.map +1 -0
- package/dist/lib/StateStorage.js +403 -0
- package/dist/lib/StateStorage.js.map +1 -0
- package/dist/lib/TypeRegistry.js +122 -0
- package/dist/lib/TypeRegistry.js.map +1 -0
- package/dist/lib/agent/tools/bash.js +336 -0
- package/dist/lib/agent/tools/bash.js.map +1 -0
- package/dist/lib/agent/tools/edit.js +157 -0
- package/dist/lib/agent/tools/edit.js.map +1 -0
- package/dist/lib/agent/tools/glob.js +137 -0
- package/dist/lib/agent/tools/glob.js.map +1 -0
- package/dist/lib/agent/tools/grep.js +315 -0
- package/dist/lib/agent/tools/grep.js.map +1 -0
- package/dist/lib/agent/tools/index.js +71 -0
- package/dist/lib/agent/tools/index.js.map +1 -0
- package/dist/lib/agent/tools/read.js +212 -0
- package/dist/lib/agent/tools/read.js.map +1 -0
- package/dist/lib/agent/tools/types.js +197 -0
- package/dist/lib/agent/tools/types.js.map +1 -0
- package/dist/lib/agent/tools/write.js +159 -0
- package/dist/lib/agent/tools/write.js.map +1 -0
- package/dist/lib/ai/gateway.js +247 -0
- package/dist/lib/ai/gateway.js.map +1 -0
- package/dist/lib/ai/tool-loop-agent.js +591 -0
- package/dist/lib/ai/tool-loop-agent.js.map +1 -0
- package/dist/lib/auto-wiring.js +439 -0
- package/dist/lib/auto-wiring.js.map +1 -0
- package/dist/lib/browse/browserbase.js +163 -0
- package/dist/lib/browse/browserbase.js.map +1 -0
- package/dist/lib/browse/cloudflare.js +144 -0
- package/dist/lib/browse/cloudflare.js.map +1 -0
- package/dist/lib/browse/index.js +62 -0
- package/dist/lib/browse/index.js.map +1 -0
- package/dist/lib/browse/types.js +13 -0
- package/dist/lib/browse/types.js.map +1 -0
- package/dist/lib/cache/index.js +37 -0
- package/dist/lib/cache/index.js.map +1 -0
- package/dist/lib/cache/visibility.js +638 -0
- package/dist/lib/cache/visibility.js.map +1 -0
- package/dist/lib/capabilities.js +268 -0
- package/dist/lib/capabilities.js.map +1 -0
- package/dist/lib/channels/base.js +106 -0
- package/dist/lib/channels/base.js.map +1 -0
- package/dist/lib/channels/discord.js +94 -0
- package/dist/lib/channels/discord.js.map +1 -0
- package/dist/lib/channels/email.js +204 -0
- package/dist/lib/channels/email.js.map +1 -0
- package/dist/lib/channels/index.js +90 -0
- package/dist/lib/channels/index.js.map +1 -0
- package/dist/lib/channels/mdxui-chat.js +95 -0
- package/dist/lib/channels/mdxui-chat.js.map +1 -0
- package/dist/lib/channels/slack-blockkit.js +121 -0
- package/dist/lib/channels/slack-blockkit.js.map +1 -0
- package/dist/lib/channels/types.js +7 -0
- package/dist/lib/channels/types.js.map +1 -0
- package/dist/lib/cloudflare/ai.js +654 -0
- package/dist/lib/cloudflare/ai.js.map +1 -0
- package/dist/lib/cloudflare/index.js +88 -0
- package/dist/lib/cloudflare/index.js.map +1 -0
- package/dist/lib/cloudflare/kv.js +342 -0
- package/dist/lib/cloudflare/kv.js.map +1 -0
- package/dist/lib/cloudflare/queues.js +434 -0
- package/dist/lib/cloudflare/queues.js.map +1 -0
- package/dist/lib/cloudflare/r2.js +604 -0
- package/dist/lib/cloudflare/r2.js.map +1 -0
- package/dist/lib/cloudflare/vectorize.js +494 -0
- package/dist/lib/cloudflare/vectorize.js.map +1 -0
- package/dist/lib/cloudflare/workflows.js +569 -0
- package/dist/lib/cloudflare/workflows.js.map +1 -0
- package/dist/lib/colo/caching.js +196 -0
- package/dist/lib/colo/caching.js.map +1 -0
- package/dist/lib/colo/detection.js +194 -0
- package/dist/lib/colo/detection.js.map +1 -0
- package/dist/lib/colo/external-data.js +219 -0
- package/dist/lib/colo/external-data.js.map +1 -0
- package/dist/lib/colo/globe-data.js +179 -0
- package/dist/lib/colo/globe-data.js.map +1 -0
- package/dist/lib/colo/index.js +16 -0
- package/dist/lib/colo/index.js.map +1 -0
- package/dist/lib/decorators.js +37 -0
- package/dist/lib/decorators.js.map +1 -0
- package/dist/lib/discovery.js +81 -0
- package/dist/lib/discovery.js.map +1 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js +619 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/BaseFunctionExecutor.js +328 -0
- package/dist/lib/executors/BaseFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/CascadeExecutor.js +418 -0
- package/dist/lib/executors/CascadeExecutor.js.map +1 -0
- package/dist/lib/executors/CodeFunctionExecutor.js +904 -0
- package/dist/lib/executors/CodeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js +904 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/HumanFunctionExecutor.js +884 -0
- package/dist/lib/executors/HumanFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/ParallelStepExecutor.js +308 -0
- package/dist/lib/executors/ParallelStepExecutor.js.map +1 -0
- package/dist/lib/executors/types.js +12 -0
- package/dist/lib/executors/types.js.map +1 -0
- package/dist/lib/experiments.js +89 -0
- package/dist/lib/experiments.js.map +1 -0
- package/dist/lib/flags/store.js +262 -0
- package/dist/lib/flags/store.js.map +1 -0
- package/dist/lib/functions/FunctionComposition.js +467 -0
- package/dist/lib/functions/FunctionComposition.js.map +1 -0
- package/dist/lib/functions/FunctionMiddleware.js +457 -0
- package/dist/lib/functions/FunctionMiddleware.js.map +1 -0
- package/dist/lib/functions/FunctionRegistry.js +426 -0
- package/dist/lib/functions/FunctionRegistry.js.map +1 -0
- package/dist/lib/functions/createFunction.js +1048 -0
- package/dist/lib/functions/createFunction.js.map +1 -0
- package/dist/lib/humans/index.js +68 -0
- package/dist/lib/humans/index.js.map +1 -0
- package/dist/lib/humans/templates.js +117 -0
- package/dist/lib/humans/templates.js.map +1 -0
- package/dist/lib/identity.js +98 -0
- package/dist/lib/identity.js.map +1 -0
- package/dist/lib/index.js +9 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/logging/error-logger.js +163 -0
- package/dist/lib/logging/error-logger.js.map +1 -0
- package/dist/lib/logging/index.js +160 -0
- package/dist/lib/logging/index.js.map +1 -0
- package/dist/lib/mixins/bash.js +825 -0
- package/dist/lib/mixins/bash.js.map +1 -0
- package/dist/lib/mixins/fs.js +648 -0
- package/dist/lib/mixins/fs.js.map +1 -0
- package/dist/lib/mixins/git.js +1011 -0
- package/dist/lib/mixins/git.js.map +1 -0
- package/dist/lib/mixins/index.js +29 -0
- package/dist/lib/mixins/index.js.map +1 -0
- package/dist/lib/mixins/npm.js +662 -0
- package/dist/lib/mixins/npm.js.map +1 -0
- package/dist/lib/noun-id.js +278 -0
- package/dist/lib/noun-id.js.map +1 -0
- package/dist/lib/rate-limit/sliding-window.js +148 -0
- package/dist/lib/rate-limit/sliding-window.js.map +1 -0
- package/dist/lib/rate-limit.js +110 -0
- package/dist/lib/rate-limit.js.map +1 -0
- package/dist/lib/rpc/bindings.js +548 -0
- package/dist/lib/rpc/bindings.js.map +1 -0
- package/dist/lib/rpc/index.js +64 -0
- package/dist/lib/rpc/index.js.map +1 -0
- package/dist/lib/safe-stringify.js +223 -0
- package/dist/lib/safe-stringify.js.map +1 -0
- package/dist/lib/sandbox/miniflare-sandbox.js +1007 -0
- package/dist/lib/sandbox/miniflare-sandbox.js.map +1 -0
- package/dist/lib/sqids.js +110 -0
- package/dist/lib/sqids.js.map +1 -0
- package/dist/lib/sql/adapters/index.js +10 -0
- package/dist/lib/sql/adapters/index.js.map +1 -0
- package/dist/lib/sql/adapters/node-sql-parser.js +552 -0
- package/dist/lib/sql/adapters/node-sql-parser.js.map +1 -0
- package/dist/lib/sql/adapters/pgsql-parser.js +1189 -0
- package/dist/lib/sql/adapters/pgsql-parser.js.map +1 -0
- package/dist/lib/sql/index.js +277 -0
- package/dist/lib/sql/index.js.map +1 -0
- package/dist/lib/sql/types.js +56 -0
- package/dist/lib/sql/types.js.map +1 -0
- package/dist/lib/type-classifier.js +126 -0
- package/dist/lib/type-classifier.js.map +1 -0
- package/dist/lib/utils/html.js +47 -0
- package/dist/lib/utils/html.js.map +1 -0
- package/dist/lib/validation.js +48 -0
- package/dist/lib/validation.js.map +1 -0
- package/dist/lib/vault/store.js +411 -0
- package/dist/lib/vault/store.js.map +1 -0
- package/dist/metrics/hunch.js +739 -0
- package/dist/metrics/hunch.js.map +1 -0
- package/dist/objects/API.js +302 -0
- package/dist/objects/API.js.map +1 -0
- package/dist/objects/Agent.js +179 -0
- package/dist/objects/Agent.js.map +1 -0
- package/dist/objects/AgenticFunctionExecutor.js +8 -0
- package/dist/objects/AgenticFunctionExecutor.js.map +1 -0
- package/dist/objects/App.js +83 -0
- package/dist/objects/App.js.map +1 -0
- package/dist/objects/Browser.js +884 -0
- package/dist/objects/Browser.js.map +1 -0
- package/dist/objects/Business.js +107 -0
- package/dist/objects/Business.js.map +1 -0
- package/dist/objects/CLI.js +221 -0
- package/dist/objects/CLI.js.map +1 -0
- package/dist/objects/CodeFunctionExecutor.js +8 -0
- package/dist/objects/CodeFunctionExecutor.js.map +1 -0
- package/dist/objects/Collection.js +161 -0
- package/dist/objects/Collection.js.map +1 -0
- package/dist/objects/DO.js +41 -0
- package/dist/objects/DO.js.map +1 -0
- package/dist/objects/DOBase.js +2309 -0
- package/dist/objects/DOBase.js.map +1 -0
- package/dist/objects/DOFull.js +1676 -0
- package/dist/objects/DOFull.js.map +1 -0
- package/dist/objects/DOTiny.js +207 -0
- package/dist/objects/DOTiny.js.map +1 -0
- package/dist/objects/Directory.js +199 -0
- package/dist/objects/Directory.js.map +1 -0
- package/dist/objects/Entity.js +413 -0
- package/dist/objects/Entity.js.map +1 -0
- package/dist/objects/Function.js +116 -0
- package/dist/objects/Function.js.map +1 -0
- package/dist/objects/Human.js +231 -0
- package/dist/objects/Human.js.map +1 -0
- package/dist/objects/HumanFunctionExecutor.js +8 -0
- package/dist/objects/HumanFunctionExecutor.js.map +1 -0
- package/dist/objects/IcebergMetadataDO.js +938 -0
- package/dist/objects/IcebergMetadataDO.js.map +1 -0
- package/dist/objects/IntegrationsDO.js +1174 -0
- package/dist/objects/IntegrationsDO.js.map +1 -0
- package/dist/objects/ObservabilityBroadcaster.js +149 -0
- package/dist/objects/ObservabilityBroadcaster.js.map +1 -0
- package/dist/objects/Package.js +154 -0
- package/dist/objects/Package.js.map +1 -0
- package/dist/objects/Product.js +193 -0
- package/dist/objects/Product.js.map +1 -0
- package/dist/objects/SDK.js +152 -0
- package/dist/objects/SDK.js.map +1 -0
- package/dist/objects/SaaS.js +235 -0
- package/dist/objects/SaaS.js.map +1 -0
- package/dist/objects/SandboxDO.js +759 -0
- package/dist/objects/SandboxDO.js.map +1 -0
- package/dist/objects/Service.js +337 -0
- package/dist/objects/Service.js.map +1 -0
- package/dist/objects/Site.js +80 -0
- package/dist/objects/Site.js.map +1 -0
- package/dist/objects/Startup.js +479 -0
- package/dist/objects/Startup.js.map +1 -0
- package/dist/objects/ThingsDO.js +170 -0
- package/dist/objects/ThingsDO.js.map +1 -0
- package/dist/objects/VectorShardDO.js +648 -0
- package/dist/objects/VectorShardDO.js.map +1 -0
- package/dist/objects/Worker.js +144 -0
- package/dist/objects/Worker.js.map +1 -0
- package/dist/objects/Workflow.js +196 -0
- package/dist/objects/Workflow.js.map +1 -0
- package/dist/objects/WorkflowFactory.js +313 -0
- package/dist/objects/WorkflowFactory.js.map +1 -0
- package/dist/objects/WorkflowRuntime.js +863 -0
- package/dist/objects/WorkflowRuntime.js.map +1 -0
- package/dist/objects/circuit-breaker-bulkhead.js +178 -0
- package/dist/objects/circuit-breaker-bulkhead.js.map +1 -0
- package/dist/objects/createFunction.js +934 -0
- package/dist/objects/createFunction.js.map +1 -0
- package/dist/objects/index.js +80 -0
- package/dist/objects/index.js.map +1 -0
- package/dist/objects/lifecycle/Branch.js +275 -0
- package/dist/objects/lifecycle/Branch.js.map +1 -0
- package/dist/objects/lifecycle/Clone.js +1499 -0
- package/dist/objects/lifecycle/Clone.js.map +1 -0
- package/dist/objects/lifecycle/Compact.js +237 -0
- package/dist/objects/lifecycle/Compact.js.map +1 -0
- package/dist/objects/lifecycle/Promote.js +476 -0
- package/dist/objects/lifecycle/Promote.js.map +1 -0
- package/dist/objects/lifecycle/Shard.js +560 -0
- package/dist/objects/lifecycle/Shard.js.map +1 -0
- package/dist/objects/lifecycle/index.js +15 -0
- package/dist/objects/lifecycle/index.js.map +1 -0
- package/dist/objects/lifecycle/types.js +33 -0
- package/dist/objects/lifecycle/types.js.map +1 -0
- package/dist/objects/mixins/infrastructure.js +171 -0
- package/dist/objects/mixins/infrastructure.js.map +1 -0
- package/dist/objects/modules/StoresModule.js +153 -0
- package/dist/objects/modules/StoresModule.js.map +1 -0
- package/dist/objects/persistence/checkpoint-manager.js +606 -0
- package/dist/objects/persistence/checkpoint-manager.js.map +1 -0
- package/dist/objects/persistence/index.js +72 -0
- package/dist/objects/persistence/index.js.map +1 -0
- package/dist/objects/persistence/migration-runner.js +562 -0
- package/dist/objects/persistence/migration-runner.js.map +1 -0
- package/dist/objects/persistence/replication-manager.js +501 -0
- package/dist/objects/persistence/replication-manager.js.map +1 -0
- package/dist/objects/persistence/tiered-storage-manager.js +595 -0
- package/dist/objects/persistence/tiered-storage-manager.js.map +1 -0
- package/dist/objects/persistence/types.js +14 -0
- package/dist/objects/persistence/types.js.map +1 -0
- package/dist/objects/persistence/wal-manager.js +653 -0
- package/dist/objects/persistence/wal-manager.js.map +1 -0
- package/dist/objects/presets/index.js +20 -0
- package/dist/objects/presets/index.js.map +1 -0
- package/dist/objects/presets/primitives.js +188 -0
- package/dist/objects/presets/primitives.js.map +1 -0
- package/dist/objects/primitives/alarm-adapter.js +141 -0
- package/dist/objects/primitives/alarm-adapter.js.map +1 -0
- package/dist/objects/primitives/index.js +337 -0
- package/dist/objects/primitives/index.js.map +1 -0
- package/dist/objects/primitives/storage-adapter.js +182 -0
- package/dist/objects/primitives/storage-adapter.js.map +1 -0
- package/dist/objects/primitives/with-primitives.js +102 -0
- package/dist/objects/primitives/with-primitives.js.map +1 -0
- package/dist/objects/services/StoreManager.js +227 -0
- package/dist/objects/services/StoreManager.js.map +1 -0
- package/dist/objects/services/index.js +13 -0
- package/dist/objects/services/index.js.map +1 -0
- package/dist/objects/transport/auth-layer.js +1451 -0
- package/dist/objects/transport/auth-layer.js.map +1 -0
- package/dist/objects/transport/capnweb-target.js +355 -0
- package/dist/objects/transport/capnweb-target.js.map +1 -0
- package/dist/objects/transport/chain.js +441 -0
- package/dist/objects/transport/chain.js.map +1 -0
- package/dist/objects/transport/handler.js +58 -0
- package/dist/objects/transport/handler.js.map +1 -0
- package/dist/objects/transport/index.js +53 -0
- package/dist/objects/transport/index.js.map +1 -0
- package/dist/objects/transport/mcp-server.js +690 -0
- package/dist/objects/transport/mcp-server.js.map +1 -0
- package/dist/objects/transport/rest-autowire.js +1507 -0
- package/dist/objects/transport/rest-autowire.js.map +1 -0
- package/dist/objects/transport/rest-router.js +440 -0
- package/dist/objects/transport/rest-router.js.map +1 -0
- package/dist/objects/transport/rpc-server.js +1536 -0
- package/dist/objects/transport/rpc-server.js.map +1 -0
- package/dist/objects/transport/shared.js +575 -0
- package/dist/objects/transport/shared.js.map +1 -0
- package/dist/objects/transport/sync-engine.js +291 -0
- package/dist/objects/transport/sync-engine.js.map +1 -0
- package/dist/objects/transport/types.js +8 -0
- package/dist/objects/transport/types.js.map +1 -0
- package/dist/primitives/bashx/src/ast/analyze.js +1472 -0
- package/dist/primitives/bashx/src/ast/analyze.js.map +1 -0
- package/dist/primitives/bashx/src/ast/parser.js +1488 -0
- package/dist/primitives/bashx/src/ast/parser.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/crypto.js +1954 -0
- package/dist/primitives/bashx/src/do/commands/crypto.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/data-processing.js +1812 -0
- package/dist/primitives/bashx/src/do/commands/data-processing.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/extended-utils.js +804 -0
- package/dist/primitives/bashx/src/do/commands/extended-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/math-control.js +1122 -0
- package/dist/primitives/bashx/src/do/commands/math-control.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/posix-utils.js +1015 -0
- package/dist/primitives/bashx/src/do/commands/posix-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/system-utils.js +687 -0
- package/dist/primitives/bashx/src/do/commands/system-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/test-command.js +523 -0
- package/dist/primitives/bashx/src/do/commands/test-command.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/text-processing.js +1550 -0
- package/dist/primitives/bashx/src/do/commands/text-processing.js.map +1 -0
- package/dist/primitives/bashx/src/do/container-executor.js +429 -0
- package/dist/primitives/bashx/src/do/container-executor.js.map +1 -0
- package/dist/primitives/bashx/src/do/index.js +668 -0
- package/dist/primitives/bashx/src/do/index.js.map +1 -0
- package/dist/primitives/bashx/src/do/tiered-executor.js +2647 -0
- package/dist/primitives/bashx/src/do/tiered-executor.js.map +1 -0
- package/dist/primitives/bashx/src/do/worker.js +352 -0
- package/dist/primitives/bashx/src/do/worker.js.map +1 -0
- package/dist/primitives/bashx/src/types.js +10 -0
- package/dist/primitives/bashx/src/types.js.map +1 -0
- package/dist/primitives/fsx/core/backend.js +480 -0
- package/dist/primitives/fsx/core/backend.js.map +1 -0
- package/dist/primitives/fsx/core/constants.js +140 -0
- package/dist/primitives/fsx/core/constants.js.map +1 -0
- package/dist/primitives/fsx/core/fsx.js +1184 -0
- package/dist/primitives/fsx/core/fsx.js.map +1 -0
- package/dist/primitives/fsx/core/glob/glob.js +438 -0
- package/dist/primitives/fsx/core/glob/glob.js.map +1 -0
- package/dist/primitives/fsx/core/glob/index.js +8 -0
- package/dist/primitives/fsx/core/glob/index.js.map +1 -0
- package/dist/primitives/fsx/core/glob/match.js +392 -0
- package/dist/primitives/fsx/core/glob/match.js.map +1 -0
- package/dist/primitives/fsx/core/types.js +307 -0
- package/dist/primitives/fsx/core/types.js.map +1 -0
- package/dist/sandbox/index.js +258 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/sdk/capnweb-compat.js +42 -0
- package/dist/sdk/capnweb-compat.js.map +1 -0
- package/dist/sdk/client.js +20 -0
- package/dist/sdk/client.js.map +1 -0
- package/dist/sdk/index.js +17 -0
- package/dist/sdk/index.js.map +1 -0
- package/dist/snippets/artifacts-config.js +241 -0
- package/dist/snippets/artifacts-config.js.map +1 -0
- package/dist/snippets/artifacts-ingest.js +832 -0
- package/dist/snippets/artifacts-ingest.js.map +1 -0
- package/dist/snippets/artifacts-serve.js +1035 -0
- package/dist/snippets/artifacts-serve.js.map +1 -0
- package/dist/snippets/artifacts-types.js +161 -0
- package/dist/snippets/artifacts-types.js.map +1 -0
- package/dist/snippets/cache-probe.js +376 -0
- package/dist/snippets/cache-probe.js.map +1 -0
- package/dist/snippets/cache.js +10 -0
- package/dist/snippets/cache.js.map +1 -0
- package/dist/snippets/events.js +469 -0
- package/dist/snippets/events.js.map +1 -0
- package/dist/snippets/index.js +7 -0
- package/dist/snippets/index.js.map +1 -0
- package/dist/snippets/proxy.js +495 -0
- package/dist/snippets/proxy.js.map +1 -0
- package/dist/snippets/search.js +1759 -0
- package/dist/snippets/search.js.map +1 -0
- package/dist/streams/index.js +30 -0
- package/dist/streams/index.js.map +1 -0
- package/dist/streams/observability.js +68 -0
- package/dist/streams/observability.js.map +1 -0
- package/dist/types/AI.js +92 -0
- package/dist/types/AI.js.map +1 -0
- package/dist/types/AIFunction.js +171 -0
- package/dist/types/AIFunction.js.map +1 -0
- package/dist/types/BrowseVerb.js +89 -0
- package/dist/types/BrowseVerb.js.map +1 -0
- package/dist/types/Browser.js +31 -0
- package/dist/types/Browser.js.map +1 -0
- package/dist/types/Chaos.js +15 -0
- package/dist/types/Chaos.js.map +1 -0
- package/dist/types/CloudflareBindings.js +109 -0
- package/dist/types/CloudflareBindings.js.map +1 -0
- package/dist/types/Collection.js +50 -0
- package/dist/types/Collection.js.map +1 -0
- package/dist/types/DO.js +2 -0
- package/dist/types/DO.js.map +1 -0
- package/dist/types/DOLocation.js +63 -0
- package/dist/types/DOLocation.js.map +1 -0
- package/dist/types/EventHandler.js +57 -0
- package/dist/types/EventHandler.js.map +1 -0
- package/dist/types/Experiment.js +33 -0
- package/dist/types/Experiment.js.map +1 -0
- package/dist/types/Flag.js +57 -0
- package/dist/types/Flag.js.map +1 -0
- package/dist/types/Lifecycle.js +13 -0
- package/dist/types/Lifecycle.js.map +1 -0
- package/dist/types/Location.js +169 -0
- package/dist/types/Location.js.map +1 -0
- package/dist/types/Noun.js +66 -0
- package/dist/types/Noun.js.map +1 -0
- package/dist/types/SessionEvent.js +194 -0
- package/dist/types/SessionEvent.js.map +1 -0
- package/dist/types/Thing.js +55 -0
- package/dist/types/Thing.js.map +1 -0
- package/dist/types/ThingDO.js +153 -0
- package/dist/types/ThingDO.js.map +1 -0
- package/dist/types/Things.js +2 -0
- package/dist/types/Things.js.map +1 -0
- package/dist/types/Verb.js +119 -0
- package/dist/types/Verb.js.map +1 -0
- package/dist/types/WorkflowContext.js +70 -0
- package/dist/types/WorkflowContext.js.map +1 -0
- package/dist/types/analytics-api.js +13 -0
- package/dist/types/analytics-api.js.map +1 -0
- package/dist/types/capabilities.js +135 -0
- package/dist/types/capabilities.js.map +1 -0
- package/dist/types/drizzle.js +12 -0
- package/dist/types/drizzle.js.map +1 -0
- package/dist/types/event.js +201 -0
- package/dist/types/event.js.map +1 -0
- package/dist/types/fn.js +12 -0
- package/dist/types/fn.js.map +1 -0
- package/dist/types/iceberg.js +48 -0
- package/dist/types/iceberg.js.map +1 -0
- package/dist/types/ids.js +170 -0
- package/dist/types/ids.js.map +1 -0
- package/dist/types/index.js +41 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/introspect.js +54 -0
- package/dist/types/introspect.js.map +1 -0
- package/dist/types/observability.js +124 -0
- package/dist/types/observability.js.map +1 -0
- package/dist/types/sync-protocol.js +175 -0
- package/dist/types/sync-protocol.js.map +1 -0
- package/dist/types/vector.js +13 -0
- package/dist/types/vector.js.map +1 -0
- package/dist/workflows/ScheduleManager.js +473 -0
- package/dist/workflows/ScheduleManager.js.map +1 -0
- package/dist/workflows/StepDOBridge.js +149 -0
- package/dist/workflows/StepDOBridge.js.map +1 -0
- package/dist/workflows/StepResultStorage.js +232 -0
- package/dist/workflows/StepResultStorage.js.map +1 -0
- package/dist/workflows/WaitForEventManager.js +461 -0
- package/dist/workflows/WaitForEventManager.js.map +1 -0
- package/dist/workflows/analyzer.js +332 -0
- package/dist/workflows/analyzer.js.map +1 -0
- package/dist/workflows/compat/activity-router.js +484 -0
- package/dist/workflows/compat/activity-router.js.map +1 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js +431 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js.map +1 -0
- package/dist/workflows/compat/backends/index.js +14 -0
- package/dist/workflows/compat/backends/index.js.map +1 -0
- package/dist/workflows/compat/errors/index.js +375 -0
- package/dist/workflows/compat/errors/index.js.map +1 -0
- package/dist/workflows/compat/index.js +79 -0
- package/dist/workflows/compat/index.js.map +1 -0
- package/dist/workflows/compat/inngest/index.js +989 -0
- package/dist/workflows/compat/inngest/index.js.map +1 -0
- package/dist/workflows/compat/qstash/index.js +1263 -0
- package/dist/workflows/compat/qstash/index.js.map +1 -0
- package/dist/workflows/compat/temporal/activities.js +739 -0
- package/dist/workflows/compat/temporal/activities.js.map +1 -0
- package/dist/workflows/compat/temporal/child-workflows.js +154 -0
- package/dist/workflows/compat/temporal/child-workflows.js.map +1 -0
- package/dist/workflows/compat/temporal/client.js +381 -0
- package/dist/workflows/compat/temporal/client.js.map +1 -0
- package/dist/workflows/compat/temporal/context.js +309 -0
- package/dist/workflows/compat/temporal/context.js.map +1 -0
- package/dist/workflows/compat/temporal/determinism.js +216 -0
- package/dist/workflows/compat/temporal/determinism.js.map +1 -0
- package/dist/workflows/compat/temporal/errors.js +128 -0
- package/dist/workflows/compat/temporal/errors.js.map +1 -0
- package/dist/workflows/compat/temporal/index.js +2464 -0
- package/dist/workflows/compat/temporal/index.js.map +1 -0
- package/dist/workflows/compat/temporal/saga.js +504 -0
- package/dist/workflows/compat/temporal/saga.js.map +1 -0
- package/dist/workflows/compat/temporal/signals.js +364 -0
- package/dist/workflows/compat/temporal/signals.js.map +1 -0
- package/dist/workflows/compat/temporal/storage.js +271 -0
- package/dist/workflows/compat/temporal/storage.js.map +1 -0
- package/dist/workflows/compat/temporal/timers.js +347 -0
- package/dist/workflows/compat/temporal/timers.js.map +1 -0
- package/dist/workflows/compat/temporal/types.js +7 -0
- package/dist/workflows/compat/temporal/types.js.map +1 -0
- package/dist/workflows/compat/temporal/unified-primitives.js +339 -0
- package/dist/workflows/compat/temporal/unified-primitives.js.map +1 -0
- package/dist/workflows/compat/trigger/index.js +468 -0
- package/dist/workflows/compat/trigger/index.js.map +1 -0
- package/dist/workflows/compat/utils/index.js +69 -0
- package/dist/workflows/compat/utils/index.js.map +1 -0
- package/dist/workflows/context/correlation-capability.js +266 -0
- package/dist/workflows/context/correlation-capability.js.map +1 -0
- package/dist/workflows/context/correlation.js +484 -0
- package/dist/workflows/context/correlation.js.map +1 -0
- package/dist/workflows/context/experiment.js +289 -0
- package/dist/workflows/context/experiment.js.map +1 -0
- package/dist/workflows/context/flag.js +244 -0
- package/dist/workflows/context/flag.js.map +1 -0
- package/dist/workflows/context/foundation.js +648 -0
- package/dist/workflows/context/foundation.js.map +1 -0
- package/dist/workflows/context/human-base.js +106 -0
- package/dist/workflows/context/human-base.js.map +1 -0
- package/dist/workflows/context/human.js +368 -0
- package/dist/workflows/context/human.js.map +1 -0
- package/dist/workflows/context/measure.js +354 -0
- package/dist/workflows/context/measure.js.map +1 -0
- package/dist/workflows/context/rate-limit.js +358 -0
- package/dist/workflows/context/rate-limit.js.map +1 -0
- package/dist/workflows/context/user.js +117 -0
- package/dist/workflows/context/user.js.map +1 -0
- package/dist/workflows/context/vault.js +360 -0
- package/dist/workflows/context/vault.js.map +1 -0
- package/dist/workflows/data/entity-events/entity-events.js +489 -0
- package/dist/workflows/data/entity-events/entity-events.js.map +1 -0
- package/dist/workflows/data/experiment/index.js +599 -0
- package/dist/workflows/data/experiment/index.js.map +1 -0
- package/dist/workflows/data/goal/context.js +558 -0
- package/dist/workflows/data/goal/context.js.map +1 -0
- package/dist/workflows/data/goal/index.js +32 -0
- package/dist/workflows/data/goal/index.js.map +1 -0
- package/dist/workflows/data/measure/index.js +840 -0
- package/dist/workflows/data/measure/index.js.map +1 -0
- package/dist/workflows/data/stream/index.js +1215 -0
- package/dist/workflows/data/stream/index.js.map +1 -0
- package/dist/workflows/data/track/context.js +883 -0
- package/dist/workflows/data/track/context.js.map +1 -0
- package/dist/workflows/data/track/index.js +15 -0
- package/dist/workflows/data/track/index.js.map +1 -0
- package/dist/workflows/data/view/context.js +864 -0
- package/dist/workflows/data/view/context.js.map +1 -0
- package/dist/workflows/domain.js +93 -0
- package/dist/workflows/domain.js.map +1 -0
- package/dist/workflows/flag.js +176 -0
- package/dist/workflows/flag.js.map +1 -0
- package/dist/workflows/flags.js +217 -0
- package/dist/workflows/flags.js.map +1 -0
- package/dist/workflows/hash.js +209 -0
- package/dist/workflows/hash.js.map +1 -0
- package/dist/workflows/index.js +50 -0
- package/dist/workflows/index.js.map +1 -0
- package/dist/workflows/on.js +378 -0
- package/dist/workflows/on.js.map +1 -0
- package/dist/workflows/pipeline-promise.js +481 -0
- package/dist/workflows/pipeline-promise.js.map +1 -0
- package/dist/workflows/pipeline-types.js +20 -0
- package/dist/workflows/pipeline-types.js.map +1 -0
- package/dist/workflows/proxy.js +76 -0
- package/dist/workflows/proxy.js.map +1 -0
- package/dist/workflows/runtime.js +310 -0
- package/dist/workflows/runtime.js.map +1 -0
- package/dist/workflows/schedule-builder.js +327 -0
- package/dist/workflows/schedule-builder.js.map +1 -0
- package/dist/workflows/visibility/index.js +148 -0
- package/dist/workflows/visibility/index.js.map +1 -0
- package/dist/workflows/visibility/query-parser.js +150 -0
- package/dist/workflows/visibility/query-parser.js.map +1 -0
- package/dist/workflows/visibility/store.js +223 -0
- package/dist/workflows/visibility/store.js.map +1 -0
- package/dist/workflows/visibility/types.js +30 -0
- package/dist/workflows/visibility/types.js.map +1 -0
- package/dist/workflows/workflow.js +53 -0
- package/dist/workflows/workflow.js.map +1 -0
- package/package.json +279 -46
|
@@ -0,0 +1,878 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Iceberg Puffin Sidecar File Format
|
|
3
|
+
*
|
|
4
|
+
* Puffin is Iceberg's format for storing additional table statistics like
|
|
5
|
+
* bloom filters, theta sketches, and other indexed structures. These sidecars
|
|
6
|
+
* enable efficient query pruning without scanning Parquet files.
|
|
7
|
+
*
|
|
8
|
+
* File format:
|
|
9
|
+
* - Magic "PFA1" (4 bytes)
|
|
10
|
+
* - Blob data (variable length blobs concatenated)
|
|
11
|
+
* - Footer JSON (blob metadata)
|
|
12
|
+
* - Footer length (4 bytes, little-endian)
|
|
13
|
+
* - Magic "PFA1" (4 bytes)
|
|
14
|
+
*
|
|
15
|
+
* Key features:
|
|
16
|
+
* - Range-addressable: Footer can be read independently, then specific blobs
|
|
17
|
+
* - Supports multiple blob types per file (bloom, ngram, set index)
|
|
18
|
+
* - Designed for Cloudflare Snippets (<1ms blob lookup from cache)
|
|
19
|
+
*
|
|
20
|
+
* @see https://iceberg.apache.org/puffin-spec/
|
|
21
|
+
* @module db/iceberg/puffin
|
|
22
|
+
*/
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Constants
|
|
25
|
+
// ============================================================================
|
|
26
|
+
/** Magic bytes for Puffin format: "PFA1" */
|
|
27
|
+
export const PUFFIN_MAGIC = new Uint8Array([0x50, 0x46, 0x41, 0x31]); // "PFA1"
|
|
28
|
+
/** Default false positive rate for bloom filters */
|
|
29
|
+
export const DEFAULT_FPR = 0.01;
|
|
30
|
+
/** Bits per element at 1% FPR: -ln(0.01) / ln(2)^2 ≈ 9.6 */
|
|
31
|
+
export const BITS_PER_ELEMENT_1_PERCENT = 10;
|
|
32
|
+
/** Number of hash functions for 1% FPR: ln(2) * (bits/element) ≈ 7 */
|
|
33
|
+
export const HASH_FUNCTIONS_1_PERCENT = 7;
|
|
34
|
+
// ============================================================================
|
|
35
|
+
// MurmurHash3 Implementation
|
|
36
|
+
// ============================================================================
|
|
37
|
+
/**
|
|
38
|
+
* MurmurHash3 32-bit implementation
|
|
39
|
+
*
|
|
40
|
+
* A fast, non-cryptographic hash function suitable for bloom filters.
|
|
41
|
+
* This implementation is compatible with the reference implementation
|
|
42
|
+
* used by Apache Iceberg.
|
|
43
|
+
*
|
|
44
|
+
* @param key - Input bytes to hash
|
|
45
|
+
* @param seed - Hash seed for generating multiple hash values
|
|
46
|
+
* @returns 32-bit unsigned hash value
|
|
47
|
+
*/
|
|
48
|
+
export function murmurHash3_32(key, seed = 0) {
|
|
49
|
+
const c1 = 0xcc9e2d51;
|
|
50
|
+
const c2 = 0x1b873593;
|
|
51
|
+
const r1 = 15;
|
|
52
|
+
const r2 = 13;
|
|
53
|
+
const m = 5;
|
|
54
|
+
const n = 0xe6546b64;
|
|
55
|
+
let hash = seed >>> 0;
|
|
56
|
+
const len = key.length;
|
|
57
|
+
const nblocks = Math.floor(len / 4);
|
|
58
|
+
// Process 4-byte blocks
|
|
59
|
+
for (let i = 0; i < nblocks; i++) {
|
|
60
|
+
const offset = i * 4;
|
|
61
|
+
let k = (key[offset] & 0xff) |
|
|
62
|
+
((key[offset + 1] & 0xff) << 8) |
|
|
63
|
+
((key[offset + 2] & 0xff) << 16) |
|
|
64
|
+
((key[offset + 3] & 0xff) << 24);
|
|
65
|
+
k = Math.imul(k, c1);
|
|
66
|
+
k = (k << r1) | (k >>> (32 - r1));
|
|
67
|
+
k = Math.imul(k, c2);
|
|
68
|
+
hash ^= k;
|
|
69
|
+
hash = (hash << r2) | (hash >>> (32 - r2));
|
|
70
|
+
hash = Math.imul(hash, m) + n;
|
|
71
|
+
}
|
|
72
|
+
// Process remaining bytes
|
|
73
|
+
const tailOffset = nblocks * 4;
|
|
74
|
+
let k1 = 0;
|
|
75
|
+
const tail = len & 3;
|
|
76
|
+
if (tail >= 3)
|
|
77
|
+
k1 ^= (key[tailOffset + 2] & 0xff) << 16;
|
|
78
|
+
if (tail >= 2)
|
|
79
|
+
k1 ^= (key[tailOffset + 1] & 0xff) << 8;
|
|
80
|
+
if (tail >= 1) {
|
|
81
|
+
k1 ^= key[tailOffset] & 0xff;
|
|
82
|
+
k1 = Math.imul(k1, c1);
|
|
83
|
+
k1 = (k1 << r1) | (k1 >>> (32 - r1));
|
|
84
|
+
k1 = Math.imul(k1, c2);
|
|
85
|
+
hash ^= k1;
|
|
86
|
+
}
|
|
87
|
+
// Finalization
|
|
88
|
+
hash ^= len;
|
|
89
|
+
hash ^= hash >>> 16;
|
|
90
|
+
hash = Math.imul(hash, 0x85ebca6b);
|
|
91
|
+
hash ^= hash >>> 13;
|
|
92
|
+
hash = Math.imul(hash, 0xc2b2ae35);
|
|
93
|
+
hash ^= hash >>> 16;
|
|
94
|
+
return hash >>> 0;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Generate two hash values for enhanced double hashing
|
|
98
|
+
*
|
|
99
|
+
* Uses MurmurHash3 with two seeds to generate independent hash values.
|
|
100
|
+
* These are combined to create k hash functions: h(i) = h1 + i*h2
|
|
101
|
+
*
|
|
102
|
+
* @param key - Input bytes
|
|
103
|
+
* @returns Tuple of [hash1, hash2]
|
|
104
|
+
*/
|
|
105
|
+
export function doubleHash(key) {
|
|
106
|
+
const h1 = murmurHash3_32(key, 0);
|
|
107
|
+
const h2 = murmurHash3_32(key, h1);
|
|
108
|
+
return [h1, h2];
|
|
109
|
+
}
|
|
110
|
+
// ============================================================================
|
|
111
|
+
// Bloom Filter Implementation
|
|
112
|
+
// ============================================================================
|
|
113
|
+
/**
|
|
114
|
+
* Memory-efficient bloom filter for probabilistic set membership
|
|
115
|
+
*
|
|
116
|
+
* This implementation is optimized for the Puffin file format:
|
|
117
|
+
* - Compact binary serialization (~1KB per 10,000 items at 1% FPR)
|
|
118
|
+
* - Fast hashing with MurmurHash3
|
|
119
|
+
* - Double hashing for multiple hash functions
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* // Create a bloom filter for 10,000 emails at 1% FPR
|
|
124
|
+
* const bloom = new BloomFilter({ expectedElements: 10000 })
|
|
125
|
+
*
|
|
126
|
+
* // Add items
|
|
127
|
+
* bloom.add('user@example.com.ai')
|
|
128
|
+
* bloom.add('other@example.com.ai')
|
|
129
|
+
*
|
|
130
|
+
* // Check membership
|
|
131
|
+
* bloom.mightContain('user@example.com.ai') // true
|
|
132
|
+
* bloom.mightContain('unknown@test.com') // false (probably)
|
|
133
|
+
*
|
|
134
|
+
* // Serialize for Puffin file
|
|
135
|
+
* const bytes = bloom.serialize()
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
export class BloomFilter {
|
|
139
|
+
/** Bit array stored as bytes */
|
|
140
|
+
bits;
|
|
141
|
+
/** Number of hash functions */
|
|
142
|
+
numHashFunctions;
|
|
143
|
+
/** Total number of bits */
|
|
144
|
+
numBits;
|
|
145
|
+
/**
|
|
146
|
+
* Create a new bloom filter
|
|
147
|
+
*
|
|
148
|
+
* @param config - Configuration including expected elements and FPR
|
|
149
|
+
*/
|
|
150
|
+
constructor(config) {
|
|
151
|
+
const { expectedElements, falsePositiveRate = DEFAULT_FPR } = config;
|
|
152
|
+
// Calculate optimal number of bits and hash functions
|
|
153
|
+
// bits = -n * ln(p) / (ln(2)^2)
|
|
154
|
+
const ln2Squared = Math.LN2 * Math.LN2;
|
|
155
|
+
this.numBits = Math.max(64, // Minimum 8 bytes
|
|
156
|
+
Math.ceil((-expectedElements * Math.log(falsePositiveRate)) / ln2Squared));
|
|
157
|
+
// Round up to nearest byte
|
|
158
|
+
const numBytes = Math.ceil(this.numBits / 8);
|
|
159
|
+
this.bits = new Uint8Array(numBytes);
|
|
160
|
+
// k = (m/n) * ln(2)
|
|
161
|
+
this.numHashFunctions = Math.max(1, Math.min(16, Math.round((this.numBits / expectedElements) * Math.LN2)));
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Create a bloom filter from serialized bytes
|
|
165
|
+
*
|
|
166
|
+
* @param bytes - Serialized bloom filter data
|
|
167
|
+
* @returns Deserialized bloom filter
|
|
168
|
+
*/
|
|
169
|
+
static deserialize(bytes) {
|
|
170
|
+
// Format: [numHashFunctions(1 byte), numBits(4 bytes LE), bits...]
|
|
171
|
+
if (bytes.length < 5) {
|
|
172
|
+
throw new Error('Invalid bloom filter: too short');
|
|
173
|
+
}
|
|
174
|
+
const numHashFunctions = bytes[0];
|
|
175
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
176
|
+
const numBits = view.getUint32(1, true);
|
|
177
|
+
const bits = bytes.slice(5);
|
|
178
|
+
// Use internal construction bypassing size calculation
|
|
179
|
+
return BloomFilter._fromRaw(numHashFunctions, numBits, bits);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Internal factory for deserialization
|
|
183
|
+
* @internal
|
|
184
|
+
*/
|
|
185
|
+
static _fromRaw(numHashFunctions, numBits, bits) {
|
|
186
|
+
const filter = Object.create(BloomFilter.prototype);
|
|
187
|
+
Object.defineProperty(filter, 'numHashFunctions', { value: numHashFunctions, writable: false });
|
|
188
|
+
Object.defineProperty(filter, 'numBits', { value: numBits, writable: false });
|
|
189
|
+
Object.defineProperty(filter, 'bits', { value: bits, writable: false });
|
|
190
|
+
return filter;
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Add a string value to the filter
|
|
194
|
+
*/
|
|
195
|
+
add(value) {
|
|
196
|
+
this.addBytes(new TextEncoder().encode(value));
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Add raw bytes to the filter
|
|
200
|
+
*/
|
|
201
|
+
addBytes(value) {
|
|
202
|
+
const [h1, h2] = doubleHash(value);
|
|
203
|
+
for (let i = 0; i < this.numHashFunctions; i++) {
|
|
204
|
+
const combinedHash = (h1 + i * h2) >>> 0;
|
|
205
|
+
const bitIndex = combinedHash % this.numBits;
|
|
206
|
+
const byteIndex = Math.floor(bitIndex / 8);
|
|
207
|
+
const bitOffset = bitIndex % 8;
|
|
208
|
+
this.bits[byteIndex] |= 1 << bitOffset;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Check if a string value might be in the set
|
|
213
|
+
*
|
|
214
|
+
* @returns true if the value might be present, false if definitely not
|
|
215
|
+
*/
|
|
216
|
+
mightContain(value) {
|
|
217
|
+
return this.mightContainBytes(new TextEncoder().encode(value));
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Check if raw bytes might be in the set
|
|
221
|
+
*/
|
|
222
|
+
mightContainBytes(value) {
|
|
223
|
+
const [h1, h2] = doubleHash(value);
|
|
224
|
+
for (let i = 0; i < this.numHashFunctions; i++) {
|
|
225
|
+
const combinedHash = (h1 + i * h2) >>> 0;
|
|
226
|
+
const bitIndex = combinedHash % this.numBits;
|
|
227
|
+
const byteIndex = Math.floor(bitIndex / 8);
|
|
228
|
+
const bitOffset = bitIndex % 8;
|
|
229
|
+
if ((this.bits[byteIndex] & (1 << bitOffset)) === 0) {
|
|
230
|
+
return false;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Serialize the bloom filter to bytes
|
|
237
|
+
*
|
|
238
|
+
* Format: [numHashFunctions(1), numBits(4 LE), bits...]
|
|
239
|
+
*/
|
|
240
|
+
serialize() {
|
|
241
|
+
const result = new Uint8Array(5 + this.bits.length);
|
|
242
|
+
result[0] = this.numHashFunctions;
|
|
243
|
+
const view = new DataView(result.buffer);
|
|
244
|
+
view.setUint32(1, this.numBits, true);
|
|
245
|
+
result.set(this.bits, 5);
|
|
246
|
+
return result;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Get the size of the serialized filter in bytes
|
|
250
|
+
*/
|
|
251
|
+
get sizeBytes() {
|
|
252
|
+
return 5 + this.bits.length;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Get the estimated false positive rate
|
|
256
|
+
*/
|
|
257
|
+
get estimatedFPR() {
|
|
258
|
+
// FPR ≈ (1 - e^(-kn/m))^k
|
|
259
|
+
// This is approximate without knowing actual element count
|
|
260
|
+
return Math.pow(1 - Math.exp(-this.numHashFunctions / (this.numBits / 8)), this.numHashFunctions);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// ============================================================================
|
|
264
|
+
// N-gram Bloom Filter Implementation
|
|
265
|
+
// ============================================================================
|
|
266
|
+
/**
|
|
267
|
+
* N-gram bloom filter for substring/LIKE query support
|
|
268
|
+
*
|
|
269
|
+
* Stores n-grams (character sequences) of each value to enable
|
|
270
|
+
* LIKE '%pattern%' query pruning. If a file's n-gram bloom doesn't
|
|
271
|
+
* contain all n-grams of the search pattern, the file can be skipped.
|
|
272
|
+
*
|
|
273
|
+
* @example
|
|
274
|
+
* ```typescript
|
|
275
|
+
* const ngram = new NgramBloomFilter({
|
|
276
|
+
* expectedElements: 1000,
|
|
277
|
+
* ngramSize: 3
|
|
278
|
+
* })
|
|
279
|
+
*
|
|
280
|
+
* ngram.add('hello@example.com.ai')
|
|
281
|
+
*
|
|
282
|
+
* // Check if a pattern might match any value
|
|
283
|
+
* ngram.mightContainSubstring('example') // true
|
|
284
|
+
* ngram.mightContainSubstring('xyz') // false (probably)
|
|
285
|
+
* ```
|
|
286
|
+
*/
|
|
287
|
+
export class NgramBloomFilter {
|
|
288
|
+
bloom;
|
|
289
|
+
ngramSize;
|
|
290
|
+
padStrings;
|
|
291
|
+
/**
|
|
292
|
+
* Create a new n-gram bloom filter
|
|
293
|
+
*/
|
|
294
|
+
constructor(config) {
|
|
295
|
+
const { ngramSize = 3, padStrings = true, ...bloomConfig } = config;
|
|
296
|
+
// Estimate n-grams: avg string length * elements
|
|
297
|
+
// Assume average string length of 20 characters
|
|
298
|
+
const avgStringLength = 20;
|
|
299
|
+
const estimatedNgrams = (avgStringLength - ngramSize + 1) * bloomConfig.expectedElements;
|
|
300
|
+
this.bloom = new BloomFilter({
|
|
301
|
+
expectedElements: Math.max(estimatedNgrams, bloomConfig.expectedElements),
|
|
302
|
+
falsePositiveRate: bloomConfig.falsePositiveRate,
|
|
303
|
+
});
|
|
304
|
+
this.ngramSize = ngramSize;
|
|
305
|
+
this.padStrings = padStrings;
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Create from serialized bytes
|
|
309
|
+
*/
|
|
310
|
+
static deserialize(bytes) {
|
|
311
|
+
// Format: [ngramSize(1), padStrings(1), bloom...]
|
|
312
|
+
if (bytes.length < 2) {
|
|
313
|
+
throw new Error('Invalid ngram bloom filter: too short');
|
|
314
|
+
}
|
|
315
|
+
const ngramSize = bytes[0];
|
|
316
|
+
const padStrings = bytes[1] === 1;
|
|
317
|
+
const bloom = BloomFilter.deserialize(bytes.slice(2));
|
|
318
|
+
return NgramBloomFilter._fromRaw(ngramSize, padStrings, bloom);
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Internal factory for deserialization
|
|
322
|
+
* @internal
|
|
323
|
+
*/
|
|
324
|
+
static _fromRaw(ngramSize, padStrings, bloom) {
|
|
325
|
+
const filter = Object.create(NgramBloomFilter.prototype);
|
|
326
|
+
Object.defineProperty(filter, 'ngramSize', { value: ngramSize, writable: false });
|
|
327
|
+
Object.defineProperty(filter, 'padStrings', { value: padStrings, writable: false });
|
|
328
|
+
Object.defineProperty(filter, 'bloom', { value: bloom, writable: false });
|
|
329
|
+
return filter;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Generate n-grams from a string
|
|
333
|
+
*/
|
|
334
|
+
generateNgrams(value) {
|
|
335
|
+
const ngrams = [];
|
|
336
|
+
// Pad string for prefix/suffix matching
|
|
337
|
+
const padded = this.padStrings ? `\x00\x00${value}\x00\x00` : value;
|
|
338
|
+
for (let i = 0; i <= padded.length - this.ngramSize; i++) {
|
|
339
|
+
ngrams.push(padded.substring(i, i + this.ngramSize));
|
|
340
|
+
}
|
|
341
|
+
return ngrams;
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Add a string value (all its n-grams)
|
|
345
|
+
*/
|
|
346
|
+
add(value) {
|
|
347
|
+
const ngrams = this.generateNgrams(value);
|
|
348
|
+
for (const ngram of ngrams) {
|
|
349
|
+
this.bloom.add(ngram);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Check if a substring pattern might match any stored value
|
|
354
|
+
*
|
|
355
|
+
* This checks if ALL n-grams of the pattern are present.
|
|
356
|
+
* If any n-gram is missing, no stored value can contain the pattern.
|
|
357
|
+
*
|
|
358
|
+
* Note: We don't pad the pattern because we're searching for substrings
|
|
359
|
+
* that appear anywhere within stored values, not matching start/end.
|
|
360
|
+
*/
|
|
361
|
+
mightContainSubstring(pattern) {
|
|
362
|
+
// For patterns shorter than n-gram size, we can't prune
|
|
363
|
+
if (pattern.length < this.ngramSize) {
|
|
364
|
+
return true;
|
|
365
|
+
}
|
|
366
|
+
// Generate n-grams WITHOUT padding for substring search
|
|
367
|
+
// Pattern n-grams should match middle n-grams of stored values
|
|
368
|
+
for (let i = 0; i <= pattern.length - this.ngramSize; i++) {
|
|
369
|
+
const ngram = pattern.substring(i, i + this.ngramSize);
|
|
370
|
+
if (!this.bloom.mightContain(ngram)) {
|
|
371
|
+
return false;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Serialize to bytes
|
|
378
|
+
*/
|
|
379
|
+
serialize() {
|
|
380
|
+
const bloomBytes = this.bloom.serialize();
|
|
381
|
+
const result = new Uint8Array(2 + bloomBytes.length);
|
|
382
|
+
result[0] = this.ngramSize;
|
|
383
|
+
result[1] = this.padStrings ? 1 : 0;
|
|
384
|
+
result.set(bloomBytes, 2);
|
|
385
|
+
return result;
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Get serialized size in bytes
|
|
389
|
+
*/
|
|
390
|
+
get sizeBytes() {
|
|
391
|
+
return 2 + this.bloom.sizeBytes;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// ============================================================================
|
|
395
|
+
// Set Index Implementation
|
|
396
|
+
// ============================================================================
|
|
397
|
+
/**
|
|
398
|
+
* Direct set index for low-cardinality columns
|
|
399
|
+
*
|
|
400
|
+
* For columns with few distinct values (like status, type, visibility),
|
|
401
|
+
* we store the actual values rather than a probabilistic structure.
|
|
402
|
+
* This enables exact pruning with no false positives.
|
|
403
|
+
*
|
|
404
|
+
* @example
|
|
405
|
+
* ```typescript
|
|
406
|
+
* const setIndex = new SetIndex()
|
|
407
|
+
*
|
|
408
|
+
* setIndex.add('active')
|
|
409
|
+
* setIndex.add('pending')
|
|
410
|
+
* setIndex.add('active') // Deduplicated
|
|
411
|
+
*
|
|
412
|
+
* setIndex.contains('active') // true
|
|
413
|
+
* setIndex.contains('deleted') // false (exactly)
|
|
414
|
+
* ```
|
|
415
|
+
*/
|
|
416
|
+
export class SetIndex {
|
|
417
|
+
values = new Set();
|
|
418
|
+
/**
|
|
419
|
+
* Create from serialized bytes
|
|
420
|
+
*/
|
|
421
|
+
static deserialize(bytes) {
|
|
422
|
+
const decoder = new TextDecoder();
|
|
423
|
+
const json = decoder.decode(bytes);
|
|
424
|
+
const values = JSON.parse(json);
|
|
425
|
+
const index = new SetIndex();
|
|
426
|
+
for (const value of values) {
|
|
427
|
+
index.values.add(value);
|
|
428
|
+
}
|
|
429
|
+
return index;
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
* Add a value to the set
|
|
433
|
+
*/
|
|
434
|
+
add(value) {
|
|
435
|
+
this.values.add(value);
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Check if the set contains a value (exact match)
|
|
439
|
+
*/
|
|
440
|
+
contains(value) {
|
|
441
|
+
return this.values.has(value);
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Check if any values match a pattern
|
|
445
|
+
*/
|
|
446
|
+
containsAny(values) {
|
|
447
|
+
for (const value of values) {
|
|
448
|
+
if (this.values.has(value)) {
|
|
449
|
+
return true;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
return false;
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Get all values in the set
|
|
456
|
+
*/
|
|
457
|
+
getValues() {
|
|
458
|
+
return Array.from(this.values);
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Get the number of distinct values
|
|
462
|
+
*/
|
|
463
|
+
get size() {
|
|
464
|
+
return this.values.size;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Serialize to bytes (JSON array)
|
|
468
|
+
*/
|
|
469
|
+
serialize() {
|
|
470
|
+
const json = JSON.stringify(Array.from(this.values));
|
|
471
|
+
return new TextEncoder().encode(json);
|
|
472
|
+
}
|
|
473
|
+
/**
|
|
474
|
+
* Get serialized size in bytes
|
|
475
|
+
*/
|
|
476
|
+
get sizeBytes() {
|
|
477
|
+
return this.serialize().length;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
// ============================================================================
|
|
481
|
+
// Puffin Writer
|
|
482
|
+
// ============================================================================
|
|
483
|
+
/**
|
|
484
|
+
* Writer for creating Puffin sidecar files
|
|
485
|
+
*
|
|
486
|
+
* Assembles blobs (bloom filters, set indices, etc.) and writes them
|
|
487
|
+
* in the Puffin format with proper header, footer, and metadata.
|
|
488
|
+
*
|
|
489
|
+
* @example
|
|
490
|
+
* ```typescript
|
|
491
|
+
* const writer = new PuffinWriter({
|
|
492
|
+
* snapshotId: 123456789,
|
|
493
|
+
* sequenceNumber: 1
|
|
494
|
+
* })
|
|
495
|
+
*
|
|
496
|
+
* // Add a bloom filter for email column
|
|
497
|
+
* const emailBloom = new BloomFilter({ expectedElements: 10000 })
|
|
498
|
+
* emailBloom.add('user@example.com.ai')
|
|
499
|
+
* writer.addBloomFilter(5, emailBloom)
|
|
500
|
+
*
|
|
501
|
+
* // Add a set index for status column
|
|
502
|
+
* const statusIndex = new SetIndex()
|
|
503
|
+
* statusIndex.add('active')
|
|
504
|
+
* statusIndex.add('pending')
|
|
505
|
+
* writer.addSetIndex(6, statusIndex)
|
|
506
|
+
*
|
|
507
|
+
* // Generate the Puffin file
|
|
508
|
+
* const puffinBytes = writer.finish()
|
|
509
|
+
* ```
|
|
510
|
+
*/
|
|
511
|
+
export class PuffinWriter {
|
|
512
|
+
snapshotId;
|
|
513
|
+
sequenceNumber;
|
|
514
|
+
blobs = [];
|
|
515
|
+
properties;
|
|
516
|
+
constructor(options) {
|
|
517
|
+
this.snapshotId = options.snapshotId;
|
|
518
|
+
this.sequenceNumber = options.sequenceNumber;
|
|
519
|
+
this.properties = options.properties ?? {};
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Add a bloom filter blob for a column
|
|
523
|
+
*/
|
|
524
|
+
addBloomFilter(fieldId, filter, properties) {
|
|
525
|
+
this.blobs.push({
|
|
526
|
+
type: 'bloom-filter-v1',
|
|
527
|
+
fields: [fieldId],
|
|
528
|
+
data: filter.serialize(),
|
|
529
|
+
properties,
|
|
530
|
+
});
|
|
531
|
+
}
|
|
532
|
+
/**
|
|
533
|
+
* Add an n-gram bloom filter blob for substring queries
|
|
534
|
+
*/
|
|
535
|
+
addNgramBloomFilter(fieldId, filter, properties) {
|
|
536
|
+
this.blobs.push({
|
|
537
|
+
type: 'ngram-bloom-filter-v1',
|
|
538
|
+
fields: [fieldId],
|
|
539
|
+
data: filter.serialize(),
|
|
540
|
+
properties,
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
/**
|
|
544
|
+
* Add a set index blob for low-cardinality columns
|
|
545
|
+
*/
|
|
546
|
+
addSetIndex(fieldId, index, properties) {
|
|
547
|
+
this.blobs.push({
|
|
548
|
+
type: 'set-index-v1',
|
|
549
|
+
fields: [fieldId],
|
|
550
|
+
data: index.serialize(),
|
|
551
|
+
properties,
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Add a raw blob with custom type
|
|
556
|
+
*/
|
|
557
|
+
addBlob(blob) {
|
|
558
|
+
this.blobs.push(blob);
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Finish writing and generate the Puffin file bytes
|
|
562
|
+
*
|
|
563
|
+
* File layout:
|
|
564
|
+
* [Magic 4B][Blob1][Blob2]...[Footer JSON][Footer Length 4B][Magic 4B]
|
|
565
|
+
*/
|
|
566
|
+
finish() {
|
|
567
|
+
// Calculate total blob data size
|
|
568
|
+
let blobDataSize = 0;
|
|
569
|
+
for (const blob of this.blobs) {
|
|
570
|
+
blobDataSize += blob.data.length;
|
|
571
|
+
}
|
|
572
|
+
// Build footer with blob metadata
|
|
573
|
+
const blobMetadata = [];
|
|
574
|
+
let currentOffset = PUFFIN_MAGIC.length; // Start after header magic
|
|
575
|
+
for (const blob of this.blobs) {
|
|
576
|
+
blobMetadata.push({
|
|
577
|
+
type: blob.type,
|
|
578
|
+
fields: blob.fields,
|
|
579
|
+
snapshotId: this.snapshotId,
|
|
580
|
+
sequenceNumber: this.sequenceNumber,
|
|
581
|
+
offset: currentOffset,
|
|
582
|
+
length: blob.data.length,
|
|
583
|
+
compressionCodec: blob.compressionCodec,
|
|
584
|
+
properties: blob.properties,
|
|
585
|
+
});
|
|
586
|
+
currentOffset += blob.data.length;
|
|
587
|
+
}
|
|
588
|
+
const footer = {
|
|
589
|
+
blobs: blobMetadata,
|
|
590
|
+
properties: this.properties,
|
|
591
|
+
};
|
|
592
|
+
const footerJson = JSON.stringify(footer);
|
|
593
|
+
const footerBytes = new TextEncoder().encode(footerJson);
|
|
594
|
+
// Calculate total file size
|
|
595
|
+
const totalSize = PUFFIN_MAGIC.length + // Header magic
|
|
596
|
+
blobDataSize + // All blob data
|
|
597
|
+
footerBytes.length + // Footer JSON
|
|
598
|
+
4 + // Footer length
|
|
599
|
+
PUFFIN_MAGIC.length; // Trailer magic
|
|
600
|
+
// Allocate result buffer
|
|
601
|
+
const result = new Uint8Array(totalSize);
|
|
602
|
+
let writeOffset = 0;
|
|
603
|
+
// Write header magic
|
|
604
|
+
result.set(PUFFIN_MAGIC, writeOffset);
|
|
605
|
+
writeOffset += PUFFIN_MAGIC.length;
|
|
606
|
+
// Write blob data
|
|
607
|
+
for (const blob of this.blobs) {
|
|
608
|
+
result.set(blob.data, writeOffset);
|
|
609
|
+
writeOffset += blob.data.length;
|
|
610
|
+
}
|
|
611
|
+
// Write footer JSON
|
|
612
|
+
result.set(footerBytes, writeOffset);
|
|
613
|
+
writeOffset += footerBytes.length;
|
|
614
|
+
// Write footer length (little-endian)
|
|
615
|
+
const view = new DataView(result.buffer);
|
|
616
|
+
view.setUint32(writeOffset, footerBytes.length, true);
|
|
617
|
+
writeOffset += 4;
|
|
618
|
+
// Write trailer magic
|
|
619
|
+
result.set(PUFFIN_MAGIC, writeOffset);
|
|
620
|
+
return result;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
/**
|
|
624
|
+
* Reader for parsing Puffin sidecar files
|
|
625
|
+
*
|
|
626
|
+
* Designed for range-addressable access from R2 or cache:
|
|
627
|
+
* 1. Fetch footer (last 8+ bytes) to get metadata
|
|
628
|
+
* 2. Parse footer to find blob offsets
|
|
629
|
+
* 3. Fetch specific blobs by type/column as needed
|
|
630
|
+
*
|
|
631
|
+
* @example
|
|
632
|
+
* ```typescript
|
|
633
|
+
* // Step 1: Get footer info for range request
|
|
634
|
+
* const footerRange = PuffinReader.getFooterRange(fileSize)
|
|
635
|
+
*
|
|
636
|
+
* // Step 2: Fetch footer bytes and parse
|
|
637
|
+
* const footerBytes = await fetch(url, {
|
|
638
|
+
* headers: { Range: `bytes=${footerRange.start}-${footerRange.end}` }
|
|
639
|
+
* }).then(r => r.arrayBuffer())
|
|
640
|
+
*
|
|
641
|
+
* const reader = PuffinReader.fromFooterBytes(new Uint8Array(footerBytes), fileSize)
|
|
642
|
+
*
|
|
643
|
+
* // Step 3: Get range for specific blob
|
|
644
|
+
* const bloomMeta = reader.findBlob('bloom-filter-v1', 5) // field ID 5
|
|
645
|
+
* const blobRange = reader.getBlobRange(bloomMeta)
|
|
646
|
+
*
|
|
647
|
+
* // Step 4: Fetch and parse blob
|
|
648
|
+
* const blobBytes = await fetch(url, {
|
|
649
|
+
* headers: { Range: `bytes=${blobRange.start}-${blobRange.end}` }
|
|
650
|
+
* }).then(r => r.arrayBuffer())
|
|
651
|
+
*
|
|
652
|
+
* const bloom = reader.parseBlob(bloomMeta, new Uint8Array(blobBytes))
|
|
653
|
+
* ```
|
|
654
|
+
*/
|
|
655
|
+
export class PuffinReader {
|
|
656
|
+
footer;
|
|
657
|
+
fileSize;
|
|
658
|
+
constructor(footer, fileSize) {
|
|
659
|
+
this.footer = footer;
|
|
660
|
+
this.fileSize = fileSize;
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* Calculate the range needed to fetch the footer
|
|
664
|
+
*
|
|
665
|
+
* The footer is at the end of the file:
|
|
666
|
+
* [...][Footer JSON][Footer Length 4B][Magic 4B]
|
|
667
|
+
*
|
|
668
|
+
* We fetch enough bytes to get the length, then the full footer.
|
|
669
|
+
* For initial request, fetch last 8 bytes to get footer length.
|
|
670
|
+
*
|
|
671
|
+
* @param fileSize - Total file size in bytes
|
|
672
|
+
* @param estimatedFooterSize - Estimated footer size (default: 4KB)
|
|
673
|
+
*/
|
|
674
|
+
static getFooterRange(fileSize, estimatedFooterSize = 4096) {
|
|
675
|
+
// Minimum is 8 bytes (footer length + magic)
|
|
676
|
+
// We estimate a reasonable footer size to minimize round trips
|
|
677
|
+
const fetchSize = Math.min(estimatedFooterSize, fileSize);
|
|
678
|
+
return {
|
|
679
|
+
start: fileSize - fetchSize,
|
|
680
|
+
end: fileSize,
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
684
|
+
* Parse the footer length from the last 8 bytes
|
|
685
|
+
*
|
|
686
|
+
* @param tailBytes - Last 8+ bytes of the file
|
|
687
|
+
* @returns Footer JSON length in bytes
|
|
688
|
+
*/
|
|
689
|
+
static parseFooterLength(tailBytes) {
|
|
690
|
+
if (tailBytes.length < 8) {
|
|
691
|
+
throw new Error('Need at least 8 bytes to parse footer length');
|
|
692
|
+
}
|
|
693
|
+
// Validate trailer magic
|
|
694
|
+
const trailerOffset = tailBytes.length - PUFFIN_MAGIC.length;
|
|
695
|
+
const trailer = tailBytes.slice(trailerOffset);
|
|
696
|
+
if (!arraysEqual(trailer, PUFFIN_MAGIC)) {
|
|
697
|
+
throw new Error('Invalid Puffin file: trailer magic mismatch');
|
|
698
|
+
}
|
|
699
|
+
// Read footer length (4 bytes before trailer, little-endian)
|
|
700
|
+
const lengthOffset = trailerOffset - 4;
|
|
701
|
+
const view = new DataView(tailBytes.buffer, tailBytes.byteOffset, tailBytes.byteLength);
|
|
702
|
+
return view.getUint32(lengthOffset, true);
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Create a reader from footer bytes
|
|
706
|
+
*
|
|
707
|
+
* @param bytes - Bytes from the footer range request
|
|
708
|
+
* @param fileSize - Total file size
|
|
709
|
+
*/
|
|
710
|
+
static fromFooterBytes(bytes, fileSize) {
|
|
711
|
+
const footerLength = PuffinReader.parseFooterLength(bytes);
|
|
712
|
+
// Extract footer JSON
|
|
713
|
+
const trailerSize = 4 + PUFFIN_MAGIC.length; // length + magic
|
|
714
|
+
const footerStart = bytes.length - trailerSize - footerLength;
|
|
715
|
+
if (footerStart < 0) {
|
|
716
|
+
throw new Error('Footer extends beyond fetched bytes - need larger range');
|
|
717
|
+
}
|
|
718
|
+
const footerBytes = bytes.slice(footerStart, footerStart + footerLength);
|
|
719
|
+
const footerJson = new TextDecoder().decode(footerBytes);
|
|
720
|
+
const footer = JSON.parse(footerJson);
|
|
721
|
+
return new PuffinReader(footer, fileSize);
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Parse a complete Puffin file
|
|
725
|
+
*
|
|
726
|
+
* Use this when you have the entire file in memory.
|
|
727
|
+
*/
|
|
728
|
+
static fromBytes(bytes) {
|
|
729
|
+
// Validate header magic
|
|
730
|
+
const header = bytes.slice(0, PUFFIN_MAGIC.length);
|
|
731
|
+
if (!arraysEqual(header, PUFFIN_MAGIC)) {
|
|
732
|
+
throw new Error('Invalid Puffin file: header magic mismatch');
|
|
733
|
+
}
|
|
734
|
+
return PuffinReader.fromFooterBytes(bytes, bytes.length);
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Get all blob metadata
|
|
738
|
+
*/
|
|
739
|
+
getBlobs() {
|
|
740
|
+
return this.footer.blobs;
|
|
741
|
+
}
|
|
742
|
+
/**
|
|
743
|
+
* Get file properties
|
|
744
|
+
*/
|
|
745
|
+
getProperties() {
|
|
746
|
+
return this.footer.properties ?? {};
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* Find a blob by type and optional field ID
|
|
750
|
+
*/
|
|
751
|
+
findBlob(type, fieldId) {
|
|
752
|
+
for (const blob of this.footer.blobs) {
|
|
753
|
+
if (blob.type !== type)
|
|
754
|
+
continue;
|
|
755
|
+
if (fieldId !== undefined && !blob.fields.includes(fieldId))
|
|
756
|
+
continue;
|
|
757
|
+
return blob;
|
|
758
|
+
}
|
|
759
|
+
return null;
|
|
760
|
+
}
|
|
761
|
+
/**
|
|
762
|
+
* Find all blobs for a specific field
|
|
763
|
+
*/
|
|
764
|
+
findBlobsForField(fieldId) {
|
|
765
|
+
return this.footer.blobs.filter((blob) => blob.fields.includes(fieldId));
|
|
766
|
+
}
|
|
767
|
+
/**
|
|
768
|
+
* Find all blobs of a specific type
|
|
769
|
+
*/
|
|
770
|
+
findBlobsByType(type) {
|
|
771
|
+
return this.footer.blobs.filter((blob) => blob.type === type);
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Get the range request for a specific blob
|
|
775
|
+
*/
|
|
776
|
+
getBlobRange(blob) {
|
|
777
|
+
return {
|
|
778
|
+
start: blob.offset,
|
|
779
|
+
end: blob.offset + blob.length,
|
|
780
|
+
};
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* Parse blob data into the appropriate structure
|
|
784
|
+
*
|
|
785
|
+
* @param metadata - Blob metadata
|
|
786
|
+
* @param data - Raw blob bytes
|
|
787
|
+
* @returns Parsed bloom filter, set index, or raw bytes
|
|
788
|
+
*/
|
|
789
|
+
parseBlob(metadata, data) {
|
|
790
|
+
switch (metadata.type) {
|
|
791
|
+
case 'bloom-filter-v1':
|
|
792
|
+
return BloomFilter.deserialize(data);
|
|
793
|
+
case 'ngram-bloom-filter-v1':
|
|
794
|
+
return NgramBloomFilter.deserialize(data);
|
|
795
|
+
case 'set-index-v1':
|
|
796
|
+
return SetIndex.deserialize(data);
|
|
797
|
+
default:
|
|
798
|
+
return data;
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
/**
|
|
802
|
+
* Extract a blob from the full file bytes
|
|
803
|
+
*
|
|
804
|
+
* Use when you have the complete file in memory.
|
|
805
|
+
*/
|
|
806
|
+
extractBlob(metadata, fileBytes) {
|
|
807
|
+
const blobData = fileBytes.slice(metadata.offset, metadata.offset + metadata.length);
|
|
808
|
+
return this.parseBlob(metadata, blobData);
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
// ============================================================================
|
|
812
|
+
// Helper Functions
|
|
813
|
+
// ============================================================================
|
|
814
|
+
/**
|
|
815
|
+
* Compare two Uint8Arrays for equality
|
|
816
|
+
*/
|
|
817
|
+
function arraysEqual(a, b) {
|
|
818
|
+
if (a.length !== b.length)
|
|
819
|
+
return false;
|
|
820
|
+
for (let i = 0; i < a.length; i++) {
|
|
821
|
+
if (a[i] !== b[i])
|
|
822
|
+
return false;
|
|
823
|
+
}
|
|
824
|
+
return true;
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Create a bloom filter for a list of string values
|
|
828
|
+
*
|
|
829
|
+
* Convenience function for common use case.
|
|
830
|
+
*/
|
|
831
|
+
export function createBloomFilterFromValues(values, fpr = DEFAULT_FPR) {
|
|
832
|
+
const filter = new BloomFilter({
|
|
833
|
+
expectedElements: Math.max(values.length, 100),
|
|
834
|
+
falsePositiveRate: fpr,
|
|
835
|
+
});
|
|
836
|
+
for (const value of values) {
|
|
837
|
+
filter.add(value);
|
|
838
|
+
}
|
|
839
|
+
return filter;
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Create an n-gram bloom filter from a list of string values
|
|
843
|
+
*/
|
|
844
|
+
export function createNgramBloomFromValues(values, ngramSize = 3, fpr = DEFAULT_FPR) {
|
|
845
|
+
const filter = new NgramBloomFilter({
|
|
846
|
+
expectedElements: Math.max(values.length, 100),
|
|
847
|
+
falsePositiveRate: fpr,
|
|
848
|
+
ngramSize,
|
|
849
|
+
});
|
|
850
|
+
for (const value of values) {
|
|
851
|
+
filter.add(value);
|
|
852
|
+
}
|
|
853
|
+
return filter;
|
|
854
|
+
}
|
|
855
|
+
/**
|
|
856
|
+
* Create a set index from a list of values
|
|
857
|
+
*/
|
|
858
|
+
export function createSetIndexFromValues(values) {
|
|
859
|
+
const index = new SetIndex();
|
|
860
|
+
for (const value of values) {
|
|
861
|
+
index.add(value);
|
|
862
|
+
}
|
|
863
|
+
return index;
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Estimate bloom filter size for given parameters
|
|
867
|
+
*
|
|
868
|
+
* @param elements - Expected number of elements
|
|
869
|
+
* @param fpr - False positive rate (default: 1%)
|
|
870
|
+
* @returns Estimated size in bytes
|
|
871
|
+
*/
|
|
872
|
+
export function estimateBloomFilterSize(elements, fpr = DEFAULT_FPR) {
|
|
873
|
+
const ln2Squared = Math.LN2 * Math.LN2;
|
|
874
|
+
const numBits = Math.ceil((-elements * Math.log(fpr)) / ln2Squared);
|
|
875
|
+
const numBytes = Math.ceil(numBits / 8);
|
|
876
|
+
return 5 + numBytes; // 5 byte header + bits
|
|
877
|
+
}
|
|
878
|
+
//# sourceMappingURL=puffin.js.map
|