dotdo 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +446 -315
- package/dist/ai/index.js +19 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/ai/template-literals.js +852 -0
- package/dist/ai/template-literals.js.map +1 -0
- package/dist/api/analytics/router.js +601 -0
- package/dist/api/analytics/router.js.map +1 -0
- package/dist/api/index.js +158 -0
- package/dist/api/index.js.map +1 -0
- package/dist/api/middleware/auth-federation.js +573 -0
- package/dist/api/middleware/auth-federation.js.map +1 -0
- package/dist/api/middleware/auth.js +544 -0
- package/dist/api/middleware/auth.js.map +1 -0
- package/dist/api/middleware/error-handling.js +176 -0
- package/dist/api/middleware/error-handling.js.map +1 -0
- package/dist/api/middleware/request-id.js +21 -0
- package/dist/api/middleware/request-id.js.map +1 -0
- package/dist/api/pages.js +1180 -0
- package/dist/api/pages.js.map +1 -0
- package/dist/api/routes/api.js +612 -0
- package/dist/api/routes/api.js.map +1 -0
- package/dist/api/routes/browsers.js +471 -0
- package/dist/api/routes/browsers.js.map +1 -0
- package/dist/api/routes/do.js +188 -0
- package/dist/api/routes/do.js.map +1 -0
- package/dist/api/routes/mcp.js +459 -0
- package/dist/api/routes/mcp.js.map +1 -0
- package/dist/api/routes/obs.js +445 -0
- package/dist/api/routes/obs.js.map +1 -0
- package/dist/api/routes/openapi.js +794 -0
- package/dist/api/routes/openapi.js.map +1 -0
- package/dist/api/routes/rpc.js +1103 -0
- package/dist/api/routes/rpc.js.map +1 -0
- package/dist/api/routes/sandboxes.js +389 -0
- package/dist/api/routes/sandboxes.js.map +1 -0
- package/dist/api/test-do.js +38 -0
- package/dist/api/test-do.js.map +1 -0
- package/dist/api/types.js +11 -0
- package/dist/api/types.js.map +1 -0
- package/dist/cli/bin.js +2 -0
- package/dist/cli/main.js +52342 -0
- package/dist/db/actions.js +212 -0
- package/dist/db/actions.js.map +1 -0
- package/dist/db/auth.js +506 -0
- package/dist/db/auth.js.map +1 -0
- package/dist/db/branches.js +65 -0
- package/dist/db/branches.js.map +1 -0
- package/dist/db/clickhouse.js +1074 -0
- package/dist/db/clickhouse.js.map +1 -0
- package/dist/db/dlq.js +39 -0
- package/dist/db/dlq.js.map +1 -0
- package/dist/db/events.js +28 -0
- package/dist/db/events.js.map +1 -0
- package/dist/db/exec.js +64 -0
- package/dist/db/exec.js.map +1 -0
- package/dist/db/files.js +85 -0
- package/dist/db/files.js.map +1 -0
- package/dist/db/flags.js +24 -0
- package/dist/db/flags.js.map +1 -0
- package/dist/db/git.js +116 -0
- package/dist/db/git.js.map +1 -0
- package/dist/db/iceberg/inverted-index.js +862 -0
- package/dist/db/iceberg/inverted-index.js.map +1 -0
- package/dist/db/iceberg/puffin.js +878 -0
- package/dist/db/iceberg/puffin.js.map +1 -0
- package/dist/db/iceberg/search-manifest.js +422 -0
- package/dist/db/iceberg/search-manifest.js.map +1 -0
- package/dist/db/iceberg/types.js +8 -0
- package/dist/db/iceberg/types.js.map +1 -0
- package/dist/db/index.js +121 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/integrations.js +368 -0
- package/dist/db/integrations.js.map +1 -0
- package/dist/db/json-indexes.js +332 -0
- package/dist/db/json-indexes.js.map +1 -0
- package/dist/db/linked-accounts.js +287 -0
- package/dist/db/linked-accounts.js.map +1 -0
- package/dist/db/nouns.js +183 -0
- package/dist/db/nouns.js.map +1 -0
- package/dist/db/objects.js +170 -0
- package/dist/db/objects.js.map +1 -0
- package/dist/db/primitives/dag-scheduler/index.js +869 -0
- package/dist/db/primitives/dag-scheduler/index.js.map +1 -0
- package/dist/db/primitives/exactly-once-context.js +237 -0
- package/dist/db/primitives/exactly-once-context.js.map +1 -0
- package/dist/db/primitives/index.js +62 -0
- package/dist/db/primitives/index.js.map +1 -0
- package/dist/db/primitives/keyed-router.js +145 -0
- package/dist/db/primitives/keyed-router.js.map +1 -0
- package/dist/db/primitives/observability.js +162 -0
- package/dist/db/primitives/observability.js.map +1 -0
- package/dist/db/primitives/schema-evolution.js +643 -0
- package/dist/db/primitives/schema-evolution.js.map +1 -0
- package/dist/db/primitives/stateful-operator/index.js +770 -0
- package/dist/db/primitives/stateful-operator/index.js.map +1 -0
- package/dist/db/primitives/temporal-store.js +306 -0
- package/dist/db/primitives/temporal-store.js.map +1 -0
- package/dist/db/primitives/typed-column-store.js +1229 -0
- package/dist/db/primitives/typed-column-store.js.map +1 -0
- package/dist/db/primitives/utils/duration.js +162 -0
- package/dist/db/primitives/utils/duration.js.map +1 -0
- package/dist/db/primitives/utils/murmur3.js +118 -0
- package/dist/db/primitives/utils/murmur3.js.map +1 -0
- package/dist/db/primitives/watermark-service.js +136 -0
- package/dist/db/primitives/watermark-service.js.map +1 -0
- package/dist/db/primitives/window-manager.js +764 -0
- package/dist/db/primitives/window-manager.js.map +1 -0
- package/dist/db/relationships.js +66 -0
- package/dist/db/relationships.js.map +1 -0
- package/dist/db/schema-minimal.js +61 -0
- package/dist/db/schema-minimal.js.map +1 -0
- package/dist/db/search.js +28 -0
- package/dist/db/search.js.map +1 -0
- package/dist/db/stores.js +1665 -0
- package/dist/db/stores.js.map +1 -0
- package/dist/db/things.js +297 -0
- package/dist/db/things.js.map +1 -0
- package/dist/db/vault.js +171 -0
- package/dist/db/vault.js.map +1 -0
- package/dist/db/verbs.js +102 -0
- package/dist/db/verbs.js.map +1 -0
- package/dist/do/base.js +48 -0
- package/dist/do/base.js.map +1 -0
- package/dist/do/bash.js +35 -0
- package/dist/do/bash.js.map +1 -0
- package/dist/do/fs.js +25 -0
- package/dist/do/fs.js.map +1 -0
- package/dist/do/full.js +61 -0
- package/dist/do/full.js.map +1 -0
- package/dist/do/git.js +28 -0
- package/dist/do/git.js.map +1 -0
- package/dist/do/index.js +52 -0
- package/dist/do/index.js.map +1 -0
- package/dist/do/tiny.js +31 -0
- package/dist/do/tiny.js.map +1 -0
- package/dist/lib/DOAuth.js +261 -0
- package/dist/lib/DOAuth.js.map +1 -0
- package/dist/lib/DODispatcher.js +72 -0
- package/dist/lib/DODispatcher.js.map +1 -0
- package/dist/lib/Modifier.js +189 -0
- package/dist/lib/Modifier.js.map +1 -0
- package/dist/lib/StateStorage.js +403 -0
- package/dist/lib/StateStorage.js.map +1 -0
- package/dist/lib/TypeRegistry.js +122 -0
- package/dist/lib/TypeRegistry.js.map +1 -0
- package/dist/lib/agent/tools/bash.js +336 -0
- package/dist/lib/agent/tools/bash.js.map +1 -0
- package/dist/lib/agent/tools/edit.js +157 -0
- package/dist/lib/agent/tools/edit.js.map +1 -0
- package/dist/lib/agent/tools/glob.js +137 -0
- package/dist/lib/agent/tools/glob.js.map +1 -0
- package/dist/lib/agent/tools/grep.js +315 -0
- package/dist/lib/agent/tools/grep.js.map +1 -0
- package/dist/lib/agent/tools/index.js +71 -0
- package/dist/lib/agent/tools/index.js.map +1 -0
- package/dist/lib/agent/tools/read.js +212 -0
- package/dist/lib/agent/tools/read.js.map +1 -0
- package/dist/lib/agent/tools/types.js +197 -0
- package/dist/lib/agent/tools/types.js.map +1 -0
- package/dist/lib/agent/tools/write.js +159 -0
- package/dist/lib/agent/tools/write.js.map +1 -0
- package/dist/lib/ai/gateway.js +247 -0
- package/dist/lib/ai/gateway.js.map +1 -0
- package/dist/lib/ai/tool-loop-agent.js +591 -0
- package/dist/lib/ai/tool-loop-agent.js.map +1 -0
- package/dist/lib/auto-wiring.js +439 -0
- package/dist/lib/auto-wiring.js.map +1 -0
- package/dist/lib/browse/browserbase.js +163 -0
- package/dist/lib/browse/browserbase.js.map +1 -0
- package/dist/lib/browse/cloudflare.js +144 -0
- package/dist/lib/browse/cloudflare.js.map +1 -0
- package/dist/lib/browse/index.js +62 -0
- package/dist/lib/browse/index.js.map +1 -0
- package/dist/lib/browse/types.js +13 -0
- package/dist/lib/browse/types.js.map +1 -0
- package/dist/lib/cache/index.js +37 -0
- package/dist/lib/cache/index.js.map +1 -0
- package/dist/lib/cache/visibility.js +638 -0
- package/dist/lib/cache/visibility.js.map +1 -0
- package/dist/lib/capabilities.js +268 -0
- package/dist/lib/capabilities.js.map +1 -0
- package/dist/lib/channels/base.js +106 -0
- package/dist/lib/channels/base.js.map +1 -0
- package/dist/lib/channels/discord.js +94 -0
- package/dist/lib/channels/discord.js.map +1 -0
- package/dist/lib/channels/email.js +204 -0
- package/dist/lib/channels/email.js.map +1 -0
- package/dist/lib/channels/index.js +90 -0
- package/dist/lib/channels/index.js.map +1 -0
- package/dist/lib/channels/mdxui-chat.js +95 -0
- package/dist/lib/channels/mdxui-chat.js.map +1 -0
- package/dist/lib/channels/slack-blockkit.js +121 -0
- package/dist/lib/channels/slack-blockkit.js.map +1 -0
- package/dist/lib/channels/types.js +7 -0
- package/dist/lib/channels/types.js.map +1 -0
- package/dist/lib/cloudflare/ai.js +654 -0
- package/dist/lib/cloudflare/ai.js.map +1 -0
- package/dist/lib/cloudflare/index.js +88 -0
- package/dist/lib/cloudflare/index.js.map +1 -0
- package/dist/lib/cloudflare/kv.js +342 -0
- package/dist/lib/cloudflare/kv.js.map +1 -0
- package/dist/lib/cloudflare/queues.js +434 -0
- package/dist/lib/cloudflare/queues.js.map +1 -0
- package/dist/lib/cloudflare/r2.js +604 -0
- package/dist/lib/cloudflare/r2.js.map +1 -0
- package/dist/lib/cloudflare/vectorize.js +494 -0
- package/dist/lib/cloudflare/vectorize.js.map +1 -0
- package/dist/lib/cloudflare/workflows.js +569 -0
- package/dist/lib/cloudflare/workflows.js.map +1 -0
- package/dist/lib/colo/caching.js +196 -0
- package/dist/lib/colo/caching.js.map +1 -0
- package/dist/lib/colo/detection.js +194 -0
- package/dist/lib/colo/detection.js.map +1 -0
- package/dist/lib/colo/external-data.js +219 -0
- package/dist/lib/colo/external-data.js.map +1 -0
- package/dist/lib/colo/globe-data.js +179 -0
- package/dist/lib/colo/globe-data.js.map +1 -0
- package/dist/lib/colo/index.js +16 -0
- package/dist/lib/colo/index.js.map +1 -0
- package/dist/lib/decorators.js +37 -0
- package/dist/lib/decorators.js.map +1 -0
- package/dist/lib/discovery.js +81 -0
- package/dist/lib/discovery.js.map +1 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js +619 -0
- package/dist/lib/executors/AgenticFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/BaseFunctionExecutor.js +328 -0
- package/dist/lib/executors/BaseFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/CascadeExecutor.js +418 -0
- package/dist/lib/executors/CascadeExecutor.js.map +1 -0
- package/dist/lib/executors/CodeFunctionExecutor.js +904 -0
- package/dist/lib/executors/CodeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js +904 -0
- package/dist/lib/executors/GenerativeFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/HumanFunctionExecutor.js +884 -0
- package/dist/lib/executors/HumanFunctionExecutor.js.map +1 -0
- package/dist/lib/executors/ParallelStepExecutor.js +308 -0
- package/dist/lib/executors/ParallelStepExecutor.js.map +1 -0
- package/dist/lib/executors/types.js +12 -0
- package/dist/lib/executors/types.js.map +1 -0
- package/dist/lib/experiments.js +89 -0
- package/dist/lib/experiments.js.map +1 -0
- package/dist/lib/flags/store.js +262 -0
- package/dist/lib/flags/store.js.map +1 -0
- package/dist/lib/functions/FunctionComposition.js +467 -0
- package/dist/lib/functions/FunctionComposition.js.map +1 -0
- package/dist/lib/functions/FunctionMiddleware.js +457 -0
- package/dist/lib/functions/FunctionMiddleware.js.map +1 -0
- package/dist/lib/functions/FunctionRegistry.js +426 -0
- package/dist/lib/functions/FunctionRegistry.js.map +1 -0
- package/dist/lib/functions/createFunction.js +1048 -0
- package/dist/lib/functions/createFunction.js.map +1 -0
- package/dist/lib/humans/index.js +68 -0
- package/dist/lib/humans/index.js.map +1 -0
- package/dist/lib/humans/templates.js +117 -0
- package/dist/lib/humans/templates.js.map +1 -0
- package/dist/lib/identity.js +98 -0
- package/dist/lib/identity.js.map +1 -0
- package/dist/lib/index.js +9 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/logging/error-logger.js +163 -0
- package/dist/lib/logging/error-logger.js.map +1 -0
- package/dist/lib/logging/index.js +160 -0
- package/dist/lib/logging/index.js.map +1 -0
- package/dist/lib/mixins/bash.js +825 -0
- package/dist/lib/mixins/bash.js.map +1 -0
- package/dist/lib/mixins/fs.js +648 -0
- package/dist/lib/mixins/fs.js.map +1 -0
- package/dist/lib/mixins/git.js +1011 -0
- package/dist/lib/mixins/git.js.map +1 -0
- package/dist/lib/mixins/index.js +29 -0
- package/dist/lib/mixins/index.js.map +1 -0
- package/dist/lib/mixins/npm.js +662 -0
- package/dist/lib/mixins/npm.js.map +1 -0
- package/dist/lib/noun-id.js +278 -0
- package/dist/lib/noun-id.js.map +1 -0
- package/dist/lib/rate-limit/sliding-window.js +148 -0
- package/dist/lib/rate-limit/sliding-window.js.map +1 -0
- package/dist/lib/rate-limit.js +110 -0
- package/dist/lib/rate-limit.js.map +1 -0
- package/dist/lib/rpc/bindings.js +548 -0
- package/dist/lib/rpc/bindings.js.map +1 -0
- package/dist/lib/rpc/index.js +64 -0
- package/dist/lib/rpc/index.js.map +1 -0
- package/dist/lib/safe-stringify.js +223 -0
- package/dist/lib/safe-stringify.js.map +1 -0
- package/dist/lib/sandbox/miniflare-sandbox.js +1007 -0
- package/dist/lib/sandbox/miniflare-sandbox.js.map +1 -0
- package/dist/lib/sqids.js +110 -0
- package/dist/lib/sqids.js.map +1 -0
- package/dist/lib/sql/adapters/index.js +10 -0
- package/dist/lib/sql/adapters/index.js.map +1 -0
- package/dist/lib/sql/adapters/node-sql-parser.js +552 -0
- package/dist/lib/sql/adapters/node-sql-parser.js.map +1 -0
- package/dist/lib/sql/adapters/pgsql-parser.js +1189 -0
- package/dist/lib/sql/adapters/pgsql-parser.js.map +1 -0
- package/dist/lib/sql/index.js +277 -0
- package/dist/lib/sql/index.js.map +1 -0
- package/dist/lib/sql/types.js +56 -0
- package/dist/lib/sql/types.js.map +1 -0
- package/dist/lib/type-classifier.js +126 -0
- package/dist/lib/type-classifier.js.map +1 -0
- package/dist/lib/utils/html.js +47 -0
- package/dist/lib/utils/html.js.map +1 -0
- package/dist/lib/validation.js +48 -0
- package/dist/lib/validation.js.map +1 -0
- package/dist/lib/vault/store.js +411 -0
- package/dist/lib/vault/store.js.map +1 -0
- package/dist/metrics/hunch.js +739 -0
- package/dist/metrics/hunch.js.map +1 -0
- package/dist/objects/API.js +302 -0
- package/dist/objects/API.js.map +1 -0
- package/dist/objects/Agent.js +179 -0
- package/dist/objects/Agent.js.map +1 -0
- package/dist/objects/AgenticFunctionExecutor.js +8 -0
- package/dist/objects/AgenticFunctionExecutor.js.map +1 -0
- package/dist/objects/App.js +83 -0
- package/dist/objects/App.js.map +1 -0
- package/dist/objects/Browser.js +884 -0
- package/dist/objects/Browser.js.map +1 -0
- package/dist/objects/Business.js +107 -0
- package/dist/objects/Business.js.map +1 -0
- package/dist/objects/CLI.js +221 -0
- package/dist/objects/CLI.js.map +1 -0
- package/dist/objects/CodeFunctionExecutor.js +8 -0
- package/dist/objects/CodeFunctionExecutor.js.map +1 -0
- package/dist/objects/Collection.js +161 -0
- package/dist/objects/Collection.js.map +1 -0
- package/dist/objects/DO.js +41 -0
- package/dist/objects/DO.js.map +1 -0
- package/dist/objects/DOBase.js +2309 -0
- package/dist/objects/DOBase.js.map +1 -0
- package/dist/objects/DOFull.js +1676 -0
- package/dist/objects/DOFull.js.map +1 -0
- package/dist/objects/DOTiny.js +207 -0
- package/dist/objects/DOTiny.js.map +1 -0
- package/dist/objects/Directory.js +199 -0
- package/dist/objects/Directory.js.map +1 -0
- package/dist/objects/Entity.js +413 -0
- package/dist/objects/Entity.js.map +1 -0
- package/dist/objects/Function.js +116 -0
- package/dist/objects/Function.js.map +1 -0
- package/dist/objects/Human.js +231 -0
- package/dist/objects/Human.js.map +1 -0
- package/dist/objects/HumanFunctionExecutor.js +8 -0
- package/dist/objects/HumanFunctionExecutor.js.map +1 -0
- package/dist/objects/IcebergMetadataDO.js +938 -0
- package/dist/objects/IcebergMetadataDO.js.map +1 -0
- package/dist/objects/IntegrationsDO.js +1174 -0
- package/dist/objects/IntegrationsDO.js.map +1 -0
- package/dist/objects/ObservabilityBroadcaster.js +149 -0
- package/dist/objects/ObservabilityBroadcaster.js.map +1 -0
- package/dist/objects/Package.js +154 -0
- package/dist/objects/Package.js.map +1 -0
- package/dist/objects/Product.js +193 -0
- package/dist/objects/Product.js.map +1 -0
- package/dist/objects/SDK.js +152 -0
- package/dist/objects/SDK.js.map +1 -0
- package/dist/objects/SaaS.js +235 -0
- package/dist/objects/SaaS.js.map +1 -0
- package/dist/objects/SandboxDO.js +759 -0
- package/dist/objects/SandboxDO.js.map +1 -0
- package/dist/objects/Service.js +337 -0
- package/dist/objects/Service.js.map +1 -0
- package/dist/objects/Site.js +80 -0
- package/dist/objects/Site.js.map +1 -0
- package/dist/objects/Startup.js +479 -0
- package/dist/objects/Startup.js.map +1 -0
- package/dist/objects/ThingsDO.js +170 -0
- package/dist/objects/ThingsDO.js.map +1 -0
- package/dist/objects/VectorShardDO.js +648 -0
- package/dist/objects/VectorShardDO.js.map +1 -0
- package/dist/objects/Worker.js +144 -0
- package/dist/objects/Worker.js.map +1 -0
- package/dist/objects/Workflow.js +196 -0
- package/dist/objects/Workflow.js.map +1 -0
- package/dist/objects/WorkflowFactory.js +313 -0
- package/dist/objects/WorkflowFactory.js.map +1 -0
- package/dist/objects/WorkflowRuntime.js +863 -0
- package/dist/objects/WorkflowRuntime.js.map +1 -0
- package/dist/objects/circuit-breaker-bulkhead.js +178 -0
- package/dist/objects/circuit-breaker-bulkhead.js.map +1 -0
- package/dist/objects/createFunction.js +934 -0
- package/dist/objects/createFunction.js.map +1 -0
- package/dist/objects/index.js +80 -0
- package/dist/objects/index.js.map +1 -0
- package/dist/objects/lifecycle/Branch.js +275 -0
- package/dist/objects/lifecycle/Branch.js.map +1 -0
- package/dist/objects/lifecycle/Clone.js +1499 -0
- package/dist/objects/lifecycle/Clone.js.map +1 -0
- package/dist/objects/lifecycle/Compact.js +237 -0
- package/dist/objects/lifecycle/Compact.js.map +1 -0
- package/dist/objects/lifecycle/Promote.js +476 -0
- package/dist/objects/lifecycle/Promote.js.map +1 -0
- package/dist/objects/lifecycle/Shard.js +560 -0
- package/dist/objects/lifecycle/Shard.js.map +1 -0
- package/dist/objects/lifecycle/index.js +15 -0
- package/dist/objects/lifecycle/index.js.map +1 -0
- package/dist/objects/lifecycle/types.js +33 -0
- package/dist/objects/lifecycle/types.js.map +1 -0
- package/dist/objects/mixins/infrastructure.js +171 -0
- package/dist/objects/mixins/infrastructure.js.map +1 -0
- package/dist/objects/modules/StoresModule.js +153 -0
- package/dist/objects/modules/StoresModule.js.map +1 -0
- package/dist/objects/persistence/checkpoint-manager.js +606 -0
- package/dist/objects/persistence/checkpoint-manager.js.map +1 -0
- package/dist/objects/persistence/index.js +72 -0
- package/dist/objects/persistence/index.js.map +1 -0
- package/dist/objects/persistence/migration-runner.js +562 -0
- package/dist/objects/persistence/migration-runner.js.map +1 -0
- package/dist/objects/persistence/replication-manager.js +501 -0
- package/dist/objects/persistence/replication-manager.js.map +1 -0
- package/dist/objects/persistence/tiered-storage-manager.js +595 -0
- package/dist/objects/persistence/tiered-storage-manager.js.map +1 -0
- package/dist/objects/persistence/types.js +14 -0
- package/dist/objects/persistence/types.js.map +1 -0
- package/dist/objects/persistence/wal-manager.js +653 -0
- package/dist/objects/persistence/wal-manager.js.map +1 -0
- package/dist/objects/presets/index.js +20 -0
- package/dist/objects/presets/index.js.map +1 -0
- package/dist/objects/presets/primitives.js +188 -0
- package/dist/objects/presets/primitives.js.map +1 -0
- package/dist/objects/primitives/alarm-adapter.js +141 -0
- package/dist/objects/primitives/alarm-adapter.js.map +1 -0
- package/dist/objects/primitives/index.js +337 -0
- package/dist/objects/primitives/index.js.map +1 -0
- package/dist/objects/primitives/storage-adapter.js +182 -0
- package/dist/objects/primitives/storage-adapter.js.map +1 -0
- package/dist/objects/primitives/with-primitives.js +102 -0
- package/dist/objects/primitives/with-primitives.js.map +1 -0
- package/dist/objects/services/StoreManager.js +227 -0
- package/dist/objects/services/StoreManager.js.map +1 -0
- package/dist/objects/services/index.js +13 -0
- package/dist/objects/services/index.js.map +1 -0
- package/dist/objects/transport/auth-layer.js +1451 -0
- package/dist/objects/transport/auth-layer.js.map +1 -0
- package/dist/objects/transport/capnweb-target.js +355 -0
- package/dist/objects/transport/capnweb-target.js.map +1 -0
- package/dist/objects/transport/chain.js +441 -0
- package/dist/objects/transport/chain.js.map +1 -0
- package/dist/objects/transport/handler.js +58 -0
- package/dist/objects/transport/handler.js.map +1 -0
- package/dist/objects/transport/index.js +53 -0
- package/dist/objects/transport/index.js.map +1 -0
- package/dist/objects/transport/mcp-server.js +690 -0
- package/dist/objects/transport/mcp-server.js.map +1 -0
- package/dist/objects/transport/rest-autowire.js +1507 -0
- package/dist/objects/transport/rest-autowire.js.map +1 -0
- package/dist/objects/transport/rest-router.js +440 -0
- package/dist/objects/transport/rest-router.js.map +1 -0
- package/dist/objects/transport/rpc-server.js +1536 -0
- package/dist/objects/transport/rpc-server.js.map +1 -0
- package/dist/objects/transport/shared.js +575 -0
- package/dist/objects/transport/shared.js.map +1 -0
- package/dist/objects/transport/sync-engine.js +291 -0
- package/dist/objects/transport/sync-engine.js.map +1 -0
- package/dist/objects/transport/types.js +8 -0
- package/dist/objects/transport/types.js.map +1 -0
- package/dist/primitives/bashx/src/ast/analyze.js +1472 -0
- package/dist/primitives/bashx/src/ast/analyze.js.map +1 -0
- package/dist/primitives/bashx/src/ast/parser.js +1488 -0
- package/dist/primitives/bashx/src/ast/parser.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/crypto.js +1954 -0
- package/dist/primitives/bashx/src/do/commands/crypto.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/data-processing.js +1812 -0
- package/dist/primitives/bashx/src/do/commands/data-processing.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/extended-utils.js +804 -0
- package/dist/primitives/bashx/src/do/commands/extended-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/math-control.js +1122 -0
- package/dist/primitives/bashx/src/do/commands/math-control.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/posix-utils.js +1015 -0
- package/dist/primitives/bashx/src/do/commands/posix-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/system-utils.js +687 -0
- package/dist/primitives/bashx/src/do/commands/system-utils.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/test-command.js +523 -0
- package/dist/primitives/bashx/src/do/commands/test-command.js.map +1 -0
- package/dist/primitives/bashx/src/do/commands/text-processing.js +1550 -0
- package/dist/primitives/bashx/src/do/commands/text-processing.js.map +1 -0
- package/dist/primitives/bashx/src/do/container-executor.js +429 -0
- package/dist/primitives/bashx/src/do/container-executor.js.map +1 -0
- package/dist/primitives/bashx/src/do/index.js +668 -0
- package/dist/primitives/bashx/src/do/index.js.map +1 -0
- package/dist/primitives/bashx/src/do/tiered-executor.js +2647 -0
- package/dist/primitives/bashx/src/do/tiered-executor.js.map +1 -0
- package/dist/primitives/bashx/src/do/worker.js +352 -0
- package/dist/primitives/bashx/src/do/worker.js.map +1 -0
- package/dist/primitives/bashx/src/types.js +10 -0
- package/dist/primitives/bashx/src/types.js.map +1 -0
- package/dist/primitives/fsx/core/backend.js +480 -0
- package/dist/primitives/fsx/core/backend.js.map +1 -0
- package/dist/primitives/fsx/core/constants.js +140 -0
- package/dist/primitives/fsx/core/constants.js.map +1 -0
- package/dist/primitives/fsx/core/fsx.js +1184 -0
- package/dist/primitives/fsx/core/fsx.js.map +1 -0
- package/dist/primitives/fsx/core/glob/glob.js +438 -0
- package/dist/primitives/fsx/core/glob/glob.js.map +1 -0
- package/dist/primitives/fsx/core/glob/index.js +8 -0
- package/dist/primitives/fsx/core/glob/index.js.map +1 -0
- package/dist/primitives/fsx/core/glob/match.js +392 -0
- package/dist/primitives/fsx/core/glob/match.js.map +1 -0
- package/dist/primitives/fsx/core/types.js +307 -0
- package/dist/primitives/fsx/core/types.js.map +1 -0
- package/dist/sandbox/index.js +258 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/sdk/capnweb-compat.js +42 -0
- package/dist/sdk/capnweb-compat.js.map +1 -0
- package/dist/sdk/client.js +20 -0
- package/dist/sdk/client.js.map +1 -0
- package/dist/sdk/index.js +17 -0
- package/dist/sdk/index.js.map +1 -0
- package/dist/snippets/artifacts-config.js +241 -0
- package/dist/snippets/artifacts-config.js.map +1 -0
- package/dist/snippets/artifacts-ingest.js +832 -0
- package/dist/snippets/artifacts-ingest.js.map +1 -0
- package/dist/snippets/artifacts-serve.js +1035 -0
- package/dist/snippets/artifacts-serve.js.map +1 -0
- package/dist/snippets/artifacts-types.js +161 -0
- package/dist/snippets/artifacts-types.js.map +1 -0
- package/dist/snippets/cache-probe.js +376 -0
- package/dist/snippets/cache-probe.js.map +1 -0
- package/dist/snippets/cache.js +10 -0
- package/dist/snippets/cache.js.map +1 -0
- package/dist/snippets/events.js +469 -0
- package/dist/snippets/events.js.map +1 -0
- package/dist/snippets/index.js +7 -0
- package/dist/snippets/index.js.map +1 -0
- package/dist/snippets/proxy.js +495 -0
- package/dist/snippets/proxy.js.map +1 -0
- package/dist/snippets/search.js +1759 -0
- package/dist/snippets/search.js.map +1 -0
- package/dist/streams/index.js +30 -0
- package/dist/streams/index.js.map +1 -0
- package/dist/streams/observability.js +68 -0
- package/dist/streams/observability.js.map +1 -0
- package/dist/types/AI.js +92 -0
- package/dist/types/AI.js.map +1 -0
- package/dist/types/AIFunction.js +171 -0
- package/dist/types/AIFunction.js.map +1 -0
- package/dist/types/BrowseVerb.js +89 -0
- package/dist/types/BrowseVerb.js.map +1 -0
- package/dist/types/Browser.js +31 -0
- package/dist/types/Browser.js.map +1 -0
- package/dist/types/Chaos.js +15 -0
- package/dist/types/Chaos.js.map +1 -0
- package/dist/types/CloudflareBindings.js +109 -0
- package/dist/types/CloudflareBindings.js.map +1 -0
- package/dist/types/Collection.js +50 -0
- package/dist/types/Collection.js.map +1 -0
- package/dist/types/DO.js +2 -0
- package/dist/types/DO.js.map +1 -0
- package/dist/types/DOLocation.js +63 -0
- package/dist/types/DOLocation.js.map +1 -0
- package/dist/types/EventHandler.js +57 -0
- package/dist/types/EventHandler.js.map +1 -0
- package/dist/types/Experiment.js +33 -0
- package/dist/types/Experiment.js.map +1 -0
- package/dist/types/Flag.js +57 -0
- package/dist/types/Flag.js.map +1 -0
- package/dist/types/Lifecycle.js +13 -0
- package/dist/types/Lifecycle.js.map +1 -0
- package/dist/types/Location.js +169 -0
- package/dist/types/Location.js.map +1 -0
- package/dist/types/Noun.js +66 -0
- package/dist/types/Noun.js.map +1 -0
- package/dist/types/SessionEvent.js +194 -0
- package/dist/types/SessionEvent.js.map +1 -0
- package/dist/types/Thing.js +55 -0
- package/dist/types/Thing.js.map +1 -0
- package/dist/types/ThingDO.js +153 -0
- package/dist/types/ThingDO.js.map +1 -0
- package/dist/types/Things.js +2 -0
- package/dist/types/Things.js.map +1 -0
- package/dist/types/Verb.js +119 -0
- package/dist/types/Verb.js.map +1 -0
- package/dist/types/WorkflowContext.js +70 -0
- package/dist/types/WorkflowContext.js.map +1 -0
- package/dist/types/analytics-api.js +13 -0
- package/dist/types/analytics-api.js.map +1 -0
- package/dist/types/capabilities.js +135 -0
- package/dist/types/capabilities.js.map +1 -0
- package/dist/types/drizzle.js +12 -0
- package/dist/types/drizzle.js.map +1 -0
- package/dist/types/event.js +201 -0
- package/dist/types/event.js.map +1 -0
- package/dist/types/fn.js +12 -0
- package/dist/types/fn.js.map +1 -0
- package/dist/types/iceberg.js +48 -0
- package/dist/types/iceberg.js.map +1 -0
- package/dist/types/ids.js +170 -0
- package/dist/types/ids.js.map +1 -0
- package/dist/types/index.js +41 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/introspect.js +54 -0
- package/dist/types/introspect.js.map +1 -0
- package/dist/types/observability.js +124 -0
- package/dist/types/observability.js.map +1 -0
- package/dist/types/sync-protocol.js +175 -0
- package/dist/types/sync-protocol.js.map +1 -0
- package/dist/types/vector.js +13 -0
- package/dist/types/vector.js.map +1 -0
- package/dist/workflows/ScheduleManager.js +473 -0
- package/dist/workflows/ScheduleManager.js.map +1 -0
- package/dist/workflows/StepDOBridge.js +149 -0
- package/dist/workflows/StepDOBridge.js.map +1 -0
- package/dist/workflows/StepResultStorage.js +232 -0
- package/dist/workflows/StepResultStorage.js.map +1 -0
- package/dist/workflows/WaitForEventManager.js +461 -0
- package/dist/workflows/WaitForEventManager.js.map +1 -0
- package/dist/workflows/analyzer.js +332 -0
- package/dist/workflows/analyzer.js.map +1 -0
- package/dist/workflows/compat/activity-router.js +484 -0
- package/dist/workflows/compat/activity-router.js.map +1 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js +431 -0
- package/dist/workflows/compat/backends/cloudflare-workflows.js.map +1 -0
- package/dist/workflows/compat/backends/index.js +14 -0
- package/dist/workflows/compat/backends/index.js.map +1 -0
- package/dist/workflows/compat/errors/index.js +375 -0
- package/dist/workflows/compat/errors/index.js.map +1 -0
- package/dist/workflows/compat/index.js +79 -0
- package/dist/workflows/compat/index.js.map +1 -0
- package/dist/workflows/compat/inngest/index.js +989 -0
- package/dist/workflows/compat/inngest/index.js.map +1 -0
- package/dist/workflows/compat/qstash/index.js +1263 -0
- package/dist/workflows/compat/qstash/index.js.map +1 -0
- package/dist/workflows/compat/temporal/activities.js +739 -0
- package/dist/workflows/compat/temporal/activities.js.map +1 -0
- package/dist/workflows/compat/temporal/child-workflows.js +154 -0
- package/dist/workflows/compat/temporal/child-workflows.js.map +1 -0
- package/dist/workflows/compat/temporal/client.js +381 -0
- package/dist/workflows/compat/temporal/client.js.map +1 -0
- package/dist/workflows/compat/temporal/context.js +309 -0
- package/dist/workflows/compat/temporal/context.js.map +1 -0
- package/dist/workflows/compat/temporal/determinism.js +216 -0
- package/dist/workflows/compat/temporal/determinism.js.map +1 -0
- package/dist/workflows/compat/temporal/errors.js +128 -0
- package/dist/workflows/compat/temporal/errors.js.map +1 -0
- package/dist/workflows/compat/temporal/index.js +2464 -0
- package/dist/workflows/compat/temporal/index.js.map +1 -0
- package/dist/workflows/compat/temporal/saga.js +504 -0
- package/dist/workflows/compat/temporal/saga.js.map +1 -0
- package/dist/workflows/compat/temporal/signals.js +364 -0
- package/dist/workflows/compat/temporal/signals.js.map +1 -0
- package/dist/workflows/compat/temporal/storage.js +271 -0
- package/dist/workflows/compat/temporal/storage.js.map +1 -0
- package/dist/workflows/compat/temporal/timers.js +347 -0
- package/dist/workflows/compat/temporal/timers.js.map +1 -0
- package/dist/workflows/compat/temporal/types.js +7 -0
- package/dist/workflows/compat/temporal/types.js.map +1 -0
- package/dist/workflows/compat/temporal/unified-primitives.js +339 -0
- package/dist/workflows/compat/temporal/unified-primitives.js.map +1 -0
- package/dist/workflows/compat/trigger/index.js +468 -0
- package/dist/workflows/compat/trigger/index.js.map +1 -0
- package/dist/workflows/compat/utils/index.js +69 -0
- package/dist/workflows/compat/utils/index.js.map +1 -0
- package/dist/workflows/context/correlation-capability.js +266 -0
- package/dist/workflows/context/correlation-capability.js.map +1 -0
- package/dist/workflows/context/correlation.js +484 -0
- package/dist/workflows/context/correlation.js.map +1 -0
- package/dist/workflows/context/experiment.js +289 -0
- package/dist/workflows/context/experiment.js.map +1 -0
- package/dist/workflows/context/flag.js +244 -0
- package/dist/workflows/context/flag.js.map +1 -0
- package/dist/workflows/context/foundation.js +648 -0
- package/dist/workflows/context/foundation.js.map +1 -0
- package/dist/workflows/context/human-base.js +106 -0
- package/dist/workflows/context/human-base.js.map +1 -0
- package/dist/workflows/context/human.js +368 -0
- package/dist/workflows/context/human.js.map +1 -0
- package/dist/workflows/context/measure.js +354 -0
- package/dist/workflows/context/measure.js.map +1 -0
- package/dist/workflows/context/rate-limit.js +358 -0
- package/dist/workflows/context/rate-limit.js.map +1 -0
- package/dist/workflows/context/user.js +117 -0
- package/dist/workflows/context/user.js.map +1 -0
- package/dist/workflows/context/vault.js +360 -0
- package/dist/workflows/context/vault.js.map +1 -0
- package/dist/workflows/data/entity-events/entity-events.js +489 -0
- package/dist/workflows/data/entity-events/entity-events.js.map +1 -0
- package/dist/workflows/data/experiment/index.js +599 -0
- package/dist/workflows/data/experiment/index.js.map +1 -0
- package/dist/workflows/data/goal/context.js +558 -0
- package/dist/workflows/data/goal/context.js.map +1 -0
- package/dist/workflows/data/goal/index.js +32 -0
- package/dist/workflows/data/goal/index.js.map +1 -0
- package/dist/workflows/data/measure/index.js +840 -0
- package/dist/workflows/data/measure/index.js.map +1 -0
- package/dist/workflows/data/stream/index.js +1215 -0
- package/dist/workflows/data/stream/index.js.map +1 -0
- package/dist/workflows/data/track/context.js +883 -0
- package/dist/workflows/data/track/context.js.map +1 -0
- package/dist/workflows/data/track/index.js +15 -0
- package/dist/workflows/data/track/index.js.map +1 -0
- package/dist/workflows/data/view/context.js +864 -0
- package/dist/workflows/data/view/context.js.map +1 -0
- package/dist/workflows/domain.js +93 -0
- package/dist/workflows/domain.js.map +1 -0
- package/dist/workflows/flag.js +176 -0
- package/dist/workflows/flag.js.map +1 -0
- package/dist/workflows/flags.js +217 -0
- package/dist/workflows/flags.js.map +1 -0
- package/dist/workflows/hash.js +209 -0
- package/dist/workflows/hash.js.map +1 -0
- package/dist/workflows/index.js +50 -0
- package/dist/workflows/index.js.map +1 -0
- package/dist/workflows/on.js +378 -0
- package/dist/workflows/on.js.map +1 -0
- package/dist/workflows/pipeline-promise.js +481 -0
- package/dist/workflows/pipeline-promise.js.map +1 -0
- package/dist/workflows/pipeline-types.js +20 -0
- package/dist/workflows/pipeline-types.js.map +1 -0
- package/dist/workflows/proxy.js +76 -0
- package/dist/workflows/proxy.js.map +1 -0
- package/dist/workflows/runtime.js +310 -0
- package/dist/workflows/runtime.js.map +1 -0
- package/dist/workflows/schedule-builder.js +327 -0
- package/dist/workflows/schedule-builder.js.map +1 -0
- package/dist/workflows/visibility/index.js +148 -0
- package/dist/workflows/visibility/index.js.map +1 -0
- package/dist/workflows/visibility/query-parser.js +150 -0
- package/dist/workflows/visibility/query-parser.js.map +1 -0
- package/dist/workflows/visibility/store.js +223 -0
- package/dist/workflows/visibility/store.js.map +1 -0
- package/dist/workflows/visibility/types.js +30 -0
- package/dist/workflows/visibility/types.js.map +1 -0
- package/dist/workflows/workflow.js +53 -0
- package/dist/workflows/workflow.js.map +1 -0
- package/package.json +279 -46
|
@@ -0,0 +1,862 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compact Binary Inverted Index Format
|
|
3
|
+
*
|
|
4
|
+
* A memory-efficient inverted index format designed for Cloudflare Snippets
|
|
5
|
+
* (2MB memory, <5ms CPU). Enables full-text search via term -> posting list lookups.
|
|
6
|
+
*
|
|
7
|
+
* Binary Format:
|
|
8
|
+
* ```
|
|
9
|
+
* Header (16 bytes):
|
|
10
|
+
* magic: 4 bytes "INVI"
|
|
11
|
+
* version: 2 bytes (little-endian, currently 1)
|
|
12
|
+
* term_count: 4 bytes (little-endian, number of unique terms)
|
|
13
|
+
* flags: 2 bytes (bit flags for features)
|
|
14
|
+
* reserved: 4 bytes (future use, must be 0)
|
|
15
|
+
*
|
|
16
|
+
* Term Index (variable length):
|
|
17
|
+
* For each term (sorted lexicographically):
|
|
18
|
+
* term_length: 1 byte (max 255 chars)
|
|
19
|
+
* term: term_length bytes (UTF-8)
|
|
20
|
+
* offset: 4 bytes (little-endian, byte offset to posting list)
|
|
21
|
+
* length: 4 bytes (little-endian, byte length of posting list)
|
|
22
|
+
*
|
|
23
|
+
* Term Index End Marker:
|
|
24
|
+
* 0x00 (single null byte indicating end of term index)
|
|
25
|
+
*
|
|
26
|
+
* Posting Lists (variable length):
|
|
27
|
+
* For each posting list:
|
|
28
|
+
* doc_count: varint (number of document IDs)
|
|
29
|
+
* doc_ids: varint-encoded, delta-compressed document IDs
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* Key Design Decisions:
|
|
33
|
+
* - Fixed 16-byte header for fast validation
|
|
34
|
+
* - Terms sorted for binary search (O(log n) lookup)
|
|
35
|
+
* - Term index separate from posting lists for range requests
|
|
36
|
+
* - Varint encoding for posting lists (1-5 bytes per ID)
|
|
37
|
+
* - Delta encoding for sorted doc IDs (smaller deltas = smaller varints)
|
|
38
|
+
*
|
|
39
|
+
* Memory Budget Analysis (from issue):
|
|
40
|
+
* - ~56K terms vocabulary-only per 1MB
|
|
41
|
+
* - With posting lists: depends on average list size
|
|
42
|
+
* - Typical: 10K terms + 100K postings in ~500KB
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```typescript
|
|
46
|
+
* // Building an index
|
|
47
|
+
* const writer = new InvertedIndexWriter()
|
|
48
|
+
* writer.addPosting('hello', 1)
|
|
49
|
+
* writer.addPosting('hello', 5)
|
|
50
|
+
* writer.addPosting('world', 2)
|
|
51
|
+
* const bytes = writer.serialize()
|
|
52
|
+
*
|
|
53
|
+
* // Querying an index
|
|
54
|
+
* const reader = InvertedIndexReader.deserialize(bytes)
|
|
55
|
+
* const postings = reader.getPostings('hello') // [1, 5]
|
|
56
|
+
* ```
|
|
57
|
+
*
|
|
58
|
+
* @see https://en.wikipedia.org/wiki/Inverted_index
|
|
59
|
+
* @module db/iceberg/inverted-index
|
|
60
|
+
*/
|
|
61
|
+
// ============================================================================
|
|
62
|
+
// Constants
|
|
63
|
+
// ============================================================================
|
|
64
|
+
/** Magic bytes for inverted index format: "INVI" */
|
|
65
|
+
export const INVERTED_INDEX_MAGIC = new Uint8Array([0x49, 0x4e, 0x56, 0x49]); // "INVI"
|
|
66
|
+
/** Current format version */
|
|
67
|
+
export const INVERTED_INDEX_VERSION = 1;
|
|
68
|
+
/** Header size in bytes */
|
|
69
|
+
export const HEADER_SIZE = 16;
|
|
70
|
+
/** Maximum term length (single byte length prefix) */
|
|
71
|
+
export const MAX_TERM_LENGTH = 255;
|
|
72
|
+
/**
|
|
73
|
+
* Feature flags for the index
|
|
74
|
+
*/
|
|
75
|
+
export var IndexFlags;
|
|
76
|
+
(function (IndexFlags) {
|
|
77
|
+
/** No special features */
|
|
78
|
+
IndexFlags[IndexFlags["NONE"] = 0] = "NONE";
|
|
79
|
+
/** Term frequencies stored (not yet implemented) */
|
|
80
|
+
IndexFlags[IndexFlags["HAS_FREQUENCIES"] = 1] = "HAS_FREQUENCIES";
|
|
81
|
+
/** Position data stored (not yet implemented) */
|
|
82
|
+
IndexFlags[IndexFlags["HAS_POSITIONS"] = 2] = "HAS_POSITIONS";
|
|
83
|
+
})(IndexFlags || (IndexFlags = {}));
|
|
84
|
+
// ============================================================================
|
|
85
|
+
// Varint Encoding/Decoding
|
|
86
|
+
// ============================================================================
|
|
87
|
+
/**
|
|
88
|
+
* Encode a non-negative integer as a varint
|
|
89
|
+
*
|
|
90
|
+
* Uses LEB128 encoding: 7 bits per byte, high bit indicates continuation.
|
|
91
|
+
* Values 0-127 use 1 byte, 128-16383 use 2 bytes, etc.
|
|
92
|
+
*
|
|
93
|
+
* @param value - Non-negative integer to encode
|
|
94
|
+
* @returns Varint-encoded bytes
|
|
95
|
+
*/
|
|
96
|
+
export function encodeVarint(value) {
|
|
97
|
+
if (value < 0) {
|
|
98
|
+
throw new Error('Varint encoding requires non-negative integers');
|
|
99
|
+
}
|
|
100
|
+
const bytes = [];
|
|
101
|
+
do {
|
|
102
|
+
let byte = value & 0x7f;
|
|
103
|
+
value >>>= 7;
|
|
104
|
+
if (value !== 0) {
|
|
105
|
+
byte |= 0x80; // Set continuation bit
|
|
106
|
+
}
|
|
107
|
+
bytes.push(byte);
|
|
108
|
+
} while (value !== 0);
|
|
109
|
+
return new Uint8Array(bytes);
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Decode a varint from a byte array at the given offset
|
|
113
|
+
*
|
|
114
|
+
* @param bytes - Byte array containing the varint
|
|
115
|
+
* @param offset - Starting offset in the array
|
|
116
|
+
* @returns Tuple of [decoded value, bytes consumed]
|
|
117
|
+
*/
|
|
118
|
+
export function decodeVarint(bytes, offset) {
|
|
119
|
+
let value = 0;
|
|
120
|
+
let shift = 0;
|
|
121
|
+
let bytesRead = 0;
|
|
122
|
+
while (offset + bytesRead < bytes.length) {
|
|
123
|
+
const byte = bytes[offset + bytesRead];
|
|
124
|
+
bytesRead++;
|
|
125
|
+
value |= (byte & 0x7f) << shift;
|
|
126
|
+
shift += 7;
|
|
127
|
+
if ((byte & 0x80) === 0) {
|
|
128
|
+
return [value, bytesRead];
|
|
129
|
+
}
|
|
130
|
+
// Prevent overflow (max 5 bytes for 32-bit values)
|
|
131
|
+
if (bytesRead >= 5) {
|
|
132
|
+
throw new Error('Varint exceeds maximum size');
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
throw new Error('Unexpected end of varint');
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Calculate the byte size of a varint-encoded value
|
|
139
|
+
*
|
|
140
|
+
* @param value - Value to measure
|
|
141
|
+
* @returns Number of bytes needed
|
|
142
|
+
*/
|
|
143
|
+
export function varintSize(value) {
|
|
144
|
+
if (value < 0)
|
|
145
|
+
return 5; // Max size for negative (treated as large positive)
|
|
146
|
+
if (value < 128)
|
|
147
|
+
return 1;
|
|
148
|
+
if (value < 16384)
|
|
149
|
+
return 2;
|
|
150
|
+
if (value < 2097152)
|
|
151
|
+
return 3;
|
|
152
|
+
if (value < 268435456)
|
|
153
|
+
return 4;
|
|
154
|
+
return 5;
|
|
155
|
+
}
|
|
156
|
+
// ============================================================================
|
|
157
|
+
// Posting List Encoding/Decoding
|
|
158
|
+
// ============================================================================
|
|
159
|
+
/**
|
|
160
|
+
* Encode a sorted array of document IDs as a delta-compressed posting list
|
|
161
|
+
*
|
|
162
|
+
* Format:
|
|
163
|
+
* - doc_count: varint (number of IDs)
|
|
164
|
+
* - doc_ids: varint-encoded deltas from previous ID (first ID is delta from 0)
|
|
165
|
+
*
|
|
166
|
+
* @param docIds - Sorted array of document IDs
|
|
167
|
+
* @returns Encoded posting list bytes
|
|
168
|
+
*/
|
|
169
|
+
export function encodePostingList(docIds) {
|
|
170
|
+
if (docIds.length === 0) {
|
|
171
|
+
return encodeVarint(0);
|
|
172
|
+
}
|
|
173
|
+
// Calculate total size
|
|
174
|
+
let totalSize = varintSize(docIds.length);
|
|
175
|
+
let prevId = 0;
|
|
176
|
+
for (const docId of docIds) {
|
|
177
|
+
const delta = docId - prevId;
|
|
178
|
+
totalSize += varintSize(delta);
|
|
179
|
+
prevId = docId;
|
|
180
|
+
}
|
|
181
|
+
// Encode
|
|
182
|
+
const result = new Uint8Array(totalSize);
|
|
183
|
+
let offset = 0;
|
|
184
|
+
// Write count
|
|
185
|
+
const countBytes = encodeVarint(docIds.length);
|
|
186
|
+
result.set(countBytes, offset);
|
|
187
|
+
offset += countBytes.length;
|
|
188
|
+
// Write delta-encoded IDs
|
|
189
|
+
prevId = 0;
|
|
190
|
+
for (const docId of docIds) {
|
|
191
|
+
const delta = docId - prevId;
|
|
192
|
+
const deltaBytes = encodeVarint(delta);
|
|
193
|
+
result.set(deltaBytes, offset);
|
|
194
|
+
offset += deltaBytes.length;
|
|
195
|
+
prevId = docId;
|
|
196
|
+
}
|
|
197
|
+
return result;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Decode a delta-compressed posting list
|
|
201
|
+
*
|
|
202
|
+
* @param bytes - Encoded posting list bytes
|
|
203
|
+
* @returns Array of document IDs
|
|
204
|
+
*/
|
|
205
|
+
export function decodePostingList(bytes) {
|
|
206
|
+
if (bytes.length === 0) {
|
|
207
|
+
return [];
|
|
208
|
+
}
|
|
209
|
+
let offset = 0;
|
|
210
|
+
// Read count
|
|
211
|
+
const [count, countBytes] = decodeVarint(bytes, offset);
|
|
212
|
+
offset += countBytes;
|
|
213
|
+
if (count === 0) {
|
|
214
|
+
return [];
|
|
215
|
+
}
|
|
216
|
+
// Read delta-encoded IDs
|
|
217
|
+
const docIds = [];
|
|
218
|
+
let prevId = 0;
|
|
219
|
+
for (let i = 0; i < count; i++) {
|
|
220
|
+
const [delta, deltaBytes] = decodeVarint(bytes, offset);
|
|
221
|
+
offset += deltaBytes;
|
|
222
|
+
const docId = prevId + delta;
|
|
223
|
+
docIds.push(docId);
|
|
224
|
+
prevId = docId;
|
|
225
|
+
}
|
|
226
|
+
return docIds;
|
|
227
|
+
}
|
|
228
|
+
// ============================================================================
|
|
229
|
+
// InvertedIndexWriter
|
|
230
|
+
// ============================================================================
|
|
231
|
+
/**
|
|
232
|
+
* Builder for creating inverted index files
|
|
233
|
+
*
|
|
234
|
+
* Collects postings (term -> doc ID mappings) and serializes them into
|
|
235
|
+
* the compact binary format for efficient storage and lookup.
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const writer = new InvertedIndexWriter()
|
|
240
|
+
*
|
|
241
|
+
* // Add postings from documents
|
|
242
|
+
* for (const [docId, doc] of documents.entries()) {
|
|
243
|
+
* for (const term of tokenize(doc.text)) {
|
|
244
|
+
* writer.addPosting(term, docId)
|
|
245
|
+
* }
|
|
246
|
+
* }
|
|
247
|
+
*
|
|
248
|
+
* // Serialize to bytes
|
|
249
|
+
* const bytes = writer.serialize()
|
|
250
|
+
*
|
|
251
|
+
* // Or get size estimate first
|
|
252
|
+
* console.log(`Estimated size: ${writer.estimateSize()} bytes`)
|
|
253
|
+
* ```
|
|
254
|
+
*/
|
|
255
|
+
export class InvertedIndexWriter {
|
|
256
|
+
/** Map from term to set of document IDs */
|
|
257
|
+
postings = new Map();
|
|
258
|
+
/** Feature flags */
|
|
259
|
+
flags = IndexFlags.NONE;
|
|
260
|
+
/**
|
|
261
|
+
* Add a posting (term -> document ID mapping)
|
|
262
|
+
*
|
|
263
|
+
* @param term - The term (will be normalized to lowercase)
|
|
264
|
+
* @param docId - Document ID containing this term
|
|
265
|
+
*/
|
|
266
|
+
addPosting(term, docId) {
|
|
267
|
+
if (term.length === 0 || term.length > MAX_TERM_LENGTH) {
|
|
268
|
+
return; // Skip invalid terms
|
|
269
|
+
}
|
|
270
|
+
if (docId < 0 || !Number.isInteger(docId)) {
|
|
271
|
+
throw new Error(`Invalid document ID: ${docId}`);
|
|
272
|
+
}
|
|
273
|
+
let docIds = this.postings.get(term);
|
|
274
|
+
if (!docIds) {
|
|
275
|
+
docIds = new Set();
|
|
276
|
+
this.postings.set(term, docIds);
|
|
277
|
+
}
|
|
278
|
+
docIds.add(docId);
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Add multiple postings at once
|
|
282
|
+
*
|
|
283
|
+
* @param postings - Array of posting entries
|
|
284
|
+
*/
|
|
285
|
+
addPostings(postings) {
|
|
286
|
+
for (const { term, docId } of postings) {
|
|
287
|
+
this.addPosting(term, docId);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Add all terms from a document
|
|
292
|
+
*
|
|
293
|
+
* @param docId - Document ID
|
|
294
|
+
* @param terms - Array of terms in the document
|
|
295
|
+
*/
|
|
296
|
+
addDocument(docId, terms) {
|
|
297
|
+
for (const term of terms) {
|
|
298
|
+
this.addPosting(term, docId);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Get the number of unique terms
|
|
303
|
+
*/
|
|
304
|
+
get termCount() {
|
|
305
|
+
return this.postings.size;
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Get the total number of postings (term-doc pairs)
|
|
309
|
+
*/
|
|
310
|
+
get postingCount() {
|
|
311
|
+
let count = 0;
|
|
312
|
+
for (const docIds of this.postings.values()) {
|
|
313
|
+
count += docIds.size;
|
|
314
|
+
}
|
|
315
|
+
return count;
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Estimate the serialized size in bytes
|
|
319
|
+
*
|
|
320
|
+
* Useful for checking memory constraints before serializing.
|
|
321
|
+
*
|
|
322
|
+
* @returns Estimated byte size
|
|
323
|
+
*/
|
|
324
|
+
estimateSize() {
|
|
325
|
+
let size = HEADER_SIZE;
|
|
326
|
+
// Term index size
|
|
327
|
+
const encoder = new TextEncoder();
|
|
328
|
+
for (const [term, docIds] of this.postings) {
|
|
329
|
+
// term_length (1) + term + offset (4) + length (4)
|
|
330
|
+
size += 1 + encoder.encode(term).length + 4 + 4;
|
|
331
|
+
}
|
|
332
|
+
size += 1; // End marker
|
|
333
|
+
// Posting lists size
|
|
334
|
+
for (const docIds of this.postings.values()) {
|
|
335
|
+
const sortedIds = Array.from(docIds).sort((a, b) => a - b);
|
|
336
|
+
size += varintSize(sortedIds.length);
|
|
337
|
+
let prevId = 0;
|
|
338
|
+
for (const id of sortedIds) {
|
|
339
|
+
size += varintSize(id - prevId);
|
|
340
|
+
prevId = id;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return size;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Check if the index fits within memory constraints
|
|
347
|
+
*
|
|
348
|
+
* @param maxBytes - Maximum allowed size (default: 2MB for Snippets)
|
|
349
|
+
* @returns true if within constraints
|
|
350
|
+
*/
|
|
351
|
+
fitsInMemory(maxBytes = 2 * 1024 * 1024) {
|
|
352
|
+
return this.estimateSize() <= maxBytes;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Serialize the index to binary format
|
|
356
|
+
*
|
|
357
|
+
* @returns Binary inverted index data
|
|
358
|
+
* @throws Error if any term exceeds MAX_TERM_LENGTH
|
|
359
|
+
*/
|
|
360
|
+
serialize() {
|
|
361
|
+
const encoder = new TextEncoder();
|
|
362
|
+
// Sort terms lexicographically for binary search
|
|
363
|
+
const sortedTerms = Array.from(this.postings.keys()).sort();
|
|
364
|
+
// First pass: encode all posting lists and calculate offsets
|
|
365
|
+
const postingListsData = new Map();
|
|
366
|
+
let totalPostingListSize = 0;
|
|
367
|
+
for (const term of sortedTerms) {
|
|
368
|
+
const docIds = Array.from(this.postings.get(term)).sort((a, b) => a - b);
|
|
369
|
+
const encoded = encodePostingList(docIds);
|
|
370
|
+
postingListsData.set(term, encoded);
|
|
371
|
+
totalPostingListSize += encoded.length;
|
|
372
|
+
}
|
|
373
|
+
// Calculate term index size
|
|
374
|
+
let termIndexSize = 0;
|
|
375
|
+
for (const term of sortedTerms) {
|
|
376
|
+
const termBytes = encoder.encode(term);
|
|
377
|
+
if (termBytes.length > MAX_TERM_LENGTH) {
|
|
378
|
+
throw new Error(`Term exceeds maximum length: ${term.substring(0, 50)}...`);
|
|
379
|
+
}
|
|
380
|
+
termIndexSize += 1 + termBytes.length + 4 + 4; // length + term + offset + length
|
|
381
|
+
}
|
|
382
|
+
termIndexSize += 1; // End marker
|
|
383
|
+
// Calculate posting lists start offset
|
|
384
|
+
const postingListsOffset = HEADER_SIZE + termIndexSize;
|
|
385
|
+
// Allocate result buffer
|
|
386
|
+
const totalSize = HEADER_SIZE + termIndexSize + totalPostingListSize;
|
|
387
|
+
const result = new Uint8Array(totalSize);
|
|
388
|
+
const view = new DataView(result.buffer);
|
|
389
|
+
// Write header
|
|
390
|
+
result.set(INVERTED_INDEX_MAGIC, 0);
|
|
391
|
+
view.setUint16(4, INVERTED_INDEX_VERSION, true);
|
|
392
|
+
view.setUint32(6, sortedTerms.length, true);
|
|
393
|
+
view.setUint16(10, this.flags, true);
|
|
394
|
+
view.setUint32(12, 0, true); // reserved
|
|
395
|
+
// Write term index
|
|
396
|
+
let writeOffset = HEADER_SIZE;
|
|
397
|
+
let postingOffset = postingListsOffset;
|
|
398
|
+
for (const term of sortedTerms) {
|
|
399
|
+
const termBytes = encoder.encode(term);
|
|
400
|
+
const postingListData = postingListsData.get(term);
|
|
401
|
+
// Write term length
|
|
402
|
+
result[writeOffset++] = termBytes.length;
|
|
403
|
+
// Write term
|
|
404
|
+
result.set(termBytes, writeOffset);
|
|
405
|
+
writeOffset += termBytes.length;
|
|
406
|
+
// Write posting list offset
|
|
407
|
+
view.setUint32(writeOffset, postingOffset, true);
|
|
408
|
+
writeOffset += 4;
|
|
409
|
+
// Write posting list length
|
|
410
|
+
view.setUint32(writeOffset, postingListData.length, true);
|
|
411
|
+
writeOffset += 4;
|
|
412
|
+
postingOffset += postingListData.length;
|
|
413
|
+
}
|
|
414
|
+
// Write end marker
|
|
415
|
+
result[writeOffset++] = 0;
|
|
416
|
+
// Write posting lists
|
|
417
|
+
for (const term of sortedTerms) {
|
|
418
|
+
const postingListData = postingListsData.get(term);
|
|
419
|
+
result.set(postingListData, writeOffset);
|
|
420
|
+
writeOffset += postingListData.length;
|
|
421
|
+
}
|
|
422
|
+
return result;
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Clear all postings
|
|
426
|
+
*/
|
|
427
|
+
clear() {
|
|
428
|
+
this.postings.clear();
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
// ============================================================================
|
|
432
|
+
// InvertedIndexReader
|
|
433
|
+
// ============================================================================
|
|
434
|
+
/**
|
|
435
|
+
* Reader for querying inverted index files
|
|
436
|
+
*
|
|
437
|
+
* Supports both full in-memory loading and range-addressable access
|
|
438
|
+
* for partial loading (fetch term index first, then specific posting lists).
|
|
439
|
+
*
|
|
440
|
+
* @example
|
|
441
|
+
* ```typescript
|
|
442
|
+
* // Full loading
|
|
443
|
+
* const reader = InvertedIndexReader.deserialize(bytes)
|
|
444
|
+
* const postings = reader.getPostings('hello')
|
|
445
|
+
*
|
|
446
|
+
* // Range-addressable access (two requests)
|
|
447
|
+
* const header = InvertedIndexReader.parseHeader(headerBytes)
|
|
448
|
+
* const termIndex = InvertedIndexReader.parseTermIndex(termIndexBytes)
|
|
449
|
+
* const entry = InvertedIndexReader.findTerm(termIndex, 'hello')
|
|
450
|
+
* // Fetch bytes[entry.offset:entry.offset+entry.length]
|
|
451
|
+
* const postings = InvertedIndexReader.parsePostingList(postingBytes)
|
|
452
|
+
* ```
|
|
453
|
+
*/
|
|
454
|
+
export class InvertedIndexReader {
|
|
455
|
+
metadata;
|
|
456
|
+
termIndex;
|
|
457
|
+
postingListsData;
|
|
458
|
+
postingListsOffset;
|
|
459
|
+
constructor(metadata, termIndex, postingListsData, postingListsOffset) {
|
|
460
|
+
this.metadata = metadata;
|
|
461
|
+
this.termIndex = termIndex;
|
|
462
|
+
this.postingListsData = postingListsData;
|
|
463
|
+
this.postingListsOffset = postingListsOffset;
|
|
464
|
+
}
|
|
465
|
+
/**
|
|
466
|
+
* Parse the header from raw bytes
|
|
467
|
+
*
|
|
468
|
+
* @param bytes - At least HEADER_SIZE bytes
|
|
469
|
+
* @returns Parsed metadata
|
|
470
|
+
*/
|
|
471
|
+
static parseHeader(bytes) {
|
|
472
|
+
if (bytes.length < HEADER_SIZE) {
|
|
473
|
+
throw new Error(`Header too short: expected ${HEADER_SIZE} bytes, got ${bytes.length}`);
|
|
474
|
+
}
|
|
475
|
+
// Validate magic
|
|
476
|
+
for (let i = 0; i < INVERTED_INDEX_MAGIC.length; i++) {
|
|
477
|
+
if (bytes[i] !== INVERTED_INDEX_MAGIC[i]) {
|
|
478
|
+
throw new Error('Invalid inverted index: magic mismatch');
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
482
|
+
const version = view.getUint16(4, true);
|
|
483
|
+
const termCount = view.getUint32(6, true);
|
|
484
|
+
const flags = view.getUint16(10, true);
|
|
485
|
+
if (version !== INVERTED_INDEX_VERSION) {
|
|
486
|
+
throw new Error(`Unsupported version: ${version}`);
|
|
487
|
+
}
|
|
488
|
+
return { version, termCount, flags };
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Parse the term index from raw bytes (starting after header)
|
|
492
|
+
*
|
|
493
|
+
* @param bytes - Bytes containing the term index (starting at offset 0)
|
|
494
|
+
* @param termCount - Number of terms to parse
|
|
495
|
+
* @returns Array of term entries
|
|
496
|
+
*/
|
|
497
|
+
static parseTermIndex(bytes, termCount) {
|
|
498
|
+
const decoder = new TextDecoder();
|
|
499
|
+
const entries = [];
|
|
500
|
+
let offset = 0;
|
|
501
|
+
for (let i = 0; i < termCount; i++) {
|
|
502
|
+
const termLength = bytes[offset++];
|
|
503
|
+
if (termLength === 0 && i < termCount - 1) {
|
|
504
|
+
throw new Error('Unexpected end of term index');
|
|
505
|
+
}
|
|
506
|
+
const termBytes = bytes.slice(offset, offset + termLength);
|
|
507
|
+
offset += termLength;
|
|
508
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset + offset, 8);
|
|
509
|
+
const postingOffset = view.getUint32(0, true);
|
|
510
|
+
const postingLength = view.getUint32(4, true);
|
|
511
|
+
offset += 8;
|
|
512
|
+
entries.push({
|
|
513
|
+
term: decoder.decode(termBytes),
|
|
514
|
+
offset: postingOffset,
|
|
515
|
+
length: postingLength,
|
|
516
|
+
});
|
|
517
|
+
}
|
|
518
|
+
return entries;
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Parse a posting list from raw bytes
|
|
522
|
+
*
|
|
523
|
+
* @param bytes - Encoded posting list
|
|
524
|
+
* @returns Array of document IDs
|
|
525
|
+
*/
|
|
526
|
+
static parsePostingList(bytes) {
|
|
527
|
+
return decodePostingList(bytes);
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Find a term in the sorted term index using binary search
|
|
531
|
+
*
|
|
532
|
+
* @param termIndex - Sorted array of term entries
|
|
533
|
+
* @param term - Term to find
|
|
534
|
+
* @returns Term entry or null if not found
|
|
535
|
+
*/
|
|
536
|
+
static findTerm(termIndex, term) {
|
|
537
|
+
let left = 0;
|
|
538
|
+
let right = termIndex.length - 1;
|
|
539
|
+
while (left <= right) {
|
|
540
|
+
const mid = (left + right) >>> 1;
|
|
541
|
+
const midTerm = termIndex[mid].term;
|
|
542
|
+
// Use simple lexicographic comparison (matches default sort order)
|
|
543
|
+
if (term === midTerm) {
|
|
544
|
+
return termIndex[mid];
|
|
545
|
+
}
|
|
546
|
+
else if (term < midTerm) {
|
|
547
|
+
right = mid - 1;
|
|
548
|
+
}
|
|
549
|
+
else {
|
|
550
|
+
left = mid + 1;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
return null;
|
|
554
|
+
}
|
|
555
|
+
/**
|
|
556
|
+
* Deserialize a complete inverted index from bytes
|
|
557
|
+
*
|
|
558
|
+
* @param bytes - Full inverted index data
|
|
559
|
+
* @returns Reader instance
|
|
560
|
+
*/
|
|
561
|
+
static deserialize(bytes) {
|
|
562
|
+
const metadata = InvertedIndexReader.parseHeader(bytes);
|
|
563
|
+
// Parse term index
|
|
564
|
+
const termIndexStart = HEADER_SIZE;
|
|
565
|
+
const termIndexBytes = bytes.slice(termIndexStart);
|
|
566
|
+
const termIndex = InvertedIndexReader.parseTermIndex(termIndexBytes, metadata.termCount);
|
|
567
|
+
// Calculate posting lists offset
|
|
568
|
+
// Find the end of term index (after all entries + end marker)
|
|
569
|
+
let termIndexEnd = 0;
|
|
570
|
+
for (const entry of termIndex) {
|
|
571
|
+
// Each entry: 1 (length) + term.length + 4 (offset) + 4 (length)
|
|
572
|
+
termIndexEnd += 1 + new TextEncoder().encode(entry.term).length + 8;
|
|
573
|
+
}
|
|
574
|
+
termIndexEnd += 1; // End marker
|
|
575
|
+
const postingListsOffset = HEADER_SIZE + termIndexEnd;
|
|
576
|
+
const postingListsData = bytes.slice(postingListsOffset);
|
|
577
|
+
return new InvertedIndexReader(metadata, termIndex, postingListsData, postingListsOffset);
|
|
578
|
+
}
|
|
579
|
+
/**
|
|
580
|
+
* Get metadata about the index
|
|
581
|
+
*/
|
|
582
|
+
getMetadata() {
|
|
583
|
+
return { ...this.metadata };
|
|
584
|
+
}
|
|
585
|
+
/**
|
|
586
|
+
* Get the number of unique terms
|
|
587
|
+
*/
|
|
588
|
+
get termCount() {
|
|
589
|
+
return this.metadata.termCount;
|
|
590
|
+
}
|
|
591
|
+
/**
|
|
592
|
+
* Get all terms in the index
|
|
593
|
+
*/
|
|
594
|
+
getTerms() {
|
|
595
|
+
return this.termIndex.map((e) => e.term);
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* Check if a term exists in the index
|
|
599
|
+
*
|
|
600
|
+
* @param term - Term to check
|
|
601
|
+
* @returns true if the term exists
|
|
602
|
+
*/
|
|
603
|
+
hasTerm(term) {
|
|
604
|
+
return InvertedIndexReader.findTerm(this.termIndex, term) !== null;
|
|
605
|
+
}
|
|
606
|
+
/**
|
|
607
|
+
* Get the posting list for a term
|
|
608
|
+
*
|
|
609
|
+
* @param term - Term to look up
|
|
610
|
+
* @returns Array of document IDs, or empty array if term not found
|
|
611
|
+
*/
|
|
612
|
+
getPostings(term) {
|
|
613
|
+
const entry = InvertedIndexReader.findTerm(this.termIndex, term);
|
|
614
|
+
if (!entry) {
|
|
615
|
+
return [];
|
|
616
|
+
}
|
|
617
|
+
// Calculate offset within posting lists data
|
|
618
|
+
const localOffset = entry.offset - this.postingListsOffset;
|
|
619
|
+
const postingBytes = this.postingListsData.slice(localOffset, localOffset + entry.length);
|
|
620
|
+
return InvertedIndexReader.parsePostingList(postingBytes);
|
|
621
|
+
}
|
|
622
|
+
/**
|
|
623
|
+
* Get document frequency (number of docs containing term)
|
|
624
|
+
*
|
|
625
|
+
* @param term - Term to check
|
|
626
|
+
* @returns Number of documents, or 0 if term not found
|
|
627
|
+
*/
|
|
628
|
+
getDocumentFrequency(term) {
|
|
629
|
+
return this.getPostings(term).length;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Get the range request for a term's posting list
|
|
633
|
+
*
|
|
634
|
+
* Useful for range-addressable access without loading full index.
|
|
635
|
+
*
|
|
636
|
+
* @param term - Term to look up
|
|
637
|
+
* @returns Range request or null if term not found
|
|
638
|
+
*/
|
|
639
|
+
getPostingListRange(term) {
|
|
640
|
+
const entry = InvertedIndexReader.findTerm(this.termIndex, term);
|
|
641
|
+
if (!entry) {
|
|
642
|
+
return null;
|
|
643
|
+
}
|
|
644
|
+
return {
|
|
645
|
+
start: entry.offset,
|
|
646
|
+
end: entry.offset + entry.length,
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* Get range request for the term index portion
|
|
651
|
+
*
|
|
652
|
+
* @returns Range request for term index
|
|
653
|
+
*/
|
|
654
|
+
getTermIndexRange() {
|
|
655
|
+
// First posting list offset tells us where term index ends
|
|
656
|
+
if (this.termIndex.length === 0) {
|
|
657
|
+
return { start: HEADER_SIZE, end: HEADER_SIZE + 1 }; // Just end marker
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
start: HEADER_SIZE,
|
|
661
|
+
end: this.termIndex[0].offset,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
/**
|
|
665
|
+
* Search for terms matching a prefix
|
|
666
|
+
*
|
|
667
|
+
* @param prefix - Prefix to match
|
|
668
|
+
* @param limit - Maximum results (default: 100)
|
|
669
|
+
* @returns Array of matching term entries
|
|
670
|
+
*/
|
|
671
|
+
searchPrefix(prefix, limit = 100) {
|
|
672
|
+
const results = [];
|
|
673
|
+
// Binary search to find first term >= prefix
|
|
674
|
+
let left = 0;
|
|
675
|
+
let right = this.termIndex.length - 1;
|
|
676
|
+
let startIdx = this.termIndex.length;
|
|
677
|
+
while (left <= right) {
|
|
678
|
+
const mid = (left + right) >>> 1;
|
|
679
|
+
const midTerm = this.termIndex[mid].term;
|
|
680
|
+
if (midTerm >= prefix) {
|
|
681
|
+
startIdx = mid;
|
|
682
|
+
right = mid - 1;
|
|
683
|
+
}
|
|
684
|
+
else {
|
|
685
|
+
left = mid + 1;
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
// Collect matching terms
|
|
689
|
+
for (let i = startIdx; i < this.termIndex.length && results.length < limit; i++) {
|
|
690
|
+
const entry = this.termIndex[i];
|
|
691
|
+
if (!entry.term.startsWith(prefix)) {
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
results.push(entry);
|
|
695
|
+
}
|
|
696
|
+
return results;
|
|
697
|
+
}
|
|
698
|
+
/**
|
|
699
|
+
* Intersect posting lists for multiple terms (AND query)
|
|
700
|
+
*
|
|
701
|
+
* @param terms - Terms to intersect
|
|
702
|
+
* @returns Document IDs that contain ALL terms
|
|
703
|
+
*/
|
|
704
|
+
intersect(terms) {
|
|
705
|
+
if (terms.length === 0) {
|
|
706
|
+
return [];
|
|
707
|
+
}
|
|
708
|
+
// Get all posting lists
|
|
709
|
+
const postingLists = terms.map((t) => this.getPostings(t));
|
|
710
|
+
// If any term is missing, result is empty
|
|
711
|
+
if (postingLists.some((p) => p.length === 0)) {
|
|
712
|
+
return [];
|
|
713
|
+
}
|
|
714
|
+
// Start with smallest list for efficiency
|
|
715
|
+
const sorted = postingLists.slice().sort((a, b) => a.length - b.length);
|
|
716
|
+
let result = sorted[0];
|
|
717
|
+
for (let i = 1; i < sorted.length && result.length > 0; i++) {
|
|
718
|
+
result = intersectSorted(result, sorted[i]);
|
|
719
|
+
}
|
|
720
|
+
return result;
|
|
721
|
+
}
|
|
722
|
+
/**
|
|
723
|
+
* Union posting lists for multiple terms (OR query)
|
|
724
|
+
*
|
|
725
|
+
* @param terms - Terms to union
|
|
726
|
+
* @returns Document IDs that contain ANY term
|
|
727
|
+
*/
|
|
728
|
+
union(terms) {
|
|
729
|
+
if (terms.length === 0) {
|
|
730
|
+
return [];
|
|
731
|
+
}
|
|
732
|
+
const postingLists = terms.map((t) => this.getPostings(t)).filter((p) => p.length > 0);
|
|
733
|
+
if (postingLists.length === 0) {
|
|
734
|
+
return [];
|
|
735
|
+
}
|
|
736
|
+
if (postingLists.length === 1) {
|
|
737
|
+
return postingLists[0];
|
|
738
|
+
}
|
|
739
|
+
// Merge all lists
|
|
740
|
+
let result = postingLists[0];
|
|
741
|
+
for (let i = 1; i < postingLists.length; i++) {
|
|
742
|
+
result = unionSorted(result, postingLists[i]);
|
|
743
|
+
}
|
|
744
|
+
return result;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
// ============================================================================
|
|
748
|
+
// Helper Functions
|
|
749
|
+
// ============================================================================
|
|
750
|
+
/**
|
|
751
|
+
* Intersect two sorted arrays of numbers
|
|
752
|
+
*
|
|
753
|
+
* @param a - First sorted array
|
|
754
|
+
* @param b - Second sorted array
|
|
755
|
+
* @returns Intersection (elements in both)
|
|
756
|
+
*/
|
|
757
|
+
function intersectSorted(a, b) {
|
|
758
|
+
const result = [];
|
|
759
|
+
let i = 0;
|
|
760
|
+
let j = 0;
|
|
761
|
+
while (i < a.length && j < b.length) {
|
|
762
|
+
if (a[i] === b[j]) {
|
|
763
|
+
result.push(a[i]);
|
|
764
|
+
i++;
|
|
765
|
+
j++;
|
|
766
|
+
}
|
|
767
|
+
else if (a[i] < b[j]) {
|
|
768
|
+
i++;
|
|
769
|
+
}
|
|
770
|
+
else {
|
|
771
|
+
j++;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
return result;
|
|
775
|
+
}
|
|
776
|
+
/**
|
|
777
|
+
* Union two sorted arrays of numbers
|
|
778
|
+
*
|
|
779
|
+
* @param a - First sorted array
|
|
780
|
+
* @param b - Second sorted array
|
|
781
|
+
* @returns Union (unique elements from both)
|
|
782
|
+
*/
|
|
783
|
+
function unionSorted(a, b) {
|
|
784
|
+
const result = [];
|
|
785
|
+
let i = 0;
|
|
786
|
+
let j = 0;
|
|
787
|
+
while (i < a.length && j < b.length) {
|
|
788
|
+
if (a[i] === b[j]) {
|
|
789
|
+
result.push(a[i]);
|
|
790
|
+
i++;
|
|
791
|
+
j++;
|
|
792
|
+
}
|
|
793
|
+
else if (a[i] < b[j]) {
|
|
794
|
+
result.push(a[i]);
|
|
795
|
+
i++;
|
|
796
|
+
}
|
|
797
|
+
else {
|
|
798
|
+
result.push(b[j]);
|
|
799
|
+
j++;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
// Add remaining elements
|
|
803
|
+
while (i < a.length) {
|
|
804
|
+
result.push(a[i++]);
|
|
805
|
+
}
|
|
806
|
+
while (j < b.length) {
|
|
807
|
+
result.push(b[j++]);
|
|
808
|
+
}
|
|
809
|
+
return result;
|
|
810
|
+
}
|
|
811
|
+
/**
|
|
812
|
+
* Estimate the size of an inverted index for given parameters
|
|
813
|
+
*
|
|
814
|
+
* @param termCount - Number of unique terms
|
|
815
|
+
* @param avgTermLength - Average term length in bytes (default: 8)
|
|
816
|
+
* @param avgPostingsPerTerm - Average documents per term (default: 10)
|
|
817
|
+
* @param avgDocIdBits - Average bits per doc ID delta (default: 10)
|
|
818
|
+
* @returns Estimated size in bytes
|
|
819
|
+
*/
|
|
820
|
+
export function estimateInvertedIndexSize(termCount, avgTermLength = 8, avgPostingsPerTerm = 10, avgDocIdBits = 10) {
|
|
821
|
+
// Header
|
|
822
|
+
let size = HEADER_SIZE;
|
|
823
|
+
// Term index: 1 (length) + avgTermLength + 4 (offset) + 4 (length) per term
|
|
824
|
+
size += termCount * (1 + avgTermLength + 8);
|
|
825
|
+
size += 1; // End marker
|
|
826
|
+
// Posting lists: count varint + doc_ids varints per term
|
|
827
|
+
// Average varint size based on avgDocIdBits
|
|
828
|
+
const avgVarintSize = Math.ceil(avgDocIdBits / 7);
|
|
829
|
+
size += termCount * (1 + avgPostingsPerTerm * avgVarintSize);
|
|
830
|
+
return size;
|
|
831
|
+
}
|
|
832
|
+
/**
|
|
833
|
+
* Simple tokenizer for testing
|
|
834
|
+
*
|
|
835
|
+
* Splits text into lowercase alphanumeric tokens.
|
|
836
|
+
*
|
|
837
|
+
* @param text - Text to tokenize
|
|
838
|
+
* @returns Array of tokens
|
|
839
|
+
*/
|
|
840
|
+
export function simpleTokenize(text) {
|
|
841
|
+
return text
|
|
842
|
+
.toLowerCase()
|
|
843
|
+
.split(/[^a-z0-9]+/)
|
|
844
|
+
.filter((t) => t.length > 0);
|
|
845
|
+
}
|
|
846
|
+
/**
|
|
847
|
+
* Create an inverted index from documents
|
|
848
|
+
*
|
|
849
|
+
* @param documents - Map or array of doc ID -> text
|
|
850
|
+
* @param tokenizer - Function to tokenize text (default: simpleTokenize)
|
|
851
|
+
* @returns Serialized inverted index bytes
|
|
852
|
+
*/
|
|
853
|
+
export function createInvertedIndex(documents, tokenizer = simpleTokenize) {
|
|
854
|
+
const writer = new InvertedIndexWriter();
|
|
855
|
+
const entries = documents instanceof Map ? documents.entries() : documents;
|
|
856
|
+
for (const [docId, text] of entries) {
|
|
857
|
+
const terms = tokenizer(text);
|
|
858
|
+
writer.addDocument(docId, terms);
|
|
859
|
+
}
|
|
860
|
+
return writer.serialize();
|
|
861
|
+
}
|
|
862
|
+
//# sourceMappingURL=inverted-index.js.map
|