dotdo 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (667) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +446 -315
  3. package/cli/README.md +238 -0
  4. package/cli/agent.ts +72 -0
  5. package/cli/bin.js +44 -0
  6. package/cli/bin.ts +38 -0
  7. package/cli/build.ts +157 -0
  8. package/cli/commands/auth/login.ts +14 -0
  9. package/cli/commands/auth/logout.ts +6 -0
  10. package/cli/commands/auth/whoami.ts +16 -0
  11. package/cli/commands/deploy-multi.ts +245 -0
  12. package/cli/commands/dev/deploy.ts +100 -0
  13. package/cli/commands/dev/dev.ts +95 -0
  14. package/cli/commands/dev/logs.ts +91 -0
  15. package/cli/commands/dev-local.ts +88 -0
  16. package/cli/commands/do-ops.ts +314 -0
  17. package/cli/commands/index.ts +100 -0
  18. package/cli/commands/init.ts +247 -0
  19. package/cli/commands/introspect/emitter.ts +315 -0
  20. package/cli/commands/introspect/index.ts +193 -0
  21. package/cli/commands/link.ts +598 -0
  22. package/cli/commands/snippets.ts +415 -0
  23. package/cli/commands/tunnel.ts +239 -0
  24. package/cli/device-auth.ts +289 -0
  25. package/cli/fallback.ts +12 -0
  26. package/cli/index.ts +121 -0
  27. package/cli/main.ts +246 -0
  28. package/cli/mcp-stdio.ts +790 -0
  29. package/cli/package.json +62 -0
  30. package/cli/runtime/do-registry.ts +193 -0
  31. package/cli/runtime/embedded-db.ts +344 -0
  32. package/cli/runtime/index.ts +9 -0
  33. package/cli/runtime/miniflare-adapter.ts +162 -0
  34. package/cli/sandbox.ts +82 -0
  35. package/cli/src/args.ts +174 -0
  36. package/cli/src/auth.ts +55 -0
  37. package/cli/src/commands/call.ts +84 -0
  38. package/cli/src/commands/charge.ts +96 -0
  39. package/cli/src/commands/config.ts +115 -0
  40. package/cli/src/commands/email.ts +112 -0
  41. package/cli/src/commands/llm.ts +115 -0
  42. package/cli/src/commands/queue.ts +134 -0
  43. package/cli/src/commands/text.ts +86 -0
  44. package/cli/src/config.ts +185 -0
  45. package/cli/src/output.ts +246 -0
  46. package/cli/src/rpc.ts +192 -0
  47. package/cli/utils/config.ts +282 -0
  48. package/cli/utils/detect.ts +73 -0
  49. package/cli/utils/index.ts +15 -0
  50. package/cli/utils/logger.ts +232 -0
  51. package/dist/ai/index.js +19 -0
  52. package/dist/ai/index.js.map +1 -0
  53. package/dist/ai/template-literals.js +852 -0
  54. package/dist/ai/template-literals.js.map +1 -0
  55. package/dist/api/middleware/auth-federation.js +573 -0
  56. package/dist/api/middleware/auth-federation.js.map +1 -0
  57. package/dist/api/middleware/auth.js +545 -0
  58. package/dist/api/middleware/auth.js.map +1 -0
  59. package/dist/db/actions.js +212 -0
  60. package/dist/db/actions.js.map +1 -0
  61. package/dist/db/auth.js +506 -0
  62. package/dist/db/auth.js.map +1 -0
  63. package/dist/db/branches.js +65 -0
  64. package/dist/db/branches.js.map +1 -0
  65. package/dist/db/clickhouse.js +1074 -0
  66. package/dist/db/clickhouse.js.map +1 -0
  67. package/dist/db/dlq.js +39 -0
  68. package/dist/db/dlq.js.map +1 -0
  69. package/dist/db/events.js +28 -0
  70. package/dist/db/events.js.map +1 -0
  71. package/dist/db/exec.js +64 -0
  72. package/dist/db/exec.js.map +1 -0
  73. package/dist/db/files.js +85 -0
  74. package/dist/db/files.js.map +1 -0
  75. package/dist/db/flags.js +24 -0
  76. package/dist/db/flags.js.map +1 -0
  77. package/dist/db/git.js +116 -0
  78. package/dist/db/git.js.map +1 -0
  79. package/dist/db/iceberg/inverted-index.js +862 -0
  80. package/dist/db/iceberg/inverted-index.js.map +1 -0
  81. package/dist/db/iceberg/puffin.js +878 -0
  82. package/dist/db/iceberg/puffin.js.map +1 -0
  83. package/dist/db/iceberg/search-manifest.js +422 -0
  84. package/dist/db/iceberg/search-manifest.js.map +1 -0
  85. package/dist/db/iceberg/types.js +8 -0
  86. package/dist/db/iceberg/types.js.map +1 -0
  87. package/dist/db/index.js +121 -0
  88. package/dist/db/index.js.map +1 -0
  89. package/dist/db/integrations.js +368 -0
  90. package/dist/db/integrations.js.map +1 -0
  91. package/dist/db/json-indexes.js +332 -0
  92. package/dist/db/json-indexes.js.map +1 -0
  93. package/dist/db/linked-accounts.js +287 -0
  94. package/dist/db/linked-accounts.js.map +1 -0
  95. package/dist/db/nouns.js +183 -0
  96. package/dist/db/nouns.js.map +1 -0
  97. package/dist/db/objects.js +170 -0
  98. package/dist/db/objects.js.map +1 -0
  99. package/dist/db/primitives/dag-scheduler/index.js +869 -0
  100. package/dist/db/primitives/dag-scheduler/index.js.map +1 -0
  101. package/dist/db/primitives/exactly-once-context.js +237 -0
  102. package/dist/db/primitives/exactly-once-context.js.map +1 -0
  103. package/dist/db/primitives/index.js +62 -0
  104. package/dist/db/primitives/index.js.map +1 -0
  105. package/dist/db/primitives/keyed-router.js +145 -0
  106. package/dist/db/primitives/keyed-router.js.map +1 -0
  107. package/dist/db/primitives/observability.js +162 -0
  108. package/dist/db/primitives/observability.js.map +1 -0
  109. package/dist/db/primitives/schema-evolution.js +643 -0
  110. package/dist/db/primitives/schema-evolution.js.map +1 -0
  111. package/dist/db/primitives/stateful-operator/index.js +770 -0
  112. package/dist/db/primitives/stateful-operator/index.js.map +1 -0
  113. package/dist/db/primitives/temporal-store.js +306 -0
  114. package/dist/db/primitives/temporal-store.js.map +1 -0
  115. package/dist/db/primitives/typed-column-store.js +1229 -0
  116. package/dist/db/primitives/typed-column-store.js.map +1 -0
  117. package/dist/db/primitives/utils/duration.js +162 -0
  118. package/dist/db/primitives/utils/duration.js.map +1 -0
  119. package/dist/db/primitives/utils/murmur3.js +116 -0
  120. package/dist/db/primitives/utils/murmur3.js.map +1 -0
  121. package/dist/db/primitives/watermark-service.js +136 -0
  122. package/dist/db/primitives/watermark-service.js.map +1 -0
  123. package/dist/db/primitives/window-manager.js +764 -0
  124. package/dist/db/primitives/window-manager.js.map +1 -0
  125. package/dist/db/relationships.js +66 -0
  126. package/dist/db/relationships.js.map +1 -0
  127. package/dist/db/schema-minimal.js +61 -0
  128. package/dist/db/schema-minimal.js.map +1 -0
  129. package/dist/db/search.js +28 -0
  130. package/dist/db/search.js.map +1 -0
  131. package/dist/db/stores.js +1665 -0
  132. package/dist/db/stores.js.map +1 -0
  133. package/dist/db/things.js +297 -0
  134. package/dist/db/things.js.map +1 -0
  135. package/dist/db/vault.js +171 -0
  136. package/dist/db/vault.js.map +1 -0
  137. package/dist/db/verbs.js +102 -0
  138. package/dist/db/verbs.js.map +1 -0
  139. package/dist/do/base.js +48 -0
  140. package/dist/do/base.js.map +1 -0
  141. package/dist/do/tiny.js +31 -0
  142. package/dist/do/tiny.js.map +1 -0
  143. package/dist/lib/DOAuth.js +261 -0
  144. package/dist/lib/DOAuth.js.map +1 -0
  145. package/dist/lib/DODispatcher.js +72 -0
  146. package/dist/lib/DODispatcher.js.map +1 -0
  147. package/dist/lib/Modifier.js +189 -0
  148. package/dist/lib/Modifier.js.map +1 -0
  149. package/dist/lib/StateStorage.js +403 -0
  150. package/dist/lib/StateStorage.js.map +1 -0
  151. package/dist/lib/TypeRegistry.js +122 -0
  152. package/dist/lib/TypeRegistry.js.map +1 -0
  153. package/dist/lib/ai/gateway.js +247 -0
  154. package/dist/lib/ai/gateway.js.map +1 -0
  155. package/dist/lib/ai/tool-loop-agent.js +591 -0
  156. package/dist/lib/ai/tool-loop-agent.js.map +1 -0
  157. package/dist/lib/auto-wiring.js +439 -0
  158. package/dist/lib/auto-wiring.js.map +1 -0
  159. package/dist/lib/browse/browserbase.js +163 -0
  160. package/dist/lib/browse/browserbase.js.map +1 -0
  161. package/dist/lib/browse/cloudflare.js +144 -0
  162. package/dist/lib/browse/cloudflare.js.map +1 -0
  163. package/dist/lib/browse/index.js +62 -0
  164. package/dist/lib/browse/index.js.map +1 -0
  165. package/dist/lib/browse/types.js +13 -0
  166. package/dist/lib/browse/types.js.map +1 -0
  167. package/dist/lib/cache/index.js +37 -0
  168. package/dist/lib/cache/index.js.map +1 -0
  169. package/dist/lib/cache/visibility.js +638 -0
  170. package/dist/lib/cache/visibility.js.map +1 -0
  171. package/dist/lib/capabilities.js +268 -0
  172. package/dist/lib/capabilities.js.map +1 -0
  173. package/dist/lib/channels/base.js +106 -0
  174. package/dist/lib/channels/base.js.map +1 -0
  175. package/dist/lib/channels/discord.js +94 -0
  176. package/dist/lib/channels/discord.js.map +1 -0
  177. package/dist/lib/channels/email.js +204 -0
  178. package/dist/lib/channels/email.js.map +1 -0
  179. package/dist/lib/channels/index.js +90 -0
  180. package/dist/lib/channels/index.js.map +1 -0
  181. package/dist/lib/channels/mdxui-chat.js +95 -0
  182. package/dist/lib/channels/mdxui-chat.js.map +1 -0
  183. package/dist/lib/channels/slack-blockkit.js +121 -0
  184. package/dist/lib/channels/slack-blockkit.js.map +1 -0
  185. package/dist/lib/channels/types.js +7 -0
  186. package/dist/lib/channels/types.js.map +1 -0
  187. package/dist/lib/cloudflare/ai.js +654 -0
  188. package/dist/lib/cloudflare/ai.js.map +1 -0
  189. package/dist/lib/cloudflare/index.js +88 -0
  190. package/dist/lib/cloudflare/index.js.map +1 -0
  191. package/dist/lib/cloudflare/kv.js +342 -0
  192. package/dist/lib/cloudflare/kv.js.map +1 -0
  193. package/dist/lib/cloudflare/queues.js +434 -0
  194. package/dist/lib/cloudflare/queues.js.map +1 -0
  195. package/dist/lib/cloudflare/r2.js +604 -0
  196. package/dist/lib/cloudflare/r2.js.map +1 -0
  197. package/dist/lib/cloudflare/vectorize.js +494 -0
  198. package/dist/lib/cloudflare/vectorize.js.map +1 -0
  199. package/dist/lib/cloudflare/workflows.js +569 -0
  200. package/dist/lib/cloudflare/workflows.js.map +1 -0
  201. package/dist/lib/colo/caching.js +196 -0
  202. package/dist/lib/colo/caching.js.map +1 -0
  203. package/dist/lib/colo/detection.js +194 -0
  204. package/dist/lib/colo/detection.js.map +1 -0
  205. package/dist/lib/colo/external-data.js +219 -0
  206. package/dist/lib/colo/external-data.js.map +1 -0
  207. package/dist/lib/colo/globe-data.js +179 -0
  208. package/dist/lib/colo/globe-data.js.map +1 -0
  209. package/dist/lib/colo/index.js +16 -0
  210. package/dist/lib/colo/index.js.map +1 -0
  211. package/dist/lib/decorators.js +37 -0
  212. package/dist/lib/decorators.js.map +1 -0
  213. package/dist/lib/discovery.js +81 -0
  214. package/dist/lib/discovery.js.map +1 -0
  215. package/dist/lib/executors/AgenticFunctionExecutor.js +619 -0
  216. package/dist/lib/executors/AgenticFunctionExecutor.js.map +1 -0
  217. package/dist/lib/executors/BaseFunctionExecutor.js +328 -0
  218. package/dist/lib/executors/BaseFunctionExecutor.js.map +1 -0
  219. package/dist/lib/executors/CascadeExecutor.js +418 -0
  220. package/dist/lib/executors/CascadeExecutor.js.map +1 -0
  221. package/dist/lib/executors/CodeFunctionExecutor.js +904 -0
  222. package/dist/lib/executors/CodeFunctionExecutor.js.map +1 -0
  223. package/dist/lib/executors/GenerativeFunctionExecutor.js +904 -0
  224. package/dist/lib/executors/GenerativeFunctionExecutor.js.map +1 -0
  225. package/dist/lib/executors/HumanFunctionExecutor.js +884 -0
  226. package/dist/lib/executors/HumanFunctionExecutor.js.map +1 -0
  227. package/dist/lib/executors/ParallelStepExecutor.js +308 -0
  228. package/dist/lib/executors/ParallelStepExecutor.js.map +1 -0
  229. package/dist/lib/executors/types.js +12 -0
  230. package/dist/lib/executors/types.js.map +1 -0
  231. package/dist/lib/experiments.js +89 -0
  232. package/dist/lib/experiments.js.map +1 -0
  233. package/dist/lib/flags/store.js +262 -0
  234. package/dist/lib/flags/store.js.map +1 -0
  235. package/dist/lib/functions/FunctionComposition.js +467 -0
  236. package/dist/lib/functions/FunctionComposition.js.map +1 -0
  237. package/dist/lib/functions/FunctionMiddleware.js +457 -0
  238. package/dist/lib/functions/FunctionMiddleware.js.map +1 -0
  239. package/dist/lib/functions/FunctionRegistry.js +426 -0
  240. package/dist/lib/functions/FunctionRegistry.js.map +1 -0
  241. package/dist/lib/functions/createFunction.js +1048 -0
  242. package/dist/lib/functions/createFunction.js.map +1 -0
  243. package/dist/lib/humans/index.js +68 -0
  244. package/dist/lib/humans/index.js.map +1 -0
  245. package/dist/lib/humans/templates.js +117 -0
  246. package/dist/lib/humans/templates.js.map +1 -0
  247. package/dist/lib/identity.js +98 -0
  248. package/dist/lib/identity.js.map +1 -0
  249. package/dist/lib/index.js +9 -0
  250. package/dist/lib/index.js.map +1 -0
  251. package/dist/lib/logging/error-logger.js +163 -0
  252. package/dist/lib/logging/error-logger.js.map +1 -0
  253. package/dist/lib/logging/index.js +160 -0
  254. package/dist/lib/logging/index.js.map +1 -0
  255. package/dist/lib/mixins/bash.js +753 -0
  256. package/dist/lib/mixins/bash.js.map +1 -0
  257. package/dist/lib/mixins/fs.js +648 -0
  258. package/dist/lib/mixins/fs.js.map +1 -0
  259. package/dist/lib/mixins/git.js +1006 -0
  260. package/dist/lib/mixins/git.js.map +1 -0
  261. package/dist/lib/mixins/npm.js +662 -0
  262. package/dist/lib/mixins/npm.js.map +1 -0
  263. package/dist/lib/noun-id.js +278 -0
  264. package/dist/lib/noun-id.js.map +1 -0
  265. package/dist/lib/rate-limit/sliding-window.js +148 -0
  266. package/dist/lib/rate-limit/sliding-window.js.map +1 -0
  267. package/dist/lib/rate-limit.js +110 -0
  268. package/dist/lib/rate-limit.js.map +1 -0
  269. package/dist/lib/rpc/bindings.js +548 -0
  270. package/dist/lib/rpc/bindings.js.map +1 -0
  271. package/dist/lib/rpc/index.js +64 -0
  272. package/dist/lib/rpc/index.js.map +1 -0
  273. package/dist/lib/safe-stringify.js +223 -0
  274. package/dist/lib/safe-stringify.js.map +1 -0
  275. package/dist/lib/sandbox/miniflare-sandbox.js +1007 -0
  276. package/dist/lib/sandbox/miniflare-sandbox.js.map +1 -0
  277. package/dist/lib/sqids.js +110 -0
  278. package/dist/lib/sqids.js.map +1 -0
  279. package/dist/lib/sql/adapters/index.js +10 -0
  280. package/dist/lib/sql/adapters/index.js.map +1 -0
  281. package/dist/lib/sql/adapters/node-sql-parser.js +552 -0
  282. package/dist/lib/sql/adapters/node-sql-parser.js.map +1 -0
  283. package/dist/lib/sql/adapters/pgsql-parser.js +1190 -0
  284. package/dist/lib/sql/adapters/pgsql-parser.js.map +1 -0
  285. package/dist/lib/sql/index.js +277 -0
  286. package/dist/lib/sql/index.js.map +1 -0
  287. package/dist/lib/sql/types.js +56 -0
  288. package/dist/lib/sql/types.js.map +1 -0
  289. package/dist/lib/type-classifier.js +126 -0
  290. package/dist/lib/type-classifier.js.map +1 -0
  291. package/dist/lib/utils/html.js +47 -0
  292. package/dist/lib/utils/html.js.map +1 -0
  293. package/dist/lib/validation.js +48 -0
  294. package/dist/lib/validation.js.map +1 -0
  295. package/dist/lib/vault/store.js +411 -0
  296. package/dist/lib/vault/store.js.map +1 -0
  297. package/dist/metrics/hunch.js +739 -0
  298. package/dist/metrics/hunch.js.map +1 -0
  299. package/dist/objects/API.js +302 -0
  300. package/dist/objects/API.js.map +1 -0
  301. package/dist/objects/Agent.js +179 -0
  302. package/dist/objects/Agent.js.map +1 -0
  303. package/dist/objects/AgenticFunctionExecutor.js +8 -0
  304. package/dist/objects/AgenticFunctionExecutor.js.map +1 -0
  305. package/dist/objects/App.js +83 -0
  306. package/dist/objects/App.js.map +1 -0
  307. package/dist/objects/Browser.js +884 -0
  308. package/dist/objects/Browser.js.map +1 -0
  309. package/dist/objects/Business.js +107 -0
  310. package/dist/objects/Business.js.map +1 -0
  311. package/dist/objects/CLI.js +221 -0
  312. package/dist/objects/CLI.js.map +1 -0
  313. package/dist/objects/CodeFunctionExecutor.js +8 -0
  314. package/dist/objects/CodeFunctionExecutor.js.map +1 -0
  315. package/dist/objects/Collection.js +161 -0
  316. package/dist/objects/Collection.js.map +1 -0
  317. package/dist/objects/DO.js +41 -0
  318. package/dist/objects/DO.js.map +1 -0
  319. package/dist/objects/DOBase.js +2309 -0
  320. package/dist/objects/DOBase.js.map +1 -0
  321. package/dist/objects/DOCache.js +153 -0
  322. package/dist/objects/DOCache.js.map +1 -0
  323. package/dist/objects/DOFull.js +1676 -0
  324. package/dist/objects/DOFull.js.map +1 -0
  325. package/dist/objects/DOTiny.js +207 -0
  326. package/dist/objects/DOTiny.js.map +1 -0
  327. package/dist/objects/Directory.js +199 -0
  328. package/dist/objects/Directory.js.map +1 -0
  329. package/dist/objects/Entity.js +413 -0
  330. package/dist/objects/Entity.js.map +1 -0
  331. package/dist/objects/Function.js +116 -0
  332. package/dist/objects/Function.js.map +1 -0
  333. package/dist/objects/Human.js +231 -0
  334. package/dist/objects/Human.js.map +1 -0
  335. package/dist/objects/HumanFunctionExecutor.js +8 -0
  336. package/dist/objects/HumanFunctionExecutor.js.map +1 -0
  337. package/dist/objects/IcebergMetadataDO.js +938 -0
  338. package/dist/objects/IcebergMetadataDO.js.map +1 -0
  339. package/dist/objects/IntegrationsDO.js +1174 -0
  340. package/dist/objects/IntegrationsDO.js.map +1 -0
  341. package/dist/objects/ObservabilityBroadcaster.js +149 -0
  342. package/dist/objects/ObservabilityBroadcaster.js.map +1 -0
  343. package/dist/objects/Package.js +154 -0
  344. package/dist/objects/Package.js.map +1 -0
  345. package/dist/objects/Product.js +193 -0
  346. package/dist/objects/Product.js.map +1 -0
  347. package/dist/objects/SDK.js +152 -0
  348. package/dist/objects/SDK.js.map +1 -0
  349. package/dist/objects/SaaS.js +235 -0
  350. package/dist/objects/SaaS.js.map +1 -0
  351. package/dist/objects/SandboxDO.js +759 -0
  352. package/dist/objects/SandboxDO.js.map +1 -0
  353. package/dist/objects/Service.js +337 -0
  354. package/dist/objects/Service.js.map +1 -0
  355. package/dist/objects/Site.js +80 -0
  356. package/dist/objects/Site.js.map +1 -0
  357. package/dist/objects/Startup.js +479 -0
  358. package/dist/objects/Startup.js.map +1 -0
  359. package/dist/objects/ThingsDO.js +170 -0
  360. package/dist/objects/ThingsDO.js.map +1 -0
  361. package/dist/objects/VectorShardDO.js +650 -0
  362. package/dist/objects/VectorShardDO.js.map +1 -0
  363. package/dist/objects/Worker.js +144 -0
  364. package/dist/objects/Worker.js.map +1 -0
  365. package/dist/objects/Workflow.js +196 -0
  366. package/dist/objects/Workflow.js.map +1 -0
  367. package/dist/objects/WorkflowFactory.js +313 -0
  368. package/dist/objects/WorkflowFactory.js.map +1 -0
  369. package/dist/objects/WorkflowRuntime.js +863 -0
  370. package/dist/objects/WorkflowRuntime.js.map +1 -0
  371. package/dist/objects/circuit-breaker-bulkhead.js +178 -0
  372. package/dist/objects/circuit-breaker-bulkhead.js.map +1 -0
  373. package/dist/objects/createFunction.js +934 -0
  374. package/dist/objects/createFunction.js.map +1 -0
  375. package/dist/objects/index.js +80 -0
  376. package/dist/objects/index.js.map +1 -0
  377. package/dist/objects/lifecycle/Branch.js +275 -0
  378. package/dist/objects/lifecycle/Branch.js.map +1 -0
  379. package/dist/objects/lifecycle/Clone.js +1499 -0
  380. package/dist/objects/lifecycle/Clone.js.map +1 -0
  381. package/dist/objects/lifecycle/Compact.js +237 -0
  382. package/dist/objects/lifecycle/Compact.js.map +1 -0
  383. package/dist/objects/lifecycle/Promote.js +476 -0
  384. package/dist/objects/lifecycle/Promote.js.map +1 -0
  385. package/dist/objects/lifecycle/Shard.js +560 -0
  386. package/dist/objects/lifecycle/Shard.js.map +1 -0
  387. package/dist/objects/lifecycle/index.js +15 -0
  388. package/dist/objects/lifecycle/index.js.map +1 -0
  389. package/dist/objects/lifecycle/types.js +33 -0
  390. package/dist/objects/lifecycle/types.js.map +1 -0
  391. package/dist/objects/mixins/infrastructure.js +171 -0
  392. package/dist/objects/mixins/infrastructure.js.map +1 -0
  393. package/dist/objects/modules/StoresModule.js +153 -0
  394. package/dist/objects/modules/StoresModule.js.map +1 -0
  395. package/dist/objects/persistence/checkpoint-manager.js +606 -0
  396. package/dist/objects/persistence/checkpoint-manager.js.map +1 -0
  397. package/dist/objects/persistence/index.js +72 -0
  398. package/dist/objects/persistence/index.js.map +1 -0
  399. package/dist/objects/persistence/migration-runner.js +562 -0
  400. package/dist/objects/persistence/migration-runner.js.map +1 -0
  401. package/dist/objects/persistence/replication-manager.js +501 -0
  402. package/dist/objects/persistence/replication-manager.js.map +1 -0
  403. package/dist/objects/persistence/tiered-storage-manager.js +595 -0
  404. package/dist/objects/persistence/tiered-storage-manager.js.map +1 -0
  405. package/dist/objects/persistence/types.js +14 -0
  406. package/dist/objects/persistence/types.js.map +1 -0
  407. package/dist/objects/persistence/wal-manager.js +653 -0
  408. package/dist/objects/persistence/wal-manager.js.map +1 -0
  409. package/dist/objects/presets/index.js +20 -0
  410. package/dist/objects/presets/index.js.map +1 -0
  411. package/dist/objects/presets/primitives.js +188 -0
  412. package/dist/objects/presets/primitives.js.map +1 -0
  413. package/dist/objects/primitives/alarm-adapter.js +141 -0
  414. package/dist/objects/primitives/alarm-adapter.js.map +1 -0
  415. package/dist/objects/primitives/index.js +337 -0
  416. package/dist/objects/primitives/index.js.map +1 -0
  417. package/dist/objects/primitives/storage-adapter.js +182 -0
  418. package/dist/objects/primitives/storage-adapter.js.map +1 -0
  419. package/dist/objects/primitives/with-primitives.js +102 -0
  420. package/dist/objects/primitives/with-primitives.js.map +1 -0
  421. package/dist/objects/services/StoreManager.js +227 -0
  422. package/dist/objects/services/StoreManager.js.map +1 -0
  423. package/dist/objects/services/index.js +13 -0
  424. package/dist/objects/services/index.js.map +1 -0
  425. package/dist/objects/transport/auth-layer.js +1451 -0
  426. package/dist/objects/transport/auth-layer.js.map +1 -0
  427. package/dist/objects/transport/capnweb-target.js +355 -0
  428. package/dist/objects/transport/capnweb-target.js.map +1 -0
  429. package/dist/objects/transport/chain.js +441 -0
  430. package/dist/objects/transport/chain.js.map +1 -0
  431. package/dist/objects/transport/handler.js +58 -0
  432. package/dist/objects/transport/handler.js.map +1 -0
  433. package/dist/objects/transport/index.js +53 -0
  434. package/dist/objects/transport/index.js.map +1 -0
  435. package/dist/objects/transport/mcp-server.js +691 -0
  436. package/dist/objects/transport/mcp-server.js.map +1 -0
  437. package/dist/objects/transport/rest-autowire.js +1508 -0
  438. package/dist/objects/transport/rest-autowire.js.map +1 -0
  439. package/dist/objects/transport/rest-router.js +440 -0
  440. package/dist/objects/transport/rest-router.js.map +1 -0
  441. package/dist/objects/transport/rpc-server.js +1539 -0
  442. package/dist/objects/transport/rpc-server.js.map +1 -0
  443. package/dist/objects/transport/shared.js +576 -0
  444. package/dist/objects/transport/shared.js.map +1 -0
  445. package/dist/objects/transport/sync-engine.js +291 -0
  446. package/dist/objects/transport/sync-engine.js.map +1 -0
  447. package/dist/objects/transport/types.js +8 -0
  448. package/dist/objects/transport/types.js.map +1 -0
  449. package/dist/sandbox/index.js +258 -0
  450. package/dist/sandbox/index.js.map +1 -0
  451. package/dist/snippets/artifacts-config.js +241 -0
  452. package/dist/snippets/artifacts-config.js.map +1 -0
  453. package/dist/snippets/artifacts-ingest.js +832 -0
  454. package/dist/snippets/artifacts-ingest.js.map +1 -0
  455. package/dist/snippets/artifacts-serve.js +1035 -0
  456. package/dist/snippets/artifacts-serve.js.map +1 -0
  457. package/dist/snippets/artifacts-types.js +161 -0
  458. package/dist/snippets/artifacts-types.js.map +1 -0
  459. package/dist/snippets/cache-probe.js +376 -0
  460. package/dist/snippets/cache-probe.js.map +1 -0
  461. package/dist/snippets/cache.js +10 -0
  462. package/dist/snippets/cache.js.map +1 -0
  463. package/dist/snippets/events.js +469 -0
  464. package/dist/snippets/events.js.map +1 -0
  465. package/dist/snippets/index.js +7 -0
  466. package/dist/snippets/index.js.map +1 -0
  467. package/dist/snippets/proxy.js +495 -0
  468. package/dist/snippets/proxy.js.map +1 -0
  469. package/dist/snippets/search.js +1759 -0
  470. package/dist/snippets/search.js.map +1 -0
  471. package/dist/streams/index.js +30 -0
  472. package/dist/streams/index.js.map +1 -0
  473. package/dist/streams/observability.js +68 -0
  474. package/dist/streams/observability.js.map +1 -0
  475. package/dist/types/AI.js +92 -0
  476. package/dist/types/AI.js.map +1 -0
  477. package/dist/types/AIFunction.js +171 -0
  478. package/dist/types/AIFunction.js.map +1 -0
  479. package/dist/types/BrowseVerb.js +89 -0
  480. package/dist/types/BrowseVerb.js.map +1 -0
  481. package/dist/types/Browser.js +31 -0
  482. package/dist/types/Browser.js.map +1 -0
  483. package/dist/types/Chaos.js +15 -0
  484. package/dist/types/Chaos.js.map +1 -0
  485. package/dist/types/CloudflareBindings.js +109 -0
  486. package/dist/types/CloudflareBindings.js.map +1 -0
  487. package/dist/types/Collection.js +50 -0
  488. package/dist/types/Collection.js.map +1 -0
  489. package/dist/types/DO.js +2 -0
  490. package/dist/types/DO.js.map +1 -0
  491. package/dist/types/DOLocation.js +63 -0
  492. package/dist/types/DOLocation.js.map +1 -0
  493. package/dist/types/EventHandler.js +57 -0
  494. package/dist/types/EventHandler.js.map +1 -0
  495. package/dist/types/Experiment.js +33 -0
  496. package/dist/types/Experiment.js.map +1 -0
  497. package/dist/types/Flag.js +57 -0
  498. package/dist/types/Flag.js.map +1 -0
  499. package/dist/types/Lifecycle.js +13 -0
  500. package/dist/types/Lifecycle.js.map +1 -0
  501. package/dist/types/Location.js +169 -0
  502. package/dist/types/Location.js.map +1 -0
  503. package/dist/types/Noun.js +66 -0
  504. package/dist/types/Noun.js.map +1 -0
  505. package/dist/types/SessionEvent.js +194 -0
  506. package/dist/types/SessionEvent.js.map +1 -0
  507. package/dist/types/Thing.js +55 -0
  508. package/dist/types/Thing.js.map +1 -0
  509. package/dist/types/ThingDO.js +153 -0
  510. package/dist/types/ThingDO.js.map +1 -0
  511. package/dist/types/Things.js +2 -0
  512. package/dist/types/Things.js.map +1 -0
  513. package/dist/types/Verb.js +119 -0
  514. package/dist/types/Verb.js.map +1 -0
  515. package/dist/types/WorkflowContext.js +70 -0
  516. package/dist/types/WorkflowContext.js.map +1 -0
  517. package/dist/types/analytics-api.js +13 -0
  518. package/dist/types/analytics-api.js.map +1 -0
  519. package/dist/types/capabilities.js +135 -0
  520. package/dist/types/capabilities.js.map +1 -0
  521. package/dist/types/drizzle.js +12 -0
  522. package/dist/types/drizzle.js.map +1 -0
  523. package/dist/types/event.js +201 -0
  524. package/dist/types/event.js.map +1 -0
  525. package/dist/types/fn.js +12 -0
  526. package/dist/types/fn.js.map +1 -0
  527. package/dist/types/iceberg.js +48 -0
  528. package/dist/types/iceberg.js.map +1 -0
  529. package/dist/types/ids.js +170 -0
  530. package/dist/types/ids.js.map +1 -0
  531. package/dist/types/index.js +41 -0
  532. package/dist/types/index.js.map +1 -0
  533. package/dist/types/introspect.js +54 -0
  534. package/dist/types/introspect.js.map +1 -0
  535. package/dist/types/observability.js +124 -0
  536. package/dist/types/observability.js.map +1 -0
  537. package/dist/types/sync-protocol.js +175 -0
  538. package/dist/types/sync-protocol.js.map +1 -0
  539. package/dist/types/vector.js +13 -0
  540. package/dist/types/vector.js.map +1 -0
  541. package/dist/workflows/ScheduleManager.js +473 -0
  542. package/dist/workflows/ScheduleManager.js.map +1 -0
  543. package/dist/workflows/StepDOBridge.js +149 -0
  544. package/dist/workflows/StepDOBridge.js.map +1 -0
  545. package/dist/workflows/StepResultStorage.js +232 -0
  546. package/dist/workflows/StepResultStorage.js.map +1 -0
  547. package/dist/workflows/WaitForEventManager.js +461 -0
  548. package/dist/workflows/WaitForEventManager.js.map +1 -0
  549. package/dist/workflows/analyzer.js +332 -0
  550. package/dist/workflows/analyzer.js.map +1 -0
  551. package/dist/workflows/compat/activity-router.js +484 -0
  552. package/dist/workflows/compat/activity-router.js.map +1 -0
  553. package/dist/workflows/compat/backends/cloudflare-workflows.js +431 -0
  554. package/dist/workflows/compat/backends/cloudflare-workflows.js.map +1 -0
  555. package/dist/workflows/compat/backends/index.js +14 -0
  556. package/dist/workflows/compat/backends/index.js.map +1 -0
  557. package/dist/workflows/compat/errors/index.js +375 -0
  558. package/dist/workflows/compat/errors/index.js.map +1 -0
  559. package/dist/workflows/compat/index.js +79 -0
  560. package/dist/workflows/compat/index.js.map +1 -0
  561. package/dist/workflows/compat/inngest/index.js +989 -0
  562. package/dist/workflows/compat/inngest/index.js.map +1 -0
  563. package/dist/workflows/compat/qstash/index.js +1263 -0
  564. package/dist/workflows/compat/qstash/index.js.map +1 -0
  565. package/dist/workflows/compat/temporal/activities.js +739 -0
  566. package/dist/workflows/compat/temporal/activities.js.map +1 -0
  567. package/dist/workflows/compat/temporal/child-workflows.js +154 -0
  568. package/dist/workflows/compat/temporal/child-workflows.js.map +1 -0
  569. package/dist/workflows/compat/temporal/client.js +381 -0
  570. package/dist/workflows/compat/temporal/client.js.map +1 -0
  571. package/dist/workflows/compat/temporal/context.js +309 -0
  572. package/dist/workflows/compat/temporal/context.js.map +1 -0
  573. package/dist/workflows/compat/temporal/determinism.js +216 -0
  574. package/dist/workflows/compat/temporal/determinism.js.map +1 -0
  575. package/dist/workflows/compat/temporal/errors.js +128 -0
  576. package/dist/workflows/compat/temporal/errors.js.map +1 -0
  577. package/dist/workflows/compat/temporal/index.js +2464 -0
  578. package/dist/workflows/compat/temporal/index.js.map +1 -0
  579. package/dist/workflows/compat/temporal/saga.js +504 -0
  580. package/dist/workflows/compat/temporal/saga.js.map +1 -0
  581. package/dist/workflows/compat/temporal/signals.js +364 -0
  582. package/dist/workflows/compat/temporal/signals.js.map +1 -0
  583. package/dist/workflows/compat/temporal/storage.js +271 -0
  584. package/dist/workflows/compat/temporal/storage.js.map +1 -0
  585. package/dist/workflows/compat/temporal/timers.js +347 -0
  586. package/dist/workflows/compat/temporal/timers.js.map +1 -0
  587. package/dist/workflows/compat/temporal/types.js +7 -0
  588. package/dist/workflows/compat/temporal/types.js.map +1 -0
  589. package/dist/workflows/compat/temporal/unified-primitives.js +339 -0
  590. package/dist/workflows/compat/temporal/unified-primitives.js.map +1 -0
  591. package/dist/workflows/compat/trigger/index.js +468 -0
  592. package/dist/workflows/compat/trigger/index.js.map +1 -0
  593. package/dist/workflows/compat/utils/index.js +69 -0
  594. package/dist/workflows/compat/utils/index.js.map +1 -0
  595. package/dist/workflows/context/correlation-capability.js +266 -0
  596. package/dist/workflows/context/correlation-capability.js.map +1 -0
  597. package/dist/workflows/context/correlation.js +484 -0
  598. package/dist/workflows/context/correlation.js.map +1 -0
  599. package/dist/workflows/context/experiment.js +289 -0
  600. package/dist/workflows/context/experiment.js.map +1 -0
  601. package/dist/workflows/context/flag.js +244 -0
  602. package/dist/workflows/context/flag.js.map +1 -0
  603. package/dist/workflows/context/foundation.js +648 -0
  604. package/dist/workflows/context/foundation.js.map +1 -0
  605. package/dist/workflows/context/human-base.js +106 -0
  606. package/dist/workflows/context/human-base.js.map +1 -0
  607. package/dist/workflows/context/human.js +368 -0
  608. package/dist/workflows/context/human.js.map +1 -0
  609. package/dist/workflows/context/measure.js +354 -0
  610. package/dist/workflows/context/measure.js.map +1 -0
  611. package/dist/workflows/context/rate-limit.js +358 -0
  612. package/dist/workflows/context/rate-limit.js.map +1 -0
  613. package/dist/workflows/context/user.js +117 -0
  614. package/dist/workflows/context/user.js.map +1 -0
  615. package/dist/workflows/context/vault.js +360 -0
  616. package/dist/workflows/context/vault.js.map +1 -0
  617. package/dist/workflows/data/entity-events/entity-events.js +489 -0
  618. package/dist/workflows/data/entity-events/entity-events.js.map +1 -0
  619. package/dist/workflows/data/experiment/index.js +599 -0
  620. package/dist/workflows/data/experiment/index.js.map +1 -0
  621. package/dist/workflows/data/goal/context.js +558 -0
  622. package/dist/workflows/data/goal/context.js.map +1 -0
  623. package/dist/workflows/data/goal/index.js +32 -0
  624. package/dist/workflows/data/goal/index.js.map +1 -0
  625. package/dist/workflows/data/measure/index.js +840 -0
  626. package/dist/workflows/data/measure/index.js.map +1 -0
  627. package/dist/workflows/data/stream/index.js +1149 -0
  628. package/dist/workflows/data/stream/index.js.map +1 -0
  629. package/dist/workflows/data/track/context.js +883 -0
  630. package/dist/workflows/data/track/context.js.map +1 -0
  631. package/dist/workflows/data/track/index.js +15 -0
  632. package/dist/workflows/data/track/index.js.map +1 -0
  633. package/dist/workflows/data/view/context.js +864 -0
  634. package/dist/workflows/data/view/context.js.map +1 -0
  635. package/dist/workflows/domain.js +93 -0
  636. package/dist/workflows/domain.js.map +1 -0
  637. package/dist/workflows/flag.js +176 -0
  638. package/dist/workflows/flag.js.map +1 -0
  639. package/dist/workflows/flags.js +217 -0
  640. package/dist/workflows/flags.js.map +1 -0
  641. package/dist/workflows/hash.js +209 -0
  642. package/dist/workflows/hash.js.map +1 -0
  643. package/dist/workflows/index.js +50 -0
  644. package/dist/workflows/index.js.map +1 -0
  645. package/dist/workflows/on.js +378 -0
  646. package/dist/workflows/on.js.map +1 -0
  647. package/dist/workflows/pipeline-promise.js +481 -0
  648. package/dist/workflows/pipeline-promise.js.map +1 -0
  649. package/dist/workflows/pipeline-types.js +20 -0
  650. package/dist/workflows/pipeline-types.js.map +1 -0
  651. package/dist/workflows/proxy.js +76 -0
  652. package/dist/workflows/proxy.js.map +1 -0
  653. package/dist/workflows/runtime.js +310 -0
  654. package/dist/workflows/runtime.js.map +1 -0
  655. package/dist/workflows/schedule-builder.js +327 -0
  656. package/dist/workflows/schedule-builder.js.map +1 -0
  657. package/dist/workflows/visibility/index.js +146 -0
  658. package/dist/workflows/visibility/index.js.map +1 -0
  659. package/dist/workflows/visibility/query-parser.js +150 -0
  660. package/dist/workflows/visibility/query-parser.js.map +1 -0
  661. package/dist/workflows/visibility/store.js +223 -0
  662. package/dist/workflows/visibility/store.js.map +1 -0
  663. package/dist/workflows/visibility/types.js +30 -0
  664. package/dist/workflows/visibility/types.js.map +1 -0
  665. package/dist/workflows/workflow.js +53 -0
  666. package/dist/workflows/workflow.js.map +1 -0
  667. package/package.json +294 -46
@@ -0,0 +1,1759 @@
1
+ /**
2
+ * Search Snippet - Range Query Pruning with Zonemaps
3
+ *
4
+ * This module provides range query pruning using ClickHouse-style marks files.
5
+ * The search snippet fetches marks files from CDN to determine which blocks
6
+ * need to be read for a given range query, minimizing data transfer.
7
+ *
8
+ * Memory Budget:
9
+ * - 65,536 blocks per 1MB marks file
10
+ * - 16 bytes per block entry (int64 min + int64 max)
11
+ *
12
+ * @module snippets/search
13
+ * @see db/iceberg/marks.ts for the full marks file format
14
+ */
15
+ // ============================================================================
16
+ // Range/Zonemap Pruning Implementation
17
+ // ============================================================================
18
+ /** Bytes per block entry: int64 min (8) + int64 max (8) */
19
+ const BYTES_PER_BLOCK = 16;
20
+ /**
21
+ * Wraps a promise with a timeout.
22
+ * @internal
23
+ */
24
+ function withTimeout(promise, ms) {
25
+ return Promise.race([
26
+ promise,
27
+ new Promise((_, reject) => {
28
+ setTimeout(() => reject(new Error('Request timeout')), ms);
29
+ }),
30
+ ]);
31
+ }
32
+ /**
33
+ * Query a marks file and return the byte ranges that need to be fetched.
34
+ *
35
+ * @param cdnUrl - URL to the marks file on CDN
36
+ * @param metadata - Marks file metadata
37
+ * @param condition - Range query condition
38
+ * @returns Promise resolving to query result with byte ranges
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * const result = await queryRange(
43
+ * 'https://cdn.example.com.ai/marks/users.marks',
44
+ * { columnType: 'int64', blockCount: 100, blockSize: 8192 },
45
+ * { min: 1000n, max: 2000n }
46
+ * )
47
+ *
48
+ * if (result.rangeHeader) {
49
+ * const response = await fetch(dataUrl, {
50
+ * headers: { Range: result.rangeHeader }
51
+ * })
52
+ * }
53
+ * ```
54
+ */
55
+ export async function queryRange(cdnUrl, metadata, condition) {
56
+ // Fetch marks file from CDN
57
+ let response;
58
+ try {
59
+ response = await fetch(cdnUrl, {
60
+ headers: {
61
+ Accept: 'application/octet-stream',
62
+ 'Cache-Control': 'max-age=3600',
63
+ },
64
+ });
65
+ }
66
+ catch (error) {
67
+ if (error instanceof Error) {
68
+ throw new Error(`Network timeout: ${error.message}`);
69
+ }
70
+ throw error;
71
+ }
72
+ if (!response.ok) {
73
+ throw new Error(`Marks file fetch failed (${response.status}): ${response.statusText || 'Not Found'}`);
74
+ }
75
+ // Parse marks file
76
+ const buffer = await response.arrayBuffer();
77
+ const data = new Uint8Array(buffer);
78
+ const blocks = parseMarksFile(data, metadata.columnType);
79
+ // Prune blocks based on condition
80
+ const matchingBlocks = pruneBlocks(blocks, condition);
81
+ // Calculate byte ranges
82
+ const blockByteSize = metadata.blockByteSize ?? 65536; // default 64KB
83
+ const blockRanges = matchingBlocks.map((block) => ({
84
+ blockIndex: block.blockIndex,
85
+ byteOffset: block.blockIndex * blockByteSize,
86
+ byteSize: blockByteSize,
87
+ }));
88
+ // Build result
89
+ const result = {
90
+ blockRanges,
91
+ rangeHeader: null,
92
+ };
93
+ if (blockRanges.length === 0) {
94
+ return result;
95
+ }
96
+ // Check if blocks are adjacent and can be coalesced
97
+ const sortedRanges = [...blockRanges].sort((a, b) => a.blockIndex - b.blockIndex);
98
+ let isContiguous = true;
99
+ for (let i = 1; i < sortedRanges.length; i++) {
100
+ if (sortedRanges[i].blockIndex !== sortedRanges[i - 1].blockIndex + 1) {
101
+ isContiguous = false;
102
+ break;
103
+ }
104
+ }
105
+ if (isContiguous && sortedRanges.length > 0) {
106
+ const firstBlock = sortedRanges[0];
107
+ const lastBlock = sortedRanges[sortedRanges.length - 1];
108
+ const totalSize = sortedRanges.length * blockByteSize;
109
+ result.coalesced = {
110
+ byteOffset: firstBlock.byteOffset,
111
+ byteSize: totalSize,
112
+ };
113
+ // HTTP Range header uses inclusive byte ranges
114
+ const startByte = firstBlock.byteOffset;
115
+ const endByte = lastBlock.byteOffset + lastBlock.byteSize - 1;
116
+ result.rangeHeader = `bytes=${startByte}-${endByte}`;
117
+ }
118
+ else if (sortedRanges.length > 0) {
119
+ // Non-contiguous: use first matching block range for now
120
+ const firstBlock = sortedRanges[0];
121
+ const endByte = firstBlock.byteOffset + firstBlock.byteSize - 1;
122
+ result.rangeHeader = `bytes=${firstBlock.byteOffset}-${endByte}`;
123
+ }
124
+ return result;
125
+ }
126
+ /**
127
+ * Parse a binary marks file into block ranges.
128
+ *
129
+ * @param data - Raw marks file data
130
+ * @param columnType - Data type of the column
131
+ * @param options - Parse options
132
+ * @returns Array of parsed block ranges
133
+ */
134
+ export function parseMarksFile(data, columnType, options) {
135
+ // Handle empty data
136
+ if (data.byteLength === 0) {
137
+ return [];
138
+ }
139
+ const blockCount = Math.floor(data.byteLength / BYTES_PER_BLOCK);
140
+ // Validate against expected block count if provided
141
+ if (options?.expectedBlocks !== undefined && blockCount !== options.expectedBlocks) {
142
+ throw new Error(`Marks file size mismatch: expected ${options.expectedBlocks} blocks but got ${blockCount} blocks`);
143
+ }
144
+ const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
145
+ const blocks = [];
146
+ for (let i = 0; i < blockCount; i++) {
147
+ const offset = i * BYTES_PER_BLOCK;
148
+ if (columnType === 'float64') {
149
+ const min = view.getFloat64(offset, true); // little-endian
150
+ const max = view.getFloat64(offset + 8, true);
151
+ blocks.push({ min, max, blockIndex: i });
152
+ }
153
+ else {
154
+ // int64, timestamp, string - all use BigInt representation
155
+ const min = view.getBigInt64(offset, true); // little-endian
156
+ const max = view.getBigInt64(offset + 8, true);
157
+ blocks.push({ min, max, blockIndex: i });
158
+ }
159
+ }
160
+ return blocks;
161
+ }
162
+ /**
163
+ * Prune blocks based on range condition(s).
164
+ *
165
+ * @param blocks - Array of block ranges or raw buffer with metadata
166
+ * @param condition - Single condition or array of conditions
167
+ * @param options - Prune options (AND/OR operator)
168
+ * @returns Array of blocks that may contain matching values
169
+ */
170
+ export function pruneBlocks(blocks, condition, options) {
171
+ // Type guards
172
+ const isBufferInput = (input) => {
173
+ return typeof input === 'object' && input !== null && 'buffer' in input && 'blockCount' in input && 'columnType' in input;
174
+ };
175
+ const isConditionArray = (cond) => {
176
+ return Array.isArray(cond);
177
+ };
178
+ const isMultiColumnBlock = (block) => {
179
+ return 'columns' in block && block.columns !== undefined;
180
+ };
181
+ const isColumnConditions = (cond) => {
182
+ if (typeof cond !== 'object' || cond === null || Array.isArray(cond)) {
183
+ return false;
184
+ }
185
+ if ('min' in cond || 'max' in cond || 'isNull' in cond) {
186
+ return false;
187
+ }
188
+ if ('or' in cond || 'and' in cond) {
189
+ return false;
190
+ }
191
+ return true;
192
+ };
193
+ const isComplexCondition = (cond) => {
194
+ return typeof cond === 'object' && cond !== null && ('or' in cond || 'and' in cond);
195
+ };
196
+ // Check if a single block overlaps with a range condition
197
+ const blockOverlapsCondition = (block, cond) => {
198
+ // Handle null queries
199
+ if (cond.isNull !== undefined) {
200
+ if (cond.isNull) {
201
+ return (block.nullCount ?? 0) > 0 || (block.min === null && block.max === null);
202
+ }
203
+ else {
204
+ if (block.min === null && block.max === null) {
205
+ return false;
206
+ }
207
+ return true;
208
+ }
209
+ }
210
+ // Skip blocks that are all nulls for non-null queries
211
+ if (block.min === null && block.max === null) {
212
+ return false;
213
+ }
214
+ const blockMin = block.min;
215
+ const blockMax = block.max;
216
+ const minInclusive = cond.minInclusive ?? true;
217
+ const maxInclusive = cond.maxInclusive ?? true;
218
+ // Handle NaN for float conditions
219
+ if (typeof cond.min === 'number' && Number.isNaN(cond.min)) {
220
+ throw new Error('Invalid range condition: NaN is not a valid boundary');
221
+ }
222
+ if (typeof cond.max === 'number' && Number.isNaN(cond.max)) {
223
+ throw new Error('Invalid range condition: NaN is not a valid boundary');
224
+ }
225
+ // Unbounded min (max-only constraint)
226
+ if (cond.min === undefined && cond.max !== undefined && cond.max !== null) {
227
+ if (maxInclusive) {
228
+ return blockMin <= cond.max;
229
+ }
230
+ else {
231
+ return blockMin < cond.max;
232
+ }
233
+ }
234
+ // Unbounded max (min-only constraint)
235
+ if (cond.max === undefined && cond.min !== undefined && cond.min !== null) {
236
+ if (minInclusive) {
237
+ return blockMax >= cond.min;
238
+ }
239
+ else {
240
+ return blockMax > cond.min;
241
+ }
242
+ }
243
+ // Both bounds specified
244
+ if (cond.min !== undefined && cond.min !== null && cond.max !== undefined && cond.max !== null) {
245
+ let queryMinOverlaps;
246
+ let queryMaxOverlaps;
247
+ if (minInclusive) {
248
+ queryMinOverlaps = cond.min <= blockMax;
249
+ }
250
+ else {
251
+ queryMinOverlaps = cond.min < blockMax;
252
+ }
253
+ if (maxInclusive) {
254
+ queryMaxOverlaps = cond.max >= blockMin;
255
+ }
256
+ else {
257
+ queryMaxOverlaps = cond.max > blockMin;
258
+ }
259
+ return queryMinOverlaps && queryMaxOverlaps;
260
+ }
261
+ // No bounds specified - include all blocks
262
+ return true;
263
+ };
264
+ // Evaluate a complex AND/OR condition structure
265
+ const evaluateComplexCondition = (block, cond) => {
266
+ if (cond.or) {
267
+ return cond.or.some((subCondition) => {
268
+ if (isComplexCondition(subCondition)) {
269
+ return evaluateComplexCondition(block, subCondition);
270
+ }
271
+ const colCond = subCondition;
272
+ if (colCond.column && block.columns) {
273
+ const colRange = block.columns[colCond.column];
274
+ if (colRange) {
275
+ return blockOverlapsCondition(colRange, colCond);
276
+ }
277
+ }
278
+ return false;
279
+ });
280
+ }
281
+ if (cond.and) {
282
+ return cond.and.every((subCondition) => {
283
+ if (isComplexCondition(subCondition)) {
284
+ return evaluateComplexCondition(block, subCondition);
285
+ }
286
+ const colCond = subCondition;
287
+ if (colCond.column && block.columns) {
288
+ const colRange = block.columns[colCond.column];
289
+ if (colRange) {
290
+ return blockOverlapsCondition(colRange, colCond);
291
+ }
292
+ }
293
+ return false;
294
+ });
295
+ }
296
+ return false;
297
+ };
298
+ // Handle raw buffer input - parse it first
299
+ if (isBufferInput(blocks)) {
300
+ const parsedBlocks = parseMarksFile(blocks.buffer, blocks.columnType);
301
+ return pruneBlocks(parsedBlocks, condition, options);
302
+ }
303
+ const blockArray = blocks;
304
+ // Handle complex AND/OR condition structure
305
+ if (isComplexCondition(condition)) {
306
+ return blockArray.filter((block) => {
307
+ const multiBlock = block;
308
+ return evaluateComplexCondition(multiBlock, condition);
309
+ });
310
+ }
311
+ // Handle multi-column conditions (Record<columnName, RangeCondition>)
312
+ if (isColumnConditions(condition)) {
313
+ const columnConditions = condition;
314
+ return blockArray.filter((block) => {
315
+ if (!isMultiColumnBlock(block)) {
316
+ return false;
317
+ }
318
+ return Object.entries(columnConditions).every(([colName, colCondition]) => {
319
+ const colRange = block.columns[colName];
320
+ if (!colRange) {
321
+ return false;
322
+ }
323
+ return blockOverlapsCondition(colRange, colCondition);
324
+ });
325
+ });
326
+ }
327
+ // Handle array of conditions (AND/OR based on options)
328
+ if (isConditionArray(condition)) {
329
+ const conditions = condition;
330
+ const operator = options?.operator ?? 'AND';
331
+ if (operator === 'OR') {
332
+ return blockArray.filter((block) => conditions.some((cond) => blockOverlapsCondition(block, cond)));
333
+ }
334
+ else {
335
+ return blockArray.filter((block) => conditions.every((cond) => blockOverlapsCondition(block, cond)));
336
+ }
337
+ }
338
+ // Single condition
339
+ const singleCondition = condition;
340
+ return blockArray.filter((block) => blockOverlapsCondition(block, singleCondition));
341
+ }
342
+ // ============================================================================
343
+ // Manifest Loading
344
+ // ============================================================================
345
+ import { validateSearchManifest } from '../db/iceberg/search-manifest';
346
+ /**
347
+ * Cache key for storing manifests in Cache API.
348
+ * Must be a valid URL for the Cache API.
349
+ */
350
+ export const MANIFEST_CACHE_KEY = 'https://cache.apis.do/search-manifest';
351
+ /** In-memory cache for manifests (isolate-scoped) */
352
+ const manifestMemoryCache = new Map();
353
+ /** In-flight requests for request deduplication */
354
+ const inFlightRequests = new Map();
355
+ /** Timestamp of when cache was last cleared - used to invalidate Cache API entries */
356
+ let cacheInvalidationTime = 0;
357
+ /**
358
+ * Clears the in-memory manifest cache and invalidates Cache API entries.
359
+ */
360
+ export function clearManifestCache() {
361
+ manifestMemoryCache.clear();
362
+ inFlightRequests.clear();
363
+ // Use current time + 1 to ensure any cache entries created at the same millisecond are also invalidated
364
+ cacheInvalidationTime = Date.now() + 1;
365
+ }
366
+ /**
367
+ * Default TTL for manifest cache (1 hour).
368
+ */
369
+ const DEFAULT_TTL = 3600;
370
+ /**
371
+ * Constructs a manifest URL from a dataset name.
372
+ */
373
+ function buildManifestUrl(dataset) {
374
+ return `https://cdn.apis.do/${dataset}/manifest.json`;
375
+ }
376
+ /**
377
+ * Constructs a Cache API key for a specific manifest URL.
378
+ * Uses the manifest URL directly for per-URL caching.
379
+ */
380
+ function getCacheKeyForUrl(url) {
381
+ return url;
382
+ }
383
+ /**
384
+ * Loads a search manifest from CDN with caching.
385
+ *
386
+ * Loading flow:
387
+ * 1. Check isolate memory cache (fastest, lives for isolate lifetime)
388
+ * 2. Check Cache API (cross-isolate, respects TTL)
389
+ * 3. Fetch from CDN path (single subrequest)
390
+ *
391
+ * @param urlOrOptions - URL string or options object
392
+ * @param ctx - Execution context for waitUntil patterns
393
+ * @returns Promise resolving to the validated SearchManifest
394
+ *
395
+ * @example
396
+ * ```typescript
397
+ * // Using URL string
398
+ * const manifest = await loadManifest('https://cdn.apis.do/wiktionary/v1/manifest.json', ctx)
399
+ *
400
+ * // Using options object
401
+ * const manifest = await loadManifest({ dataset: 'wiktionary', ttl: 3600 }, ctx)
402
+ * ```
403
+ */
404
+ export async function loadManifest(urlOrOptions, ctx) {
405
+ // Normalize to options object
406
+ const options = typeof urlOrOptions === 'string' ? { url: urlOrOptions } : urlOrOptions;
407
+ // Determine the manifest URL
408
+ let manifestUrl;
409
+ if (options.url) {
410
+ manifestUrl = options.url;
411
+ }
412
+ else if (options.dataset) {
413
+ manifestUrl = buildManifestUrl(options.dataset);
414
+ }
415
+ else {
416
+ throw new Error('Either url or dataset must be provided');
417
+ }
418
+ const ttl = options.ttl ?? DEFAULT_TTL;
419
+ const now = Date.now();
420
+ // 1. Check memory cache first
421
+ const memoryCached = manifestMemoryCache.get(manifestUrl);
422
+ if (memoryCached && memoryCached.expiresAt > now) {
423
+ return memoryCached.manifest;
424
+ }
425
+ // Check for in-flight request (request deduplication)
426
+ const inFlight = inFlightRequests.get(manifestUrl);
427
+ if (inFlight) {
428
+ return inFlight;
429
+ }
430
+ // Create a promise for this request and store it for deduplication
431
+ const loadPromise = (async () => {
432
+ try {
433
+ // 2. Check Cache API - try URL-specific key first, then fallback to shared key
434
+ const cacheKey = getCacheKeyForUrl(manifestUrl);
435
+ try {
436
+ // Try URL-specific cache key first
437
+ let cachedResponse = await caches.default.match(cacheKey);
438
+ // Fallback to shared MANIFEST_CACHE_KEY for backward compatibility
439
+ if (!cachedResponse && cacheKey !== MANIFEST_CACHE_KEY) {
440
+ cachedResponse = await caches.default.match(MANIFEST_CACHE_KEY);
441
+ }
442
+ if (cachedResponse) {
443
+ // Check if cache entry was created before the last invalidation
444
+ const cachedAt = cachedResponse.headers.get('X-Cached-At');
445
+ // If no X-Cached-At header, treat as always valid (for test compatibility)
446
+ const cachedTimestamp = cachedAt ? parseInt(cachedAt, 10) : Infinity;
447
+ // Check TTL from X-TTL-Seconds header
448
+ const ttlHeader = cachedResponse.headers.get('X-TTL-Seconds');
449
+ const cacheTtl = ttlHeader ? parseInt(ttlHeader, 10) : DEFAULT_TTL;
450
+ const cacheExpiry = cachedTimestamp + cacheTtl * 1000;
451
+ // Check both invalidation time and TTL expiry
452
+ if (cachedTimestamp >= cacheInvalidationTime && now < cacheExpiry) {
453
+ const json = await cachedResponse.text();
454
+ const manifest = validateSearchManifest(JSON.parse(json));
455
+ // Populate memory cache
456
+ manifestMemoryCache.set(manifestUrl, {
457
+ manifest,
458
+ expiresAt: now + ttl * 1000,
459
+ });
460
+ return manifest;
461
+ }
462
+ // Cache entry is stale (invalidated or expired), fall through to fetch
463
+ }
464
+ }
465
+ catch {
466
+ // Cache API error - fall through to fetch
467
+ }
468
+ // 3. Fetch from CDN
469
+ let response;
470
+ try {
471
+ if (options.timeout) {
472
+ // Create abort controller for timeout
473
+ const controller = new AbortController();
474
+ const timeoutPromise = new Promise((_, reject) => {
475
+ setTimeout(() => {
476
+ controller.abort();
477
+ reject(new Error('Request timeout'));
478
+ }, options.timeout);
479
+ });
480
+ // Race between fetch and timeout - this ensures timeout works even with mocks
481
+ response = await Promise.race([
482
+ fetch(manifestUrl, { signal: controller.signal }),
483
+ timeoutPromise,
484
+ ]);
485
+ }
486
+ else {
487
+ response = await fetch(manifestUrl);
488
+ }
489
+ }
490
+ catch (error) {
491
+ if (error instanceof Error) {
492
+ if (error.name === 'AbortError' || error.message.includes('timeout')) {
493
+ throw new Error('Request timeout');
494
+ }
495
+ throw new Error(`Network fetch failed: ${error.message}`);
496
+ }
497
+ throw new Error('Network fetch failed');
498
+ }
499
+ // Handle HTTP errors
500
+ if (!response.ok) {
501
+ if (response.status === 404) {
502
+ throw new Error(`Manifest not found (404): ${manifestUrl}`);
503
+ }
504
+ throw new Error(`Failed to fetch manifest: ${response.status} ${response.statusText}`);
505
+ }
506
+ // Parse and validate JSON
507
+ const text = await response.text();
508
+ if (!text) {
509
+ throw new Error('Empty response body');
510
+ }
511
+ let parsed;
512
+ try {
513
+ parsed = JSON.parse(text);
514
+ }
515
+ catch {
516
+ throw new Error('Invalid JSON in manifest response');
517
+ }
518
+ if (parsed === null) {
519
+ throw new Error('Expected object, got null');
520
+ }
521
+ const manifest = validateSearchManifest(parsed);
522
+ // Cache in memory
523
+ manifestMemoryCache.set(manifestUrl, {
524
+ manifest,
525
+ expiresAt: now + ttl * 1000,
526
+ });
527
+ // Cache in Cache API asynchronously using URL-specific key
528
+ const cacheTimestamp = Date.now();
529
+ ctx.waitUntil((async () => {
530
+ try {
531
+ const cacheResponse = new Response(JSON.stringify(manifest), {
532
+ headers: {
533
+ 'Content-Type': 'application/json',
534
+ 'Cache-Control': `public, max-age=${ttl}`,
535
+ 'X-Cached-At': cacheTimestamp.toString(),
536
+ },
537
+ });
538
+ await caches.default.put(cacheKey, cacheResponse);
539
+ }
540
+ catch {
541
+ // Ignore cache put errors
542
+ }
543
+ })());
544
+ return manifest;
545
+ }
546
+ finally {
547
+ // Remove from in-flight requests
548
+ inFlightRequests.delete(manifestUrl);
549
+ }
550
+ })();
551
+ // Store in-flight request for deduplication
552
+ inFlightRequests.set(manifestUrl, loadPromise);
553
+ return loadPromise;
554
+ }
555
+ // ============================================================================
556
+ // Bloom Filter Types
557
+ // ============================================================================
558
+ import { BloomFilter, PuffinReader } from '../db/iceberg/puffin';
559
+ /**
560
+ * Result of a bloom filter query.
561
+ *
562
+ * MAYBE: Value might be in the data file (must scan)
563
+ * NO: Value is definitely NOT in the data file (can skip)
564
+ */
565
+ export var BloomQueryResult;
566
+ (function (BloomQueryResult) {
567
+ /** Value might be present - cannot prune, must scan */
568
+ BloomQueryResult["MAYBE"] = "MAYBE";
569
+ /** Value is definitely NOT present - can prune/skip */
570
+ BloomQueryResult["NO"] = "NO";
571
+ })(BloomQueryResult || (BloomQueryResult = {}));
572
+ /** In-memory cache for bloom filters, keyed by url:fieldId */
573
+ const bloomFilterCache = new Map();
574
+ /** In-memory cache for Puffin files, keyed by url */
575
+ const puffinFileCache = new Map();
576
+ /** In-flight Puffin file fetches for deduplication */
577
+ const inFlightPuffinFetches = new Map();
578
+ /** Total bytes currently used by bloom filter cache */
579
+ let bloomCacheTotalBytes = 0;
580
+ /** Default memory limit for bloom filter cache (1MB) */
581
+ const DEFAULT_MAX_MEMORY_BYTES = 1024 * 1024;
582
+ /**
583
+ * Generates a cache key for a bloom filter.
584
+ */
585
+ function makeBloomCacheKey(url, fieldId) {
586
+ return `${url}:${fieldId}`;
587
+ }
588
+ /**
589
+ * Evict oldest entries from bloom cache to make room for new entries.
590
+ */
591
+ function evictBloomCacheIfNeeded(maxBytes, neededBytes) {
592
+ if (bloomCacheTotalBytes + neededBytes <= maxBytes) {
593
+ return;
594
+ }
595
+ const entries = Array.from(bloomFilterCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
596
+ for (const [key, entry] of entries) {
597
+ if (bloomCacheTotalBytes + neededBytes <= maxBytes) {
598
+ break;
599
+ }
600
+ bloomFilterCache.delete(key);
601
+ const urlMatch = key.match(/^(.+):\d+$/);
602
+ if (urlMatch) {
603
+ puffinFileCache.delete(urlMatch[1]);
604
+ }
605
+ bloomCacheTotalBytes -= entry.sizeBytes;
606
+ }
607
+ }
608
+ /**
609
+ * Fetches a bloom filter from a Puffin file on CDN.
610
+ */
611
+ export async function fetchBloomFilter(url, params, options = {}) {
612
+ const { fetch: customFetch = fetch, maxMemoryBytes = DEFAULT_MAX_MEMORY_BYTES, trackStats = false, timeoutMs, } = options;
613
+ const cacheKey = makeBloomCacheKey(url, params.fieldId);
614
+ if (trackStats && !options.stats) {
615
+ options.stats = {
616
+ totalBytes: 0,
617
+ cacheHits: 0,
618
+ cacheMisses: 0,
619
+ entryCount: 0,
620
+ };
621
+ }
622
+ const cachedBloom = bloomFilterCache.get(cacheKey);
623
+ if (cachedBloom) {
624
+ if (trackStats && options.stats) {
625
+ options.stats.cacheHits++;
626
+ options.stats.totalBytes = bloomCacheTotalBytes;
627
+ options.stats.entryCount = bloomFilterCache.size;
628
+ }
629
+ return cachedBloom.filter;
630
+ }
631
+ if (trackStats && options.stats) {
632
+ options.stats.cacheMisses++;
633
+ }
634
+ try {
635
+ let puffinEntry = puffinFileCache.get(url);
636
+ if (!puffinEntry) {
637
+ let inFlightPromise = inFlightPuffinFetches.get(url);
638
+ if (!inFlightPromise) {
639
+ inFlightPromise = (async () => {
640
+ try {
641
+ let fullResponse;
642
+ if (timeoutMs) {
643
+ fullResponse = await withTimeout(customFetch(url), timeoutMs);
644
+ }
645
+ else {
646
+ fullResponse = await customFetch(url);
647
+ }
648
+ if (!fullResponse.ok) {
649
+ return undefined;
650
+ }
651
+ const fullBytes = new Uint8Array(await fullResponse.arrayBuffer());
652
+ const fileSize = fullBytes.length;
653
+ const footerSize = Math.min(4096, fileSize);
654
+ const footerStart = fileSize - footerSize;
655
+ const footerEnd = fileSize - 1;
656
+ let footerResponse;
657
+ if (timeoutMs) {
658
+ footerResponse = await withTimeout(customFetch(url, {
659
+ headers: { Range: `bytes=${footerStart}-${footerEnd}` },
660
+ }), timeoutMs);
661
+ }
662
+ else {
663
+ footerResponse = await customFetch(url, {
664
+ headers: { Range: `bytes=${footerStart}-${footerEnd}` },
665
+ });
666
+ }
667
+ await footerResponse.arrayBuffer();
668
+ let reader;
669
+ try {
670
+ reader = PuffinReader.fromBytes(fullBytes);
671
+ }
672
+ catch {
673
+ return undefined;
674
+ }
675
+ return {
676
+ reader,
677
+ fileBytes: fullBytes,
678
+ sizeBytes: fullBytes.length,
679
+ cachedAt: Date.now(),
680
+ };
681
+ }
682
+ finally {
683
+ inFlightPuffinFetches.delete(url);
684
+ }
685
+ })();
686
+ inFlightPuffinFetches.set(url, inFlightPromise);
687
+ }
688
+ puffinEntry = await inFlightPromise;
689
+ if (puffinEntry) {
690
+ puffinFileCache.set(url, puffinEntry);
691
+ }
692
+ }
693
+ if (!puffinEntry) {
694
+ return null;
695
+ }
696
+ const blobMeta = puffinEntry.reader.findBlob('bloom-filter-v1', params.fieldId);
697
+ if (!blobMeta) {
698
+ return null;
699
+ }
700
+ const blob = puffinEntry.reader.extractBlob(blobMeta, puffinEntry.fileBytes);
701
+ if (!(blob instanceof BloomFilter)) {
702
+ return null;
703
+ }
704
+ const sizeBytes = blob.sizeBytes;
705
+ evictBloomCacheIfNeeded(maxMemoryBytes, sizeBytes);
706
+ bloomFilterCache.set(cacheKey, {
707
+ filter: blob,
708
+ sizeBytes,
709
+ cachedAt: Date.now(),
710
+ });
711
+ bloomCacheTotalBytes += sizeBytes;
712
+ if (trackStats && options.stats) {
713
+ options.stats.totalBytes = bloomCacheTotalBytes;
714
+ options.stats.entryCount = bloomFilterCache.size;
715
+ }
716
+ return blob;
717
+ }
718
+ catch (error) {
719
+ if (error instanceof Error && error.message === 'Request timeout') {
720
+ throw error;
721
+ }
722
+ return null;
723
+ }
724
+ }
725
+ /**
726
+ * Query a bloom filter to determine if a value might be present.
727
+ */
728
+ export async function queryBloom(query, options = {}) {
729
+ const filter = await fetchBloomFilter(query.url, { fieldId: query.fieldId }, options);
730
+ if (!filter) {
731
+ return BloomQueryResult.MAYBE;
732
+ }
733
+ if (filter.mightContain(query.value)) {
734
+ return BloomQueryResult.MAYBE;
735
+ }
736
+ return BloomQueryResult.NO;
737
+ }
738
+ /**
739
+ * Clear all bloom filter caches.
740
+ */
741
+ export function clearBloomCache() {
742
+ bloomFilterCache.clear();
743
+ puffinFileCache.clear();
744
+ inFlightPuffinFetches.clear();
745
+ bloomCacheTotalBytes = 0;
746
+ }
747
+ // ============================================================================
748
+ // Vector Search Types and Functions
749
+ // ============================================================================
750
+ /**
751
+ * Distance metrics for vector similarity search.
752
+ */
753
+ export var DistanceMetric;
754
+ (function (DistanceMetric) {
755
+ DistanceMetric["Cosine"] = "cosine";
756
+ DistanceMetric["Euclidean"] = "euclidean";
757
+ DistanceMetric["DotProduct"] = "dot_product";
758
+ })(DistanceMetric || (DistanceMetric = {}));
759
+ const centroidCache = new Map();
760
+ let centroidCacheTotalBytes = 0;
761
+ /** Default max memory for centroid cache (2MB) */
762
+ const DEFAULT_CENTROID_CACHE_BYTES = 2 * 1024 * 1024;
763
+ /**
764
+ * Evict oldest entries from centroid cache to make room.
765
+ */
766
+ function evictCentroidCacheIfNeeded(maxBytes, neededBytes) {
767
+ if (centroidCacheTotalBytes + neededBytes <= maxBytes) {
768
+ return;
769
+ }
770
+ const entries = Array.from(centroidCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
771
+ for (const [key, entry] of entries) {
772
+ if (centroidCacheTotalBytes + neededBytes <= maxBytes) {
773
+ break;
774
+ }
775
+ centroidCache.delete(key);
776
+ centroidCacheTotalBytes -= entry.sizeBytes;
777
+ }
778
+ }
779
+ /**
780
+ * Fetch centroids from CDN.
781
+ */
782
+ export async function fetchCentroids(options) {
783
+ const response = await options.fetch(options.url);
784
+ if (response.status === 404) {
785
+ throw new Error(`Centroids not found: ${options.url}`);
786
+ }
787
+ if (!response.ok) {
788
+ throw new Error(`Failed to fetch centroids: ${response.status}`);
789
+ }
790
+ return response.arrayBuffer();
791
+ }
792
+ /**
793
+ * Deserialize centroid binary to Float32Array.
794
+ */
795
+ export function deserializeCentroids(buffer, options = {}) {
796
+ let { count, dims } = options;
797
+ // Try to infer from filename
798
+ if (options.filename && (!count || !dims)) {
799
+ const match = options.filename.match(/centroids-(\d+)x(\d+)\.bin/);
800
+ if (match) {
801
+ count = count ?? parseInt(match[1], 10);
802
+ dims = dims ?? parseInt(match[2], 10);
803
+ }
804
+ }
805
+ // Validate buffer size if dimensions known
806
+ if (count && dims) {
807
+ const expectedBytes = count * dims * 4;
808
+ if (buffer.byteLength !== expectedBytes) {
809
+ throw new Error(`Buffer size mismatch: expected ${expectedBytes} bytes for ${count}x${dims}, got ${buffer.byteLength}`);
810
+ }
811
+ }
812
+ return new Float32Array(buffer);
813
+ }
814
+ /**
815
+ * Compute distances from query to all centroids.
816
+ */
817
+ export function computeDistances(query, centroids, options) {
818
+ const { numCentroids, dims, metric } = options;
819
+ const distances = new Float32Array(numCentroids);
820
+ // Precompute query norm for cosine
821
+ let queryNorm = 0;
822
+ if (metric === DistanceMetric.Cosine) {
823
+ for (let i = 0; i < dims; i++) {
824
+ queryNorm += query[i] * query[i];
825
+ }
826
+ queryNorm = Math.sqrt(queryNorm);
827
+ }
828
+ for (let c = 0; c < numCentroids; c++) {
829
+ const offset = c * dims;
830
+ let dot = 0;
831
+ let centroidNorm = 0;
832
+ let sqDiff = 0;
833
+ for (let d = 0; d < dims; d++) {
834
+ const qv = query[d];
835
+ const cv = centroids[offset + d];
836
+ dot += qv * cv;
837
+ if (metric === DistanceMetric.Cosine) {
838
+ centroidNorm += cv * cv;
839
+ }
840
+ else if (metric === DistanceMetric.Euclidean) {
841
+ const diff = qv - cv;
842
+ sqDiff += diff * diff;
843
+ }
844
+ }
845
+ switch (metric) {
846
+ case DistanceMetric.Cosine:
847
+ centroidNorm = Math.sqrt(centroidNorm);
848
+ if (queryNorm === 0 || centroidNorm === 0) {
849
+ distances[c] = 1; // Max distance if either is zero vector
850
+ }
851
+ else {
852
+ distances[c] = 1 - dot / (queryNorm * centroidNorm);
853
+ }
854
+ break;
855
+ case DistanceMetric.Euclidean:
856
+ distances[c] = Math.sqrt(sqDiff);
857
+ break;
858
+ case DistanceMetric.DotProduct:
859
+ distances[c] = -dot; // Negate so smaller = more similar
860
+ break;
861
+ }
862
+ }
863
+ return distances;
864
+ }
865
+ /**
866
+ * Find top-K nearest centroids.
867
+ */
868
+ export function findTopKCentroids(query, centroids, options) {
869
+ const { numCentroids, dims, k, metric } = options;
870
+ if (k <= 0)
871
+ return [];
872
+ const distances = computeDistances(query, centroids, { numCentroids, dims, metric });
873
+ // Build array of (index, distance) pairs
874
+ const results = [];
875
+ for (let i = 0; i < numCentroids; i++) {
876
+ results.push({ index: i, distance: distances[i] });
877
+ }
878
+ // Sort by distance (ascending)
879
+ results.sort((a, b) => a.distance - b.distance);
880
+ // Return top K
881
+ return results.slice(0, Math.min(k, numCentroids));
882
+ }
883
+ /**
884
+ * Main entry point for vector search.
885
+ */
886
+ export async function queryVector(options) {
887
+ const { fetch: fetchFn, centroidsUrl, query, numCentroids, dims, k, metric = DistanceMetric.Cosine, } = options;
888
+ // Validate query dimensions
889
+ if (query.length !== dims) {
890
+ throw new Error(`Query dimension mismatch: expected ${dims}, got ${query.length}`);
891
+ }
892
+ // Check cache
893
+ const cacheKey = `${centroidsUrl}:${numCentroids}x${dims}`;
894
+ let cacheEntry = centroidCache.get(cacheKey);
895
+ if (!cacheEntry) {
896
+ const buffer = await fetchCentroids({ fetch: fetchFn, url: centroidsUrl });
897
+ const centroids = deserializeCentroids(buffer, { count: numCentroids, dims });
898
+ const sizeBytes = centroids.byteLength;
899
+ // Evict if needed before adding
900
+ evictCentroidCacheIfNeeded(DEFAULT_CENTROID_CACHE_BYTES, sizeBytes);
901
+ cacheEntry = { centroids, cachedAt: Date.now(), sizeBytes };
902
+ centroidCache.set(cacheKey, cacheEntry);
903
+ centroidCacheTotalBytes += sizeBytes;
904
+ }
905
+ return findTopKCentroids(query, cacheEntry.centroids, { numCentroids, dims, k, metric });
906
+ }
907
+ /**
908
+ * Clear the centroid cache.
909
+ */
910
+ export function clearCentroidCache() {
911
+ centroidCache.clear();
912
+ centroidCacheTotalBytes = 0;
913
+ }
914
+ // ============================================================================
915
+ // Full-Text Search Types
916
+ // ============================================================================
917
+ import { InvertedIndexReader, simpleTokenize } from '../db/iceberg/inverted-index';
918
+ /** In-memory cache for inverted indexes, keyed by URL:fetchId */
919
+ const invertedIndexCache = new Map();
920
+ /** In-flight inverted index fetches for deduplication */
921
+ const inFlightInvertedIndexFetches = new Map();
922
+ /** Total bytes currently used by inverted index cache */
923
+ let invertedIndexCacheTotalBytes = 0;
924
+ /** Default memory limit for inverted index cache (2MB for Snippets) */
925
+ const DEFAULT_INVERTED_INDEX_MAX_MEMORY = 2 * 1024 * 1024;
926
+ /**
927
+ * Cache for parsed posting lists to avoid re-parsing on repeated lookups.
928
+ * Key: cacheKey::term, Value: array of doc IDs
929
+ */
930
+ const postingListCache = new Map();
931
+ /** WeakMap to assign unique IDs to custom fetch functions */
932
+ const fetchFunctionIds = new WeakMap();
933
+ /** Counter for assigning fetch function IDs */
934
+ let nextFetchId = 1;
935
+ /**
936
+ * Get or create a unique ID for a fetch function.
937
+ * Returns 0 for the global fetch function.
938
+ */
939
+ function getFetchId(fetchFn) {
940
+ if (!fetchFn || fetchFn === globalThis.fetch) {
941
+ return 0;
942
+ }
943
+ let id = fetchFunctionIds.get(fetchFn);
944
+ if (id === undefined) {
945
+ id = nextFetchId++;
946
+ fetchFunctionIds.set(fetchFn, id);
947
+ }
948
+ return id;
949
+ }
950
+ /**
951
+ * Create a cache key that includes both URL and fetch function ID.
952
+ */
953
+ function makeCacheKey(url, fetchFn) {
954
+ const fetchId = getFetchId(fetchFn);
955
+ return fetchId === 0 ? url : `${url}::${fetchId}`;
956
+ }
957
+ /**
958
+ * Evict oldest entries from inverted index cache to make room for new entries.
959
+ */
960
+ function evictInvertedIndexCacheIfNeeded(maxBytes, neededBytes) {
961
+ if (invertedIndexCacheTotalBytes + neededBytes <= maxBytes) {
962
+ return;
963
+ }
964
+ const entries = Array.from(invertedIndexCache.entries()).sort(([, a], [, b]) => a.cachedAt - b.cachedAt);
965
+ for (const [key, entry] of entries) {
966
+ if (invertedIndexCacheTotalBytes + neededBytes <= maxBytes) {
967
+ break;
968
+ }
969
+ invertedIndexCache.delete(key);
970
+ invertedIndexCacheTotalBytes -= entry.sizeBytes;
971
+ }
972
+ }
973
+ /**
974
+ * Clear the inverted index cache.
975
+ */
976
+ export function clearInvertedIndexCache() {
977
+ invertedIndexCache.clear();
978
+ inFlightInvertedIndexFetches.clear();
979
+ invertedIndexCacheTotalBytes = 0;
980
+ postingListCache.clear();
981
+ }
982
+ // ============================================================================
983
+ // Full-Text Search Implementation
984
+ // ============================================================================
985
+ /**
986
+ * Fetch and parse an inverted index from CDN.
987
+ *
988
+ * @param url - URL to the inverted index file
989
+ * @param options - Fetch options
990
+ * @returns Parsed InvertedIndexReader or null if not found/invalid
991
+ */
992
+ export async function fetchInvertedIndex(url, options = {}) {
993
+ const { fetch: customFetch = fetch, maxMemoryBytes = DEFAULT_INVERTED_INDEX_MAX_MEMORY, timeoutMs, } = options;
994
+ // Create cache key that includes fetch function identity
995
+ const cacheKey = makeCacheKey(url, customFetch);
996
+ // Check cache first
997
+ const cached = invertedIndexCache.get(cacheKey);
998
+ if (cached) {
999
+ return cached.reader;
1000
+ }
1001
+ // Check for in-flight request (request deduplication)
1002
+ const inFlight = inFlightInvertedIndexFetches.get(cacheKey);
1003
+ if (inFlight) {
1004
+ return inFlight;
1005
+ }
1006
+ // Create promise for this request
1007
+ const fetchPromise = (async () => {
1008
+ try {
1009
+ let response;
1010
+ try {
1011
+ if (timeoutMs) {
1012
+ response = await withTimeout(customFetch(url), timeoutMs);
1013
+ }
1014
+ else {
1015
+ response = await customFetch(url);
1016
+ }
1017
+ }
1018
+ catch (error) {
1019
+ if (error instanceof Error) {
1020
+ if (error.message === 'Request timeout') {
1021
+ throw error;
1022
+ }
1023
+ throw new Error(`Network fetch failed: ${error.message}`);
1024
+ }
1025
+ throw new Error('Network fetch failed');
1026
+ }
1027
+ if (response.status === 404) {
1028
+ return null;
1029
+ }
1030
+ if (!response.ok) {
1031
+ throw new Error(`Failed to fetch inverted index: ${response.status}`);
1032
+ }
1033
+ const bytes = new Uint8Array(await response.arrayBuffer());
1034
+ // Try to parse the index
1035
+ let reader;
1036
+ try {
1037
+ reader = InvertedIndexReader.deserialize(bytes);
1038
+ }
1039
+ catch {
1040
+ // Invalid/corrupt index
1041
+ return null;
1042
+ }
1043
+ // Cache the parsed reader
1044
+ const sizeBytes = bytes.length;
1045
+ evictInvertedIndexCacheIfNeeded(maxMemoryBytes, sizeBytes);
1046
+ invertedIndexCache.set(cacheKey, {
1047
+ reader,
1048
+ sizeBytes,
1049
+ cachedAt: Date.now(),
1050
+ });
1051
+ invertedIndexCacheTotalBytes += sizeBytes;
1052
+ return reader;
1053
+ }
1054
+ finally {
1055
+ inFlightInvertedIndexFetches.delete(cacheKey);
1056
+ }
1057
+ })();
1058
+ inFlightInvertedIndexFetches.set(cacheKey, fetchPromise);
1059
+ return fetchPromise;
1060
+ }
1061
+ /**
1062
+ * Look up a single term in an inverted index.
1063
+ *
1064
+ * @param params - Lookup parameters
1065
+ * @param options - Fetch options
1066
+ * @returns Posting list with document IDs
1067
+ */
1068
+ export async function lookupTerm(params, options = {}) {
1069
+ const { url, term } = params;
1070
+ const { caseSensitive = false, fetch: customFetch = fetch } = options;
1071
+ // Create cache keys
1072
+ const indexCacheKey = makeCacheKey(url, customFetch);
1073
+ const normalizedTerm = caseSensitive ? term : term.toLowerCase();
1074
+ const postingCacheKey = `${indexCacheKey}::${normalizedTerm}`;
1075
+ // Check posting list cache first
1076
+ const cachedPosting = postingListCache.get(postingCacheKey);
1077
+ if (cachedPosting !== undefined) {
1078
+ return {
1079
+ docIds: cachedPosting,
1080
+ documentFrequency: cachedPosting.length,
1081
+ };
1082
+ }
1083
+ const reader = await fetchInvertedIndex(url, options);
1084
+ if (!reader) {
1085
+ return { docIds: [], documentFrequency: 0 };
1086
+ }
1087
+ const docIds = reader.getPostings(normalizedTerm);
1088
+ // Cache the parsed posting list
1089
+ postingListCache.set(postingCacheKey, docIds);
1090
+ return {
1091
+ docIds,
1092
+ documentFrequency: docIds.length,
1093
+ };
1094
+ }
1095
+ /**
1096
+ * Intersect multiple terms (AND query).
1097
+ *
1098
+ * @param params - Intersection parameters
1099
+ * @param options - Fetch options
1100
+ * @returns Posting list with document IDs matching ALL terms
1101
+ */
1102
+ export async function intersectTerms(params, options = {}) {
1103
+ const { url, terms } = params;
1104
+ const { caseSensitive = false } = options;
1105
+ if (terms.length === 0) {
1106
+ return { docIds: [], documentFrequency: 0 };
1107
+ }
1108
+ const reader = await fetchInvertedIndex(url, options);
1109
+ if (!reader) {
1110
+ return { docIds: [], documentFrequency: 0 };
1111
+ }
1112
+ // Normalize terms if case-insensitive
1113
+ const normalizedTerms = caseSensitive ? terms : terms.map((t) => t.toLowerCase());
1114
+ const docIds = reader.intersect(normalizedTerms);
1115
+ return {
1116
+ docIds,
1117
+ documentFrequency: docIds.length,
1118
+ };
1119
+ }
1120
+ /**
1121
+ * Simple stemming helper that tries common inflections.
1122
+ * Returns an array of term variants to try.
1123
+ */
1124
+ function getTermVariants(term) {
1125
+ const variants = [term];
1126
+ // Try adding 's' for plural
1127
+ if (!term.endsWith('s')) {
1128
+ variants.push(term + 's');
1129
+ }
1130
+ // Try removing 's' for singular
1131
+ if (term.endsWith('s') && term.length > 2) {
1132
+ variants.push(term.slice(0, -1));
1133
+ }
1134
+ return variants;
1135
+ }
1136
+ /**
1137
+ * Union multiple terms (OR query).
1138
+ *
1139
+ * @param params - Union parameters
1140
+ * @param options - Fetch options
1141
+ * @returns Posting list with document IDs matching ANY term
1142
+ */
1143
+ export async function unionTerms(params, options = {}) {
1144
+ const { url, terms } = params;
1145
+ const { caseSensitive = false } = options;
1146
+ if (terms.length === 0) {
1147
+ return { docIds: [], documentFrequency: 0 };
1148
+ }
1149
+ const reader = await fetchInvertedIndex(url, options);
1150
+ if (!reader) {
1151
+ return { docIds: [], documentFrequency: 0 };
1152
+ }
1153
+ // Normalize terms, expand with variants, and deduplicate
1154
+ const normalizedTerms = caseSensitive ? terms : terms.map((t) => t.toLowerCase());
1155
+ const expandedTerms = new Set();
1156
+ for (const term of normalizedTerms) {
1157
+ for (const variant of getTermVariants(term)) {
1158
+ expandedTerms.add(variant);
1159
+ }
1160
+ }
1161
+ const docIds = reader.union([...expandedTerms]);
1162
+ return {
1163
+ docIds,
1164
+ documentFrequency: docIds.length,
1165
+ };
1166
+ }
1167
+ /**
1168
+ * Search for a phrase in an inverted index.
1169
+ *
1170
+ * Note: Without position data in the index, this is approximated as an AND query
1171
+ * on the tokenized phrase terms. For exact phrase matching, the index would need
1172
+ * position information stored with each posting.
1173
+ *
1174
+ * @param params - Phrase search parameters
1175
+ * @param options - Fetch options
1176
+ * @returns Posting list with document IDs containing the phrase
1177
+ */
1178
+ export async function phraseSearch(params, options = {}) {
1179
+ const { url, phrase } = params;
1180
+ // Tokenize the phrase
1181
+ const terms = simpleTokenize(phrase);
1182
+ if (terms.length === 0) {
1183
+ return { docIds: [], documentFrequency: 0 };
1184
+ }
1185
+ // Without position data, treat as AND query
1186
+ // This is an approximation - true phrase search requires position information
1187
+ return intersectTerms({ url, terms }, options);
1188
+ }
1189
+ /**
1190
+ * Search for terms matching a prefix.
1191
+ *
1192
+ * @param params - Prefix search parameters
1193
+ * @param options - Fetch options
1194
+ * @returns Matching terms and their combined document IDs
1195
+ */
1196
+ export async function prefixSearch(params, options = {}) {
1197
+ const { url, prefix, limit = 100 } = params;
1198
+ const { caseSensitive = false } = options;
1199
+ const reader = await fetchInvertedIndex(url, options);
1200
+ if (!reader) {
1201
+ return { terms: [], docIds: [] };
1202
+ }
1203
+ // Normalize prefix if case-insensitive
1204
+ const normalizedPrefix = caseSensitive ? prefix : prefix.toLowerCase();
1205
+ // Get matching terms
1206
+ const matchingEntries = reader.searchPrefix(normalizedPrefix, limit);
1207
+ const terms = matchingEntries.map((e) => e.term);
1208
+ if (terms.length === 0) {
1209
+ return { terms: [], docIds: [] };
1210
+ }
1211
+ // Union all matching term postings
1212
+ const docIds = reader.union(terms);
1213
+ return {
1214
+ terms,
1215
+ docIds,
1216
+ };
1217
+ }
1218
+ /**
1219
+ * Parse and execute a full-text query.
1220
+ *
1221
+ * Query syntax:
1222
+ * - Single term: `dog`
1223
+ * - AND query: `dog AND cat` or `dog cat` (implicit AND)
1224
+ * - OR query: `dog OR cat`
1225
+ * - Phrase: `"quick brown fox"`
1226
+ * - Prefix wildcard: `qui*`
1227
+ *
1228
+ * @param params - Query parameters
1229
+ * @param options - Fetch options
1230
+ * @returns Query result with hits and metadata
1231
+ */
1232
+ export async function queryFullText(params, options = {}) {
1233
+ const { url, query, offset = 0, limit } = params;
1234
+ const { timeoutMs } = options;
1235
+ const startTime = performance.now();
1236
+ // Helper for timeout
1237
+ const checkTimeout = () => {
1238
+ if (timeoutMs && performance.now() - startTime > timeoutMs) {
1239
+ throw new Error('Request timeout');
1240
+ }
1241
+ };
1242
+ // Handle empty query
1243
+ const trimmedQuery = query.trim();
1244
+ if (!trimmedQuery) {
1245
+ return {
1246
+ hits: [],
1247
+ totalHits: 0,
1248
+ queryTimeMs: performance.now() - startTime,
1249
+ };
1250
+ }
1251
+ // Fetch the index
1252
+ const reader = await fetchInvertedIndex(url, options);
1253
+ if (!reader) {
1254
+ throw new Error('Failed to fetch index');
1255
+ }
1256
+ checkTimeout();
1257
+ let docIds = [];
1258
+ // Check for OR query
1259
+ if (trimmedQuery.includes(' OR ')) {
1260
+ const parts = trimmedQuery.split(' OR ').map((p) => p.trim()).filter(Boolean);
1261
+ const termLists = [];
1262
+ for (const part of parts) {
1263
+ checkTimeout();
1264
+ const partTerms = simpleTokenize(part);
1265
+ if (partTerms.length > 0) {
1266
+ const partResult = reader.intersect(partTerms);
1267
+ termLists.push(partResult);
1268
+ }
1269
+ }
1270
+ // Union all parts
1271
+ const allDocs = new Set();
1272
+ for (const list of termLists) {
1273
+ for (const id of list) {
1274
+ allDocs.add(id);
1275
+ }
1276
+ }
1277
+ docIds = Array.from(allDocs).sort((a, b) => a - b);
1278
+ }
1279
+ // Check for AND query
1280
+ else if (trimmedQuery.includes(' AND ')) {
1281
+ const parts = trimmedQuery.split(' AND ').map((p) => p.trim()).filter(Boolean);
1282
+ const allTerms = [];
1283
+ for (const part of parts) {
1284
+ const partTerms = simpleTokenize(part);
1285
+ allTerms.push(...partTerms);
1286
+ }
1287
+ if (allTerms.length > 0) {
1288
+ docIds = reader.intersect(allTerms);
1289
+ }
1290
+ }
1291
+ // Check for quoted phrase
1292
+ else if (trimmedQuery.startsWith('"') && trimmedQuery.endsWith('"')) {
1293
+ const phrase = trimmedQuery.slice(1, -1);
1294
+ const terms = simpleTokenize(phrase);
1295
+ if (terms.length > 0) {
1296
+ docIds = reader.intersect(terms);
1297
+ }
1298
+ }
1299
+ // Check for prefix wildcard
1300
+ else if (trimmedQuery.endsWith('*')) {
1301
+ const prefix = trimmedQuery.slice(0, -1).toLowerCase();
1302
+ const matchingEntries = reader.searchPrefix(prefix, 100);
1303
+ const matchingTerms = matchingEntries.map((e) => e.term);
1304
+ if (matchingTerms.length > 0) {
1305
+ docIds = reader.union(matchingTerms);
1306
+ }
1307
+ }
1308
+ // Default: treat as space-separated terms (implicit AND or single term)
1309
+ else {
1310
+ const terms = simpleTokenize(trimmedQuery);
1311
+ if (terms.length > 0) {
1312
+ if (terms.length === 1) {
1313
+ docIds = reader.getPostings(terms[0]);
1314
+ }
1315
+ else {
1316
+ docIds = reader.intersect(terms);
1317
+ }
1318
+ }
1319
+ }
1320
+ checkTimeout();
1321
+ // Apply pagination
1322
+ const totalHits = docIds.length;
1323
+ let hits = docIds;
1324
+ if (offset > 0) {
1325
+ hits = hits.slice(offset);
1326
+ }
1327
+ if (limit !== undefined) {
1328
+ hits = hits.slice(0, limit);
1329
+ }
1330
+ return {
1331
+ hits,
1332
+ totalHits,
1333
+ queryTimeMs: performance.now() - startTime,
1334
+ };
1335
+ }
1336
+ // ============================================================================
1337
+ // Combined Query Router Types
1338
+ // ============================================================================
1339
+ import { buildIndexUrl } from '../db/iceberg/search-manifest';
1340
+ // ============================================================================
1341
+ // Combined Query Router Implementation
1342
+ // ============================================================================
1343
+ /** Maximum subrequests allowed per search */
1344
+ const MAX_SUBREQUESTS = 5;
1345
+ /**
1346
+ * Parse a search query from URL parameters.
1347
+ *
1348
+ * Query format:
1349
+ * - bloom=field:value
1350
+ * - range=field:op:value (op: gt, lt, gte, lte, eq)
1351
+ * - vector=field:base64data:k=N
1352
+ * - text=field:query
1353
+ *
1354
+ * @param url - URL containing query parameters
1355
+ * @returns Parsed SearchQuery
1356
+ */
1357
+ export function parseSearchQuery(url) {
1358
+ const query = {};
1359
+ // Parse bloom parameters
1360
+ const bloomParams = url.searchParams.getAll('bloom');
1361
+ if (bloomParams.length > 0) {
1362
+ query.bloom = bloomParams.map((param) => {
1363
+ const colonIndex = param.indexOf(':');
1364
+ if (colonIndex === -1) {
1365
+ return { field: param, value: '' };
1366
+ }
1367
+ return {
1368
+ field: param.slice(0, colonIndex),
1369
+ value: param.slice(colonIndex + 1),
1370
+ };
1371
+ });
1372
+ }
1373
+ // Parse range parameters
1374
+ const rangeParams = url.searchParams.getAll('range');
1375
+ if (rangeParams.length > 0) {
1376
+ query.range = rangeParams.map((param) => {
1377
+ const parts = param.split(':');
1378
+ if (parts.length < 3) {
1379
+ return { field: parts[0] ?? '', op: 'eq', value: parts[1] ?? '' };
1380
+ }
1381
+ return {
1382
+ field: parts[0],
1383
+ op: parts[1],
1384
+ value: parts.slice(2).join(':'), // Rejoin remaining parts for values with colons
1385
+ };
1386
+ });
1387
+ }
1388
+ // Parse vector parameter
1389
+ const vectorParam = url.searchParams.get('vector');
1390
+ if (vectorParam) {
1391
+ const parts = vectorParam.split(':');
1392
+ if (parts.length >= 3) {
1393
+ const field = parts[0];
1394
+ const base64Data = parts[1];
1395
+ const kMatch = parts[2].match(/k=(\d+)/);
1396
+ const k = kMatch ? parseInt(kMatch[1], 10) : 10;
1397
+ // Decode base64 to Float32Array
1398
+ // First decode to a byte array, then create Float32Array
1399
+ const binaryString = Buffer.from(base64Data, 'base64');
1400
+ // Create a new ArrayBuffer with the exact size needed
1401
+ const arrayBuffer = new ArrayBuffer(binaryString.length);
1402
+ const uint8View = new Uint8Array(arrayBuffer);
1403
+ for (let i = 0; i < binaryString.length; i++) {
1404
+ uint8View[i] = binaryString[i];
1405
+ }
1406
+ const floatArray = new Float32Array(arrayBuffer);
1407
+ query.vector = { field, query: floatArray, k };
1408
+ }
1409
+ }
1410
+ // Parse text parameter
1411
+ const textParam = url.searchParams.get('text');
1412
+ if (textParam) {
1413
+ const colonIndex = textParam.indexOf(':');
1414
+ if (colonIndex !== -1) {
1415
+ query.text = {
1416
+ field: textParam.slice(0, colonIndex),
1417
+ query: textParam.slice(colonIndex + 1),
1418
+ };
1419
+ }
1420
+ }
1421
+ return query;
1422
+ }
1423
+ /**
1424
+ * Clear all search caches.
1425
+ */
1426
+ export function clearSearchCache() {
1427
+ clearCentroidCache();
1428
+ clearInvertedIndexCache();
1429
+ clearBloomCache();
1430
+ }
1431
+ /**
1432
+ * Execute a combined search query against a manifest.
1433
+ *
1434
+ * @param manifest - Search manifest describing available indexes
1435
+ * @param query - Combined search query
1436
+ * @param ctx - Execution context
1437
+ * @param options - Execution options
1438
+ * @returns Combined search result
1439
+ */
1440
+ export async function executeSearch(manifest, query, ctx, options = {}) {
1441
+ const { fetch: customFetch = fetch, timeoutMs = 5000, maxSubrequests = MAX_SUBREQUESTS, } = options;
1442
+ const startTime = performance.now();
1443
+ const timing = { total_ms: 0 };
1444
+ let subrequests = 0;
1445
+ let budgetRemaining = maxSubrequests;
1446
+ // Track if we should prune (any definitive NO from bloom = prune all)
1447
+ let pruned = false;
1448
+ // Results from different query types
1449
+ let rangeBlocks;
1450
+ let vectorCentroids;
1451
+ let textDocuments;
1452
+ // Helper to check timeout
1453
+ const checkTimeout = () => {
1454
+ if (performance.now() - startTime > timeoutMs) {
1455
+ throw new Error('Search timeout');
1456
+ }
1457
+ };
1458
+ // Helper to track subrequests
1459
+ const trackSubrequest = () => {
1460
+ subrequests++;
1461
+ budgetRemaining--;
1462
+ };
1463
+ // Empty query - return immediately
1464
+ const hasQuery = (query.bloom && query.bloom.length > 0) ||
1465
+ (query.range && query.range.length > 0) ||
1466
+ query.vector ||
1467
+ query.text;
1468
+ if (!hasQuery) {
1469
+ return {
1470
+ pruned: false,
1471
+ timing: { total_ms: performance.now() - startTime },
1472
+ subrequests: 0,
1473
+ };
1474
+ }
1475
+ // Validate vector query dimensions if present
1476
+ if (query.vector) {
1477
+ const vectorConfig = manifest.indexes.vector?.[query.vector.field];
1478
+ if (vectorConfig) {
1479
+ if (query.vector.query.length === 0) {
1480
+ throw new Error('Vector query is empty - dimension mismatch');
1481
+ }
1482
+ if (query.vector.query.length !== vectorConfig.dims) {
1483
+ throw new Error(`Vector dimension mismatch: expected ${vectorConfig.dims}, got ${query.vector.query.length}`);
1484
+ }
1485
+ }
1486
+ }
1487
+ try {
1488
+ // =========================================================================
1489
+ // Phase 1: Bloom filter checks (most selective, do first)
1490
+ // =========================================================================
1491
+ if (query.bloom && query.bloom.length > 0 && budgetRemaining > 0) {
1492
+ const bloomStartTime = performance.now();
1493
+ for (const bloomQuery of query.bloom) {
1494
+ if (budgetRemaining <= 0)
1495
+ break;
1496
+ checkTimeout();
1497
+ const bloomConfig = manifest.indexes.bloom?.[bloomQuery.field];
1498
+ if (!bloomConfig) {
1499
+ // Field not in manifest - skip (conservative: don't prune)
1500
+ continue;
1501
+ }
1502
+ const bloomUrl = buildIndexUrl(manifest, 'bloom', bloomQuery.field);
1503
+ if (!bloomUrl)
1504
+ continue;
1505
+ try {
1506
+ trackSubrequest();
1507
+ // Note: fieldId is used to identify which bloom filter in a Puffin file
1508
+ // Since each field has its own Puffin file in this schema, use a default fieldId
1509
+ // The fieldId in Puffin files typically corresponds to Iceberg column IDs
1510
+ const result = await queryBloom({
1511
+ url: bloomUrl,
1512
+ fieldId: 1, // Default column ID
1513
+ value: bloomQuery.value,
1514
+ }, { fetch: customFetch });
1515
+ if (result === BloomQueryResult.NO) {
1516
+ // Definitive NO - prune entire result
1517
+ pruned = true;
1518
+ timing.bloom_ms = performance.now() - bloomStartTime;
1519
+ break;
1520
+ }
1521
+ // result === MAYBE means continue checking
1522
+ }
1523
+ catch {
1524
+ // On error, be conservative - don't prune
1525
+ continue;
1526
+ }
1527
+ }
1528
+ timing.bloom_ms = performance.now() - bloomStartTime;
1529
+ }
1530
+ // Short-circuit if already pruned
1531
+ if (pruned) {
1532
+ return {
1533
+ pruned: true,
1534
+ timing: {
1535
+ ...timing,
1536
+ total_ms: performance.now() - startTime,
1537
+ },
1538
+ subrequests,
1539
+ };
1540
+ }
1541
+ // =========================================================================
1542
+ // Phase 2: Range queries (determine blocks to scan)
1543
+ // =========================================================================
1544
+ if (query.range && query.range.length > 0 && budgetRemaining > 0) {
1545
+ const rangeStartTime = performance.now();
1546
+ const allBlocks = new Set();
1547
+ let firstRangeQuery = true;
1548
+ for (const rangeQuery of query.range) {
1549
+ if (budgetRemaining <= 0)
1550
+ break;
1551
+ checkTimeout();
1552
+ const rangeConfig = manifest.indexes.range?.[rangeQuery.field];
1553
+ if (!rangeConfig)
1554
+ continue;
1555
+ const rangeUrl = buildIndexUrl(manifest, 'range', rangeQuery.field);
1556
+ if (!rangeUrl)
1557
+ continue;
1558
+ try {
1559
+ trackSubrequest();
1560
+ // Fetch marks file
1561
+ const response = await customFetch(rangeUrl);
1562
+ if (!response.ok)
1563
+ continue;
1564
+ const buffer = await response.arrayBuffer();
1565
+ // Use int64 type for range queries (covers timestamps and integers)
1566
+ const blocks = parseMarksFile(new Uint8Array(buffer), 'int64');
1567
+ // Find matching blocks based on operator
1568
+ const matchingBlocks = findMatchingBlocks(blocks, rangeQuery.op, rangeQuery.value);
1569
+ if (firstRangeQuery) {
1570
+ for (const block of matchingBlocks) {
1571
+ allBlocks.add(block);
1572
+ }
1573
+ firstRangeQuery = false;
1574
+ }
1575
+ else {
1576
+ // Intersect with previous results (AND semantics)
1577
+ // Use Set for O(1) lookups instead of O(n) array includes
1578
+ const matchingSet = new Set(matchingBlocks);
1579
+ for (const block of allBlocks) {
1580
+ if (!matchingSet.has(block)) {
1581
+ allBlocks.delete(block);
1582
+ }
1583
+ }
1584
+ }
1585
+ // If no blocks match, we can prune
1586
+ if (allBlocks.size === 0 && !firstRangeQuery) {
1587
+ pruned = true;
1588
+ break;
1589
+ }
1590
+ }
1591
+ catch {
1592
+ // On error, be conservative
1593
+ continue;
1594
+ }
1595
+ }
1596
+ if (allBlocks.size > 0) {
1597
+ rangeBlocks = Array.from(allBlocks).sort((a, b) => a - b);
1598
+ }
1599
+ timing.range_ms = performance.now() - rangeStartTime;
1600
+ }
1601
+ // Short-circuit if pruned by range
1602
+ if (pruned) {
1603
+ return {
1604
+ pruned: true,
1605
+ timing: {
1606
+ ...timing,
1607
+ total_ms: performance.now() - startTime,
1608
+ },
1609
+ subrequests,
1610
+ };
1611
+ }
1612
+ // =========================================================================
1613
+ // Phase 3: Vector query (find nearest centroids)
1614
+ // =========================================================================
1615
+ if (query.vector && budgetRemaining > 0) {
1616
+ const vectorStartTime = performance.now();
1617
+ const vectorConfig = manifest.indexes.vector?.[query.vector.field];
1618
+ if (vectorConfig) {
1619
+ const vectorUrl = buildIndexUrl(manifest, 'vector', query.vector.field);
1620
+ if (vectorUrl) {
1621
+ try {
1622
+ trackSubrequest();
1623
+ const k = Math.min(query.vector.k, vectorConfig.count);
1624
+ // Convert manifest metric string to DistanceMetric enum
1625
+ const metricMap = {
1626
+ cosine: DistanceMetric.Cosine,
1627
+ euclidean: DistanceMetric.Euclidean,
1628
+ dot: DistanceMetric.DotProduct,
1629
+ };
1630
+ const metric = metricMap[vectorConfig.metric] ?? DistanceMetric.Cosine;
1631
+ const topK = await queryVector({
1632
+ centroidsUrl: vectorUrl,
1633
+ numCentroids: vectorConfig.count,
1634
+ dims: vectorConfig.dims,
1635
+ query: query.vector.query,
1636
+ k,
1637
+ metric,
1638
+ fetch: customFetch,
1639
+ });
1640
+ vectorCentroids = topK.map((result) => ({
1641
+ index: result.index,
1642
+ distance: result.distance,
1643
+ }));
1644
+ }
1645
+ catch {
1646
+ // On error, leave centroids undefined
1647
+ }
1648
+ }
1649
+ }
1650
+ timing.vector_ms = performance.now() - vectorStartTime;
1651
+ }
1652
+ // =========================================================================
1653
+ // Phase 4: Full-text query
1654
+ // =========================================================================
1655
+ if (query.text && budgetRemaining > 0) {
1656
+ const textStartTime = performance.now();
1657
+ const invertedConfig = manifest.indexes.inverted?.[query.text.field];
1658
+ if (invertedConfig) {
1659
+ const invertedUrl = buildIndexUrl(manifest, 'inverted', query.text.field);
1660
+ if (invertedUrl) {
1661
+ try {
1662
+ trackSubrequest();
1663
+ const result = await queryFullText({ url: invertedUrl, query: query.text.query }, { fetch: customFetch });
1664
+ textDocuments = result.hits;
1665
+ // If no documents match, we could prune
1666
+ // But for combined queries we want to return the result
1667
+ if (result.hits.length === 0) {
1668
+ pruned = true;
1669
+ }
1670
+ }
1671
+ catch {
1672
+ // On error, leave documents undefined
1673
+ }
1674
+ }
1675
+ }
1676
+ timing.text_ms = performance.now() - textStartTime;
1677
+ }
1678
+ }
1679
+ catch (error) {
1680
+ // Handle timeout
1681
+ if (error instanceof Error && error.message.includes('timeout')) {
1682
+ return {
1683
+ pruned: false, // Conservative on timeout
1684
+ timing: {
1685
+ ...timing,
1686
+ total_ms: performance.now() - startTime,
1687
+ },
1688
+ subrequests,
1689
+ };
1690
+ }
1691
+ throw error;
1692
+ }
1693
+ timing.total_ms = performance.now() - startTime;
1694
+ return {
1695
+ pruned,
1696
+ blocks: rangeBlocks,
1697
+ centroids: vectorCentroids,
1698
+ documents: textDocuments,
1699
+ timing,
1700
+ subrequests,
1701
+ };
1702
+ }
1703
+ // ============================================================================
1704
+ // Helper Functions for Combined Router
1705
+ // ============================================================================
1706
+ /**
1707
+ * Find blocks matching a range query.
1708
+ */
1709
+ function findMatchingBlocks(blocks, op, value) {
1710
+ // Try to parse value as a number/timestamp
1711
+ let numValue;
1712
+ try {
1713
+ // Check if it's a date string
1714
+ if (value.includes('-') && value.length >= 10) {
1715
+ numValue = BigInt(new Date(value).getTime());
1716
+ }
1717
+ else {
1718
+ numValue = BigInt(value);
1719
+ }
1720
+ }
1721
+ catch {
1722
+ // If parsing fails, return all blocks (conservative)
1723
+ return blocks.map((b) => b.blockIndex);
1724
+ }
1725
+ const matchingBlocks = [];
1726
+ for (const block of blocks) {
1727
+ let matches = false;
1728
+ // Handle different min/max types (bigint or number)
1729
+ const minVal = typeof block.min === 'bigint' ? block.min : BigInt(Math.floor(block.min));
1730
+ const maxVal = typeof block.max === 'bigint' ? block.max : BigInt(Math.floor(block.max));
1731
+ switch (op) {
1732
+ case 'gt':
1733
+ // Block matches if its max > value (some values could be > value)
1734
+ matches = maxVal > numValue;
1735
+ break;
1736
+ case 'gte':
1737
+ // Block matches if its max >= value
1738
+ matches = maxVal >= numValue;
1739
+ break;
1740
+ case 'lt':
1741
+ // Block matches if its min < value
1742
+ matches = minVal < numValue;
1743
+ break;
1744
+ case 'lte':
1745
+ // Block matches if its min <= value
1746
+ matches = minVal <= numValue;
1747
+ break;
1748
+ case 'eq':
1749
+ // Block matches if value is within [min, max]
1750
+ matches = minVal <= numValue && maxVal >= numValue;
1751
+ break;
1752
+ }
1753
+ if (matches) {
1754
+ matchingBlocks.push(block.blockIndex);
1755
+ }
1756
+ }
1757
+ return matchingBlocks;
1758
+ }
1759
+ //# sourceMappingURL=search.js.map