ai-database 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. package/CHANGELOG.md +47 -1
  2. package/README.md +1063 -186
  3. package/dist/actions.d.ts +2 -2
  4. package/dist/actions.d.ts.map +1 -1
  5. package/dist/actions.js +1 -1
  6. package/dist/actions.js.map +1 -1
  7. package/dist/ai-promise-db.d.ts +52 -23
  8. package/dist/ai-promise-db.d.ts.map +1 -1
  9. package/dist/ai-promise-db.js +185 -164
  10. package/dist/ai-promise-db.js.map +1 -1
  11. package/dist/authorization.d.ts.map +1 -1
  12. package/dist/authorization.js +38 -30
  13. package/dist/authorization.js.map +1 -1
  14. package/dist/cascade-orchestrator.d.ts +404 -0
  15. package/dist/cascade-orchestrator.d.ts.map +1 -0
  16. package/dist/cascade-orchestrator.js +828 -0
  17. package/dist/cascade-orchestrator.js.map +1 -0
  18. package/dist/cascade-write-strategy.d.ts +584 -0
  19. package/dist/cascade-write-strategy.d.ts.map +1 -0
  20. package/dist/cascade-write-strategy.js +590 -0
  21. package/dist/cascade-write-strategy.js.map +1 -0
  22. package/dist/ch-adapter.d.ts +358 -0
  23. package/dist/ch-adapter.d.ts.map +1 -0
  24. package/dist/ch-adapter.js +929 -0
  25. package/dist/ch-adapter.js.map +1 -0
  26. package/dist/client/index.d.ts +42 -0
  27. package/dist/client/index.d.ts.map +1 -0
  28. package/dist/client/index.js +43 -0
  29. package/dist/client/index.js.map +1 -0
  30. package/dist/client.d.ts +266 -0
  31. package/dist/client.d.ts.map +1 -0
  32. package/dist/client.js +81 -0
  33. package/dist/client.js.map +1 -0
  34. package/dist/constants.d.ts +64 -1
  35. package/dist/constants.d.ts.map +1 -1
  36. package/dist/constants.js +52 -2
  37. package/dist/constants.js.map +1 -1
  38. package/dist/dataloader.d.ts +99 -0
  39. package/dist/dataloader.d.ts.map +1 -0
  40. package/dist/dataloader.js +225 -0
  41. package/dist/dataloader.js.map +1 -0
  42. package/dist/db-provider-port.d.ts +501 -0
  43. package/dist/db-provider-port.d.ts.map +1 -0
  44. package/dist/db-provider-port.js +113 -0
  45. package/dist/db-provider-port.js.map +1 -0
  46. package/dist/digital-objects-provider.d.ts +49 -0
  47. package/dist/digital-objects-provider.d.ts.map +1 -0
  48. package/dist/digital-objects-provider.js +55 -0
  49. package/dist/digital-objects-provider.js.map +1 -0
  50. package/dist/do-sqlite-adapter.d.ts +402 -0
  51. package/dist/do-sqlite-adapter.d.ts.map +1 -0
  52. package/dist/do-sqlite-adapter.js +745 -0
  53. package/dist/do-sqlite-adapter.js.map +1 -0
  54. package/dist/docs-rels/custom-types.d.ts +134 -0
  55. package/dist/docs-rels/custom-types.d.ts.map +1 -0
  56. package/dist/docs-rels/custom-types.js +70 -0
  57. package/dist/docs-rels/custom-types.js.map +1 -0
  58. package/dist/docs-rels/index.d.ts +16 -0
  59. package/dist/docs-rels/index.d.ts.map +1 -0
  60. package/dist/docs-rels/index.js +16 -0
  61. package/dist/docs-rels/index.js.map +1 -0
  62. package/dist/docs-rels/migrations/index.d.ts +30 -0
  63. package/dist/docs-rels/migrations/index.d.ts.map +1 -0
  64. package/dist/docs-rels/migrations/index.js +128 -0
  65. package/dist/docs-rels/migrations/index.js.map +1 -0
  66. package/dist/docs-rels/schema.d.ts +2961 -0
  67. package/dist/docs-rels/schema.d.ts.map +1 -0
  68. package/dist/docs-rels/schema.js +244 -0
  69. package/dist/docs-rels/schema.js.map +1 -0
  70. package/dist/durable-clickhouse.d.ts.map +1 -1
  71. package/dist/durable-clickhouse.js +16 -13
  72. package/dist/durable-clickhouse.js.map +1 -1
  73. package/dist/durable-promise.d.ts.map +1 -1
  74. package/dist/durable-promise.js +34 -15
  75. package/dist/durable-promise.js.map +1 -1
  76. package/dist/errors.d.ts +127 -0
  77. package/dist/errors.d.ts.map +1 -0
  78. package/dist/errors.js +210 -0
  79. package/dist/errors.js.map +1 -0
  80. package/dist/eventbridge.d.ts +117 -0
  81. package/dist/eventbridge.d.ts.map +1 -0
  82. package/dist/eventbridge.js +238 -0
  83. package/dist/eventbridge.js.map +1 -0
  84. package/dist/events.d.ts +2 -2
  85. package/dist/events.d.ts.map +1 -1
  86. package/dist/events.js +1 -1
  87. package/dist/events.js.map +1 -1
  88. package/dist/execution-queue.d.ts.map +1 -1
  89. package/dist/execution-queue.js +4 -5
  90. package/dist/execution-queue.js.map +1 -1
  91. package/dist/index.d.ts +37 -8
  92. package/dist/index.d.ts.map +1 -1
  93. package/dist/index.js +112 -6
  94. package/dist/index.js.map +1 -1
  95. package/dist/linguistic.d.ts +3 -108
  96. package/dist/linguistic.d.ts.map +1 -1
  97. package/dist/linguistic.js +3 -372
  98. package/dist/linguistic.js.map +1 -1
  99. package/dist/logger.d.ts +132 -0
  100. package/dist/logger.d.ts.map +1 -0
  101. package/dist/logger.js +137 -0
  102. package/dist/logger.js.map +1 -0
  103. package/dist/memory-provider.d.ts +129 -0
  104. package/dist/memory-provider.d.ts.map +1 -1
  105. package/dist/memory-provider.js +592 -257
  106. package/dist/memory-provider.js.map +1 -1
  107. package/dist/pg-adapter.d.ts +424 -0
  108. package/dist/pg-adapter.d.ts.map +1 -0
  109. package/dist/pg-adapter.js +921 -0
  110. package/dist/pg-adapter.js.map +1 -0
  111. package/dist/pipelines-iceberg-emitter.d.ts +327 -0
  112. package/dist/pipelines-iceberg-emitter.d.ts.map +1 -0
  113. package/dist/pipelines-iceberg-emitter.js +351 -0
  114. package/dist/pipelines-iceberg-emitter.js.map +1 -0
  115. package/dist/provider-capabilities.d.ts +146 -0
  116. package/dist/provider-capabilities.d.ts.map +1 -0
  117. package/dist/provider-capabilities.js +214 -0
  118. package/dist/provider-capabilities.js.map +1 -0
  119. package/dist/rdb-provider-adapter.d.ts +195 -0
  120. package/dist/rdb-provider-adapter.d.ts.map +1 -0
  121. package/dist/rdb-provider-adapter.js +291 -0
  122. package/dist/rdb-provider-adapter.js.map +1 -0
  123. package/dist/schema/cascade.d.ts +49 -10
  124. package/dist/schema/cascade.d.ts.map +1 -1
  125. package/dist/schema/cascade.js +491 -273
  126. package/dist/schema/cascade.js.map +1 -1
  127. package/dist/schema/definition-caches.d.ts +24 -0
  128. package/dist/schema/definition-caches.d.ts.map +1 -0
  129. package/dist/schema/definition-caches.js +26 -0
  130. package/dist/schema/definition-caches.js.map +1 -0
  131. package/dist/schema/dependency-graph.d.ts +45 -0
  132. package/dist/schema/dependency-graph.d.ts.map +1 -0
  133. package/dist/schema/dependency-graph.js +47 -0
  134. package/dist/schema/dependency-graph.js.map +1 -0
  135. package/dist/schema/diff.d.ts +103 -0
  136. package/dist/schema/diff.d.ts.map +1 -0
  137. package/dist/schema/diff.js +329 -0
  138. package/dist/schema/diff.js.map +1 -0
  139. package/dist/schema/entity-operations.d.ts +99 -0
  140. package/dist/schema/entity-operations.d.ts.map +1 -0
  141. package/dist/schema/entity-operations.js +818 -0
  142. package/dist/schema/entity-operations.js.map +1 -0
  143. package/dist/schema/generation-context.d.ts +202 -0
  144. package/dist/schema/generation-context.d.ts.map +1 -0
  145. package/dist/schema/generation-context.js +393 -0
  146. package/dist/schema/generation-context.js.map +1 -0
  147. package/dist/schema/index.d.ts +32 -34
  148. package/dist/schema/index.d.ts.map +1 -1
  149. package/dist/schema/index.js +462 -519
  150. package/dist/schema/index.js.map +1 -1
  151. package/dist/schema/migration.d.ts +205 -0
  152. package/dist/schema/migration.d.ts.map +1 -0
  153. package/dist/schema/migration.js +327 -0
  154. package/dist/schema/migration.js.map +1 -0
  155. package/dist/schema/nl-query-generator.d.ts +68 -0
  156. package/dist/schema/nl-query-generator.d.ts.map +1 -0
  157. package/dist/schema/nl-query-generator.js +362 -0
  158. package/dist/schema/nl-query-generator.js.map +1 -0
  159. package/dist/schema/nl-query.d.ts +65 -0
  160. package/dist/schema/nl-query.d.ts.map +1 -0
  161. package/dist/schema/nl-query.js +178 -0
  162. package/dist/schema/nl-query.js.map +1 -0
  163. package/dist/schema/parse.d.ts.map +1 -1
  164. package/dist/schema/parse.js +152 -89
  165. package/dist/schema/parse.js.map +1 -1
  166. package/dist/schema/provider.d.ts +38 -0
  167. package/dist/schema/provider.d.ts.map +1 -1
  168. package/dist/schema/provider.js +15 -7
  169. package/dist/schema/provider.js.map +1 -1
  170. package/dist/schema/resolve.d.ts +46 -5
  171. package/dist/schema/resolve.d.ts.map +1 -1
  172. package/dist/schema/resolve.js +334 -117
  173. package/dist/schema/resolve.js.map +1 -1
  174. package/dist/schema/search-utils.d.ts +76 -0
  175. package/dist/schema/search-utils.d.ts.map +1 -0
  176. package/dist/schema/search-utils.js +86 -0
  177. package/dist/schema/search-utils.js.map +1 -0
  178. package/dist/schema/seed.d.ts +53 -0
  179. package/dist/schema/seed.d.ts.map +1 -0
  180. package/dist/schema/seed.js +94 -0
  181. package/dist/schema/seed.js.map +1 -0
  182. package/dist/schema/semantic.d.ts +11 -0
  183. package/dist/schema/semantic.d.ts.map +1 -1
  184. package/dist/schema/semantic.js +262 -68
  185. package/dist/schema/semantic.js.map +1 -1
  186. package/dist/schema/sub-apis.d.ts +52 -0
  187. package/dist/schema/sub-apis.d.ts.map +1 -0
  188. package/dist/schema/sub-apis.js +216 -0
  189. package/dist/schema/sub-apis.js.map +1 -0
  190. package/dist/schema/system-entities.d.ts +42 -0
  191. package/dist/schema/system-entities.d.ts.map +1 -0
  192. package/dist/schema/system-entities.js +101 -0
  193. package/dist/schema/system-entities.js.map +1 -0
  194. package/dist/schema/types.d.ts +91 -9
  195. package/dist/schema/types.d.ts.map +1 -1
  196. package/dist/schema/union-fallback.d.ts +219 -0
  197. package/dist/schema/union-fallback.d.ts.map +1 -0
  198. package/dist/schema/union-fallback.js +331 -0
  199. package/dist/schema/union-fallback.js.map +1 -0
  200. package/dist/schema/value-generators/ai.d.ts +54 -0
  201. package/dist/schema/value-generators/ai.d.ts.map +1 -0
  202. package/dist/schema/value-generators/ai.js +136 -0
  203. package/dist/schema/value-generators/ai.js.map +1 -0
  204. package/dist/schema/value-generators/index.d.ts +126 -0
  205. package/dist/schema/value-generators/index.d.ts.map +1 -0
  206. package/dist/schema/value-generators/index.js +219 -0
  207. package/dist/schema/value-generators/index.js.map +1 -0
  208. package/dist/schema/value-generators/placeholder.d.ts +52 -0
  209. package/dist/schema/value-generators/placeholder.d.ts.map +1 -0
  210. package/dist/schema/value-generators/placeholder.js +328 -0
  211. package/dist/schema/value-generators/placeholder.js.map +1 -0
  212. package/dist/schema/value-generators/types.d.ts +116 -0
  213. package/dist/schema/value-generators/types.d.ts.map +1 -0
  214. package/dist/schema/value-generators/types.js +11 -0
  215. package/dist/schema/value-generators/types.js.map +1 -0
  216. package/dist/schema/verb-derivation.d.ts +167 -0
  217. package/dist/schema/verb-derivation.d.ts.map +1 -0
  218. package/dist/schema/verb-derivation.js +281 -0
  219. package/dist/schema/verb-derivation.js.map +1 -0
  220. package/dist/schema/version.d.ts +111 -0
  221. package/dist/schema/version.d.ts.map +1 -0
  222. package/dist/schema/version.js +190 -0
  223. package/dist/schema/version.js.map +1 -0
  224. package/dist/schema.d.ts +1095 -23
  225. package/dist/schema.d.ts.map +1 -1
  226. package/dist/schema.js +2854 -38
  227. package/dist/schema.js.map +1 -1
  228. package/dist/semantic-vectors.d.ts +39 -0
  229. package/dist/semantic-vectors.d.ts.map +1 -0
  230. package/dist/semantic-vectors.js +334 -0
  231. package/dist/semantic-vectors.js.map +1 -0
  232. package/dist/semantic.d.ts +29 -1
  233. package/dist/semantic.d.ts.map +1 -1
  234. package/dist/semantic.js +26 -16
  235. package/dist/semantic.js.map +1 -1
  236. package/dist/telemetry.d.ts +128 -0
  237. package/dist/telemetry.d.ts.map +1 -0
  238. package/dist/telemetry.js +305 -0
  239. package/dist/telemetry.js.map +1 -0
  240. package/dist/tests.d.ts.map +1 -1
  241. package/dist/tests.js +30 -22
  242. package/dist/tests.js.map +1 -1
  243. package/dist/type-guards.d.ts +212 -0
  244. package/dist/type-guards.d.ts.map +1 -0
  245. package/dist/type-guards.js +318 -0
  246. package/dist/type-guards.js.map +1 -0
  247. package/dist/types.d.ts +33 -245
  248. package/dist/types.d.ts.map +1 -1
  249. package/dist/types.js +62 -72
  250. package/dist/types.js.map +1 -1
  251. package/dist/validation.d.ts +165 -0
  252. package/dist/validation.d.ts.map +1 -0
  253. package/dist/validation.js +639 -0
  254. package/dist/validation.js.map +1 -0
  255. package/dist/worker/db-provider.d.ts +168 -0
  256. package/dist/worker/db-provider.d.ts.map +1 -0
  257. package/dist/worker/db-provider.js +277 -0
  258. package/dist/worker/db-provider.js.map +1 -0
  259. package/dist/worker/index.d.ts +35 -0
  260. package/dist/worker/index.d.ts.map +1 -0
  261. package/dist/worker/index.js +37 -0
  262. package/dist/worker/index.js.map +1 -0
  263. package/dist/worker.d.ts +779 -0
  264. package/dist/worker.d.ts.map +1 -0
  265. package/dist/worker.js +2786 -0
  266. package/dist/worker.js.map +1 -0
  267. package/package.json +38 -8
  268. package/src/docs-rels/migrations/0001-init.sql +125 -0
package/README.md CHANGED
@@ -1,143 +1,389 @@
1
1
  # ai-database
2
2
 
3
- Your data, flowing like conversation.
3
+ ![Stability: Stable](https://img.shields.io/badge/stability-stable-green)
4
+
5
+ **AI hallucinates. Your database shouldn't.**
6
+
7
+ When AI generates a "Software Developer" for your customer profile, does it match your existing O\*NET occupation data? Does "Enterprise SaaS" connect to your NAICS industry codes? Traditional approaches fragment context—AI juggles content creation and referential integrity simultaneously, producing plausible-sounding but disconnected data.
8
+
9
+ **ai-database grounds AI generation against your domain.**
4
10
 
5
11
  ```typescript
6
12
  import { DB } from 'ai-database'
7
13
 
8
14
  const { db } = DB({
9
- Lead: { name: 'string', company: 'Company.leads' },
10
- Company: { name: 'string' }
15
+ IdealCustomerProfile: {
16
+ as: 'Who are they? <~Occupation', // Ground against O*NET occupations
17
+ at: 'Where do they work? <~Industry', // Ground against NAICS industries
18
+ are: 'What are they doing? <~Task', // Ground against O*NET tasks
19
+ },
20
+ Occupation: { title: 'string', description: 'string' },
21
+ Industry: { name: 'string', naicsCode: 'string' },
22
+ Task: { name: 'string' },
11
23
  })
12
24
 
13
- // Chain without await
14
- const leads = db.Lead.list()
15
- const qualified = await leads.filter(l => l.score > 80)
25
+ // Seed reference data from O*NET, NAICS, etc.
26
+ await db.Occupation.create({ title: 'Software Developer', description: 'Develops applications' })
27
+ await db.Industry.create({ name: 'Technology', naicsCode: '5112' })
16
28
 
17
- // Batch relationship loading
18
- const enriched = await leads.map(lead => ({
19
- name: lead.name,
20
- company: lead.company, // Batch loaded!
21
- }))
29
+ // AI generation is grounded against real reference data
30
+ const icp = await db.ICP.create({
31
+ asHint: 'Engineers who build software', // Matches "Software Developer"
32
+ atHint: 'Tech companies', // Matches "Technology"
33
+ })
34
+
35
+ const occupation = await icp.as
36
+ // => { title: 'Software Developer', ... } — matched via semantic search, not hallucinated
22
37
  ```
23
38
 
24
- ## Promise Pipelining
39
+ ---
25
40
 
26
- Chain database operations without `await`:
41
+ ## The Core Insight
27
42
 
28
- ```typescript
29
- const leads = db.Lead.list()
30
- const topLeads = leads.filter(l => l.score > 80)
31
- const names = topLeads.map(l => l.name)
43
+ Traditional databases require foreign keys at schema time. When generating with AI, this fragments context: the model must juggle content creation and referential integrity simultaneously.
32
44
 
33
- // Only await when you need the result
34
- const result = await names
35
- ```
45
+ ai-database inverts this paradigm. **Relationship operators become workflow instructions**, not schema constraints:
36
46
 
37
- ## Batch Relationship Loading
47
+ 1. **Generate** the entity with full semantic context intact
48
+ 2. **Link** as a post-processing step via insertion or vector search
38
49
 
39
- Eliminate N+1 queries automatically:
50
+ This separation eliminates context fragmentation during generation and produces human-readable relationship labels ("Software Developers") instead of opaque IDs (`occ_1547`).
51
+
52
+ ---
53
+
54
+ ## The Four Operators
55
+
56
+ ai-database provides four relationship operators that control how entities connect. They combine two dimensions:
57
+
58
+ | | **Create New** | **Search Existing** |
59
+ |---|---|---|
60
+ | **Link TO target** | `->` Forward Exact | `~>` Forward Fuzzy |
61
+ | **Link FROM target** | `<-` Backward Exact | `<~` Backward Fuzzy |
62
+
63
+ ### Quick Reference
64
+
65
+ | Operator | Direction | Match Mode | When to Use |
66
+ |----------|-----------|------------|-------------|
67
+ | `->` | forward | exact | Creating child entities (Blog → Posts) |
68
+ | `~>` | forward | fuzzy | Reusing existing entities (Campaign → Audience) |
69
+ | `<-` | backward | exact | Aggregation queries (Blog collects Posts) |
70
+ | `<~` | backward | fuzzy | Grounding against reference data (ICP → Occupation) |
71
+
72
+ ### Understanding the Operators
73
+
74
+ **Direction** determines who owns the relationship:
75
+ - **Forward** (`->`, `~>`): Current entity links TO the target
76
+ - **Backward** (`<-`, `<~`): Target entity links FROM the current entity
77
+
78
+ **Match Mode** determines how the target is resolved:
79
+ - **Exact** (`->`, `<-`): Create a new entity, then link to it
80
+ - **Fuzzy** (`~>`, `<~`): Search existing entities via semantic similarity
81
+
82
+ ---
83
+
84
+ ## Example 1: Grounding Against Reference Data (`<~`)
85
+
86
+ The backward fuzzy operator grounds AI-generated content against authoritative reference data. This is the **semantic grounding** pattern.
40
87
 
41
88
  ```typescript
42
- // Old way - N+1 queries
43
- const leads = await db.Lead.list()
44
- for (const lead of leads) {
45
- const company = await db.Company.get(lead.companyId) // N queries!
46
- }
89
+ const { db } = DB({
90
+ // Generative entity that grounds against reference data
91
+ IdealCustomerProfile: {
92
+ as: 'Who are they? (e.g. "Developers") <~Occupation',
93
+ at: 'Where do they work? (e.g. "FinTech startups") <~Industry',
94
+ are: 'What are they doing? (e.g. "building APIs") <~Task',
95
+ using: 'What are they using? (e.g. "Node.js") <~Tool',
96
+ to: 'What is their goal? (e.g. "ship faster") <~Outcome',
97
+ },
47
98
 
48
- // New way - batch loaded
49
- const enriched = await db.Lead.list().map(lead => ({
50
- lead,
51
- company: lead.company, // All companies loaded in ONE query
52
- }))
99
+ // Reference data seeded from O*NET, NAICS, etc.
100
+ Occupation: {
101
+ $seed: 'https://onet.data/occupations.tsv',
102
+ $id: '$.oNETSOCCode',
103
+ title: '$.title',
104
+ description: '$.description',
105
+ },
106
+ Industry: {
107
+ $seed: 'https://naics.data/industries.tsv',
108
+ $id: '$.naicsCode',
109
+ name: '$.title',
110
+ },
111
+ Task: { name: 'string' },
112
+ Tool: { name: 'string' },
113
+ Outcome: { description: 'string' },
114
+ })
53
115
  ```
54
116
 
55
- ## Natural Language Queries
117
+ **How it works:**
118
+
119
+ 1. AI generates ICP with `as: "Engineers who build software"`
120
+ 2. Runtime embeds the text and searches the `Occupation` collection
121
+ 3. Best match found: "Software Developer" (via vector similarity)
122
+ 4. Link created with human-readable label: `"Software Developer"`
123
+
124
+ **Key behaviors:**
125
+ - Uses embedding similarity to find the best match
126
+ - Returns `null` if no semantic match found (doesn't hallucinate)
127
+ - Grounds generated content against curated reference data
128
+ - Perfect for taxonomies, categories, and standardized values
56
129
 
57
- Ask your database questions:
130
+ ### Union Types for Fallback Search
131
+
132
+ When multiple collections could contain the best match:
58
133
 
59
134
  ```typescript
60
- const results = await db.Lead`who closed deals this month?`
61
- const pending = await db.Order`what's stuck in processing?`
135
+ IdealCustomerProfile: {
136
+ as: '<~Occupation|Role|JobType', // Search Occupation first, then Role, then JobType
137
+ using: '<~Tool|Technology|Product', // Search multiple collections in priority order
138
+ }
62
139
  ```
63
140
 
64
141
  ---
65
142
 
66
- ## Real-World Examples
143
+ ## Example 2: Content Generation with Cascade (`->`, `<-`)
67
144
 
68
- ### Sales Pipeline
145
+ The forward and backward exact operators create hierarchical content. This is the **cascading generation** pattern.
69
146
 
70
147
  ```typescript
71
148
  const { db } = DB({
72
- Lead: {
73
- name: 'string',
74
- email: 'string',
75
- score: 'number',
76
- company: 'Company.leads',
149
+ Blog: {
150
+ title: 'string',
151
+ description: 'string',
152
+ topics: ['List 5 topics covered ->Topic'], // Creates Topic children
153
+ posts: ['<-Post'], // Aggregates Post children
77
154
  },
78
- Company: {
155
+ Topic: {
79
156
  name: 'string',
80
- industry: 'string',
81
- }
157
+ titles: ['List 3 blog post titles ->Post'], // Creates Post children
158
+ },
159
+ Post: {
160
+ title: 'string',
161
+ synopsis: 'string',
162
+ content: 'markdown',
163
+ blog: '->Blog', // Links back to parent Blog
164
+ topic: '->Topic', // Links to Topic
165
+ },
82
166
  })
83
167
 
84
- // Find high-value leads with their companies
85
- const qualified = await db.Lead.list()
86
- .filter(lead => lead.score > 80)
87
- .map(lead => ({
88
- lead,
89
- company: lead.company,
90
- }))
168
+ // One call generates the entire blog structure
169
+ const blog = await db.Blog.create(
170
+ { title: 'AI Engineering', description: 'Building with LLMs' },
171
+ { cascade: true, maxDepth: 3 }
172
+ )
173
+
174
+ // Topics were auto-generated
175
+ const topics = await blog.topics
176
+ // => [{ name: 'Prompt Engineering' }, { name: 'RAG Systems' }, ...]
177
+
178
+ // Posts were auto-generated under each topic
179
+ const posts = await topics[0].titles
180
+ // => [{ title: 'Getting Started with Prompts' }, ...]
181
+
182
+ // Backward refs enable aggregation queries
183
+ const allPosts = await blog.posts
184
+ // => All posts that reference this blog
185
+ ```
186
+
187
+ ### Forward Exact (`->`)
188
+
189
+ Creates child entities that belong to the parent:
190
+
191
+ ```typescript
192
+ Startup: {
193
+ founders: ['Who are the founders? ->Founder'], // Creates Founder entities
194
+ businessModel: 'What is the business model? ->LeanCanvas',
195
+ }
196
+ ```
197
+
198
+ **Key behaviors:**
199
+ - Text before `->` is the AI generation prompt
200
+ - If a value is provided, uses it instead of generating
201
+ - Optional fields (`->Type?`) skip generation when not provided
202
+ - Nested forward fields cascade automatically
203
+
204
+ ### Backward Exact (`<-`)
205
+
206
+ Creates inverse relationships for aggregation:
207
+
208
+ ```typescript
209
+ Blog: {
210
+ posts: ['<-Post'], // All posts that reference this blog
211
+ },
212
+ Post: {
213
+ blog: '->Blog', // Forward reference to parent
214
+ }
215
+ ```
216
+
217
+ **Key behaviors:**
218
+ - Creates inverted edge direction (Post → Blog)
219
+ - Enables reverse lookups and aggregation queries
220
+ - Works with explicit backrefs: `['<-Post.blog']`
221
+ - Handles self-referential trees: `children: ['<-Node.parent']`
222
+
223
+ ### Forward Fuzzy (`~>`)
91
224
 
92
- // Ask questions naturally
93
- const results = await db.Lead`who hasn't responded in 2 weeks?`
225
+ Searches existing entities first, creates if not found:
226
+
227
+ ```typescript
228
+ Campaign: {
229
+ audience: 'Target audience ~>Audience', // Find existing or create new
230
+ }
231
+
232
+ // If "Enterprise" audience exists, reuses it
233
+ const campaign = await db.Campaign.create({
234
+ audienceHint: 'Big companies with 1000+ employees'
235
+ })
236
+ const audience = await campaign.audience
237
+ // => { name: 'Enterprise', ... } — reused existing!
94
238
  ```
95
239
 
96
- ### Customer Success
240
+ **Key behaviors:**
241
+ - Searches via semantic similarity using `${fieldName}Hint`
242
+ - Reuses existing entity if match exceeds threshold
243
+ - Generates new entity if no match found
244
+ - Generated entities marked with `$generated: true`
245
+
246
+ ---
247
+
248
+ ## Example 3: Startup Generator (Mixed Operators)
249
+
250
+ A complete example showing all four operators working together:
97
251
 
98
252
  ```typescript
99
253
  const { db } = DB({
100
- Customer: {
254
+ Startup: {
255
+ $instructions: 'Generate a B2B SaaS startup',
101
256
  name: 'string',
102
- healthScore: 'number',
103
- mrr: 'number',
104
- csm: 'User.customers',
257
+ idea: 'What problem does this solve? <-Idea', // Idea spawns Startup
258
+ founders: ['Who are the founding team? ->Founder'], // Create founders
259
+ customer: 'Who is the target customer? ~>CustomerPersona', // Find existing
260
+ industry: 'What industry? <~Industry', // Ground to NAICS
105
261
  },
106
- User: { name: 'string' }
262
+ Idea: { problem: 'string', solution: 'string' },
263
+ Founder: { name: 'string', role: 'string' },
264
+ CustomerPersona: { title: 'string', painPoints: 'string' },
265
+ Industry: { name: 'string', naicsCode: 'string' },
266
+ })
267
+
268
+ // Pre-populate reference data
269
+ await db.Industry.create({ name: 'Technology', naicsCode: '5112' })
270
+ await db.CustomerPersona.create({
271
+ title: 'VP of Engineering',
272
+ painPoints: 'Managing distributed teams',
107
273
  })
108
274
 
109
- // At-risk customers with their CSMs
110
- const atRisk = await db.Customer.list()
111
- .filter(c => c.healthScore < 50)
112
- .map(c => ({
113
- customer: c,
114
- csm: c.csm,
115
- mrr: c.mrr,
116
- }))
275
+ // Generate complete startup with grounded relationships
276
+ const startup = await db.Startup.create(
277
+ { name: 'DevFlow' },
278
+ { cascade: true, maxDepth: 2 }
279
+ )
280
+
281
+ // Relationships resolved appropriately:
282
+ const idea = await startup.idea // Created new (->)
283
+ const founders = await startup.founders // Created new ([->])
284
+ const customer = await startup.customer // Matched existing (~>)
285
+ const industry = await startup.industry // Grounded to reference (<~)
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Threshold Syntax
291
+
292
+ For fuzzy operators (`~>` and `<~`), configure the similarity threshold:
293
+
294
+ ### Field-Level Thresholds
295
+
296
+ ```typescript
297
+ Event: {
298
+ venue: 'Where is the event? ~>Venue(0.9)', // High threshold - strict match
299
+ sponsor: 'Event sponsor ~>Company(0.5)', // Low threshold - lenient match
300
+ }
117
301
  ```
118
302
 
119
- ### Order Management
303
+ ### Entity-Level Thresholds
120
304
 
121
305
  ```typescript
122
- const { db } = DB({
123
- Order: {
124
- status: 'string',
125
- total: 'number',
126
- customer: 'Customer.orders',
127
- items: ['OrderItem.order'],
128
- },
129
- OrderItem: { product: 'string', quantity: 'number' },
130
- Customer: { name: 'string' }
131
- })
306
+ Startup: {
307
+ $fuzzyThreshold: 0.85, // Apply to all ~> and <~ fields
308
+ customer: '~>Customer',
309
+ competitor: '~>Company',
310
+ }
311
+ ```
312
+
313
+ **Threshold values:**
314
+ - `0.9` - Very strict: Only near-exact semantic matches
315
+ - `0.7` - Default: Balanced matching
316
+ - `0.5` - Lenient: Accept loosely related matches
317
+
318
+ ---
319
+
320
+ ## Cascade Generation
321
+
322
+ Build complex entity graphs from a single `create()` call:
323
+
324
+ ```typescript
325
+ const company = await db.Company.create(
326
+ { name: 'TechCorp' },
327
+ {
328
+ cascade: true,
329
+ maxDepth: 4,
330
+ onProgress: (p) => console.log(`${p.totalEntitiesCreated} created`),
331
+ }
332
+ )
333
+
334
+ // Entire org chart generated: Company → Departments → Teams → Employees
335
+ ```
336
+
337
+ ### Cascade Options
338
+
339
+ | Option | Type | Default | Description |
340
+ |--------|------|---------|-------------|
341
+ | `cascade` | `boolean` | `false` | Enable cascade generation |
342
+ | `maxDepth` | `number` | `0` | Maximum recursion depth |
343
+ | `cascadeTypes` | `string[]` | - | Only cascade to these types |
344
+ | `onProgress` | `function` | - | Progress callback |
345
+ | `onError` | `function` | - | Error handler |
346
+ | `stopOnError` | `boolean` | `false` | Stop on first error |
347
+
348
+ ---
349
+
350
+ ## Special Variables
351
+
352
+ ### `$instructions`
353
+
354
+ Entity-level prompting that guides AI generation:
355
+
356
+ ```typescript
357
+ Character: {
358
+ $instructions: 'This character is from a medieval fantasy setting',
359
+ name: 'string',
360
+ backstory: 'What is their history?', // Influenced by $instructions
361
+ }
362
+ ```
363
+
364
+ Template variables resolve against entity data:
132
365
 
133
- // Pending orders with all details
134
- const pending = await db.Order
135
- .find({ status: 'pending' })
136
- .map(order => ({
137
- order,
138
- customer: order.customer,
139
- items: order.items,
140
- }))
366
+ ```typescript
367
+ Problem: {
368
+ $instructions: `
369
+ Identify problems for occupation: {task.occupation.title}
370
+ in industry: {task.occupation.industry.name}
371
+ `,
372
+ task: '<-Task',
373
+ description: 'string',
374
+ }
375
+ ```
376
+
377
+ ### `$context`
378
+
379
+ Explicit context dependencies pre-fetched before generation:
380
+
381
+ ```typescript
382
+ Ad: {
383
+ $context: ['Startup', 'ICP'],
384
+ $instructions: 'Generate ad for {startup.name} targeting {icp.as}',
385
+ headline: 'string (30 chars)',
386
+ }
141
387
  ```
142
388
 
143
389
  ---
@@ -147,11 +393,11 @@ const pending = await db.Order
147
393
  Define once, get typed operations everywhere:
148
394
 
149
395
  ```typescript
150
- const { db, events, actions, nouns, verbs } = DB({
396
+ const { db, events, actions } = DB({
151
397
  Post: {
152
398
  title: 'string',
153
399
  content: 'markdown',
154
- author: 'Author.posts', // Creates both directions
400
+ author: 'Author.posts', // Creates bidirectional relationship
155
401
  },
156
402
  Author: {
157
403
  name: 'string',
@@ -185,21 +431,48 @@ Post: { tags: ['Tag.posts'] }
185
431
 
186
432
  ---
187
433
 
188
- ## CRUD Operations
434
+ ## Promise Pipelining
435
+
436
+ Chain database operations without `await`:
437
+
438
+ ```typescript
439
+ const leads = db.Lead.list()
440
+ const topLeads = leads.filter(l => l.score > 80)
441
+ const names = topLeads.map(l => l.name)
442
+
443
+ // Only await when you need the result
444
+ const result = await names
445
+ ```
446
+
447
+ ### Batch Relationship Loading
448
+
449
+ Eliminate N+1 queries automatically:
450
+
451
+ ```typescript
452
+ // All companies loaded in ONE query
453
+ const enriched = await db.Lead.list().map(lead => ({
454
+ lead,
455
+ company: lead.company,
456
+ }))
457
+ ```
458
+
459
+ ---
189
460
 
190
- All operations return `DBPromise` for chaining:
461
+ ## CRUD Operations
191
462
 
192
463
  ```typescript
193
464
  // Read
194
465
  const lead = await db.Lead.get('lead-123')
195
466
  const leads = await db.Lead.list()
196
- const first = await db.Lead.first()
197
467
  const found = await db.Lead.find({ status: 'active' })
198
468
 
199
469
  // Search
200
470
  const results = await db.Lead.search('enterprise SaaS')
201
471
 
202
- // Write (returns regular Promise)
472
+ // Natural language queries
473
+ const pending = await db.Order`what's stuck in processing?`
474
+
475
+ // Write
203
476
  const lead = await db.Lead.create({ name: 'Acme Corp' })
204
477
  await db.Lead.update(lead.$id, { score: 90 })
205
478
  await db.Lead.delete(lead.$id)
@@ -231,36 +504,23 @@ events.on('*.updated', event => {
231
504
  })
232
505
  ```
233
506
 
507
+ ---
508
+
234
509
  ## forEach - Large-Scale Processing
235
510
 
236
- Process thousands of items with concurrency, progress tracking, and error handling:
511
+ Process thousands of items with concurrency and error handling:
237
512
 
238
513
  ```typescript
239
- // Simple iteration
240
- await db.Lead.forEach(lead => {
241
- console.log(lead.name)
242
- })
243
-
244
- // With AI and concurrency
245
514
  const result = await db.Lead.forEach(async lead => {
246
515
  const analysis = await ai`analyze ${lead}`
247
516
  await db.Lead.update(lead.$id, { analysis })
248
517
  }, {
249
518
  concurrency: 10,
250
- onProgress: p => console.log(`${p.completed}/${p.total} (${p.rate.toFixed(1)}/s)`),
251
- })
252
-
253
- // With error handling and retries
254
- await db.Order.forEach(async order => {
255
- await sendInvoice(order)
256
- }, {
257
- concurrency: 5,
258
519
  maxRetries: 3,
259
- retryDelay: attempt => 1000 * Math.pow(2, attempt), // Exponential backoff
260
- onError: (err, order) => err.code === 'RATE_LIMIT' ? 'retry' : 'continue',
520
+ retryDelay: attempt => 1000 * Math.pow(2, attempt),
521
+ onProgress: p => console.log(`${p.completed}/${p.total}`),
522
+ onError: (err, lead) => err.code === 'RATE_LIMIT' ? 'retry' : 'continue',
261
523
  })
262
-
263
- console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
264
524
  ```
265
525
 
266
526
  ### forEach Options
@@ -271,9 +531,9 @@ console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
271
531
  | `maxRetries` | `number` | Retries per item (default: 0) |
272
532
  | `retryDelay` | `number \| fn` | Delay between retries |
273
533
  | `onProgress` | `fn` | Progress callback |
274
- | `onError` | `'continue' \| 'retry' \| 'skip' \| 'stop' \| fn` | Error handling |
534
+ | `onError` | `fn` | Error handling |
275
535
  | `timeout` | `number` | Timeout per item in ms |
276
- | `persist` | `boolean \| string` | Enable durability (string = custom action name) |
536
+ | `persist` | `boolean \| string` | Enable durability |
277
537
  | `resume` | `string` | Resume from action ID |
278
538
 
279
539
  ### Durable forEach
@@ -281,28 +541,14 @@ console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
281
541
  Persist progress to survive crashes:
282
542
 
283
543
  ```typescript
284
- // Enable persistence - auto-names action as "Lead.forEach"
285
544
  const result = await db.Lead.forEach(processLead, {
286
545
  concurrency: 10,
287
- persist: true,
546
+ persist: 'analyze-leads',
288
547
  })
289
548
 
290
- console.log(`Action ID: ${result.actionId}`)
291
- ```
292
-
293
- Custom action name:
294
-
295
- ```typescript
549
+ // Resume after crash
296
550
  await db.Lead.forEach(processLead, {
297
- persist: 'analyze-leads', // Custom action name
298
- })
299
- ```
300
-
301
- Resume after a crash:
302
-
303
- ```typescript
304
- await db.Lead.forEach(processLead, {
305
- resume: 'action-123', // Skips already-processed items
551
+ resume: result.actionId,
306
552
  })
307
553
  ```
308
554
 
@@ -339,85 +585,716 @@ DATABASE_URL=sqlite://./data # SQLite
339
585
  DATABASE_URL=:memory: # in-memory
340
586
  ```
341
587
 
342
- ## Documentation
588
+ ---
589
+
590
+ ## Cloudflare Workers Deployment
591
+
592
+ ai-database provides dedicated exports for Cloudflare Workers deployment and RPC client consumption.
593
+
594
+ ### /worker Export
595
+
596
+ Use the `/worker` export when deploying ai-database as a Cloudflare Worker service:
597
+
598
+ ```typescript
599
+ // worker.ts - the ai-database service
600
+ import { DatabaseWorker, DatabaseDO } from 'ai-database/worker'
601
+
602
+ export { DatabaseDO }
603
+ export default DatabaseWorker
604
+ ```
605
+
606
+ ```jsonc
607
+ // wrangler.jsonc
608
+ {
609
+ "name": "ai-database",
610
+ "main": "src/worker.ts",
611
+ "compatibility_date": "2024-01-01",
612
+ "durable_objects": {
613
+ "bindings": [
614
+ { "name": "DATABASE_DO", "class_name": "DatabaseDO" }
615
+ ]
616
+ }
617
+ }
618
+ ```
619
+
620
+ ### /client Export
621
+
622
+ Use the `/client` export when consuming ai-database from another worker or HTTP client:
623
+
624
+ **With Cloudflare Service Bindings (RPC):**
625
+
626
+ ```typescript
627
+ // consumer-worker.ts
628
+ import type { DatabaseService } from 'ai-database/worker'
629
+
630
+ interface Env {
631
+ AI_DATABASE: Service<DatabaseService>
632
+ }
633
+
634
+ export default {
635
+ async fetch(request: Request, env: Env) {
636
+ // Direct RPC via service binding - no HTTP overhead
637
+ const service = env.AI_DATABASE.connect('my-namespace')
638
+ const post = await service.create('Post', { title: 'Hello' })
639
+ return Response.json(post)
640
+ }
641
+ }
642
+ ```
643
+
644
+ ```jsonc
645
+ // consumer wrangler.jsonc
646
+ {
647
+ "services": [
648
+ { "binding": "AI_DATABASE", "service": "ai-database" }
649
+ ]
650
+ }
651
+ ```
652
+
653
+ **With HTTP Client (rpc.do):**
654
+
655
+ ```typescript
656
+ import { createDatabaseClient, DB } from 'ai-database/client'
657
+
658
+ // Connect to production
659
+ const client = createDatabaseClient('https://ai-database.workers.dev')
660
+ const service = client.connect('my-namespace')
661
+
662
+ // CRUD operations
663
+ const post = await service.create('Post', { title: 'Hello', content: 'World' })
664
+ const posts = await service.list('Post', { limit: 10 })
665
+ const found = await service.get('Post', post.$id)
666
+
667
+ // Search
668
+ const results = await service.search('Post', 'hello')
669
+ const semantic = await service.semanticSearch('Post', 'greeting posts')
670
+
671
+ // Relationships
672
+ await service.relate('Post', post.$id, 'author', 'User', userId)
673
+ const authors = await service.related('Post', post.$id, 'author')
674
+
675
+ // Events
676
+ await service.emit({ event: 'Post.published', actor: userId, object: post.$id })
677
+ const events = await service.listEvents({ event: 'Post.published' })
678
+ ```
679
+
680
+ ### TypeScript Setup for Service Bindings
681
+
682
+ For proper type inference with service bindings, import the worker types:
683
+
684
+ ```typescript
685
+ // types.ts
686
+ import type { DatabaseService } from 'ai-database/worker'
687
+
688
+ export interface Env {
689
+ AI_DATABASE: Service<DatabaseService>
690
+ // ... other bindings
691
+ }
692
+ ```
693
+
694
+ ---
695
+
696
+ ## Common Patterns
697
+
698
+ ### Self-Referential Trees
699
+
700
+ ```typescript
701
+ Node: {
702
+ value: 'string',
703
+ parent: '->Node?',
704
+ children: ['<-Node.parent'],
705
+ }
706
+ ```
707
+
708
+ ### Union Types for Polymorphic References
343
709
 
344
- - [Full Documentation](https://primitives.org.ai/database)
345
- - [CRUD Operations](https://primitives.org.ai/database/create)
346
- - [Schema Types](https://primitives.org.ai/database/schema)
347
- - [Events](https://primitives.org.ai/database/events)
710
+ ```typescript
711
+ Comment: {
712
+ content: 'string',
713
+ target: '->Post|Article|Video',
714
+ }
715
+
716
+ const target = await comment.target
717
+ console.log(target.$matchedType) // 'Post', 'Article', or 'Video'
718
+ ```
719
+
720
+ ### Symmetric Relationships
721
+
722
+ ```typescript
723
+ Team: {
724
+ name: 'string',
725
+ members: ['->Member'],
726
+ },
727
+ Member: {
728
+ name: 'string',
729
+ team: '<-Team',
730
+ }
731
+ ```
732
+
733
+ ---
348
734
 
349
735
  ## Document Database Interface
350
736
 
351
- In addition to the schema-first graph model, `ai-database` also exports environment-agnostic types for document-based storage (MDX files with frontmatter). These types are used by `@mdxdb/*` adapters and work in any JavaScript runtime (Node.js, Bun, Deno, Workers, Browser).
737
+ In addition to the schema-first graph model, `ai-database` exports environment-agnostic types for document-based storage (MDX files with frontmatter):
352
738
 
353
739
  ```typescript
354
740
  import type {
355
741
  DocumentDatabase,
742
+ Document,
356
743
  DocListOptions,
357
744
  DocSearchOptions,
358
- Document,
359
745
  } from 'ai-database'
360
746
 
361
- // The DocumentDatabase interface
362
- interface DocumentDatabase<TData> {
363
- list(options?: DocListOptions): Promise<DocListResult<TData>>
364
- search(options: DocSearchOptions): Promise<DocSearchResult<TData>>
365
- get(id: string, options?: DocGetOptions): Promise<Document<TData> | null>
366
- set(id: string, doc: Document<TData>, options?: DocSetOptions): Promise<DocSetResult>
367
- delete(id: string, options?: DocDeleteOptions): Promise<DocDeleteResult>
368
- close?(): Promise<void>
747
+ // Same interface regardless of backend
748
+ const doc = await db.get('posts/hello-world')
749
+ await db.set('posts/new', { data: { title: 'New Post' }, content: '# Hello' })
750
+ ```
751
+
752
+ ### Usage with @mdxdb adapters
753
+
754
+ ```typescript
755
+ import { createFsDatabase } from '@mdxdb/fs'
756
+ import { createSqliteDatabase } from '@mdxdb/sqlite'
757
+ import { createApiDatabase } from '@mdxdb/api'
758
+
759
+ const db = createFsDatabase({ root: './content' })
760
+ const db = createSqliteDatabase({ path: './data.db' })
761
+ const db = createApiDatabase({ baseUrl: 'https://api.example.com' })
762
+ ```
763
+
764
+ ---
765
+
766
+ ## Provider Capabilities
767
+
768
+ Different database providers support different features. Use `detectCapabilities()` to check what's available at runtime:
769
+
770
+ ```typescript
771
+ import { detectCapabilities, requireCapability, CapabilityNotSupportedError } from 'ai-database'
772
+
773
+ const capabilities = await detectCapabilities(provider)
774
+
775
+ // Check capabilities
776
+ if (capabilities.hasSemanticSearch) {
777
+ const results = await provider.semanticSearch('Post', 'machine learning')
778
+ } else {
779
+ // Fallback to regular search
780
+ const results = await provider.search('Post', 'machine learning')
369
781
  }
782
+
783
+ // Require capabilities (throws if unavailable)
784
+ requireCapability(capabilities, 'hasEvents')
785
+ provider.on('Post.created', handleCreate)
370
786
  ```
371
787
 
372
- ### Document Types
788
+ ### Capability Matrix
373
789
 
374
- | Type | Description |
375
- |------|-------------|
376
- | `Document<TData>` | MDX document with id, type, context, data, and content |
377
- | `DocumentDatabase<TData>` | Interface for document storage adapters |
378
- | `DocListOptions` | Options for listing documents (limit, offset, sortBy, type, prefix) |
379
- | `DocListResult<TData>` | List result with documents, total, hasMore |
380
- | `DocSearchOptions` | Search options (query, fields, semantic) |
381
- | `DocSearchResult<TData>` | Search result with scores |
382
- | `DocGetOptions` | Get options (includeAst, includeCode) |
383
- | `DocSetOptions` | Set options (createOnly, updateOnly, version) |
384
- | `DocSetResult` | Set result (id, version, created) |
385
- | `DocDeleteOptions` | Delete options (soft, version) |
386
- | `DocDeleteResult` | Delete result (id, deleted) |
387
-
388
- ### View Types
389
-
390
- For bi-directional relationship rendering:
790
+ | Capability | MemoryProvider | RDB | DigitalObjects |
791
+ |------------|----------------|-----|----------------|
792
+ | **Semantic Search** | Yes | No | No |
793
+ | **Events API** | Yes | No | No |
794
+ | **Actions API** | Yes | No | No |
795
+ | **Artifacts** | Yes | No | No |
796
+ | **Batch Operations** | Yes | No | No |
391
797
 
392
- | Type | Description |
393
- |------|-------------|
394
- | `ViewManager` | Interface for managing views |
395
- | `ViewDocument` | View template definition |
396
- | `ViewContext` | Context for rendering a view |
397
- | `ViewRenderResult` | Rendered markdown and entities |
398
- | `ViewSyncResult` | Mutations from extracting edited markdown |
399
- | `DocumentDatabaseWithViews` | Database with view support |
798
+ ### Capabilities
400
799
 
401
- ### Usage with @mdxdb adapters
800
+ | Capability | Description | Methods Required |
801
+ |------------|-------------|------------------|
802
+ | `hasSemanticSearch` | Vector similarity search | `semanticSearch()`, `setEmbeddingsConfig()` |
803
+ | `hasEvents` | Event emission and subscription | `on()`, `emit()`, `listEvents()` |
804
+ | `hasActions` | Durable action tracking | `createAction()`, `getAction()`, `updateAction()` |
805
+ | `hasArtifacts` | Artifact/cache storage | `getArtifact()`, `setArtifact()` |
806
+ | `hasBatchOperations` | Concurrency-controlled batching | `withConcurrency()` or `mapWithConcurrency()` |
807
+
808
+ ### Graceful Degradation
809
+
810
+ When a capability isn't available, use fallbacks:
402
811
 
403
812
  ```typescript
404
- // Filesystem adapter
405
- import { createFsDatabase } from '@mdxdb/fs'
406
- const db = createFsDatabase({ root: './content' })
813
+ import { detectCapabilities, warnIfUnavailable } from 'ai-database'
407
814
 
408
- // API adapter
409
- import { createApiDatabase } from '@mdxdb/api'
410
- const db = createApiDatabase({ baseUrl: 'https://api.example.com' })
815
+ const capabilities = await detectCapabilities(provider)
411
816
 
412
- // SQLite adapter
413
- import { createSqliteDatabase } from '@mdxdb/sqlite'
414
- const db = createSqliteDatabase({ path: './data.db' })
817
+ // Log a warning (once) if semantic search unavailable
818
+ warnIfUnavailable(capabilities, 'hasSemanticSearch', 'semanticSearch')
415
819
 
416
- // Same DocumentDatabase interface regardless of backend
417
- const doc = await db.get('posts/hello-world')
418
- await db.set('posts/new', { data: { title: 'New Post' }, content: '# Hello' })
820
+ // Use capability with fallback
821
+ async function searchPosts(query: string) {
822
+ if (capabilities.hasSemanticSearch) {
823
+ return provider.semanticSearch('Post', query)
824
+ }
825
+ return provider.search('Post', query)
826
+ }
827
+ ```
828
+
829
+ ### Features Requiring Semantic Search
830
+
831
+ When using a provider without semantic search support (e.g., RDB), some features behave differently:
832
+
833
+ | Feature | With Semantic Search | Without Semantic Search |
834
+ |---------|---------------------|------------------------|
835
+ | `~>` Forward Fuzzy | Matches via vector similarity, falls back to generation | Uses text search fallback, then generates if no match |
836
+ | `<~` Backward Fuzzy | Matches via vector similarity | Uses text search fallback |
837
+ | `db.Entity.semanticSearch()` | Vector similarity search | Throws `CapabilityNotSupportedError` |
838
+ | `db.Entity.hybridSearch()` | Combined FTS + vector search | Throws `CapabilityNotSupportedError` |
839
+ | `db.semanticSearch()` | Global vector search | Throws `CapabilityNotSupportedError` |
840
+
841
+ **Fuzzy Operator Fallback**: When semantic search is unavailable, fuzzy operators (`~>` and `<~`) gracefully degrade to basic text search:
842
+
843
+ ```typescript
844
+ // Without semantic search, these operators use text matching instead of embeddings
845
+ const { db } = DB({
846
+ Article: {
847
+ category: '~>Category', // Will use text search fallback
848
+ },
849
+ Category: { name: 'string' }
850
+ })
851
+
852
+ // Forward fuzzy (~>) tries text search first, generates if no match found
853
+ await db.Article.create({ categoryHint: 'Tech' }) // Searches for 'Tech' in categories
854
+
855
+ // Backward fuzzy (<~) uses text search only - never generates
856
+ await db.Article.create({ categoryHint: 'Tech' }) // Returns null if no text match
857
+ ```
858
+
859
+ **Explicit Search Methods**: When you need semantic search but it's unavailable, the methods throw with helpful alternatives:
860
+
861
+ ```typescript
862
+ import { CapabilityNotSupportedError, isCapabilityNotSupportedError } from 'ai-database'
863
+
864
+ try {
865
+ await db.Post.semanticSearch('machine learning')
866
+ } catch (error) {
867
+ if (isCapabilityNotSupportedError(error)) {
868
+ console.log(error.capability) // 'hasSemanticSearch'
869
+ console.log(error.alternative) // 'Use the regular search() method instead...'
870
+ // Fall back to text search
871
+ const results = await db.Post.search('machine learning')
872
+ }
873
+ }
874
+ ```
875
+
876
+ ---
877
+
878
+ ## Integration with RDB
879
+
880
+ [RDB](https://github.com/ai-primitives/rdb) provides a simple relational database backend for ai-database. Use it when you want:
881
+
882
+ - Edge-native storage via Cloudflare Durable Objects or D1
883
+ - Simple two-table schema (`_data` and `_rels`)
884
+ - Graph traversal and relationship queries
885
+
886
+ ### Creating an RDB Provider Adapter
887
+
888
+ ```typescript
889
+ import { setProvider, DB } from 'ai-database'
890
+ import type { DBProvider, ListOptions, SearchOptions } from 'ai-database'
891
+ import { RDB } from '@dotdo/rdb'
892
+
893
+ // Adapter to bridge RDB and ai-database interfaces
894
+ class RDBProviderAdapter implements DBProvider {
895
+ private rdb: RDB
896
+
897
+ constructor(sqlStorage: SqlStorage) {
898
+ this.rdb = new RDB(sqlStorage)
899
+ }
900
+
901
+ async get(type: string, id: string) {
902
+ const entity = await this.rdb.get(type, id)
903
+ if (!entity) return null
904
+ return { $id: entity.id, $type: entity.type, ...entity }
905
+ }
906
+
907
+ async list(type: string, options?: ListOptions) {
908
+ const entities = await this.rdb.list(type, options)
909
+ return entities.map(e => ({ $id: e.id, $type: e.type, ...e }))
910
+ }
911
+
912
+ async search(type: string, query: string, options?: SearchOptions) {
913
+ // RDB uses filter-based search; perform text matching
914
+ const all = await this.rdb.list(type, options)
915
+ return all
916
+ .filter(e => JSON.stringify(e).toLowerCase().includes(query.toLowerCase()))
917
+ .map(e => ({ $id: e.id, $type: e.type, ...e }))
918
+ }
919
+
920
+ async create(type: string, id: string | undefined, data: Record<string, unknown>) {
921
+ const entity = await this.rdb.create(type, data, id)
922
+ return { $id: entity.id, $type: entity.type, ...entity }
923
+ }
924
+
925
+ async update(type: string, id: string, data: Record<string, unknown>) {
926
+ const entity = await this.rdb.update(type, id, data)
927
+ return { $id: entity.id, $type: entity.type, ...entity }
928
+ }
929
+
930
+ async delete(type: string, id: string): Promise<boolean> {
931
+ const exists = await this.rdb.get(type, id)
932
+ if (!exists) return false
933
+ await this.rdb.delete(type, id)
934
+ return true
935
+ }
936
+
937
+ async related(type: string, id: string, relation: string) {
938
+ const entities = await this.rdb.related(type, id, relation)
939
+ return entities.map(e => ({ $id: e.id, $type: e.type, ...e }))
940
+ }
941
+
942
+ async relate(fromType: string, fromId: string, relation: string, toType: string, toId: string, metadata?: object) {
943
+ await this.rdb.relate(fromType, fromId, relation, toType, toId, metadata)
944
+ }
945
+
946
+ async unrelate(fromType: string, fromId: string, relation: string, toType: string, toId: string) {
947
+ await this.rdb.unrelate(fromType, fromId, relation, toType, toId)
948
+ }
949
+ }
950
+
951
+ // Usage in a Durable Object
952
+ export class MyDO extends DurableObject {
953
+ constructor(ctx: DurableObjectState, env: Env) {
954
+ super(ctx, env)
955
+ setProvider(new RDBProviderAdapter(ctx.storage.sql))
956
+ }
957
+ }
958
+
959
+ // Now use ai-database schema with RDB backend
960
+ const { db } = DB({
961
+ Post: { title: 'string', author: '->Author.posts' },
962
+ Author: { name: 'string' },
963
+ })
964
+
965
+ const author = await db.Author.create({ name: 'Alice' })
966
+ const post = await db.Post.create({ title: 'Hello', author: author.$id })
419
967
  ```
420
968
 
969
+ ### Limitations with RDB
970
+
971
+ When using RDB as a provider:
972
+
973
+ - **No semantic search**: Fuzzy operators (`~>`, `<~`) require vector embeddings. Use exact operators (`->`, `<-`) instead, or use MemoryProvider for semantic matching.
974
+ - **No events/actions API**: RDB focuses on core CRUD and relationships.
975
+ - **Text search only**: The `search()` method performs text matching, not semantic similarity.
976
+
977
+ ---
978
+
979
+ ## AI Integration
980
+
981
+ ai-database integrates with AI providers for two core capabilities:
982
+
983
+ 1. **Entity Generation** - AI-powered content generation for schema fields using `ai-functions`
984
+ 2. **Semantic Search** - Vector embeddings for fuzzy matching (`~>`, `<~` operators) and similarity search
985
+
986
+ ### Supported AI Providers
987
+
988
+ #### For Entity Generation
989
+
990
+ Entity generation uses [ai-functions](https://github.com/ai-primitives/ai-primitives/tree/main/packages/ai-functions) which supports:
991
+
992
+ | Provider | Models | Configuration |
993
+ |----------|--------|---------------|
994
+ | **Anthropic** | claude-3-5-sonnet, claude-3-opus, claude-3-haiku | `ANTHROPIC_API_KEY` |
995
+ | **OpenAI** | gpt-4o, gpt-4-turbo, gpt-3.5-turbo | `OPENAI_API_KEY` |
996
+ | **Google** | gemini-1.5-pro, gemini-1.5-flash | `GOOGLE_API_KEY` |
997
+ | **Local Models** | Ollama, LM Studio, llama.cpp | `AI_BASE_URL` |
998
+
999
+ ```typescript
1000
+ import { DB, configureAIGeneration } from 'ai-database'
1001
+
1002
+ // Configure the AI model for entity generation
1003
+ configureAIGeneration({
1004
+ model: 'sonnet', // Model alias (see ai-functions for full list)
1005
+ enabled: true, // Enable AI generation (default: true)
1006
+ onGenerate: (details) => {
1007
+ // Track generation calls for monitoring
1008
+ console.log(`Generated ${details.entityType} in ${details.latencyMs}ms`)
1009
+ if (details.error) console.error('Generation failed:', details.error)
1010
+ }
1011
+ })
1012
+
1013
+ const { db } = DB({
1014
+ BlogPost: {
1015
+ title: 'string',
1016
+ content: 'Write a detailed blog post about this topic', // AI generates this
1017
+ summary: 'Summarize the content in 2 sentences',
1018
+ }
1019
+ })
1020
+ ```
1021
+
1022
+ #### For Embeddings/Semantic Search
1023
+
1024
+ Embedding generation for semantic search can use any provider that produces vector embeddings:
1025
+
1026
+ | Provider | Models | Dimensions | Configuration |
1027
+ |----------|--------|------------|---------------|
1028
+ | **OpenAI** | text-embedding-3-small | 1536 | `OPENAI_API_KEY` |
1029
+ | **OpenAI** | text-embedding-3-large | 3072 | `OPENAI_API_KEY` |
1030
+ | **Cohere** | embed-english-v3.0 | 1024 | `COHERE_API_KEY` |
1031
+ | **Voyage AI** | voyage-large-2 | 1024-4096 | `VOYAGE_API_KEY` |
1032
+ | **Local** | sentence-transformers | 384 | Self-hosted |
1033
+
1034
+ ```typescript
1035
+ import { createMemoryProvider, setProvider } from 'ai-database'
1036
+
1037
+ // Configure embedding dimensions to match your provider
1038
+ const provider = createMemoryProvider({
1039
+ embeddingDimensions: 1536, // Match OpenAI text-embedding-3-small
1040
+ })
1041
+ setProvider(provider)
1042
+ ```
1043
+
1044
+ ---
1045
+
1046
+ ### Configuring Embedding Generation
1047
+
1048
+ Control which fields are embedded for semantic search:
1049
+
1050
+ ```typescript
1051
+ import { DB } from 'ai-database'
1052
+
1053
+ const { db } = DB({
1054
+ Article: {
1055
+ title: 'string',
1056
+ content: 'markdown',
1057
+ authorId: 'string', // Won't be embedded (not text content)
1058
+ },
1059
+ InternalNote: {
1060
+ text: 'string',
1061
+ }
1062
+ }, {
1063
+ embeddings: {
1064
+ // Specify which fields to embed for Article
1065
+ Article: { fields: ['title', 'content'] },
1066
+
1067
+ // Disable embeddings for InternalNote (won't appear in semantic search)
1068
+ InternalNote: false,
1069
+ }
1070
+ })
1071
+ ```
1072
+
1073
+ #### Embedding Configuration Options
1074
+
1075
+ | Option | Type | Description |
1076
+ |--------|------|-------------|
1077
+ | `fields` | `string[]` | Fields to include in embedding (default: auto-detect text fields) |
1078
+ | `false` | `boolean` | Disable embeddings for this entity type |
1079
+
1080
+ #### Auto-Detection
1081
+
1082
+ If no `embeddings` config is provided, ai-database automatically embeds:
1083
+ - All `string` fields (except those ending in `Id`, `At`, or starting with `$`/`_`)
1084
+ - All `markdown` fields
1085
+ - String arrays (concatenated)
1086
+
1087
+ ---
1088
+
1089
+ ### Cost and Token Implications
1090
+
1091
+ Understanding token usage is critical for production deployments. Here's what triggers AI API calls:
1092
+
1093
+ #### Entity Generation Costs
1094
+
1095
+ | Operation | AI Calls | When |
1096
+ |-----------|----------|------|
1097
+ | `create()` with prompt fields | 1 per entity | Fields like `'Write a description'` |
1098
+ | `create({ cascade: true })` | 1 per cascaded entity | Each `->` forward relation |
1099
+ | `create()` with `~>` fuzzy | 1 embedding + search | If no semantic match found, may generate |
1100
+
1101
+ **Example: Cascade Cost Estimation**
1102
+
1103
+ ```typescript
1104
+ const { db } = DB({
1105
+ Blog: {
1106
+ title: 'string',
1107
+ topics: ['Generate 5 topics ->Topic'], // Creates 5 Topic entities
1108
+ },
1109
+ Topic: {
1110
+ name: 'string',
1111
+ posts: ['Generate 3 posts ->Post'], // Creates 3 Post entities per Topic
1112
+ },
1113
+ Post: {
1114
+ title: 'string',
1115
+ content: 'Write a 500-word blog post', // AI generates ~500 words
1116
+ }
1117
+ })
1118
+
1119
+ // This single call generates:
1120
+ // - 1 Blog (1 generation call)
1121
+ // - 5 Topics (5 generation calls)
1122
+ // - 15 Posts (15 generation calls, each ~500 words)
1123
+ // Total: 21 AI generation calls
1124
+ const blog = await db.Blog.create(
1125
+ { title: 'My Tech Blog' },
1126
+ { cascade: true, maxDepth: 3 }
1127
+ )
1128
+ ```
1129
+
1130
+ **Cost Control Strategies:**
1131
+
1132
+ ```typescript
1133
+ // 1. Limit cascade depth to control entity count
1134
+ await db.Blog.create(data, { cascade: true, maxDepth: 1 }) // Only creates immediate children
1135
+
1136
+ // 2. Filter which types cascade
1137
+ await db.Blog.create(data, {
1138
+ cascade: true,
1139
+ cascadeTypes: ['Topic'] // Only cascade to Topic, not Post
1140
+ })
1141
+
1142
+ // 3. Track costs with onGenerate callback
1143
+ let totalTokens = 0
1144
+ configureAIGeneration({
1145
+ model: 'sonnet',
1146
+ onGenerate: (details) => {
1147
+ if (details.result) {
1148
+ // Estimate tokens (actual count depends on provider)
1149
+ const inputTokens = details.prompt.length / 4
1150
+ const outputTokens = JSON.stringify(details.result).length / 4
1151
+ totalTokens += inputTokens + outputTokens
1152
+ console.log(`Running total: ~${totalTokens} tokens`)
1153
+ }
1154
+ }
1155
+ })
1156
+
1157
+ // 4. Use draftOnly for preview without committing
1158
+ const draft = await db.Blog.draft({ title: 'Test' })
1159
+ // Review draft before creating
1160
+ const entity = await draft.resolve()
1161
+ ```
1162
+
1163
+ #### Embedding Costs
1164
+
1165
+ | Operation | Embedding Calls | Notes |
1166
+ |-----------|-----------------|-------|
1167
+ | `create()` | 1 per entity | Embeds text fields on creation |
1168
+ | `update()` | 1 if text changed | Re-embeds when text fields update |
1169
+ | `semanticSearch()` | 1 for query | Embeds query string |
1170
+ | `hybridSearch()` | 1 for query | Embeds query string |
1171
+ | `~>` or `<~` resolution | 1 per hint | Embeds the hint text for matching |
1172
+
1173
+ ---
1174
+
1175
+ ### Rate Limiting Best Practices
1176
+
1177
+ ai-database provides built-in concurrency control to prevent API rate limit errors:
1178
+
1179
+ #### Using forEach with Concurrency
1180
+
1181
+ ```typescript
1182
+ // Process entities with controlled concurrency
1183
+ const result = await db.Lead.forEach(async (lead) => {
1184
+ const analysis = await generateAnalysis(lead)
1185
+ await db.Lead.update(lead.$id, { analysis })
1186
+ }, {
1187
+ concurrency: 10, // Max 10 parallel operations
1188
+ maxRetries: 3, // Retry failed items up to 3 times
1189
+ retryDelay: (attempt) => 1000 * Math.pow(2, attempt), // Exponential backoff
1190
+
1191
+ // Handle rate limit errors specifically
1192
+ onError: (error, lead) => {
1193
+ if (error.message.includes('rate_limit') || error.message.includes('429')) {
1194
+ return 'retry' // Retry with exponential backoff
1195
+ }
1196
+ return 'continue' // Skip this item and continue
1197
+ },
1198
+
1199
+ onProgress: (progress) => {
1200
+ console.log(`${progress.completed}/${progress.total} (${progress.failed} failed)`)
1201
+ }
1202
+ })
1203
+ ```
1204
+
1205
+ #### Provider-Level Concurrency
1206
+
1207
+ ```typescript
1208
+ import { createMemoryProvider } from 'ai-database'
1209
+
1210
+ // Configure concurrency at the provider level
1211
+ const provider = createMemoryProvider({
1212
+ concurrency: 10, // Global limit on parallel operations
1213
+ })
1214
+ ```
1215
+
1216
+ #### Execution Queue for Batch Operations
1217
+
1218
+ For large-scale operations with different priority levels:
1219
+
1220
+ ```typescript
1221
+ import { ExecutionQueue } from 'ai-database'
1222
+
1223
+ const queue = new ExecutionQueue({
1224
+ concurrency: {
1225
+ priority: 50, // High-priority operations
1226
+ standard: 20, // Normal operations
1227
+ flex: 10, // Low-priority background operations
1228
+ batch: 1000, // Batch window operations
1229
+ }
1230
+ })
1231
+
1232
+ // Submit operations with priority
1233
+ await queue.submit(
1234
+ () => db.Lead.create({ name: 'Important Lead' }),
1235
+ { priority: 'priority' } // Runs with higher concurrency
1236
+ )
1237
+ ```
1238
+
1239
+ #### Rate Limit Patterns by Provider
1240
+
1241
+ | Provider | Rate Limits | Recommended Concurrency |
1242
+ |----------|-------------|------------------------|
1243
+ | **OpenAI** | 60-10000 RPM (varies by tier) | 5-50 |
1244
+ | **Anthropic** | 60-4000 RPM (varies by tier) | 5-40 |
1245
+ | **Cohere** | 100 RPM (trial), 10000 RPM (prod) | 5-100 |
1246
+ | **Local (Ollama)** | Limited by hardware | 1-4 |
1247
+
1248
+ **Recommended Configuration by Use Case:**
1249
+
1250
+ ```typescript
1251
+ // Development/Testing - Low concurrency, fail fast
1252
+ configureAIGeneration({ model: 'sonnet', enabled: true })
1253
+ await db.Entity.forEach(fn, { concurrency: 2, maxRetries: 1 })
1254
+
1255
+ // Production - Moderate concurrency with retries
1256
+ await db.Entity.forEach(fn, {
1257
+ concurrency: 10,
1258
+ maxRetries: 3,
1259
+ retryDelay: attempt => 1000 * Math.pow(2, attempt)
1260
+ })
1261
+
1262
+ // Batch Processing - Low concurrency, high retry tolerance
1263
+ await db.Entity.forEach(fn, {
1264
+ concurrency: 5,
1265
+ maxRetries: 5,
1266
+ retryDelay: attempt => 2000 * Math.pow(2, attempt),
1267
+ timeout: 60000, // 60 second timeout per item
1268
+ persist: 'batch-job-123', // Resume on crash
1269
+ })
1270
+ ```
1271
+
1272
+ ---
1273
+
1274
+ ### Disabling AI Generation
1275
+
1276
+ For testing or when you want placeholder values instead of AI generation:
1277
+
1278
+ ```typescript
1279
+ import { configureAIGeneration } from 'ai-database'
1280
+
1281
+ // Disable AI globally - uses deterministic placeholder values
1282
+ configureAIGeneration({ enabled: false })
1283
+
1284
+ // Or per-DB instance
1285
+ const { db } = DB(schema, {
1286
+ aiGeneration: { enabled: false }
1287
+ })
1288
+ ```
1289
+
1290
+ When AI is disabled:
1291
+ - Prompt fields generate deterministic placeholder text
1292
+ - Fuzzy operators (`~>`, `<~`) fall back to text search
1293
+ - No API calls are made to AI providers
1294
+ - Tests run faster and don't require API keys
1295
+
1296
+ ---
1297
+
421
1298
  ## Related
422
1299
 
423
1300
  - [ai-functions](https://github.com/ai-primitives/ai-primitives/tree/main/packages/ai-functions) - AI-powered functions