ai-database 2.1.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/CHANGELOG.md +35 -1
  2. package/README.md +880 -669
  3. package/dist/actions.d.ts +2 -2
  4. package/dist/actions.d.ts.map +1 -1
  5. package/dist/actions.js +1 -1
  6. package/dist/actions.js.map +1 -1
  7. package/dist/ai-promise-db.d.ts +49 -23
  8. package/dist/ai-promise-db.d.ts.map +1 -1
  9. package/dist/ai-promise-db.js +91 -63
  10. package/dist/ai-promise-db.js.map +1 -1
  11. package/dist/authorization.d.ts.map +1 -1
  12. package/dist/authorization.js +38 -30
  13. package/dist/authorization.js.map +1 -1
  14. package/dist/cascade-orchestrator.d.ts +404 -0
  15. package/dist/cascade-orchestrator.d.ts.map +1 -0
  16. package/dist/cascade-orchestrator.js +828 -0
  17. package/dist/cascade-orchestrator.js.map +1 -0
  18. package/dist/cascade-write-strategy.d.ts +584 -0
  19. package/dist/cascade-write-strategy.d.ts.map +1 -0
  20. package/dist/cascade-write-strategy.js +590 -0
  21. package/dist/cascade-write-strategy.js.map +1 -0
  22. package/dist/ch-adapter.d.ts +358 -0
  23. package/dist/ch-adapter.d.ts.map +1 -0
  24. package/dist/ch-adapter.js +929 -0
  25. package/dist/ch-adapter.js.map +1 -0
  26. package/dist/client/index.d.ts +42 -0
  27. package/dist/client/index.d.ts.map +1 -0
  28. package/dist/client/index.js +43 -0
  29. package/dist/client/index.js.map +1 -0
  30. package/dist/client.d.ts +266 -0
  31. package/dist/client.d.ts.map +1 -0
  32. package/dist/client.js +81 -0
  33. package/dist/client.js.map +1 -0
  34. package/dist/constants.d.ts +64 -1
  35. package/dist/constants.d.ts.map +1 -1
  36. package/dist/constants.js +52 -2
  37. package/dist/constants.js.map +1 -1
  38. package/dist/dataloader.d.ts +99 -0
  39. package/dist/dataloader.d.ts.map +1 -0
  40. package/dist/dataloader.js +225 -0
  41. package/dist/dataloader.js.map +1 -0
  42. package/dist/db-provider-port.d.ts +501 -0
  43. package/dist/db-provider-port.d.ts.map +1 -0
  44. package/dist/db-provider-port.js +113 -0
  45. package/dist/db-provider-port.js.map +1 -0
  46. package/dist/digital-objects-provider.d.ts +49 -0
  47. package/dist/digital-objects-provider.d.ts.map +1 -0
  48. package/dist/digital-objects-provider.js +55 -0
  49. package/dist/digital-objects-provider.js.map +1 -0
  50. package/dist/do-sqlite-adapter.d.ts +402 -0
  51. package/dist/do-sqlite-adapter.d.ts.map +1 -0
  52. package/dist/do-sqlite-adapter.js +745 -0
  53. package/dist/do-sqlite-adapter.js.map +1 -0
  54. package/dist/docs-rels/custom-types.d.ts +134 -0
  55. package/dist/docs-rels/custom-types.d.ts.map +1 -0
  56. package/dist/docs-rels/custom-types.js +70 -0
  57. package/dist/docs-rels/custom-types.js.map +1 -0
  58. package/dist/docs-rels/index.d.ts +16 -0
  59. package/dist/docs-rels/index.d.ts.map +1 -0
  60. package/dist/docs-rels/index.js +16 -0
  61. package/dist/docs-rels/index.js.map +1 -0
  62. package/dist/docs-rels/migrations/index.d.ts +30 -0
  63. package/dist/docs-rels/migrations/index.d.ts.map +1 -0
  64. package/dist/docs-rels/migrations/index.js +128 -0
  65. package/dist/docs-rels/migrations/index.js.map +1 -0
  66. package/dist/docs-rels/schema.d.ts +2961 -0
  67. package/dist/docs-rels/schema.d.ts.map +1 -0
  68. package/dist/docs-rels/schema.js +244 -0
  69. package/dist/docs-rels/schema.js.map +1 -0
  70. package/dist/durable-clickhouse.d.ts.map +1 -1
  71. package/dist/durable-clickhouse.js +16 -13
  72. package/dist/durable-clickhouse.js.map +1 -1
  73. package/dist/durable-promise.d.ts.map +1 -1
  74. package/dist/durable-promise.js +34 -15
  75. package/dist/durable-promise.js.map +1 -1
  76. package/dist/errors.d.ts +127 -0
  77. package/dist/errors.d.ts.map +1 -0
  78. package/dist/errors.js +210 -0
  79. package/dist/errors.js.map +1 -0
  80. package/dist/eventbridge.d.ts +117 -0
  81. package/dist/eventbridge.d.ts.map +1 -0
  82. package/dist/eventbridge.js +238 -0
  83. package/dist/eventbridge.js.map +1 -0
  84. package/dist/events.d.ts +2 -2
  85. package/dist/events.d.ts.map +1 -1
  86. package/dist/events.js +1 -1
  87. package/dist/events.js.map +1 -1
  88. package/dist/execution-queue.d.ts.map +1 -1
  89. package/dist/execution-queue.js +4 -5
  90. package/dist/execution-queue.js.map +1 -1
  91. package/dist/index.d.ts +35 -8
  92. package/dist/index.d.ts.map +1 -1
  93. package/dist/index.js +106 -6
  94. package/dist/index.js.map +1 -1
  95. package/dist/linguistic.d.ts +3 -108
  96. package/dist/linguistic.d.ts.map +1 -1
  97. package/dist/linguistic.js +3 -372
  98. package/dist/linguistic.js.map +1 -1
  99. package/dist/logger.d.ts +132 -0
  100. package/dist/logger.d.ts.map +1 -0
  101. package/dist/logger.js +137 -0
  102. package/dist/logger.js.map +1 -0
  103. package/dist/memory-provider.d.ts +128 -0
  104. package/dist/memory-provider.d.ts.map +1 -1
  105. package/dist/memory-provider.js +592 -257
  106. package/dist/memory-provider.js.map +1 -1
  107. package/dist/pg-adapter.d.ts +424 -0
  108. package/dist/pg-adapter.d.ts.map +1 -0
  109. package/dist/pg-adapter.js +921 -0
  110. package/dist/pg-adapter.js.map +1 -0
  111. package/dist/pipelines-iceberg-emitter.d.ts +327 -0
  112. package/dist/pipelines-iceberg-emitter.d.ts.map +1 -0
  113. package/dist/pipelines-iceberg-emitter.js +351 -0
  114. package/dist/pipelines-iceberg-emitter.js.map +1 -0
  115. package/dist/provider-capabilities.d.ts +146 -0
  116. package/dist/provider-capabilities.d.ts.map +1 -0
  117. package/dist/provider-capabilities.js +214 -0
  118. package/dist/provider-capabilities.js.map +1 -0
  119. package/dist/rdb-provider-adapter.d.ts +195 -0
  120. package/dist/rdb-provider-adapter.d.ts.map +1 -0
  121. package/dist/rdb-provider-adapter.js +291 -0
  122. package/dist/rdb-provider-adapter.js.map +1 -0
  123. package/dist/schema/cascade.d.ts +48 -17
  124. package/dist/schema/cascade.d.ts.map +1 -1
  125. package/dist/schema/cascade.js +477 -278
  126. package/dist/schema/cascade.js.map +1 -1
  127. package/dist/schema/definition-caches.d.ts +24 -0
  128. package/dist/schema/definition-caches.d.ts.map +1 -0
  129. package/dist/schema/definition-caches.js +26 -0
  130. package/dist/schema/definition-caches.js.map +1 -0
  131. package/dist/schema/dependency-graph.d.ts +21 -109
  132. package/dist/schema/dependency-graph.d.ts.map +1 -1
  133. package/dist/schema/dependency-graph.js +25 -333
  134. package/dist/schema/dependency-graph.js.map +1 -1
  135. package/dist/schema/diff.d.ts +103 -0
  136. package/dist/schema/diff.d.ts.map +1 -0
  137. package/dist/schema/diff.js +329 -0
  138. package/dist/schema/diff.js.map +1 -0
  139. package/dist/schema/entity-operations.d.ts +99 -0
  140. package/dist/schema/entity-operations.d.ts.map +1 -0
  141. package/dist/schema/entity-operations.js +818 -0
  142. package/dist/schema/entity-operations.js.map +1 -0
  143. package/dist/schema/index.d.ts +28 -34
  144. package/dist/schema/index.d.ts.map +1 -1
  145. package/dist/schema/index.js +454 -521
  146. package/dist/schema/index.js.map +1 -1
  147. package/dist/schema/migration.d.ts +205 -0
  148. package/dist/schema/migration.d.ts.map +1 -0
  149. package/dist/schema/migration.js +327 -0
  150. package/dist/schema/migration.js.map +1 -0
  151. package/dist/schema/nl-query-generator.d.ts +68 -0
  152. package/dist/schema/nl-query-generator.d.ts.map +1 -0
  153. package/dist/schema/nl-query-generator.js +362 -0
  154. package/dist/schema/nl-query-generator.js.map +1 -0
  155. package/dist/schema/nl-query.d.ts +65 -0
  156. package/dist/schema/nl-query.d.ts.map +1 -0
  157. package/dist/schema/nl-query.js +178 -0
  158. package/dist/schema/nl-query.js.map +1 -0
  159. package/dist/schema/parse.d.ts.map +1 -1
  160. package/dist/schema/parse.js +144 -89
  161. package/dist/schema/parse.js.map +1 -1
  162. package/dist/schema/provider.d.ts +37 -0
  163. package/dist/schema/provider.d.ts.map +1 -1
  164. package/dist/schema/provider.js +15 -7
  165. package/dist/schema/provider.js.map +1 -1
  166. package/dist/schema/resolve.d.ts +46 -5
  167. package/dist/schema/resolve.d.ts.map +1 -1
  168. package/dist/schema/resolve.js +237 -95
  169. package/dist/schema/resolve.js.map +1 -1
  170. package/dist/schema/search-utils.d.ts +76 -0
  171. package/dist/schema/search-utils.d.ts.map +1 -0
  172. package/dist/schema/search-utils.js +86 -0
  173. package/dist/schema/search-utils.js.map +1 -0
  174. package/dist/schema/seed.d.ts +53 -0
  175. package/dist/schema/seed.d.ts.map +1 -0
  176. package/dist/schema/seed.js +94 -0
  177. package/dist/schema/seed.js.map +1 -0
  178. package/dist/schema/semantic.d.ts +10 -0
  179. package/dist/schema/semantic.d.ts.map +1 -1
  180. package/dist/schema/semantic.js +192 -86
  181. package/dist/schema/semantic.js.map +1 -1
  182. package/dist/schema/sub-apis.d.ts +52 -0
  183. package/dist/schema/sub-apis.d.ts.map +1 -0
  184. package/dist/schema/sub-apis.js +216 -0
  185. package/dist/schema/sub-apis.js.map +1 -0
  186. package/dist/schema/system-entities.d.ts +42 -0
  187. package/dist/schema/system-entities.d.ts.map +1 -0
  188. package/dist/schema/system-entities.js +101 -0
  189. package/dist/schema/system-entities.js.map +1 -0
  190. package/dist/schema/types.d.ts +91 -9
  191. package/dist/schema/types.d.ts.map +1 -1
  192. package/dist/schema/union-fallback.d.ts.map +1 -1
  193. package/dist/schema/union-fallback.js +21 -15
  194. package/dist/schema/union-fallback.js.map +1 -1
  195. package/dist/schema/value-generators/ai.d.ts +54 -0
  196. package/dist/schema/value-generators/ai.d.ts.map +1 -0
  197. package/dist/schema/value-generators/ai.js +136 -0
  198. package/dist/schema/value-generators/ai.js.map +1 -0
  199. package/dist/schema/value-generators/index.d.ts +126 -0
  200. package/dist/schema/value-generators/index.d.ts.map +1 -0
  201. package/dist/schema/value-generators/index.js +219 -0
  202. package/dist/schema/value-generators/index.js.map +1 -0
  203. package/dist/schema/value-generators/placeholder.d.ts +52 -0
  204. package/dist/schema/value-generators/placeholder.d.ts.map +1 -0
  205. package/dist/schema/value-generators/placeholder.js +328 -0
  206. package/dist/schema/value-generators/placeholder.js.map +1 -0
  207. package/dist/schema/value-generators/types.d.ts +116 -0
  208. package/dist/schema/value-generators/types.d.ts.map +1 -0
  209. package/dist/schema/value-generators/types.js +11 -0
  210. package/dist/schema/value-generators/types.js.map +1 -0
  211. package/dist/schema/version.d.ts +111 -0
  212. package/dist/schema/version.d.ts.map +1 -0
  213. package/dist/schema/version.js +190 -0
  214. package/dist/schema/version.js.map +1 -0
  215. package/dist/schema.d.ts +1095 -24
  216. package/dist/schema.d.ts.map +1 -1
  217. package/dist/schema.js +2852 -40
  218. package/dist/schema.js.map +1 -1
  219. package/dist/semantic-vectors.d.ts +39 -0
  220. package/dist/semantic-vectors.d.ts.map +1 -0
  221. package/dist/semantic-vectors.js +334 -0
  222. package/dist/semantic-vectors.js.map +1 -0
  223. package/dist/semantic.d.ts +29 -1
  224. package/dist/semantic.d.ts.map +1 -1
  225. package/dist/semantic.js +26 -16
  226. package/dist/semantic.js.map +1 -1
  227. package/dist/telemetry.d.ts +128 -0
  228. package/dist/telemetry.d.ts.map +1 -0
  229. package/dist/telemetry.js +305 -0
  230. package/dist/telemetry.js.map +1 -0
  231. package/dist/tests.d.ts.map +1 -1
  232. package/dist/tests.js +30 -22
  233. package/dist/tests.js.map +1 -1
  234. package/dist/type-guards.d.ts +50 -5
  235. package/dist/type-guards.d.ts.map +1 -1
  236. package/dist/type-guards.js +87 -16
  237. package/dist/type-guards.js.map +1 -1
  238. package/dist/types.d.ts +33 -245
  239. package/dist/types.d.ts.map +1 -1
  240. package/dist/types.js +62 -72
  241. package/dist/types.js.map +1 -1
  242. package/dist/validation.d.ts +2 -5
  243. package/dist/validation.d.ts.map +1 -1
  244. package/dist/validation.js +65 -93
  245. package/dist/validation.js.map +1 -1
  246. package/dist/worker/db-provider.d.ts +168 -0
  247. package/dist/worker/db-provider.d.ts.map +1 -0
  248. package/dist/worker/db-provider.js +277 -0
  249. package/dist/worker/db-provider.js.map +1 -0
  250. package/dist/worker/index.d.ts +35 -0
  251. package/dist/worker/index.d.ts.map +1 -0
  252. package/dist/worker/index.js +37 -0
  253. package/dist/worker/index.js.map +1 -0
  254. package/dist/worker.d.ts +779 -0
  255. package/dist/worker.d.ts.map +1 -0
  256. package/dist/worker.js +2786 -0
  257. package/dist/worker.js.map +1 -0
  258. package/package.json +46 -16
  259. package/src/docs-rels/migrations/0001-init.sql +125 -0
  260. package/LICENSE +0 -21
package/README.md CHANGED
@@ -1,219 +1,389 @@
1
1
  # ai-database
2
2
 
3
- **AI-generated data shouldn't be disconnected from your schema.**
3
+ ![Stability: Stable](https://img.shields.io/badge/stability-stable-green)
4
4
 
5
- You write one line of AI code. It generates a user. Where does it go? Does it have a company? Does it match your existing customers? Traditional databases don't know. They can't reason about relationships. They can't cascade.
5
+ **AI hallucinates. Your database shouldn't.**
6
6
 
7
- **ai-database can.**
7
+ When AI generates a "Software Developer" for your customer profile, does it match your existing O\*NET occupation data? Does "Enterprise SaaS" connect to your NAICS industry codes? Traditional approaches fragment context—AI juggles content creation and referential integrity simultaneously, producing plausible-sounding but disconnected data.
8
+
9
+ **ai-database grounds AI generation against your domain.**
8
10
 
9
11
  ```typescript
10
12
  import { DB } from 'ai-database'
11
13
 
12
14
  const { db } = DB({
13
- Lead: { name: 'string', company: 'Company.leads', score: 'number' },
14
- Company: { name: 'string', industry: 'string' }
15
+ IdealCustomerProfile: {
16
+ as: 'Who are they? <~Occupation', // Ground against O*NET occupations
17
+ at: 'Where do they work? <~Industry', // Ground against NAICS industries
18
+ are: 'What are they doing? <~Task', // Ground against O*NET tasks
19
+ },
20
+ Occupation: { title: 'string', description: 'string' },
21
+ Industry: { name: 'string', naicsCode: 'string' },
22
+ Task: { name: 'string' },
23
+ })
24
+
25
+ // Seed reference data from O*NET, NAICS, etc.
26
+ await db.Occupation.create({ title: 'Software Developer', description: 'Develops applications' })
27
+ await db.Industry.create({ name: 'Technology', naicsCode: '5112' })
28
+
29
+ // AI generation is grounded against real reference data
30
+ const icp = await db.ICP.create({
31
+ asHint: 'Engineers who build software', // Matches "Software Developer"
32
+ atHint: 'Tech companies', // Matches "Technology"
15
33
  })
16
34
 
17
- // One call generates Lead + Company + relationships
18
- const lead = await db.Lead.create({ name: 'Acme Corp' }, { cascade: true })
19
- const company = await lead.company // Already exists, fully typed
35
+ const occupation = await icp.as
36
+ // => { title: 'Software Developer', ... } matched via semantic search, not hallucinated
20
37
  ```
21
38
 
22
39
  ---
23
40
 
24
- ## The Problem
41
+ ## The Core Insight
25
42
 
26
- **Before ai-database:** AI generates orphaned data
43
+ Traditional databases require foreign keys at schema time. When generating with AI, this fragments context: the model must juggle content creation and referential integrity simultaneously.
27
44
 
28
- ```typescript
29
- // Generate a lead...
30
- const lead = await ai`generate a sales lead`
31
-
32
- // Now what?
33
- // - Where do you store it?
34
- // - How do you link it to a company?
35
- // - What if the company already exists?
36
- // - How do you query related data?
37
-
38
- // You end up with:
39
- const leadId = await db.insert('leads', lead)
40
- const companyId = await db.insert('companies', { name: lead.company })
41
- await db.insert('lead_companies', { leadId, companyId })
42
- // Manual. Fragile. No type safety. N+1 queries everywhere.
43
- ```
45
+ ai-database inverts this paradigm. **Relationship operators become workflow instructions**, not schema constraints:
44
46
 
45
- **After ai-database:** AI respects your schema
47
+ 1. **Generate** the entity with full semantic context intact
48
+ 2. **Link** as a post-processing step via insertion or vector search
46
49
 
47
- ```typescript
48
- const { db } = DB({
49
- Lead: {
50
- name: 'string',
51
- company: 'Target company ~>Company', // Fuzzy match existing or generate
52
- score: 'number',
53
- },
54
- Company: { name: 'string', industry: 'string' }
55
- })
50
+ This separation eliminates context fragmentation during generation and produces human-readable relationship labels ("Software Developers") instead of opaque IDs (`occ_1547`).
56
51
 
57
- // One line. AI finds existing company or creates one.
58
- const lead = await db.Lead.create({ name: 'John', companyHint: 'enterprise tech' })
52
+ ---
59
53
 
60
- // Relationships just work. Batch loaded. Type safe.
61
- const company = await lead.company
62
- ```
54
+ ## The Four Operators
63
55
 
64
- ---
56
+ ai-database provides four relationship operators that control how entities connect. They combine two dimensions:
57
+
58
+ | | **Create New** | **Search Existing** |
59
+ |---|---|---|
60
+ | **Link TO target** | `->` Forward Exact | `~>` Forward Fuzzy |
61
+ | **Link FROM target** | `<-` Backward Exact | `<~` Backward Fuzzy |
62
+
63
+ ### Quick Reference
64
+
65
+ | Operator | Direction | Match Mode | When to Use |
66
+ |----------|-----------|------------|-------------|
67
+ | `->` | forward | exact | Creating child entities (Blog → Posts) |
68
+ | `~>` | forward | fuzzy | Reusing existing entities (Campaign → Audience) |
69
+ | `<-` | backward | exact | Aggregation queries (Blog collects Posts) |
70
+ | `<~` | backward | fuzzy | Grounding against reference data (ICP → Occupation) |
65
71
 
66
- ## Why ai-database?
72
+ ### Understanding the Operators
67
73
 
68
- | Pain | Solution |
69
- |------|----------|
70
- | N+1 queries loading relationships | **Promise pipelining** - chain without await, batch automatically |
71
- | AI data disconnected from schema | **Relationship operators** (`->`, `~>`, `<-`, `<~`) for AI-native linking |
72
- | No types for AI-generated data | **Type-safe schema inference** - full TypeScript support |
73
- | Manual relationship management | **Cascade generation** - create entity graphs in one call |
74
+ **Direction** determines who owns the relationship:
75
+ - **Forward** (`->`, `~>`): Current entity links TO the target
76
+ - **Backward** (`<-`, `<~`): Target entity links FROM the current entity
77
+
78
+ **Match Mode** determines how the target is resolved:
79
+ - **Exact** (`->`, `<-`): Create a new entity, then link to it
80
+ - **Fuzzy** (`~>`, `<~`): Search existing entities via semantic similarity
74
81
 
75
82
  ---
76
83
 
77
- ## Quick Start
84
+ ## Example 1: Grounding Against Reference Data (`<~`)
78
85
 
79
- ```typescript
80
- import { DB } from 'ai-database'
86
+ The backward fuzzy operator grounds AI-generated content against authoritative reference data. This is the **semantic grounding** pattern.
81
87
 
88
+ ```typescript
82
89
  const { db } = DB({
83
- Lead: { name: 'string', company: 'Company.leads' },
84
- Company: { name: 'string' }
90
+ // Generative entity that grounds against reference data
91
+ IdealCustomerProfile: {
92
+ as: 'Who are they? (e.g. "Developers") <~Occupation',
93
+ at: 'Where do they work? (e.g. "FinTech startups") <~Industry',
94
+ are: 'What are they doing? (e.g. "building APIs") <~Task',
95
+ using: 'What are they using? (e.g. "Node.js") <~Tool',
96
+ to: 'What is their goal? (e.g. "ship faster") <~Outcome',
97
+ },
98
+
99
+ // Reference data seeded from O*NET, NAICS, etc.
100
+ Occupation: {
101
+ $seed: 'https://onet.data/occupations.tsv',
102
+ $id: '$.oNETSOCCode',
103
+ title: '$.title',
104
+ description: '$.description',
105
+ },
106
+ Industry: {
107
+ $seed: 'https://naics.data/industries.tsv',
108
+ $id: '$.naicsCode',
109
+ name: '$.title',
110
+ },
111
+ Task: { name: 'string' },
112
+ Tool: { name: 'string' },
113
+ Outcome: { description: 'string' },
85
114
  })
115
+ ```
86
116
 
87
- // Chain without await
88
- const leads = db.Lead.list()
89
- const qualified = await leads.filter(l => l.score > 80)
117
+ **How it works:**
90
118
 
91
- // Batch relationship loading
92
- const enriched = await leads.map(lead => ({
93
- name: lead.name,
94
- company: lead.company, // Batch loaded!
95
- }))
119
+ 1. AI generates ICP with `as: "Engineers who build software"`
120
+ 2. Runtime embeds the text and searches the `Occupation` collection
121
+ 3. Best match found: "Software Developer" (via vector similarity)
122
+ 4. Link created with human-readable label: `"Software Developer"`
123
+
124
+ **Key behaviors:**
125
+ - Uses embedding similarity to find the best match
126
+ - Returns `null` if no semantic match found (doesn't hallucinate)
127
+ - Grounds generated content against curated reference data
128
+ - Perfect for taxonomies, categories, and standardized values
129
+
130
+ ### Union Types for Fallback Search
131
+
132
+ When multiple collections could contain the best match:
133
+
134
+ ```typescript
135
+ IdealCustomerProfile: {
136
+ as: '<~Occupation|Role|JobType', // Search Occupation first, then Role, then JobType
137
+ using: '<~Tool|Technology|Product', // Search multiple collections in priority order
138
+ }
96
139
  ```
97
140
 
98
141
  ---
99
142
 
100
- ## Promise Pipelining
143
+ ## Example 2: Content Generation with Cascade (`->`, `<-`)
101
144
 
102
- Chain database operations without `await`:
145
+ The forward and backward exact operators create hierarchical content. This is the **cascading generation** pattern.
103
146
 
104
147
  ```typescript
105
- const leads = db.Lead.list()
106
- const topLeads = leads.filter(l => l.score > 80)
107
- const names = topLeads.map(l => l.name)
148
+ const { db } = DB({
149
+ Blog: {
150
+ title: 'string',
151
+ description: 'string',
152
+ topics: ['List 5 topics covered ->Topic'], // Creates Topic children
153
+ posts: ['<-Post'], // Aggregates Post children
154
+ },
155
+ Topic: {
156
+ name: 'string',
157
+ titles: ['List 3 blog post titles ->Post'], // Creates Post children
158
+ },
159
+ Post: {
160
+ title: 'string',
161
+ synopsis: 'string',
162
+ content: 'markdown',
163
+ blog: '->Blog', // Links back to parent Blog
164
+ topic: '->Topic', // Links to Topic
165
+ },
166
+ })
108
167
 
109
- // Only await when you need the result
110
- const result = await names
168
+ // One call generates the entire blog structure
169
+ const blog = await db.Blog.create(
170
+ { title: 'AI Engineering', description: 'Building with LLMs' },
171
+ { cascade: true, maxDepth: 3 }
172
+ )
173
+
174
+ // Topics were auto-generated
175
+ const topics = await blog.topics
176
+ // => [{ name: 'Prompt Engineering' }, { name: 'RAG Systems' }, ...]
177
+
178
+ // Posts were auto-generated under each topic
179
+ const posts = await topics[0].titles
180
+ // => [{ title: 'Getting Started with Prompts' }, ...]
181
+
182
+ // Backward refs enable aggregation queries
183
+ const allPosts = await blog.posts
184
+ // => All posts that reference this blog
111
185
  ```
112
186
 
113
- ## Batch Relationship Loading
187
+ ### Forward Exact (`->`)
114
188
 
115
- Eliminate N+1 queries automatically:
189
+ Creates child entities that belong to the parent:
116
190
 
117
191
  ```typescript
118
- // Old way - N+1 queries
119
- const leads = await db.Lead.list()
120
- for (const lead of leads) {
121
- const company = await db.Company.get(lead.companyId) // N queries!
192
+ Startup: {
193
+ founders: ['Who are the founders? ->Founder'], // Creates Founder entities
194
+ businessModel: 'What is the business model? ->LeanCanvas',
122
195
  }
196
+ ```
123
197
 
124
- // New way - batch loaded
125
- const enriched = await db.Lead.list().map(lead => ({
126
- lead,
127
- company: lead.company, // All companies loaded in ONE query
128
- }))
198
+ **Key behaviors:**
199
+ - Text before `->` is the AI generation prompt
200
+ - If a value is provided, uses it instead of generating
201
+ - Optional fields (`->Type?`) skip generation when not provided
202
+ - Nested forward fields cascade automatically
203
+
204
+ ### Backward Exact (`<-`)
205
+
206
+ Creates inverse relationships for aggregation:
207
+
208
+ ```typescript
209
+ Blog: {
210
+ posts: ['<-Post'], // All posts that reference this blog
211
+ },
212
+ Post: {
213
+ blog: '->Blog', // Forward reference to parent
214
+ }
129
215
  ```
130
216
 
131
- ## Natural Language Queries
217
+ **Key behaviors:**
218
+ - Creates inverted edge direction (Post → Blog)
219
+ - Enables reverse lookups and aggregation queries
220
+ - Works with explicit backrefs: `['<-Post.blog']`
221
+ - Handles self-referential trees: `children: ['<-Node.parent']`
222
+
223
+ ### Forward Fuzzy (`~>`)
132
224
 
133
- Ask your database questions:
225
+ Searches existing entities first, creates if not found:
134
226
 
135
227
  ```typescript
136
- const results = await db.Lead`who closed deals this month?`
137
- const pending = await db.Order`what's stuck in processing?`
228
+ Campaign: {
229
+ audience: 'Target audience ~>Audience', // Find existing or create new
230
+ }
231
+
232
+ // If "Enterprise" audience exists, reuses it
233
+ const campaign = await db.Campaign.create({
234
+ audienceHint: 'Big companies with 1000+ employees'
235
+ })
236
+ const audience = await campaign.audience
237
+ // => { name: 'Enterprise', ... } — reused existing!
138
238
  ```
139
239
 
240
+ **Key behaviors:**
241
+ - Searches via semantic similarity using `${fieldName}Hint`
242
+ - Reuses existing entity if match exceeds threshold
243
+ - Generates new entity if no match found
244
+ - Generated entities marked with `$generated: true`
245
+
140
246
  ---
141
247
 
142
- ## Real-World Examples
248
+ ## Example 3: Startup Generator (Mixed Operators)
143
249
 
144
- ### Sales Pipeline
250
+ A complete example showing all four operators working together:
145
251
 
146
252
  ```typescript
147
253
  const { db } = DB({
148
- Lead: {
254
+ Startup: {
255
+ $instructions: 'Generate a B2B SaaS startup',
149
256
  name: 'string',
150
- email: 'string',
151
- score: 'number',
152
- company: 'Company.leads',
257
+ idea: 'What problem does this solve? <-Idea', // Idea spawns Startup
258
+ founders: ['Who are the founding team? ->Founder'], // Create founders
259
+ customer: 'Who is the target customer? ~>CustomerPersona', // Find existing
260
+ industry: 'What industry? <~Industry', // Ground to NAICS
153
261
  },
154
- Company: {
155
- name: 'string',
156
- industry: 'string',
157
- }
262
+ Idea: { problem: 'string', solution: 'string' },
263
+ Founder: { name: 'string', role: 'string' },
264
+ CustomerPersona: { title: 'string', painPoints: 'string' },
265
+ Industry: { name: 'string', naicsCode: 'string' },
266
+ })
267
+
268
+ // Pre-populate reference data
269
+ await db.Industry.create({ name: 'Technology', naicsCode: '5112' })
270
+ await db.CustomerPersona.create({
271
+ title: 'VP of Engineering',
272
+ painPoints: 'Managing distributed teams',
158
273
  })
159
274
 
160
- // Find high-value leads with their companies
161
- const qualified = await db.Lead.list()
162
- .filter(lead => lead.score > 80)
163
- .map(lead => ({
164
- lead,
165
- company: lead.company,
166
- }))
275
+ // Generate complete startup with grounded relationships
276
+ const startup = await db.Startup.create(
277
+ { name: 'DevFlow' },
278
+ { cascade: true, maxDepth: 2 }
279
+ )
280
+
281
+ // Relationships resolved appropriately:
282
+ const idea = await startup.idea // Created new (->)
283
+ const founders = await startup.founders // Created new ([->])
284
+ const customer = await startup.customer // Matched existing (~>)
285
+ const industry = await startup.industry // Grounded to reference (<~)
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Threshold Syntax
291
+
292
+ For fuzzy operators (`~>` and `<~`), configure the similarity threshold:
293
+
294
+ ### Field-Level Thresholds
167
295
 
168
- // Ask questions naturally
169
- const results = await db.Lead`who hasn't responded in 2 weeks?`
296
+ ```typescript
297
+ Event: {
298
+ venue: 'Where is the event? ~>Venue(0.9)', // High threshold - strict match
299
+ sponsor: 'Event sponsor ~>Company(0.5)', // Low threshold - lenient match
300
+ }
170
301
  ```
171
302
 
172
- ### Customer Success
303
+ ### Entity-Level Thresholds
173
304
 
174
305
  ```typescript
175
- const { db } = DB({
176
- Customer: {
177
- name: 'string',
178
- healthScore: 'number',
179
- mrr: 'number',
180
- csm: 'User.customers',
181
- },
182
- User: { name: 'string' }
183
- })
306
+ Startup: {
307
+ $fuzzyThreshold: 0.85, // Apply to all ~> and <~ fields
308
+ customer: '~>Customer',
309
+ competitor: '~>Company',
310
+ }
311
+ ```
312
+
313
+ **Threshold values:**
314
+ - `0.9` - Very strict: Only near-exact semantic matches
315
+ - `0.7` - Default: Balanced matching
316
+ - `0.5` - Lenient: Accept loosely related matches
317
+
318
+ ---
319
+
320
+ ## Cascade Generation
321
+
322
+ Build complex entity graphs from a single `create()` call:
323
+
324
+ ```typescript
325
+ const company = await db.Company.create(
326
+ { name: 'TechCorp' },
327
+ {
328
+ cascade: true,
329
+ maxDepth: 4,
330
+ onProgress: (p) => console.log(`${p.totalEntitiesCreated} created`),
331
+ }
332
+ )
184
333
 
185
- // At-risk customers with their CSMs
186
- const atRisk = await db.Customer.list()
187
- .filter(c => c.healthScore < 50)
188
- .map(c => ({
189
- customer: c,
190
- csm: c.csm,
191
- mrr: c.mrr,
192
- }))
334
+ // Entire org chart generated: Company → Departments → Teams → Employees
193
335
  ```
194
336
 
195
- ### Order Management
337
+ ### Cascade Options
338
+
339
+ | Option | Type | Default | Description |
340
+ |--------|------|---------|-------------|
341
+ | `cascade` | `boolean` | `false` | Enable cascade generation |
342
+ | `maxDepth` | `number` | `0` | Maximum recursion depth |
343
+ | `cascadeTypes` | `string[]` | - | Only cascade to these types |
344
+ | `onProgress` | `function` | - | Progress callback |
345
+ | `onError` | `function` | - | Error handler |
346
+ | `stopOnError` | `boolean` | `false` | Stop on first error |
347
+
348
+ ---
349
+
350
+ ## Special Variables
351
+
352
+ ### `$instructions`
353
+
354
+ Entity-level prompting that guides AI generation:
196
355
 
197
356
  ```typescript
198
- const { db } = DB({
199
- Order: {
200
- status: 'string',
201
- total: 'number',
202
- customer: 'Customer.orders',
203
- items: ['OrderItem.order'],
204
- },
205
- OrderItem: { product: 'string', quantity: 'number' },
206
- Customer: { name: 'string' }
207
- })
357
+ Character: {
358
+ $instructions: 'This character is from a medieval fantasy setting',
359
+ name: 'string',
360
+ backstory: 'What is their history?', // Influenced by $instructions
361
+ }
362
+ ```
363
+
364
+ Template variables resolve against entity data:
365
+
366
+ ```typescript
367
+ Problem: {
368
+ $instructions: `
369
+ Identify problems for occupation: {task.occupation.title}
370
+ in industry: {task.occupation.industry.name}
371
+ `,
372
+ task: '<-Task',
373
+ description: 'string',
374
+ }
375
+ ```
376
+
377
+ ### `$context`
208
378
 
209
- // Pending orders with all details
210
- const pending = await db.Order
211
- .find({ status: 'pending' })
212
- .map(order => ({
213
- order,
214
- customer: order.customer,
215
- items: order.items,
216
- }))
379
+ Explicit context dependencies pre-fetched before generation:
380
+
381
+ ```typescript
382
+ Ad: {
383
+ $context: ['Startup', 'ICP'],
384
+ $instructions: 'Generate ad for {startup.name} targeting {icp.as}',
385
+ headline: 'string (30 chars)',
386
+ }
217
387
  ```
218
388
 
219
389
  ---
@@ -223,11 +393,11 @@ const pending = await db.Order
223
393
  Define once, get typed operations everywhere:
224
394
 
225
395
  ```typescript
226
- const { db, events, actions, nouns, verbs } = DB({
396
+ const { db, events, actions } = DB({
227
397
  Post: {
228
398
  title: 'string',
229
399
  content: 'markdown',
230
- author: 'Author.posts', // Creates both directions
400
+ author: 'Author.posts', // Creates bidirectional relationship
231
401
  },
232
402
  Author: {
233
403
  name: 'string',
@@ -261,21 +431,48 @@ Post: { tags: ['Tag.posts'] }
261
431
 
262
432
  ---
263
433
 
264
- ## CRUD Operations
434
+ ## Promise Pipelining
435
+
436
+ Chain database operations without `await`:
437
+
438
+ ```typescript
439
+ const leads = db.Lead.list()
440
+ const topLeads = leads.filter(l => l.score > 80)
441
+ const names = topLeads.map(l => l.name)
442
+
443
+ // Only await when you need the result
444
+ const result = await names
445
+ ```
446
+
447
+ ### Batch Relationship Loading
448
+
449
+ Eliminate N+1 queries automatically:
450
+
451
+ ```typescript
452
+ // All companies loaded in ONE query
453
+ const enriched = await db.Lead.list().map(lead => ({
454
+ lead,
455
+ company: lead.company,
456
+ }))
457
+ ```
458
+
459
+ ---
265
460
 
266
- All operations return `DBPromise` for chaining:
461
+ ## CRUD Operations
267
462
 
268
463
  ```typescript
269
464
  // Read
270
465
  const lead = await db.Lead.get('lead-123')
271
466
  const leads = await db.Lead.list()
272
- const first = await db.Lead.first()
273
467
  const found = await db.Lead.find({ status: 'active' })
274
468
 
275
469
  // Search
276
470
  const results = await db.Lead.search('enterprise SaaS')
277
471
 
278
- // Write (returns regular Promise)
472
+ // Natural language queries
473
+ const pending = await db.Order`what's stuck in processing?`
474
+
475
+ // Write
279
476
  const lead = await db.Lead.create({ name: 'Acme Corp' })
280
477
  await db.Lead.update(lead.$id, { score: 90 })
281
478
  await db.Lead.delete(lead.$id)
@@ -307,36 +504,23 @@ events.on('*.updated', event => {
307
504
  })
308
505
  ```
309
506
 
507
+ ---
508
+
310
509
  ## forEach - Large-Scale Processing
311
510
 
312
- Process thousands of items with concurrency, progress tracking, and error handling:
511
+ Process thousands of items with concurrency and error handling:
313
512
 
314
513
  ```typescript
315
- // Simple iteration
316
- await db.Lead.forEach(lead => {
317
- console.log(lead.name)
318
- })
319
-
320
- // With AI and concurrency
321
514
  const result = await db.Lead.forEach(async lead => {
322
515
  const analysis = await ai`analyze ${lead}`
323
516
  await db.Lead.update(lead.$id, { analysis })
324
517
  }, {
325
518
  concurrency: 10,
326
- onProgress: p => console.log(`${p.completed}/${p.total} (${p.rate.toFixed(1)}/s)`),
327
- })
328
-
329
- // With error handling and retries
330
- await db.Order.forEach(async order => {
331
- await sendInvoice(order)
332
- }, {
333
- concurrency: 5,
334
519
  maxRetries: 3,
335
- retryDelay: attempt => 1000 * Math.pow(2, attempt), // Exponential backoff
336
- onError: (err, order) => err.code === 'RATE_LIMIT' ? 'retry' : 'continue',
520
+ retryDelay: attempt => 1000 * Math.pow(2, attempt),
521
+ onProgress: p => console.log(`${p.completed}/${p.total}`),
522
+ onError: (err, lead) => err.code === 'RATE_LIMIT' ? 'retry' : 'continue',
337
523
  })
338
-
339
- console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
340
524
  ```
341
525
 
342
526
  ### forEach Options
@@ -347,9 +531,9 @@ console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
347
531
  | `maxRetries` | `number` | Retries per item (default: 0) |
348
532
  | `retryDelay` | `number \| fn` | Delay between retries |
349
533
  | `onProgress` | `fn` | Progress callback |
350
- | `onError` | `'continue' \| 'retry' \| 'skip' \| 'stop' \| fn` | Error handling |
534
+ | `onError` | `fn` | Error handling |
351
535
  | `timeout` | `number` | Timeout per item in ms |
352
- | `persist` | `boolean \| string` | Enable durability (string = custom action name) |
536
+ | `persist` | `boolean \| string` | Enable durability |
353
537
  | `resume` | `string` | Resume from action ID |
354
538
 
355
539
  ### Durable forEach
@@ -357,28 +541,14 @@ console.log(`Completed: ${result.completed}, Failed: ${result.failed}`)
357
541
  Persist progress to survive crashes:
358
542
 
359
543
  ```typescript
360
- // Enable persistence - auto-names action as "Lead.forEach"
361
544
  const result = await db.Lead.forEach(processLead, {
362
545
  concurrency: 10,
363
- persist: true,
364
- })
365
-
366
- console.log(`Action ID: ${result.actionId}`)
367
- ```
368
-
369
- Custom action name:
370
-
371
- ```typescript
372
- await db.Lead.forEach(processLead, {
373
- persist: 'analyze-leads', // Custom action name
546
+ persist: 'analyze-leads',
374
547
  })
375
- ```
376
-
377
- Resume after a crash:
378
548
 
379
- ```typescript
549
+ // Resume after crash
380
550
  await db.Lead.forEach(processLead, {
381
- resume: 'action-123', // Skips already-processed items
551
+ resume: result.actionId,
382
552
  })
383
553
  ```
384
554
 
@@ -415,673 +585,714 @@ DATABASE_URL=sqlite://./data # SQLite
415
585
  DATABASE_URL=:memory: # in-memory
416
586
  ```
417
587
 
418
- ## Documentation
588
+ ---
589
+
590
+ ## Cloudflare Workers Deployment
419
591
 
420
- - [Full Documentation](https://primitives.org.ai/database)
421
- - [CRUD Operations](https://primitives.org.ai/database/create)
422
- - [Schema Types](https://primitives.org.ai/database/schema)
423
- - [Events](https://primitives.org.ai/database/events)
592
+ ai-database provides dedicated exports for Cloudflare Workers deployment and RPC client consumption.
424
593
 
425
- ## Document Database Interface
594
+ ### /worker Export
426
595
 
427
- In addition to the schema-first graph model, `ai-database` also exports environment-agnostic types for document-based storage (MDX files with frontmatter). These types are used by `@mdxdb/*` adapters and work in any JavaScript runtime (Node.js, Bun, Deno, Workers, Browser).
596
+ Use the `/worker` export when deploying ai-database as a Cloudflare Worker service:
428
597
 
429
598
  ```typescript
430
- import type {
431
- DocumentDatabase,
432
- DocListOptions,
433
- DocSearchOptions,
434
- Document,
435
- } from 'ai-database'
599
+ // worker.ts - the ai-database service
600
+ import { DatabaseWorker, DatabaseDO } from 'ai-database/worker'
436
601
 
437
- // The DocumentDatabase interface
438
- interface DocumentDatabase<TData> {
439
- list(options?: DocListOptions): Promise<DocListResult<TData>>
440
- search(options: DocSearchOptions): Promise<DocSearchResult<TData>>
441
- get(id: string, options?: DocGetOptions): Promise<Document<TData> | null>
442
- set(id: string, doc: Document<TData>, options?: DocSetOptions): Promise<DocSetResult>
443
- delete(id: string, options?: DocDeleteOptions): Promise<DocDeleteResult>
444
- close?(): Promise<void>
445
- }
602
+ export { DatabaseDO }
603
+ export default DatabaseWorker
446
604
  ```
447
605
 
448
- ### Document Types
606
+ ```jsonc
607
+ // wrangler.jsonc
608
+ {
609
+ "name": "ai-database",
610
+ "main": "src/worker.ts",
611
+ "compatibility_date": "2024-01-01",
612
+ "durable_objects": {
613
+ "bindings": [
614
+ { "name": "DATABASE_DO", "class_name": "DatabaseDO" }
615
+ ]
616
+ }
617
+ }
618
+ ```
449
619
 
450
- | Type | Description |
451
- |------|-------------|
452
- | `Document<TData>` | MDX document with id, type, context, data, and content |
453
- | `DocumentDatabase<TData>` | Interface for document storage adapters |
454
- | `DocListOptions` | Options for listing documents (limit, offset, sortBy, type, prefix) |
455
- | `DocListResult<TData>` | List result with documents, total, hasMore |
456
- | `DocSearchOptions` | Search options (query, fields, semantic) |
457
- | `DocSearchResult<TData>` | Search result with scores |
458
- | `DocGetOptions` | Get options (includeAst, includeCode) |
459
- | `DocSetOptions` | Set options (createOnly, updateOnly, version) |
460
- | `DocSetResult` | Set result (id, version, created) |
461
- | `DocDeleteOptions` | Delete options (soft, version) |
462
- | `DocDeleteResult` | Delete result (id, deleted) |
463
-
464
- ### View Types
465
-
466
- For bi-directional relationship rendering:
620
+ ### /client Export
467
621
 
468
- | Type | Description |
469
- |------|-------------|
470
- | `ViewManager` | Interface for managing views |
471
- | `ViewDocument` | View template definition |
472
- | `ViewContext` | Context for rendering a view |
473
- | `ViewRenderResult` | Rendered markdown and entities |
474
- | `ViewSyncResult` | Mutations from extracting edited markdown |
475
- | `DocumentDatabaseWithViews` | Database with view support |
622
+ Use the `/client` export when consuming ai-database from another worker or HTTP client:
476
623
 
477
- ### Usage with @mdxdb adapters
624
+ **With Cloudflare Service Bindings (RPC):**
478
625
 
479
626
  ```typescript
480
- // Filesystem adapter
481
- import { createFsDatabase } from '@mdxdb/fs'
482
- const db = createFsDatabase({ root: './content' })
627
+ // consumer-worker.ts
628
+ import type { DatabaseService } from 'ai-database/worker'
483
629
 
484
- // API adapter
485
- import { createApiDatabase } from '@mdxdb/api'
486
- const db = createApiDatabase({ baseUrl: 'https://api.example.com' })
630
+ interface Env {
631
+ AI_DATABASE: Service<DatabaseService>
632
+ }
487
633
 
488
- // SQLite adapter
489
- import { createSqliteDatabase } from '@mdxdb/sqlite'
490
- const db = createSqliteDatabase({ path: './data.db' })
634
+ export default {
635
+ async fetch(request: Request, env: Env) {
636
+ // Direct RPC via service binding - no HTTP overhead
637
+ const service = env.AI_DATABASE.connect('my-namespace')
638
+ const post = await service.create('Post', { title: 'Hello' })
639
+ return Response.json(post)
640
+ }
641
+ }
642
+ ```
491
643
 
492
- // Same DocumentDatabase interface regardless of backend
493
- const doc = await db.get('posts/hello-world')
494
- await db.set('posts/new', { data: { title: 'New Post' }, content: '# Hello' })
644
+ ```jsonc
645
+ // consumer wrangler.jsonc
646
+ {
647
+ "services": [
648
+ { "binding": "AI_DATABASE", "service": "ai-database" }
649
+ ]
650
+ }
495
651
  ```
496
652
 
497
- ---
653
+ **With HTTP Client (rpc.do):**
498
654
 
499
- ## Relationship Operators
655
+ ```typescript
656
+ import { createDatabaseClient, DB } from 'ai-database/client'
500
657
 
501
- ai-database provides four relationship operators that control how entities are linked and how AI generation flows through your schema. These operators combine two dimensions:
658
+ // Connect to production
659
+ const client = createDatabaseClient('https://ai-database.workers.dev')
660
+ const service = client.connect('my-namespace')
502
661
 
503
- - **Direction**: Forward (`->`, `~>`) vs Backward (`<-`, `<~`)
504
- - **Match Mode**: Exact (strict foreign key) vs Fuzzy (semantic/AI-driven matching)
662
+ // CRUD operations
663
+ const post = await service.create('Post', { title: 'Hello', content: 'World' })
664
+ const posts = await service.list('Post', { limit: 10 })
665
+ const found = await service.get('Post', post.$id)
505
666
 
506
- ### Operator Reference
667
+ // Search
668
+ const results = await service.search('Post', 'hello')
669
+ const semantic = await service.semanticSearch('Post', 'greeting posts')
507
670
 
508
- | Operator | Direction | Match Mode | Description |
509
- |----------|-----------|------------|-------------|
510
- | `->` | forward | exact | Creates and links to a new entity (strict FK) |
511
- | `~>` | forward | fuzzy | Searches existing entities first, generates if no match |
512
- | `<-` | backward | exact | References an existing entity by ID |
513
- | `<~` | backward | fuzzy | Finds existing entities via semantic search |
671
+ // Relationships
672
+ await service.relate('Post', post.$id, 'author', 'User', userId)
673
+ const authors = await service.related('Post', post.$id, 'author')
514
674
 
515
- ### 1. Forward Exact (`->`)
675
+ // Events
676
+ await service.emit({ event: 'Post.published', actor: userId, object: post.$id })
677
+ const events = await service.listEvents({ event: 'Post.published' })
678
+ ```
679
+
680
+ ### TypeScript Setup for Service Bindings
516
681
 
517
- The forward exact operator creates one-to-one or one-to-many relationships where the target entity is auto-generated if not provided.
682
+ For proper type inference with service bindings, import the worker types:
518
683
 
519
684
  ```typescript
520
- const { db } = DB({
521
- Startup: {
522
- name: 'string',
523
- idea: 'What is the core idea? ->Idea', // One-to-one, auto-generated
524
- founders: ['Who are the founders? ->Founder'], // One-to-many, auto-generated
525
- },
526
- Idea: { description: 'string', solution: 'string' },
527
- Founder: { name: 'string', role: 'string' },
528
- })
685
+ // types.ts
686
+ import type { DatabaseService } from 'ai-database/worker'
529
687
 
530
- // Creating a Startup auto-generates the Idea and Founders
531
- const startup = await db.Startup.create({ name: 'Acme' })
688
+ export interface Env {
689
+ AI_DATABASE: Service<DatabaseService>
690
+ // ... other bindings
691
+ }
692
+ ```
532
693
 
533
- const idea = await startup.idea
534
- // => { $id: '...', $type: 'Idea', description: '...', solution: '...' }
694
+ ---
535
695
 
536
- const founders = await startup.founders
537
- // => [{ $id: '...', $type: 'Founder', name: '...', role: '...' }, ...]
538
- ```
696
+ ## Common Patterns
539
697
 
540
- **Key behaviors:**
541
- - Text before `->` is used as the AI generation prompt
542
- - If a value is provided, it's used instead of generating new
543
- - Optional fields (`->Type?`) skip generation when not provided
544
- - Nested forward exact fields cascade automatically
698
+ ### Self-Referential Trees
545
699
 
546
700
  ```typescript
547
- // Skip generation by providing an ID
548
- const existingIdea = await db.Idea.create({ description: 'My idea' })
549
- const startup = await db.Startup.create({
550
- name: 'Acme',
551
- idea: existingIdea.$id // Uses existing, doesn't generate
552
- })
701
+ Node: {
702
+ value: 'string',
703
+ parent: '->Node?',
704
+ children: ['<-Node.parent'],
705
+ }
553
706
  ```
554
707
 
555
- ### 2. Forward Fuzzy (`~>`)
556
-
557
- The forward fuzzy operator first searches for semantically similar existing entities. If a match is found above the similarity threshold, it reuses that entity. Otherwise, it generates a new one.
708
+ ### Union Types for Polymorphic References
558
709
 
559
710
  ```typescript
560
- const { db } = DB({
561
- Campaign: {
562
- name: 'string',
563
- audience: 'Target audience for campaign ~>Audience',
564
- },
565
- Audience: { name: 'string', description: 'string' },
566
- })
711
+ Comment: {
712
+ content: 'string',
713
+ target: '->Post|Article|Video',
714
+ }
567
715
 
568
- // Create some audiences first
569
- await db.Audience.create({
570
- name: 'Enterprise',
571
- description: 'Large corporations with 1000+ employees'
572
- })
573
- await db.Audience.create({
574
- name: 'SMB',
575
- description: 'Small businesses with less than 50 employees'
576
- })
716
+ const target = await comment.target
717
+ console.log(target.$matchedType) // 'Post', 'Article', or 'Video'
718
+ ```
577
719
 
578
- // This will find the Enterprise audience via semantic match
579
- const campaign = await db.Campaign.create({
580
- name: 'Enterprise Sales Push',
581
- audienceHint: 'Big companies with thousands of employees'
582
- })
720
+ ### Symmetric Relationships
583
721
 
584
- const audience = await campaign.audience
585
- // => { $id: '...', name: 'Enterprise', ... } (reused existing!)
722
+ ```typescript
723
+ Team: {
724
+ name: 'string',
725
+ members: ['->Member'],
726
+ },
727
+ Member: {
728
+ name: 'string',
729
+ team: '<-Team',
730
+ }
586
731
  ```
587
732
 
588
- **Key behaviors:**
589
- - Searches existing entities of the target type via semantic similarity
590
- - `${fieldName}Hint` provides context for matching (e.g., `audienceHint`)
591
- - If no match exceeds threshold, generates a new entity
592
- - Generated entities are marked with `$generated: true`
733
+ ---
593
734
 
594
- ### 3. Backward Exact (`<-`)
735
+ ## Document Database Interface
595
736
 
596
- The backward exact operator creates inverse relationships, enabling aggregation queries. The edge direction is inverted - child entities point TO the parent.
737
+ In addition to the schema-first graph model, `ai-database` exports environment-agnostic types for document-based storage (MDX files with frontmatter):
597
738
 
598
739
  ```typescript
599
- const { db } = DB({
600
- Blog: {
601
- name: 'string',
602
- posts: ['<-Post'], // All posts that reference this blog
603
- },
604
- Post: {
605
- title: 'string',
606
- blog: '->Blog', // Forward reference to parent
607
- },
608
- })
740
+ import type {
741
+ DocumentDatabase,
742
+ Document,
743
+ DocListOptions,
744
+ DocSearchOptions,
745
+ } from 'ai-database'
746
+
747
+ // Same interface regardless of backend
748
+ const doc = await db.get('posts/hello-world')
749
+ await db.set('posts/new', { data: { title: 'New Post' }, content: '# Hello' })
750
+ ```
609
751
 
610
- // Create the blog, then posts that reference it
611
- const blog = await db.Blog.create({ name: 'Tech Blog' })
612
- await db.Post.create({ title: 'Hello World', blog: blog.$id })
613
- await db.Post.create({ title: 'AI Guide', blog: blog.$id })
752
+ ### Usage with @mdxdb adapters
614
753
 
615
- // Backward ref enables aggregation queries
616
- const blogPosts = await blog.posts
617
- // => [{ title: 'Hello World', ... }, { title: 'AI Guide', ... }]
754
+ ```typescript
755
+ import { createFsDatabase } from '@mdxdb/fs'
756
+ import { createSqliteDatabase } from '@mdxdb/sqlite'
757
+ import { createApiDatabase } from '@mdxdb/api'
758
+
759
+ const db = createFsDatabase({ root: './content' })
760
+ const db = createSqliteDatabase({ path: './data.db' })
761
+ const db = createApiDatabase({ baseUrl: 'https://api.example.com' })
618
762
  ```
619
763
 
620
- **Key behaviors:**
621
- - Creates inverted edge direction (Post -> Blog, not Blog -> Post)
622
- - Enables reverse lookups and aggregation queries
623
- - Works with explicit backrefs: `['<-Post.blog']`
624
- - Handles self-referential relationships: `children: ['<-Node.parent']`
764
+ ---
625
765
 
626
- ### 4. Backward Fuzzy (`<~`)
766
+ ## Provider Capabilities
627
767
 
628
- The backward fuzzy operator combines semantic matching with inverted edge direction. Perfect for grounding generated content against reference data.
768
+ Different database providers support different features. Use `detectCapabilities()` to check what's available at runtime:
629
769
 
630
770
  ```typescript
631
- const { db } = DB({
632
- ICP: {
633
- as: 'Who are they? <~Occupation', // Ground against occupations
634
- at: 'Where do they work? <~Industry', // Ground against industries
635
- },
636
- Occupation: { title: 'string', description: 'string' },
637
- Industry: { name: 'string', naicsCode: 'string' },
638
- })
771
+ import { detectCapabilities, requireCapability, CapabilityNotSupportedError } from 'ai-database'
639
772
 
640
- // Create reference data
641
- await db.Occupation.create({ title: 'Software Developer', description: 'Writes code' })
642
- await db.Industry.create({ name: 'Technology', naicsCode: '5112' })
773
+ const capabilities = await detectCapabilities(provider)
643
774
 
644
- // ICP grounds against existing reference data
645
- const icp = await db.ICP.create({
646
- asHint: 'Engineers who build software',
647
- atHint: 'Tech companies'
648
- })
775
+ // Check capabilities
776
+ if (capabilities.hasSemanticSearch) {
777
+ const results = await provider.semanticSearch('Post', 'machine learning')
778
+ } else {
779
+ // Fallback to regular search
780
+ const results = await provider.search('Post', 'machine learning')
781
+ }
649
782
 
650
- const occupation = await icp.as
651
- // => { title: 'Software Developer', ... } (matched via semantic search)
783
+ // Require capabilities (throws if unavailable)
784
+ requireCapability(capabilities, 'hasEvents')
785
+ provider.on('Post.created', handleCreate)
652
786
  ```
653
787
 
654
- **Key behaviors:**
655
- - Uses AI/embedding similarity to find best match
656
- - Grounds generated content against curated reference data
657
- - Returns null if no semantic match found (doesn't generate)
658
- - Useful for taxonomies, categories, and standardized values
788
+ ### Capability Matrix
659
789
 
660
- ---
790
+ | Capability | MemoryProvider | RDB | DigitalObjects |
791
+ |------------|----------------|-----|----------------|
792
+ | **Semantic Search** | Yes | No | No |
793
+ | **Events API** | Yes | No | No |
794
+ | **Actions API** | Yes | No | No |
795
+ | **Artifacts** | Yes | No | No |
796
+ | **Batch Operations** | Yes | No | No |
661
797
 
662
- ## Threshold Syntax
798
+ ### Capabilities
663
799
 
664
- For fuzzy operators (`~>` and `<~`), you can configure the similarity threshold that determines when a match is accepted vs when new generation occurs.
800
+ | Capability | Description | Methods Required |
801
+ |------------|-------------|------------------|
802
+ | `hasSemanticSearch` | Vector similarity search | `semanticSearch()`, `setEmbeddingsConfig()` |
803
+ | `hasEvents` | Event emission and subscription | `on()`, `emit()`, `listEvents()` |
804
+ | `hasActions` | Durable action tracking | `createAction()`, `getAction()`, `updateAction()` |
805
+ | `hasArtifacts` | Artifact/cache storage | `getArtifact()`, `setArtifact()` |
806
+ | `hasBatchOperations` | Concurrency-controlled batching | `withConcurrency()` or `mapWithConcurrency()` |
665
807
 
666
- ### Field-Level Thresholds
808
+ ### Graceful Degradation
667
809
 
668
- Append threshold in parentheses after the type name:
810
+ When a capability isn't available, use fallbacks:
669
811
 
670
812
  ```typescript
671
- const { db } = DB({
672
- Event: {
673
- venue: 'Where is the event? ~>Venue(0.9)', // High threshold - strict match
674
- sponsor: 'Event sponsor ~>Company(0.5)', // Low threshold - lenient match
675
- },
676
- Venue: { name: 'string', address: 'string' },
677
- Company: { name: 'string' },
678
- })
813
+ import { detectCapabilities, warnIfUnavailable } from 'ai-database'
814
+
815
+ const capabilities = await detectCapabilities(provider)
816
+
817
+ // Log a warning (once) if semantic search unavailable
818
+ warnIfUnavailable(capabilities, 'hasSemanticSearch', 'semanticSearch')
819
+
820
+ // Use capability with fallback
821
+ async function searchPosts(query: string) {
822
+ if (capabilities.hasSemanticSearch) {
823
+ return provider.semanticSearch('Post', query)
824
+ }
825
+ return provider.search('Post', query)
826
+ }
679
827
  ```
680
828
 
681
- **Threshold values:**
682
- - `0.9` - Very strict: Only near-exact semantic matches
683
- - `0.7` - Default: Balanced matching
684
- - `0.5` - Lenient: Accept loosely related matches
829
+ ### Features Requiring Semantic Search
685
830
 
686
- ### Entity-Level Thresholds
831
+ When using a provider without semantic search support (e.g., RDB), some features behave differently:
832
+
833
+ | Feature | With Semantic Search | Without Semantic Search |
834
+ |---------|---------------------|------------------------|
835
+ | `~>` Forward Fuzzy | Matches via vector similarity, falls back to generation | Uses text search fallback, then generates if no match |
836
+ | `<~` Backward Fuzzy | Matches via vector similarity | Uses text search fallback |
837
+ | `db.Entity.semanticSearch()` | Vector similarity search | Throws `CapabilityNotSupportedError` |
838
+ | `db.Entity.hybridSearch()` | Combined FTS + vector search | Throws `CapabilityNotSupportedError` |
839
+ | `db.semanticSearch()` | Global vector search | Throws `CapabilityNotSupportedError` |
687
840
 
688
- Set a default threshold for all fuzzy fields in an entity:
841
+ **Fuzzy Operator Fallback**: When semantic search is unavailable, fuzzy operators (`~>` and `<~`) gracefully degrade to basic text search:
689
842
 
690
843
  ```typescript
844
+ // Without semantic search, these operators use text matching instead of embeddings
691
845
  const { db } = DB({
692
- Startup: {
693
- $fuzzyThreshold: 0.85, // Apply to all ~> and <~ fields
694
- customer: 'Who is the customer? ~>Customer',
695
- competitor: 'Main competitor ~>Company',
846
+ Article: {
847
+ category: '~>Category', // Will use text search fallback
696
848
  },
697
- Customer: { name: 'string' },
698
- Company: { name: 'string' },
849
+ Category: { name: 'string' }
699
850
  })
851
+
852
+ // Forward fuzzy (~>) tries text search first, generates if no match found
853
+ await db.Article.create({ categoryHint: 'Tech' }) // Searches for 'Tech' in categories
854
+
855
+ // Backward fuzzy (<~) uses text search only - never generates
856
+ await db.Article.create({ categoryHint: 'Tech' }) // Returns null if no text match
700
857
  ```
701
858
 
702
- **Matching behavior:**
703
- 1. If similarity score >= threshold: Reuse existing entity
704
- 2. If similarity score < threshold: Generate new entity (for `~>`) or return null (for `<~`)
859
+ **Explicit Search Methods**: When you need semantic search but it's unavailable, the methods throw with helpful alternatives:
860
+
861
+ ```typescript
862
+ import { CapabilityNotSupportedError, isCapabilityNotSupportedError } from 'ai-database'
863
+
864
+ try {
865
+ await db.Post.semanticSearch('machine learning')
866
+ } catch (error) {
867
+ if (isCapabilityNotSupportedError(error)) {
868
+ console.log(error.capability) // 'hasSemanticSearch'
869
+ console.log(error.alternative) // 'Use the regular search() method instead...'
870
+ // Fall back to text search
871
+ const results = await db.Post.search('machine learning')
872
+ }
873
+ }
874
+ ```
705
875
 
706
876
  ---
707
877
 
708
- ## Cascade Generation
878
+ ## Integration with RDB
709
879
 
710
- Cascade generation automatically creates related entities recursively, building complex entity graphs from a single `create()` call.
880
+ [RDB](https://github.com/ai-primitives/rdb) provides a simple relational database backend for ai-database. Use it when you want:
711
881
 
712
- ### Basic Cascade
882
+ - Edge-native storage via Cloudflare Durable Objects or D1
883
+ - Simple two-table schema (`_data` and `_rels`)
884
+ - Graph traversal and relationship queries
713
885
 
714
- Enable cascade with the `cascade` option:
886
+ ### Creating an RDB Provider Adapter
715
887
 
716
888
  ```typescript
717
- const { db } = DB({
718
- Company: {
719
- name: 'string',
720
- departments: ['What departments exist? ->Department'],
721
- },
722
- Department: {
723
- name: 'string',
724
- teams: ['What teams work here? ->Team'],
725
- },
726
- Team: {
727
- name: 'string',
728
- members: ['Who are the team members? ->Employee'],
729
- },
730
- Employee: { name: 'string', role: 'string' },
731
- })
889
+ import { setProvider, DB } from 'ai-database'
890
+ import type { DBProvider, ListOptions, SearchOptions } from 'ai-database'
891
+ import { RDB } from '@dotdo/rdb'
732
892
 
733
- const company = await db.Company.create(
734
- { name: 'TechCorp' },
735
- { cascade: true, maxDepth: 4 }
736
- )
893
+ // Adapter to bridge RDB and ai-database interfaces
894
+ class RDBProviderAdapter implements DBProvider {
895
+ private rdb: RDB
737
896
 
738
- // Entire org chart generated automatically!
739
- const departments = await company.departments
740
- const teams = await departments[0].teams
741
- const members = await teams[0].members
742
- ```
897
+ constructor(sqlStorage: SqlStorage) {
898
+ this.rdb = new RDB(sqlStorage)
899
+ }
743
900
 
744
- ### Cascade Options
901
+ async get(type: string, id: string) {
902
+ const entity = await this.rdb.get(type, id)
903
+ if (!entity) return null
904
+ return { $id: entity.id, $type: entity.type, ...entity }
905
+ }
745
906
 
746
- | Option | Type | Default | Description |
747
- |--------|------|---------|-------------|
748
- | `cascade` | `boolean` | `false` | Enable cascade generation |
749
- | `maxDepth` | `number` | `0` | Maximum depth of recursive generation |
750
- | `onProgress` | `function` | - | Callback for progress tracking |
751
- | `onError` | `function` | - | Error handler callback |
752
- | `stopOnError` | `boolean` | `false` | Stop cascade on first error |
753
- | `cascadeTypes` | `string[]` | - | Only cascade to these types |
907
+ async list(type: string, options?: ListOptions) {
908
+ const entities = await this.rdb.list(type, options)
909
+ return entities.map(e => ({ $id: e.id, $type: e.type, ...e }))
910
+ }
754
911
 
755
- ### Depth Control
912
+ async search(type: string, query: string, options?: SearchOptions) {
913
+ // RDB uses filter-based search; perform text matching
914
+ const all = await this.rdb.list(type, options)
915
+ return all
916
+ .filter(e => JSON.stringify(e).toLowerCase().includes(query.toLowerCase()))
917
+ .map(e => ({ $id: e.id, $type: e.type, ...e }))
918
+ }
756
919
 
757
- Control how deep the cascade goes:
920
+ async create(type: string, id: string | undefined, data: Record<string, unknown>) {
921
+ const entity = await this.rdb.create(type, data, id)
922
+ return { $id: entity.id, $type: entity.type, ...entity }
923
+ }
758
924
 
759
- ```typescript
760
- // maxDepth: 0 - No cascade (default)
761
- const root = await db.Root.create({ name: 'Test' }, { cascade: true, maxDepth: 0 })
762
- // root.items is empty - not generated
925
+ async update(type: string, id: string, data: Record<string, unknown>) {
926
+ const entity = await this.rdb.update(type, id, data)
927
+ return { $id: entity.id, $type: entity.type, ...entity }
928
+ }
763
929
 
764
- // maxDepth: 1 - Only immediate children
765
- const parent = await db.Parent.create({ name: 'P' }, { cascade: true, maxDepth: 1 })
766
- // parent.children generated, but grandchildren are not
930
+ async delete(type: string, id: string): Promise<boolean> {
931
+ const exists = await this.rdb.get(type, id)
932
+ if (!exists) return false
933
+ await this.rdb.delete(type, id)
934
+ return true
935
+ }
767
936
 
768
- // maxDepth: 3 - Three levels deep
769
- const company = await db.Company.create({ name: 'X' }, { cascade: true, maxDepth: 3 })
770
- // company -> departments -> teams -> employees (stops at employees)
771
- ```
937
+ async related(type: string, id: string, relation: string) {
938
+ const entities = await this.rdb.related(type, id, relation)
939
+ return entities.map(e => ({ $id: e.id, $type: e.type, ...e }))
940
+ }
772
941
 
773
- ### Progress Tracking
942
+ async relate(fromType: string, fromId: string, relation: string, toType: string, toId: string, metadata?: object) {
943
+ await this.rdb.relate(fromType, fromId, relation, toType, toId, metadata)
944
+ }
774
945
 
775
- Monitor cascade generation progress:
946
+ async unrelate(fromType: string, fromId: string, relation: string, toType: string, toId: string) {
947
+ await this.rdb.unrelate(fromType, fromId, relation, toType, toId)
948
+ }
949
+ }
776
950
 
777
- ```typescript
778
- const company = await db.Company.create(
779
- { name: 'TechCorp' },
780
- {
781
- cascade: true,
782
- maxDepth: 4,
783
- onProgress: (progress) => {
784
- console.log(`Phase: ${progress.phase}`) // 'generating' or 'complete'
785
- console.log(`Depth: ${progress.depth}`) // Current depth level
786
- console.log(`Type: ${progress.currentType}`) // Type being generated
787
- console.log(`Total: ${progress.totalEntitiesCreated}`)
788
- },
951
+ // Usage in a Durable Object
952
+ export class MyDO extends DurableObject {
953
+ constructor(ctx: DurableObjectState, env: Env) {
954
+ super(ctx, env)
955
+ setProvider(new RDBProviderAdapter(ctx.storage.sql))
789
956
  }
790
- )
957
+ }
958
+
959
+ // Now use ai-database schema with RDB backend
960
+ const { db } = DB({
961
+ Post: { title: 'string', author: '->Author.posts' },
962
+ Author: { name: 'string' },
963
+ })
964
+
965
+ const author = await db.Author.create({ name: 'Alice' })
966
+ const post = await db.Post.create({ title: 'Hello', author: author.$id })
791
967
  ```
792
968
 
793
- ### Selective Cascade
969
+ ### Limitations with RDB
794
970
 
795
- Only cascade to specific types:
971
+ When using RDB as a provider:
796
972
 
797
- ```typescript
798
- const company = await db.Company.create(
799
- { name: 'TechCorp' },
800
- {
801
- cascade: true,
802
- maxDepth: 3,
803
- cascadeTypes: ['Department', 'Team'], // Skip Employee generation
804
- }
805
- )
806
- ```
973
+ - **No semantic search**: Fuzzy operators (`~>`, `<~`) require vector embeddings. Use exact operators (`->`, `<-`) instead, or use MemoryProvider for semantic matching.
974
+ - **No events/actions API**: RDB focuses on core CRUD and relationships.
975
+ - **Text search only**: The `search()` method performs text matching, not semantic similarity.
807
976
 
808
977
  ---
809
978
 
810
- ## Special Variables
979
+ ## AI Integration
811
980
 
812
- ### `$instructions`
981
+ ai-database integrates with AI providers for two core capabilities:
982
+
983
+ 1. **Entity Generation** - AI-powered content generation for schema fields using `ai-functions`
984
+ 2. **Semantic Search** - Vector embeddings for fuzzy matching (`~>`, `<~` operators) and similarity search
985
+
986
+ ### Supported AI Providers
987
+
988
+ #### For Entity Generation
813
989
 
814
- Entity-level prompting that guides AI generation for all fields:
990
+ Entity generation uses [ai-functions](https://github.com/ai-primitives/ai-primitives/tree/main/packages/ai-functions) which supports:
991
+
992
+ | Provider | Models | Configuration |
993
+ |----------|--------|---------------|
994
+ | **Anthropic** | claude-3-5-sonnet, claude-3-opus, claude-3-haiku | `ANTHROPIC_API_KEY` |
995
+ | **OpenAI** | gpt-4o, gpt-4-turbo, gpt-3.5-turbo | `OPENAI_API_KEY` |
996
+ | **Google** | gemini-1.5-pro, gemini-1.5-flash | `GOOGLE_API_KEY` |
997
+ | **Local Models** | Ollama, LM Studio, llama.cpp | `AI_BASE_URL` |
815
998
 
816
999
  ```typescript
817
- const { db } = DB({
818
- Character: {
819
- $instructions: 'This character is from a medieval fantasy setting',
820
- name: 'string',
821
- backstory: 'What is their history?', // Influenced by $instructions
822
- },
1000
+ import { DB, configureAIGeneration } from 'ai-database'
1001
+
1002
+ // Configure the AI model for entity generation
1003
+ configureAIGeneration({
1004
+ model: 'sonnet', // Model alias (see ai-functions for full list)
1005
+ enabled: true, // Enable AI generation (default: true)
1006
+ onGenerate: (details) => {
1007
+ // Track generation calls for monitoring
1008
+ console.log(`Generated ${details.entityType} in ${details.latencyMs}ms`)
1009
+ if (details.error) console.error('Generation failed:', details.error)
1010
+ }
823
1011
  })
824
1012
 
825
- const character = await db.Character.create({ name: 'Sir Aldric' })
826
- // backstory will reference medieval elements (castles, knights, quests)
1013
+ const { db } = DB({
1014
+ BlogPost: {
1015
+ title: 'string',
1016
+ content: 'Write a detailed blog post about this topic', // AI generates this
1017
+ summary: 'Summarize the content in 2 sentences',
1018
+ }
1019
+ })
827
1020
  ```
828
1021
 
829
- **Template variables** resolve against entity data:
1022
+ #### For Embeddings/Semantic Search
1023
+
1024
+ Embedding generation for semantic search can use any provider that produces vector embeddings:
1025
+
1026
+ | Provider | Models | Dimensions | Configuration |
1027
+ |----------|--------|------------|---------------|
1028
+ | **OpenAI** | text-embedding-3-small | 1536 | `OPENAI_API_KEY` |
1029
+ | **OpenAI** | text-embedding-3-large | 3072 | `OPENAI_API_KEY` |
1030
+ | **Cohere** | embed-english-v3.0 | 1024 | `COHERE_API_KEY` |
1031
+ | **Voyage AI** | voyage-large-2 | 1024-4096 | `VOYAGE_API_KEY` |
1032
+ | **Local** | sentence-transformers | 384 | Self-hosted |
830
1033
 
831
1034
  ```typescript
832
- const { db } = DB({
833
- Problem: {
834
- $instructions: `
835
- Identify problems for occupation: {task.occupation.title}
836
- in industry: {task.occupation.industry.name}
837
- `,
838
- task: '<-Task',
839
- description: 'string',
840
- },
841
- Task: { name: 'string', occupation: '->Occupation' },
842
- Occupation: { title: 'string', industry: '->Industry' },
843
- Industry: { name: 'string' },
1035
+ import { createMemoryProvider, setProvider } from 'ai-database'
1036
+
1037
+ // Configure embedding dimensions to match your provider
1038
+ const provider = createMemoryProvider({
1039
+ embeddingDimensions: 1536, // Match OpenAI text-embedding-3-small
844
1040
  })
1041
+ setProvider(provider)
845
1042
  ```
846
1043
 
847
- ### `$context`
1044
+ ---
848
1045
 
849
- Explicit context dependencies that are pre-fetched before generation:
1046
+ ### Configuring Embedding Generation
1047
+
1048
+ Control which fields are embedded for semantic search:
850
1049
 
851
1050
  ```typescript
1051
+ import { DB } from 'ai-database'
1052
+
852
1053
  const { db } = DB({
853
- Ad: {
854
- $context: ['Startup', 'ICP'], // Pre-fetch these for template resolution
855
- $instructions: 'Generate ad for {startup.name} targeting {icp.as}',
856
- startup: '<-Startup',
857
- headline: 'string (30 chars)',
1054
+ Article: {
1055
+ title: 'string',
1056
+ content: 'markdown',
1057
+ authorId: 'string', // Won't be embedded (not text content)
858
1058
  },
859
- Startup: { name: 'string', icp: '->ICP' },
860
- ICP: { as: 'string' },
1059
+ InternalNote: {
1060
+ text: 'string',
1061
+ }
1062
+ }, {
1063
+ embeddings: {
1064
+ // Specify which fields to embed for Article
1065
+ Article: { fields: ['title', 'content'] },
1066
+
1067
+ // Disable embeddings for InternalNote (won't appear in semantic search)
1068
+ InternalNote: false,
1069
+ }
861
1070
  })
1071
+ ```
862
1072
 
863
- const icp = await db.ICP.create({ as: 'Software Engineers' })
864
- const startup = await db.Startup.create({ name: 'CodeHelper', icp: icp.$id })
865
- const ad = await db.Ad.create({ startup: startup.$id })
1073
+ #### Embedding Configuration Options
866
1074
 
867
- // headline will mention CodeHelper and Software Engineers
868
- ```
1075
+ | Option | Type | Description |
1076
+ |--------|------|-------------|
1077
+ | `fields` | `string[]` | Fields to include in embedding (default: auto-detect text fields) |
1078
+ | `false` | `boolean` | Disable embeddings for this entity type |
1079
+
1080
+ #### Auto-Detection
869
1081
 
870
- **Why use `$context`?**
871
- - Explicitly declares what entities are needed for generation
872
- - Enables efficient pre-fetching of related data
873
- - Makes template variable resolution predictable
874
- - Supports multiple levels of relationship traversal
1082
+ If no `embeddings` config is provided, ai-database automatically embeds:
1083
+ - All `string` fields (except those ending in `Id`, `At`, or starting with `$`/`_`)
1084
+ - All `markdown` fields
1085
+ - String arrays (concatenated)
875
1086
 
876
1087
  ---
877
1088
 
878
- ## Complete Examples
1089
+ ### Cost and Token Implications
1090
+
1091
+ Understanding token usage is critical for production deployments. Here's what triggers AI API calls:
1092
+
1093
+ #### Entity Generation Costs
879
1094
 
880
- ### Example 1: Startup Generator
1095
+ | Operation | AI Calls | When |
1096
+ |-----------|----------|------|
1097
+ | `create()` with prompt fields | 1 per entity | Fields like `'Write a description'` |
1098
+ | `create({ cascade: true })` | 1 per cascaded entity | Each `->` forward relation |
1099
+ | `create()` with `~>` fuzzy | 1 embedding + search | If no semantic match found, may generate |
881
1100
 
882
- A complete startup pitch generator with cascading entity creation:
1101
+ **Example: Cascade Cost Estimation**
883
1102
 
884
1103
  ```typescript
885
1104
  const { db } = DB({
886
- Startup: {
887
- $instructions: 'Generate a B2B SaaS startup',
888
- name: 'string',
889
- idea: 'What problem does this solve? ->Idea',
890
- founders: ['Who are the founding team? ->Founder'],
891
- customer: 'Who is the target customer? ~>CustomerPersona',
892
- },
893
- Idea: {
894
- problem: 'string',
895
- solution: 'string',
896
- differentiator: 'What makes this unique?',
1105
+ Blog: {
1106
+ title: 'string',
1107
+ topics: ['Generate 5 topics ->Topic'], // Creates 5 Topic entities
897
1108
  },
898
- Founder: {
1109
+ Topic: {
899
1110
  name: 'string',
900
- role: 'string',
901
- background: 'Previous experience',
1111
+ posts: ['Generate 3 posts ->Post'], // Creates 3 Post entities per Topic
902
1112
  },
903
- CustomerPersona: {
1113
+ Post: {
904
1114
  title: 'string',
905
- painPoints: 'string',
906
- budget: 'string',
907
- },
908
- })
909
-
910
- // Pre-populate customer personas
911
- await db.CustomerPersona.create({
912
- title: 'VP of Engineering',
913
- painPoints: 'Managing distributed teams, code quality',
914
- budget: '$50k-100k annually',
1115
+ content: 'Write a 500-word blog post', // AI generates ~500 words
1116
+ }
915
1117
  })
916
1118
 
917
- // Generate complete startup with one call
918
- const startup = await db.Startup.create(
919
- { name: 'DevFlow' },
920
- { cascade: true, maxDepth: 2 }
1119
+ // This single call generates:
1120
+ // - 1 Blog (1 generation call)
1121
+ // - 5 Topics (5 generation calls)
1122
+ // - 15 Posts (15 generation calls, each ~500 words)
1123
+ // Total: 21 AI generation calls
1124
+ const blog = await db.Blog.create(
1125
+ { title: 'My Tech Blog' },
1126
+ { cascade: true, maxDepth: 3 }
921
1127
  )
922
-
923
- // Access generated entities
924
- const idea = await startup.idea
925
- const founders = await startup.founders
926
- const customer = await startup.customer // May match existing or generate new
927
1128
  ```
928
1129
 
929
- ### Example 2: Content Management with Grounding
930
-
931
- Grounding generated content against reference taxonomies:
1130
+ **Cost Control Strategies:**
932
1131
 
933
1132
  ```typescript
934
- const { db } = DB({
935
- Article: {
936
- $instructions: 'Write a technical blog post',
937
- title: 'string',
938
- content: 'markdown',
939
- category: 'What category? <~Category', // Ground against existing
940
- tags: ['Relevant tags <~Tag'], // Multiple semantic matches
941
- author: '->Author', // Auto-generate author
942
- },
943
- Category: { name: 'string', description: 'string' },
944
- Tag: { name: 'string' },
945
- Author: { name: 'string', bio: 'string' },
946
- })
1133
+ // 1. Limit cascade depth to control entity count
1134
+ await db.Blog.create(data, { cascade: true, maxDepth: 1 }) // Only creates immediate children
947
1135
 
948
- // Set up taxonomy
949
- await db.Category.create({ name: 'Artificial Intelligence', description: 'ML and AI topics' })
950
- await db.Category.create({ name: 'Web Development', description: 'Frontend and backend' })
951
- await db.Tag.create({ name: 'Machine Learning' })
952
- await db.Tag.create({ name: 'Deep Learning' })
953
- await db.Tag.create({ name: 'React' })
954
-
955
- // Create article - content grounded against existing categories
956
- const article = await db.Article.create({
957
- title: 'Introduction to Neural Networks',
958
- categoryHint: 'AI and machine learning topics',
959
- tagsHint: ['neural network concepts', 'ML fundamentals'],
1136
+ // 2. Filter which types cascade
1137
+ await db.Blog.create(data, {
1138
+ cascade: true,
1139
+ cascadeTypes: ['Topic'] // Only cascade to Topic, not Post
960
1140
  })
961
1141
 
962
- const category = await article.category
963
- // => { name: 'Artificial Intelligence', ... } - matched existing!
1142
+ // 3. Track costs with onGenerate callback
1143
+ let totalTokens = 0
1144
+ configureAIGeneration({
1145
+ model: 'sonnet',
1146
+ onGenerate: (details) => {
1147
+ if (details.result) {
1148
+ // Estimate tokens (actual count depends on provider)
1149
+ const inputTokens = details.prompt.length / 4
1150
+ const outputTokens = JSON.stringify(details.result).length / 4
1151
+ totalTokens += inputTokens + outputTokens
1152
+ console.log(`Running total: ~${totalTokens} tokens`)
1153
+ }
1154
+ }
1155
+ })
964
1156
 
965
- const tags = await article.tags
966
- // => [{ name: 'Machine Learning' }, { name: 'Deep Learning' }] - matched existing!
1157
+ // 4. Use draftOnly for preview without committing
1158
+ const draft = await db.Blog.draft({ title: 'Test' })
1159
+ // Review draft before creating
1160
+ const entity = await draft.resolve()
967
1161
  ```
968
1162
 
969
- ### Example 3: Hierarchical Organization Chart
1163
+ #### Embedding Costs
1164
+
1165
+ | Operation | Embedding Calls | Notes |
1166
+ |-----------|-----------------|-------|
1167
+ | `create()` | 1 per entity | Embeds text fields on creation |
1168
+ | `update()` | 1 if text changed | Re-embeds when text fields update |
1169
+ | `semanticSearch()` | 1 for query | Embeds query string |
1170
+ | `hybridSearch()` | 1 for query | Embeds query string |
1171
+ | `~>` or `<~` resolution | 1 per hint | Embeds the hint text for matching |
1172
+
1173
+ ---
1174
+
1175
+ ### Rate Limiting Best Practices
1176
+
1177
+ ai-database provides built-in concurrency control to prevent API rate limit errors:
970
1178
 
971
- Building a complete org structure with bidirectional navigation:
1179
+ #### Using forEach with Concurrency
972
1180
 
973
1181
  ```typescript
974
- const { db } = DB({
975
- Company: {
976
- name: 'string',
977
- ceo: '->Person',
978
- departments: ['<-Department'], // Backward ref for aggregation
979
- },
980
- Department: {
981
- name: 'string',
982
- company: '->Company', // Forward ref to parent
983
- head: '->Person',
984
- employees: ['<-Person'], // All people in this department
985
- },
986
- Person: {
987
- name: 'string',
988
- role: 'string',
989
- department: '->Department?', // Optional department
990
- reportsTo: '->Person?', // Self-referential hierarchy
991
- directReports: ['<-Person.reportsTo'],
1182
+ // Process entities with controlled concurrency
1183
+ const result = await db.Lead.forEach(async (lead) => {
1184
+ const analysis = await generateAnalysis(lead)
1185
+ await db.Lead.update(lead.$id, { analysis })
1186
+ }, {
1187
+ concurrency: 10, // Max 10 parallel operations
1188
+ maxRetries: 3, // Retry failed items up to 3 times
1189
+ retryDelay: (attempt) => 1000 * Math.pow(2, attempt), // Exponential backoff
1190
+
1191
+ // Handle rate limit errors specifically
1192
+ onError: (error, lead) => {
1193
+ if (error.message.includes('rate_limit') || error.message.includes('429')) {
1194
+ return 'retry' // Retry with exponential backoff
1195
+ }
1196
+ return 'continue' // Skip this item and continue
992
1197
  },
993
- })
994
-
995
- // Create company structure
996
- const company = await db.Company.create({ name: 'TechCorp' })
997
- const engineering = await db.Department.create({
998
- name: 'Engineering',
999
- company: company.$id,
1000
- })
1001
1198
 
1002
- const cto = await db.Person.create({
1003
- name: 'Alice',
1004
- role: 'CTO',
1005
- department: engineering.$id,
1199
+ onProgress: (progress) => {
1200
+ console.log(`${progress.completed}/${progress.total} (${progress.failed} failed)`)
1201
+ }
1006
1202
  })
1203
+ ```
1007
1204
 
1008
- const engineer = await db.Person.create({
1009
- name: 'Bob',
1010
- role: 'Senior Engineer',
1011
- department: engineering.$id,
1012
- reportsTo: cto.$id,
1013
- })
1205
+ #### Provider-Level Concurrency
1014
1206
 
1015
- // Navigate bidirectionally
1016
- const aliceReports = await cto.directReports
1017
- // => [{ name: 'Bob', ... }]
1207
+ ```typescript
1208
+ import { createMemoryProvider } from 'ai-database'
1018
1209
 
1019
- const engineeringTeam = await engineering.employees
1020
- // => [{ name: 'Alice', ... }, { name: 'Bob', ... }]
1210
+ // Configure concurrency at the provider level
1211
+ const provider = createMemoryProvider({
1212
+ concurrency: 10, // Global limit on parallel operations
1213
+ })
1021
1214
  ```
1022
1215
 
1023
- ---
1216
+ #### Execution Queue for Batch Operations
1024
1217
 
1025
- ## Common Patterns
1026
-
1027
- ### Union Types for Polymorphic References
1218
+ For large-scale operations with different priority levels:
1028
1219
 
1029
1220
  ```typescript
1030
- const { db } = DB({
1031
- Comment: {
1032
- content: 'string',
1033
- target: '->Post|Article|Video', // Can reference any of these types
1034
- },
1035
- Post: { title: 'string' },
1036
- Article: { title: 'string' },
1037
- Video: { title: 'string', url: 'url' },
1221
+ import { ExecutionQueue } from 'ai-database'
1222
+
1223
+ const queue = new ExecutionQueue({
1224
+ concurrency: {
1225
+ priority: 50, // High-priority operations
1226
+ standard: 20, // Normal operations
1227
+ flex: 10, // Low-priority background operations
1228
+ batch: 1000, // Batch window operations
1229
+ }
1038
1230
  })
1039
1231
 
1040
- const target = await comment.target
1041
- console.log(target.$matchedType) // 'Post', 'Article', or 'Video'
1232
+ // Submit operations with priority
1233
+ await queue.submit(
1234
+ () => db.Lead.create({ name: 'Important Lead' }),
1235
+ { priority: 'priority' } // Runs with higher concurrency
1236
+ )
1042
1237
  ```
1043
1238
 
1044
- ### Self-Referential Trees
1239
+ #### Rate Limit Patterns by Provider
1240
+
1241
+ | Provider | Rate Limits | Recommended Concurrency |
1242
+ |----------|-------------|------------------------|
1243
+ | **OpenAI** | 60-10000 RPM (varies by tier) | 5-50 |
1244
+ | **Anthropic** | 60-4000 RPM (varies by tier) | 5-40 |
1245
+ | **Cohere** | 100 RPM (trial), 10000 RPM (prod) | 5-100 |
1246
+ | **Local (Ollama)** | Limited by hardware | 1-4 |
1247
+
1248
+ **Recommended Configuration by Use Case:**
1045
1249
 
1046
1250
  ```typescript
1047
- const { db } = DB({
1048
- Node: {
1049
- value: 'string',
1050
- parent: '->Node?',
1051
- children: ['<-Node.parent'],
1052
- },
1053
- })
1251
+ // Development/Testing - Low concurrency, fail fast
1252
+ configureAIGeneration({ model: 'sonnet', enabled: true })
1253
+ await db.Entity.forEach(fn, { concurrency: 2, maxRetries: 1 })
1054
1254
 
1055
- const root = await db.Node.create({ value: 'Root' })
1056
- const child = await db.Node.create({ value: 'Child', parent: root.$id })
1255
+ // Production - Moderate concurrency with retries
1256
+ await db.Entity.forEach(fn, {
1257
+ concurrency: 10,
1258
+ maxRetries: 3,
1259
+ retryDelay: attempt => 1000 * Math.pow(2, attempt)
1260
+ })
1057
1261
 
1058
- const rootChildren = await root.children
1059
- // => [{ value: 'Child', ... }]
1262
+ // Batch Processing - Low concurrency, high retry tolerance
1263
+ await db.Entity.forEach(fn, {
1264
+ concurrency: 5,
1265
+ maxRetries: 5,
1266
+ retryDelay: attempt => 2000 * Math.pow(2, attempt),
1267
+ timeout: 60000, // 60 second timeout per item
1268
+ persist: 'batch-job-123', // Resume on crash
1269
+ })
1060
1270
  ```
1061
1271
 
1062
- ### Symmetric Relationships
1272
+ ---
1273
+
1274
+ ### Disabling AI Generation
1275
+
1276
+ For testing or when you want placeholder values instead of AI generation:
1063
1277
 
1064
1278
  ```typescript
1065
- const { db } = DB({
1066
- Team: {
1067
- name: 'string',
1068
- members: ['->Member'],
1069
- },
1070
- Member: {
1071
- name: 'string',
1072
- team: '<-Team', // Points back to team
1073
- },
1074
- })
1279
+ import { configureAIGeneration } from 'ai-database'
1075
1280
 
1076
- // Creating team generates members
1077
- const team = await db.Team.create({ name: 'Engineering' }, { cascade: true })
1281
+ // Disable AI globally - uses deterministic placeholder values
1282
+ configureAIGeneration({ enabled: false })
1078
1283
 
1079
- // Each member can navigate back to team
1080
- const member = (await team.members)[0]
1081
- const memberTeam = await member.team
1082
- // memberTeam.$id === team.$id
1284
+ // Or per-DB instance
1285
+ const { db } = DB(schema, {
1286
+ aiGeneration: { enabled: false }
1287
+ })
1083
1288
  ```
1084
1289
 
1290
+ When AI is disabled:
1291
+ - Prompt fields generate deterministic placeholder text
1292
+ - Fuzzy operators (`~>`, `<~`) fall back to text search
1293
+ - No API calls are made to AI providers
1294
+ - Tests run faster and don't require API keys
1295
+
1085
1296
  ---
1086
1297
 
1087
1298
  ## Related