smokin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/LICENSE +21 -0
  3. package/README.md +366 -0
  4. package/dist/dataset/dataset.d.ts +43 -0
  5. package/dist/dataset/dataset.d.ts.map +1 -0
  6. package/dist/dataset/dataset.js +63 -0
  7. package/dist/dataset/dataset.js.map +1 -0
  8. package/dist/dataset/relate.d.ts +32 -0
  9. package/dist/dataset/relate.d.ts.map +1 -0
  10. package/dist/dataset/relate.js +46 -0
  11. package/dist/dataset/relate.js.map +1 -0
  12. package/dist/foundation/axes.d.ts +92 -0
  13. package/dist/foundation/axes.d.ts.map +1 -0
  14. package/dist/foundation/axes.js +42 -0
  15. package/dist/foundation/axes.js.map +1 -0
  16. package/dist/foundation/errors.d.ts +30 -0
  17. package/dist/foundation/errors.d.ts.map +1 -0
  18. package/dist/foundation/errors.js +53 -0
  19. package/dist/foundation/errors.js.map +1 -0
  20. package/dist/foundation/hash.d.ts +16 -0
  21. package/dist/foundation/hash.d.ts.map +1 -0
  22. package/dist/foundation/hash.js +26 -0
  23. package/dist/foundation/hash.js.map +1 -0
  24. package/dist/foundation/ir.d.ts +79 -0
  25. package/dist/foundation/ir.d.ts.map +1 -0
  26. package/dist/foundation/ir.js +16 -0
  27. package/dist/foundation/ir.js.map +1 -0
  28. package/dist/foundation/prng.d.ts +27 -0
  29. package/dist/foundation/prng.d.ts.map +1 -0
  30. package/dist/foundation/prng.js +56 -0
  31. package/dist/foundation/prng.js.map +1 -0
  32. package/dist/foundation/types.d.ts +59 -0
  33. package/dist/foundation/types.d.ts.map +1 -0
  34. package/dist/foundation/types.js +99 -0
  35. package/dist/foundation/types.js.map +1 -0
  36. package/dist/foundation/walk.d.ts +43 -0
  37. package/dist/foundation/walk.d.ts.map +1 -0
  38. package/dist/foundation/walk.js +156 -0
  39. package/dist/foundation/walk.js.map +1 -0
  40. package/dist/generator/engine.d.ts +62 -0
  41. package/dist/generator/engine.d.ts.map +1 -0
  42. package/dist/generator/engine.js +369 -0
  43. package/dist/generator/engine.js.map +1 -0
  44. package/dist/generator/replay.d.ts +31 -0
  45. package/dist/generator/replay.d.ts.map +1 -0
  46. package/dist/generator/replay.js +66 -0
  47. package/dist/generator/replay.js.map +1 -0
  48. package/dist/generator/trace.d.ts +50 -0
  49. package/dist/generator/trace.d.ts.map +1 -0
  50. package/dist/generator/trace.js +39 -0
  51. package/dist/generator/trace.js.map +1 -0
  52. package/dist/index.d.ts +32 -0
  53. package/dist/index.d.ts.map +1 -0
  54. package/dist/index.js +37 -0
  55. package/dist/index.js.map +1 -0
  56. package/dist/schema/composites.d.ts +91 -0
  57. package/dist/schema/composites.d.ts.map +1 -0
  58. package/dist/schema/composites.js +94 -0
  59. package/dist/schema/composites.js.map +1 -0
  60. package/dist/schema/conditional.d.ts +22 -0
  61. package/dist/schema/conditional.d.ts.map +1 -0
  62. package/dist/schema/conditional.js +29 -0
  63. package/dist/schema/conditional.js.map +1 -0
  64. package/dist/schema/decimal.d.ts +31 -0
  65. package/dist/schema/decimal.d.ts.map +1 -0
  66. package/dist/schema/decimal.js +39 -0
  67. package/dist/schema/decimal.js.map +1 -0
  68. package/dist/schema/primitives.d.ts +29 -0
  69. package/dist/schema/primitives.d.ts.map +1 -0
  70. package/dist/schema/primitives.js +44 -0
  71. package/dist/schema/primitives.js.map +1 -0
  72. package/dist/validator/parse.d.ts +17 -0
  73. package/dist/validator/parse.d.ts.map +1 -0
  74. package/dist/validator/parse.js +218 -0
  75. package/dist/validator/parse.js.map +1 -0
  76. package/package.json +59 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,45 @@
1
+ # Changelog
2
+
3
+ All notable changes to `smokin` are documented here.
4
+
5
+ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
6
+ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] — 2026-05-25
11
+
12
+ Initial public release.
13
+
14
+ ### Added
15
+
16
+ - Schema DSL: `str`, `num`, `int`, `bool`, `null_`, `decimal(precision, scale)`,
17
+ `obj`, `arr`, `tuple`, `union`, `literal`, `enum_`, `discriminated`.
18
+ - Modifiers: `.nullable()`, `.optional()`, `.default(v)`, `.describe(s)`.
19
+ - Eight data-schema axes:
20
+ 1. `.weighted(pairs)` — non-uniform discrete sampling.
21
+ 2. `.in(values)` / `.in({ kind: 'lookup', ... })` — closed domain.
22
+ 3. `.derivedFrom(ctx => ...)` — computed fields.
23
+ 4. `.invariant(fn)` / `.correlate(fn)` on `obj({...})` — predicates.
24
+ 5. `discriminated(key, map)` — conditional shape.
25
+ 6. `mockDataset({ name, schema, identity, n })` — dataset abstraction.
26
+ 7. Identity caching via `identity` key tuple.
27
+ 8. `.occasionally(value, p)` and `.eventually(every, value)` overrides.
28
+ - Generator: `mock(schema, opts)` with deterministic mulberry32 PRNG seeded
29
+ by mulberry32 + sha256 over path + identity tuple.
30
+ - Validator: `parse(schema, value)` (throws `ConformError`) and
31
+ `safeParse(schema, value)` (discriminated result).
32
+ - Cross-dataset FK: `relate(rows, field, opts)` for `.derivedFrom(...)`.
33
+ - Snapshot helpers: `replay(schema, opts)`, `expectStable(schema, opts)`.
34
+ - Debug: `createTrace()` — per-axis decision audit.
35
+ - Extensibility API: `walkSchema`, `fromIR`, `mulberry32`, `rngFromString`,
36
+ `seedFromString`, public IR types (`SchemaNode`, `Modifiers`, `Axes`).
37
+ - TypeScript types throughout; `Infer<typeof schema>` returns the exact shape.
38
+
39
+ ### Requirements
40
+
41
+ - Node.js ≥ 18.17.
42
+ - Zero runtime dependencies (uses only `node:crypto`).
43
+
44
+ [Unreleased]: https://github.com/ochairo/smokin/compare/v0.1.0...HEAD
45
+ [0.1.0]: https://github.com/ochairo/smokin/releases/tag/v0.1.0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 smok contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,366 @@
1
+ <!-- markdownlint-disable MD033 MD041 -->
2
+
3
+ <div align="center">
4
+
5
+ # smokin
6
+
7
+ **Library for describing and generating data.**
8
+
9
+ [![npm version](https://img.shields.io/npm/v/smokin.svg)](https://www.npmjs.com/package/smokin)
10
+ [![CI](https://github.com/ochairo/smokin/actions/workflows/test.yml/badge.svg)](https://github.com/ochairo/smokin/actions/workflows/test.yml)
11
+ [![Node.js](https://img.shields.io/badge/node-%E2%89%A518.17-brightgreen)](https://nodejs.org/)
12
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
13
+
14
+ </div>
15
+
16
+ `smokin` lets you describe **how data behaves** — distributions, identity,
17
+ derived values, invariants, business domains — not just **what shape** it has.
18
+ One schema acts as your TypeScript type, your runtime validator, and your
19
+ sample-data generator.
20
+
21
+ It is intentionally small. It does one thing — *data schemas* — and tries to
22
+ do it carefully. There is no HTTP server, no CLI, and no framework adapter
23
+ baked in; you wire it into the tools you already use.
24
+
25
+ ## Why you might like it
26
+
27
+ - **Zero runtime dependencies.** Only `node:crypto` from the standard library.
28
+ - **TypeScript-first.** `Infer<typeof schema>` gives you a precise type back.
29
+ - **Deterministic.** Same seed → same output, on every machine, every run.
30
+ - **One schema, three uses.** API mocks, test fixtures, and seed data from a
31
+ single source of truth.
32
+ - **Small surface area.** A pure data-schema DSL — no HTTP layer, no hidden
33
+ global state, no surprise transitive deps.
34
+
35
+ > `v0.1.0` — early but ready for use within its scope. Breaking changes will
36
+ > follow semver and be noted in [CHANGELOG.md](CHANGELOG.md).
37
+
38
+ ## Install
39
+
40
+ ```sh
41
+ npm install smokin
42
+ # or: pnpm add smokin
43
+ # or: yarn add smokin
44
+ ```
45
+
46
+ Requires Node.js ≥ 18.17. No transitive dependencies.
47
+
48
+ ## A 60-second tour
49
+
50
+ ```ts
51
+ import { obj, str, int, decimal, arr, mock, parse, type Infer } from 'smokin'
52
+
53
+ const Product = obj({
54
+ sku: str().pattern(/^[A-Z]{3}-\d{4}$/),
55
+ price: decimal(10, 2)
56
+ .min('0.01').max('9999.99')
57
+ .typically(10, 200) // most prices cluster in this range
58
+ .occasionally('0.99', 0.02), // 2% are loss-leaders
59
+ stock: int().min(0).max(1000),
60
+ })
61
+
62
+ const Catalog = obj({
63
+ region: str().in(['us', 'eu', 'jp']),
64
+ items: arr(Product).length(10),
65
+ })
66
+
67
+ type CatalogT = Infer<typeof Catalog> // inferred TS type
68
+ const sample: CatalogT = mock(Catalog, { seed: 'demo' })
69
+ parse(Catalog, sample) // throws on shape mismatch
70
+ ```
71
+
72
+ Run the same code twice with the same seed and you get the same value back.
73
+
74
+ ## Documentation
75
+
76
+ - [docs/design.md](docs/design.md) — architecture, axis priority, the
77
+ determinism model.
78
+ - [docs/axes.md](docs/axes.md) — full reference for every axis and modifier.
79
+ - [docs/recipes.md](docs/recipes.md) — practical patterns: FK, cadence, lookup
80
+ domains, snapshot tests.
81
+ - [docs/extending.md](docs/extending.md) — plugin author guide.
82
+ - [CHANGELOG.md](CHANGELOG.md) — release notes.
83
+ - [CONTRIBUTING.md](CONTRIBUTING.md) — local development workflow.
84
+ - [SECURITY.md](SECURITY.md) — how to report a vulnerability.
85
+
86
+ ## How smokin fits next to other tools
87
+
88
+ Other libraries in this space are excellent at what they do; `smokin` simply
89
+ covers a different corner.
90
+
91
+ | Tool | Best at | Where `smokin` adds something |
92
+ | --- | --- | --- |
93
+ | `zod` + `zod-fixture` | Type validation | Distributions, identity, and derived values — not only shape |
94
+ | `@faker-js/faker` | Rich value primitives | Values stay linked to your schema and domain rules |
95
+ | `msw` | Request interception | Pair them — `msw` for transport, `smokin` for response bodies |
96
+ | `prism` | OpenAPI-driven mocking | Deterministic, programmable values without a spec file |
97
+
98
+ If your need is "validate this object," reach for `zod`. If it is "give me a
99
+ realistic name," reach for `faker`. If it is "give me a coherent, repeatable
100
+ dataset that obeys my domain rules," `smokin` may be a good fit.
101
+
102
+ ## The 8 axes
103
+
104
+ `smokin` lets you express things a plain table schema cannot:
105
+
106
+ ```ts
107
+ // 1. Distribution — where values concentrate, not just the bounds
108
+ decimal(10, 2).min('0').max('1000').typically(100, 300)
109
+
110
+ // 2. Domain — closed candidate set, optionally keyed off a sibling
111
+ str().in(['us', 'eu', 'jp'])
112
+ str().in({ kind: 'lookup', fromField: 'region',
113
+ map: { us: ['CA', 'NY'], eu: ['DE', 'FR'] } })
114
+
115
+ // 3. Derived — computed from other fields, never sampled
116
+ int().derivedFrom(ctx => (ctx.parent.qty as number) * (ctx.parent.price as number))
117
+
118
+ // 4. Invariants — predicates the value must satisfy (rejection sampling)
119
+ int().min(0).max(100).invariant(v => (v as number) % 2 === 0)
120
+
121
+ // 5. Occasionally — rare overrides stacked before sampling
122
+ int().min(0).max(10).occasionally(-1, 0.01)
123
+
124
+ // 6. Discriminated — conditional shape switched by a literal field
125
+ discriminated('kind', {
126
+ digital: obj({ kind: literal('digital'), downloadUrl: str() }),
127
+ physical: obj({ kind: literal('physical'), weightKg: decimal(6, 2) }),
128
+ })
129
+
130
+ // 7. Identity — same identity tuple → same record across endpoints
131
+ mockDataset({
132
+ name: 'Products',
133
+ schema: Product,
134
+ identity: ['sku'],
135
+ n: 100,
136
+ })
137
+
138
+ // 8. Weighted choice — non-uniform discrete sampling
139
+ enum_(['A', 'B', 'C']).weighted([['A', 0.7], ['B', 0.2], ['C', 0.1]])
140
+ ```
141
+
142
+ When two axes overlap, the priority is well-defined and traceable — see
143
+ [docs/design.md](docs/design.md).
144
+
145
+ ## Going deeper
146
+
147
+ These features build on the 8 axes for cases where they help most —
148
+ determinism, identity, multi-field rules.
149
+
150
+ ### `.eventually(every, value)` — periodic events
151
+
152
+ Sets `value` deterministically every N rows, driven by `ctx.index`. Useful
153
+ for scheduled outages, weekly resets, or monthly billing edges.
154
+
155
+ ```ts
156
+ const dailyTotal = decimal(10, 2).min('0').max('10000')
157
+ .typically(1_000, 3_000)
158
+ .eventually(30, '0') // every 30th day is a zero
159
+ ```
160
+
161
+ `occasionally` is probabilistic and i.i.d.; `eventually` is deterministic and
162
+ periodic. They can be combined.
163
+
164
+ ### `.correlate(fn)` — multi-field invariants
165
+
166
+ Available on `obj({...})`. The predicate receives the fully-assembled object
167
+ with full type information, and the generator rejection-samples until it
168
+ holds.
169
+
170
+ ```ts
171
+ const DateRange = obj({
172
+ start: int().min(0).max(100),
173
+ end: int().min(0).max(100),
174
+ }).correlate(r => r.start <= r.end)
175
+ ```
176
+
177
+ ### `relate(rows, field)` — cross-dataset FK
178
+
179
+ Pick a foreign-key value from a previously-generated dataset. Selection is
180
+ deterministic per seed; modes include `random` (default), `index` (per-row),
181
+ or a custom `(ctx) => number` resolver.
182
+
183
+ ```ts
184
+ import { mockDataset, obj, str, int, arr, relate, mock } from 'smokin'
185
+
186
+ const customers = mockDataset({
187
+ name: 'customers',
188
+ schema: obj({ id: str() }),
189
+ identity: ['id'],
190
+ n: 5,
191
+ })
192
+
193
+ const Order = obj({
194
+ orderId: int(),
195
+ customerId: str().derivedFrom(relate(customers, 'id')),
196
+ })
197
+
198
+ mock(arr(Order).length(10), { seed: 's' }) // each order references a real customer
199
+ ```
200
+
201
+ ### `createTrace()` — see which axis fired where
202
+
203
+ Audits every decision the generator made. Helpful when output surprises you,
204
+ or when you want to confirm the intended axis won.
205
+
206
+ ```ts
207
+ import { mock, createTrace } from 'smokin'
208
+
209
+ const trace = createTrace()
210
+ mock(schema, { seed: 'demo', trace })
211
+
212
+ console.log(trace.format())
213
+ // / type
214
+ // /price distribution
215
+ // /customerId derived
216
+ // /flag occasionally
217
+ ```
218
+
219
+ ### `replay()` and `expectStable()`
220
+
221
+ Snapshot-style helpers for tests. `replay` returns a callable bound to a
222
+ fixed `(seed, input)` pair; `expectStable` runs the generator twice and
223
+ throws if results diverge — useful for catching accidental `Date.now()` or
224
+ `Math.random()` calls inside `derivedFrom`.
225
+
226
+ ```ts
227
+ import assert from 'node:assert/strict'
228
+ import { replay, expectStable } from 'smokin'
229
+
230
+ const gen = replay(Schema, { seed: 'fix-1' })
231
+ assert.deepEqual(gen(), gen()) // stable
232
+
233
+ expectStable(Schema, { seed: 'fix-1' }) // throws if non-deterministic
234
+ ```
235
+
236
+ Works with any assertion library — the example above uses `node:test`'s
237
+ built-in `assert`; substitute `expect(...).toEqual(...)` (Jest/Vitest) or
238
+ `t.deepEqual(...)` (AVA) as needed.
239
+
240
+ ## Wiring into an HTTP framework
241
+
242
+ `smokin` ships no server — you bring the framework. A few lines of glue is
243
+ usually enough. Here is a Fastify example:
244
+
245
+ ```ts
246
+ import Fastify from 'fastify'
247
+ import { obj, int, decimal, mockDataset } from 'smokin'
248
+
249
+ const Item = obj({
250
+ id: int().min(1),
251
+ price: decimal(10, 2).min('0').max('10000'),
252
+ })
253
+
254
+ const app = Fastify()
255
+ app.get('/items', async (req) => {
256
+ const seed = `GET:/items:${JSON.stringify(req.query)}`
257
+ return {
258
+ result: mockDataset({
259
+ name: 'items', schema: Item, identity: ['id'], n: 10, seedPrefix: seed,
260
+ }),
261
+ }
262
+ })
263
+ await app.listen({ port: 8000 })
264
+ ```
265
+
266
+ The same pattern works for Express, Hono, `msw`, or raw `node:http`.
267
+
268
+ ## Determinism guarantee
269
+
270
+ Given the same `seed`, `smokin` produces identical output on every run and
271
+ every machine — which makes tests, replays, diffs, and offline demos
272
+ predictable. For HTTP, a common pattern is `sha256(method + path + sorted
273
+ query string)` so the response is a pure function of the request. You can
274
+ override per call with `mock(schema, { seed: 'explicit' })` or per dataset
275
+ via `seedPrefix`.
276
+
277
+ ## Intentional non-goals
278
+
279
+ Things `smokin` deliberately does *not* try to do, to stay small and
280
+ predictable:
281
+
282
+ - No constraint solver — when axes cannot all be satisfied, you get a
283
+ `SchemaConflictError` rather than silent guessing.
284
+ - No circular field dependencies in `derivedFrom`.
285
+ - No `Math.random()` or `Date.now()` inside generation (they would break
286
+ determinism).
287
+ - No float-equality invariants — use `decimal()` for money.
288
+ - No implicit time-of-day in defaults — pass `ctx.input.now` when you need
289
+ "today".
290
+ - No HTTP server, no CLI, no OpenAPI ingestion — these belong in external
291
+ packages.
292
+
293
+ ## Extending smokin (for library authors)
294
+
295
+ `smokin` exposes the minimum surface needed to build adapters and codegen
296
+ tools on top of it:
297
+
298
+ | Export | Use case |
299
+ | --- | --- |
300
+ | `walkSchema(node, { enter, leave })` | Codegen: OpenAPI, JSON Schema, SQL DDL, docs |
301
+ | `fromIR(node)` | Reconstruct a typed `Schema` builder from raw IR |
302
+ | `SchemaNode`, `Modifiers`, `Axes` (types) | Type-safe IR consumption |
303
+ | `mulberry32`, `rngFromString`, `seedFromString` | Plugins that need their own deterministic RNG branch |
304
+ | `MockOptions.trace` | Hook into per-axis decisions |
305
+
306
+ A minimal codegen plugin:
307
+
308
+ ```ts
309
+ import { walkSchema, type Schema } from 'smokin'
310
+
311
+ export const toJsonSchema = (schema: Schema): unknown => {
312
+ const stack: unknown[] = []
313
+ walkSchema(schema, {
314
+ enter(node) {
315
+ // map a smokin kind → JSON Schema fragment, push onto stack
316
+ },
317
+ leave() {
318
+ // fold children into parent
319
+ },
320
+ })
321
+ return stack[0]
322
+ }
323
+ ```
324
+
325
+ Plugin authors do not subclass `Schema`; they produce IR (or compose existing
326
+ builders) and call `fromIR` to get a typed builder back.
327
+
328
+ ## Project layout
329
+
330
+ ```text
331
+ src/
332
+ foundation/ # IR, types, prng, sha256, errors, axes, walk
333
+ schema/ # primitives, decimal, composites, discriminated
334
+ generator/ # sampling engine, trace, replay
335
+ validator/ # parse / safeParse
336
+ dataset/ # dataset + identity cache + relate
337
+ index.ts # public surface
338
+ test/ # tests run with node:test against tsc-compiled output
339
+ ```
340
+
341
+ ## Contributing
342
+
343
+ Issues, questions, and pull requests are welcome. The library is small on
344
+ purpose, so changes that grow the surface area will be discussed before
345
+ merging — please open an issue first for anything non-trivial.
346
+
347
+ ```sh
348
+ git clone https://github.com/ochairo/smokin.git
349
+ cd smokin
350
+ corepack enable # uses the pnpm version pinned in package.json
351
+ pnpm install
352
+ pnpm test # compiles test/ with tsc, runs node:test
353
+ pnpm typecheck
354
+ pnpm build
355
+ ```
356
+
357
+ The test runner is `node --test` against tsc-compiled output — no `tsx`, no
358
+ `ts-node`, no bundler. Please keep it that way; the lack of build tooling is
359
+ a feature.
360
+
361
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full workflow and
362
+ [SECURITY.md](SECURITY.md) for vulnerability reports.
363
+
364
+ ## License
365
+
366
+ MIT © 2026 smokin contributors. See [LICENSE](LICENSE).
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Dataset abstraction (Z_PLAN §0.5.2).
3
+ *
4
+ * A Dataset is a collection of records that share:
5
+ * - a per-record schema `S`
6
+ * - identity keys `K` (same identity → same value, cross-endpoint)
7
+ * - a record count `N`
8
+ * - aggregate invariants `J` (predicates over the full record list)
9
+ *
10
+ * Identity is implemented as a per-(datasetName, identityValues) seed:
11
+ * seed = sha256(datasetName + "|" + sorted_kv_pairs_of_K).slice(0, 16)
12
+ * The same identity tuple always yields the same record, regardless of which
13
+ * endpoint asks for it.
14
+ */
15
+ import type { Infer, Schema } from '../foundation/types.js';
16
+ export type DatasetOptions<S extends Schema> = {
17
+ /** Unique dataset name (participates in the identity seed). */
18
+ readonly name: string;
19
+ /** Record-level schema. */
20
+ readonly schema: S;
21
+ /** Field names whose tuple defines a record's identity. */
22
+ readonly identity: readonly string[];
23
+ /** Number of records to generate per `mockDataset` call. */
24
+ readonly n: number;
25
+ /** Aggregate invariants — applied to the full record list, with bounded retries. */
26
+ readonly invariants?: readonly ((rows: readonly Infer<S>[]) => boolean)[];
27
+ /** Caller-supplied context channel exposed to derived/invariants as `ctx.input`. */
28
+ readonly input?: Readonly<Record<string, unknown>>;
29
+ /** Optional seed prefix; if omitted, the dataset name is used. */
30
+ readonly seedPrefix?: string;
31
+ };
32
+ /**
33
+ * Generate `n` records that conform to the schema and satisfy all aggregate
34
+ * invariants. Records sharing identity values are cached in `cache` and
35
+ * deduplicated automatically.
36
+ */
37
+ export declare const mockDataset: <S extends Schema<unknown>>(opts: DatasetOptions<S>) => Infer<S>[];
38
+ /**
39
+ * Produce the deterministic identity key for a record. Two records with the
40
+ * same identity values produce the same key (and thus collapse in the cache).
41
+ */
42
+ export declare const identityFor: (datasetName: string, identityKeys: readonly string[], row: Record<string, unknown>) => string;
43
+ //# sourceMappingURL=dataset.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset.d.ts","sourceRoot":"","sources":["../../src/dataset/dataset.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAKH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAA;AAE3D,MAAM,MAAM,cAAc,CAAC,CAAC,SAAS,MAAM,IAAI;IAC7C,+DAA+D;IAC/D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,2BAA2B;IAC3B,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAA;IAClB,2DAA2D;IAC3D,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAA;IACpC,4DAA4D;IAC5D,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAA;IAClB,oFAAoF;IACpF,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,CAAC,CAAC,IAAI,EAAE,SAAS,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAA;IACzE,oFAAoF;IACpF,QAAQ,CAAC,KAAK,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAA;IAClD,kEAAkE;IAClE,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAC7B,CAAA;AAID;;;;GAIG;AACH,eAAO,MAAM,WAAW,oCAA4B,eAAe,CAAC,CAAC,KAAG,MAAM,CAAC,CAAC,EAgC/E,CAAA;AAED;;;GAGG;AACH,eAAO,MAAM,WAAW,gBACT,MAAM,gBACL,SAAS,MAAM,EAAE,OAC1B,OAAO,MAAM,EAAE,OAAO,CAAC,KAC3B,MAIF,CAAA"}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Dataset abstraction (Z_PLAN §0.5.2).
3
+ *
4
+ * A Dataset is a collection of records that share:
5
+ * - a per-record schema `S`
6
+ * - identity keys `K` (same identity → same value, cross-endpoint)
7
+ * - a record count `N`
8
+ * - aggregate invariants `J` (predicates over the full record list)
9
+ *
10
+ * Identity is implemented as a per-(datasetName, identityValues) seed:
11
+ * seed = sha256(datasetName + "|" + sorted_kv_pairs_of_K).slice(0, 16)
12
+ * The same identity tuple always yields the same record, regardless of which
13
+ * endpoint asks for it.
14
+ */
15
+ import { SchemaConflictError } from '../foundation/errors.js';
16
+ import { identityKey } from '../foundation/hash.js';
17
+ import { mock } from '../generator/engine.js';
18
+ const MAX_DATASET_ATTEMPTS = 1;
19
+ /**
20
+ * Generate `n` records that conform to the schema and satisfy all aggregate
21
+ * invariants. Records sharing identity values are cached in `cache` and
22
+ * deduplicated automatically.
23
+ */
24
+ export const mockDataset = (opts) => {
25
+ const cache = new Map();
26
+ for (let attempt = 0; attempt <= MAX_DATASET_ATTEMPTS; attempt += 1) {
27
+ const rows = [];
28
+ for (let i = 0; i < opts.n; i += 1) {
29
+ // For non-identity-aware production, sub-seed each row with index.
30
+ // The identity key is computed AFTER generation; if a collision occurs
31
+ // the cached value is returned to honour the identity uniqueness rule.
32
+ const rowSeed = `${opts.seedPrefix ?? opts.name}:row:${i}:attempt:${attempt}`;
33
+ const row = mock(opts.schema, {
34
+ seed: rowSeed,
35
+ index: i,
36
+ ...(opts.input !== undefined ? { input: opts.input } : {}),
37
+ });
38
+ const key = identityFor(opts.name, opts.identity, row);
39
+ const existing = cache.get(key);
40
+ if (existing !== undefined) {
41
+ rows.push(existing);
42
+ continue;
43
+ }
44
+ cache.set(key, row);
45
+ rows.push(row);
46
+ }
47
+ const invariants = opts.invariants ?? [];
48
+ if (invariants.every((j) => j(rows)))
49
+ return rows;
50
+ }
51
+ throw new SchemaConflictError(`dataset "${opts.name}": aggregate invariants unsatisfied after ${MAX_DATASET_ATTEMPTS + 1} attempts`, [opts.name], 'narrow per-record distribution or relax aggregate invariants');
52
+ };
53
+ /**
54
+ * Produce the deterministic identity key for a record. Two records with the
55
+ * same identity values produce the same key (and thus collapse in the cache).
56
+ */
57
+ export const identityFor = (datasetName, identityKeys, row) => {
58
+ const parts = {};
59
+ for (const k of identityKeys)
60
+ parts[k] = JSON.stringify(row[k] ?? null);
61
+ return identityKey('DATASET', datasetName, parts);
62
+ };
63
+ //# sourceMappingURL=dataset.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset.js","sourceRoot":"","sources":["../../src/dataset/dataset.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAA;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAA;AAoB7C,MAAM,oBAAoB,GAAG,CAAC,CAAA;AAE9B;;;;GAIG;AACH,MAAM,CAAC,MAAM,WAAW,GAAG,CAAmB,IAAuB,EAAc,EAAE;IACnF,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAA;IACzC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,oBAAoB,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACpE,MAAM,IAAI,GAAe,EAAE,CAAA;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YACnC,mEAAmE;YACnE,uEAAuE;YACvE,uEAAuE;YACvE,MAAM,OAAO,GAAG,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,IAAI,QAAQ,CAAC,YAAY,OAAO,EAAE,CAAA;YAC7E,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;gBAC5B,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,CAAC;gBACR,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC3D,CAAC,CAAA;YACF,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE,GAA8B,CAAC,CAAA;YACjF,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC/B,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;gBAC3B,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;gBACnB,SAAQ;YACV,CAAC;YACD,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAA;YACnB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAChB,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,EAAE,CAAA;QACxC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAAE,OAAO,IAAI,CAAA;IACnD,CAAC;IACD,MAAM,IAAI,mBAAmB,CAC3B,YAAY,IAAI,CAAC,IAAI,6CAA6C,oBAAoB,GAAG,CAAC,WAAW,EACrG,CAAC,IAAI,CAAC,IAAI,CAAC,EACX,8DAA8D,CAC/D,CAAA;AACH,CAAC,CAAA;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,WAAW,GAAG,CACzB,WAAmB,EACnB,YAA+B,EAC/B,GAA4B,EACpB,EAAE;IACV,MAAM,KAAK,GAA2B,EAAE,CAAA;IACxC,KAAK,MAAM,CAAC,IAAI,YAAY;QAAE,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAA;IACvE,OAAO,WAAW,CAAC,SAAS,EAAE,WAAW,EAAE,KAAK,CAAC,CAAA;AACnD,CAAC,CAAA"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Cross-dataset FK identity (A1) — declarative foreign-key lookup.
3
+ *
4
+ * `relate(rows, field)` returns a `DerivedFn` suitable for `.derivedFrom(...)`
5
+ * that picks one row from a previously-generated dataset and returns its
6
+ * `field` value. Selection is deterministic, driven by the generator seed,
7
+ * so re-runs with the same seed produce the same FK assignments.
8
+ *
9
+ * ```ts
10
+ * const Bases = mockDataset({ name: 'bases', schema: Base, identity: ['base_code'], n: 5 })
11
+ *
12
+ * const Tank = obj({
13
+ * tank_no: str(),
14
+ * base_code: str().derivedFrom(relate(Bases, 'base_code')), // FK
15
+ * })
16
+ * ```
17
+ *
18
+ * For ad-hoc cross-row selection (e.g. by index or condition), use the
19
+ * `pickBy` option.
20
+ */
21
+ import type { DerivedFn, GenContext } from '../foundation/axes.js';
22
+ export type RelateOptions = {
23
+ /**
24
+ * Strategy to pick a row.
25
+ * - `'random'` (default): seeded uniform sample
26
+ * - `'index'` : `rows[ctx.index % rows.length]`
27
+ * - `(ctx) => number` : custom index resolver
28
+ */
29
+ readonly pickBy?: 'random' | 'index' | ((ctx: GenContext) => number);
30
+ };
31
+ export declare const relate: <R extends Record<string, unknown>, K extends keyof R>(rows: readonly R[], field: K, opts?: RelateOptions) => DerivedFn;
32
+ //# sourceMappingURL=relate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"relate.d.ts","sourceRoot":"","sources":["../../src/dataset/relate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAA;AAGlE,MAAM,MAAM,aAAa,GAAG;IAC1B;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,UAAU,KAAK,MAAM,CAAC,CAAA;CACrE,CAAA;AAED,eAAO,MAAM,MAAM,+DACX,SAAS,CAAC,EAAE,SACX,CAAC,SACF,aAAa,KAClB,SAoBF,CAAA"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Cross-dataset FK identity (A1) — declarative foreign-key lookup.
3
+ *
4
+ * `relate(rows, field)` returns a `DerivedFn` suitable for `.derivedFrom(...)`
5
+ * that picks one row from a previously-generated dataset and returns its
6
+ * `field` value. Selection is deterministic, driven by the generator seed,
7
+ * so re-runs with the same seed produce the same FK assignments.
8
+ *
9
+ * ```ts
10
+ * const Bases = mockDataset({ name: 'bases', schema: Base, identity: ['base_code'], n: 5 })
11
+ *
12
+ * const Tank = obj({
13
+ * tank_no: str(),
14
+ * base_code: str().derivedFrom(relate(Bases, 'base_code')), // FK
15
+ * })
16
+ * ```
17
+ *
18
+ * For ad-hoc cross-row selection (e.g. by index or condition), use the
19
+ * `pickBy` option.
20
+ */
21
+ import { mulberry32, seedFromString } from '../foundation/prng.js';
22
+ export const relate = (rows, field, opts = {}) => {
23
+ if (rows.length === 0) {
24
+ throw new RangeError(`relate: empty dataset (cannot pick field "${String(field)}")`);
25
+ }
26
+ const strategy = opts.pickBy ?? 'random';
27
+ return (ctx) => {
28
+ let idx;
29
+ if (strategy === 'index') {
30
+ idx = (ctx.index ?? 0) % rows.length;
31
+ }
32
+ else if (typeof strategy === 'function') {
33
+ idx = strategy(ctx) % rows.length;
34
+ if (idx < 0)
35
+ idx += rows.length;
36
+ }
37
+ else {
38
+ // Deterministic uniform: derive a sub-RNG from the path-bound seed.
39
+ const rng = mulberry32(seedFromString(`${ctx.seed}|relate|${String(field)}`));
40
+ idx = Math.floor(rng.next() * rows.length);
41
+ }
42
+ const row = rows[idx];
43
+ return row === undefined ? undefined : row[field];
44
+ };
45
+ };
46
+ //# sourceMappingURL=relate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"relate.js","sourceRoot":"","sources":["../../src/dataset/relate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAA;AAYlE,MAAM,CAAC,MAAM,MAAM,GAAG,CACpB,IAAkB,EAClB,KAAQ,EACR,OAAsB,EAAE,EACb,EAAE;IACb,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,UAAU,CAAC,6CAA6C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IACtF,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,IAAI,QAAQ,CAAA;IACxC,OAAO,CAAC,GAAe,EAAW,EAAE;QAClC,IAAI,GAAW,CAAA;QACf,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;YACzB,GAAG,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAA;QACtC,CAAC;aAAM,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC1C,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAA;YACjC,IAAI,GAAG,GAAG,CAAC;gBAAE,GAAG,IAAI,IAAI,CAAC,MAAM,CAAA;QACjC,CAAC;aAAM,CAAC;YACN,oEAAoE;YACpE,MAAM,GAAG,GAAG,UAAU,CAAC,cAAc,CAAC,GAAG,GAAG,CAAC,IAAI,WAAW,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAA;YAC7E,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAA;QAC5C,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAA;QACrB,OAAO,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IACnD,CAAC,CAAA;AACH,CAAC,CAAA"}