@schmock/schema 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,415 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { generate, schemas, stats, validators } from "./test-utils";
3
+
4
+ describe("Data Quality and Statistical Properties", () => {
5
+ describe("Randomness and Distribution", () => {
6
+ it("generates diverse values for string fields", () => {
7
+ const schema = schemas.simple.object({
8
+ text: schemas.simple.string(),
9
+ });
10
+
11
+ const samples = generate.samples<any>(schema, 100);
12
+ const values = samples.map((s) => s.text);
13
+
14
+ // Should have high uniqueness
15
+ const uniqueness = validators.uniquenessRatio(values);
16
+ expect(uniqueness).toBeGreaterThan(0.9); // Most values should be unique
17
+
18
+ // Should have good entropy
19
+ const entropy = stats.entropy(values);
20
+ expect(entropy).toBeGreaterThan(4); // High randomness
21
+ });
22
+
23
+ it("generates well-distributed numeric values", () => {
24
+ const schema = schemas.simple.object({
25
+ value: {
26
+ type: "number",
27
+ minimum: 0,
28
+ maximum: 100,
29
+ },
30
+ });
31
+
32
+ const samples = generate.samples<any>(schema, 200);
33
+ const values = samples.map((s) => s.value);
34
+
35
+ // Check distribution
36
+ const min = Math.min(...values);
37
+ const max = Math.max(...values);
38
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
39
+
40
+ expect(min).toBeGreaterThanOrEqual(0);
41
+ expect(max).toBeLessThanOrEqual(100);
42
+ expect(mean).toBeGreaterThan(30); // Should be somewhat centered
43
+ expect(mean).toBeLessThan(70);
44
+
45
+ // Check for good spread
46
+ const variance =
47
+ values.reduce((sum, val) => sum + (val - mean) ** 2, 0) / values.length;
48
+ expect(variance).toBeGreaterThan(100); // Good spread of values
49
+ });
50
+
51
+ it("generates diverse enum selections", () => {
52
+ const schema = schemas.simple.object({
53
+ status: {
54
+ type: "string",
55
+ enum: ["active", "pending", "inactive", "archived"],
56
+ },
57
+ });
58
+
59
+ const samples = generate.samples<any>(schema, 100);
60
+ const distribution = stats.distribution(samples.map((s) => s.status));
61
+
62
+ // All enum values should be used
63
+ expect(distribution.size).toBe(4);
64
+
65
+ // Should be relatively balanced (not always picking the same value)
66
+ const counts = Array.from(distribution.values());
67
+ const minCount = Math.min(...counts);
68
+ const maxCount = Math.max(...counts);
69
+
70
+ // No value should dominate too much
71
+ expect(maxCount / minCount).toBeLessThan(5);
72
+ });
73
+
74
+ it("generates diverse boolean values", () => {
75
+ const schema = schemas.simple.object({
76
+ flag: { type: "boolean" },
77
+ });
78
+
79
+ const samples = generate.samples<any>(schema, 100);
80
+ const trueCount = samples.filter((s) => s.flag === true).length;
81
+ const falseCount = samples.filter((s) => s.flag === false).length;
82
+
83
+ // Should be roughly balanced
84
+ expect(trueCount).toBeGreaterThan(20);
85
+ expect(falseCount).toBeGreaterThan(20);
86
+ expect(Math.abs(trueCount - falseCount)).toBeLessThan(60);
87
+ });
88
+ });
89
+
90
+ describe("Format Compliance", () => {
91
+ it("generates valid email formats consistently", () => {
92
+ const schema = schemas.simple.object({
93
+ email: { type: "string", format: "email" },
94
+ });
95
+
96
+ const samples = generate.samples<any>(schema, 50);
97
+
98
+ samples.forEach((sample) => {
99
+ // RFC 5322 simplified regex
100
+ const emailRegex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
101
+ expect(sample.email).toMatch(emailRegex);
102
+
103
+ // Should have reasonable length
104
+ expect(sample.email.length).toBeGreaterThan(5);
105
+ expect(sample.email.length).toBeLessThan(100);
106
+
107
+ // Should have exactly one @
108
+ expect(sample.email.split("@").length).toBe(2);
109
+ });
110
+
111
+ // Should generate diverse emails
112
+ const uniqueness = validators.uniquenessRatio(
113
+ samples.map((s) => s.email),
114
+ );
115
+ expect(uniqueness).toBeGreaterThan(0.8);
116
+ });
117
+
118
+ it("generates valid UUIDs consistently", () => {
119
+ const schema = schemas.simple.object({
120
+ id: { type: "string", format: "uuid" },
121
+ });
122
+
123
+ const samples = generate.samples<any>(schema, 50);
124
+ const uuidRegex =
125
+ /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
126
+
127
+ samples.forEach((sample) => {
128
+ expect(sample.id).toMatch(uuidRegex);
129
+ expect(sample.id.length).toBe(36);
130
+ });
131
+
132
+ // All UUIDs should be unique
133
+ const uniqueness = validators.uniquenessRatio(samples.map((s) => s.id));
134
+ expect(uniqueness).toBe(1);
135
+ });
136
+
137
+ it("generates valid dates with reasonable ranges", () => {
138
+ const schema = schemas.simple.object({
139
+ created: { type: "string", format: "date-time" },
140
+ });
141
+
142
+ const samples = generate.samples<any>(schema, 50);
143
+
144
+ samples.forEach((sample) => {
145
+ const date = new Date(sample.created);
146
+
147
+ // Should be valid date
148
+ expect(date.getTime()).not.toBeNaN();
149
+
150
+ // Should be properly formatted
151
+ expect(sample.created).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/);
152
+
153
+ // Should be a reasonable date (within reasonable historical range)
154
+ expect(date.getFullYear()).toBeGreaterThan(1800);
155
+ expect(date.getFullYear()).toBeLessThan(2200);
156
+ });
157
+ });
158
+
159
+ it("generates valid URLs when specified", () => {
160
+ const schema = schemas.simple.object({
161
+ website: { type: "string", format: "uri" },
162
+ });
163
+
164
+ const samples = generate.samples<any>(schema, 20);
165
+
166
+ samples.forEach((sample) => {
167
+ if (sample.website) {
168
+ // Should look like a URL
169
+ expect(sample.website).toMatch(/^https?:\/\//);
170
+ // Should have domain
171
+ expect(sample.website).toContain(".");
172
+ }
173
+ });
174
+ });
175
+ });
176
+
177
+ describe("Constraint Satisfaction", () => {
178
+ it("respects string length constraints consistently", () => {
179
+ const schema = schemas.simple.object({
180
+ username: {
181
+ type: "string",
182
+ minLength: 5,
183
+ maxLength: 15,
184
+ },
185
+ });
186
+
187
+ const samples = generate.samples<any>(schema, 100);
188
+
189
+ samples.forEach((sample) => {
190
+ expect(sample.username.length).toBeGreaterThanOrEqual(5);
191
+ expect(sample.username.length).toBeLessThanOrEqual(15);
192
+ });
193
+
194
+ // Should use various lengths, not always min or max
195
+ const lengths = samples.map((s) => s.username.length);
196
+ const uniqueLengths = new Set(lengths);
197
+ expect(uniqueLengths.size).toBeGreaterThan(3);
198
+ });
199
+
200
+ it("respects numeric ranges with good distribution", () => {
201
+ const schema = schemas.simple.object({
202
+ age: {
203
+ type: "integer",
204
+ minimum: 18,
205
+ maximum: 65,
206
+ },
207
+ });
208
+
209
+ const samples = generate.samples<any>(schema, 100);
210
+ const ages = samples.map((s) => s.age);
211
+
212
+ // All should be in range
213
+ ages.forEach((age) => {
214
+ expect(age).toBeGreaterThanOrEqual(18);
215
+ expect(age).toBeLessThanOrEqual(65);
216
+ expect(Number.isInteger(age)).toBe(true);
217
+ });
218
+
219
+ // Should have good spread
220
+ const uniqueAges = new Set(ages);
221
+ expect(uniqueAges.size).toBeGreaterThan(20);
222
+
223
+ // Should hit near boundaries sometimes
224
+ expect(ages.some((age) => age <= 25)).toBe(true);
225
+ expect(ages.some((age) => age >= 58)).toBe(true);
226
+ });
227
+
228
+ it("maintains array uniqueness when specified", () => {
229
+ const schema = schemas.simple.object({
230
+ tags: {
231
+ type: "array",
232
+ items: { type: "string", pattern: "^tag-\\d{3}$" },
233
+ minItems: 5,
234
+ maxItems: 5,
235
+ uniqueItems: true,
236
+ },
237
+ });
238
+
239
+ const samples = generate.samples<any>(schema, 20);
240
+
241
+ samples.forEach((sample) => {
242
+ expect(sample.tags).toHaveLength(5);
243
+
244
+ // All items should be unique
245
+ const uniqueTags = new Set(sample.tags);
246
+ expect(uniqueTags.size).toBe(5);
247
+
248
+ // All should match pattern
249
+ sample.tags.forEach((tag) => {
250
+ expect(tag).toMatch(/^tag-\d{3}$/);
251
+ });
252
+ });
253
+ });
254
+ });
255
+
256
+ describe("Realistic Data Generation", () => {
257
+ it("generates realistic person names", () => {
258
+ const schema = schemas.simple.object({
259
+ firstName: { type: "string" },
260
+ lastName: { type: "string" },
261
+ fullName: { type: "string" },
262
+ });
263
+
264
+ const samples = generate.samples<any>(schema, 50);
265
+
266
+ samples.forEach((sample) => {
267
+ // First names should be properly capitalized and reasonable length
268
+ expect(sample.firstName).toMatch(/^[A-Z]/); // Starts with capital
269
+ expect(sample.firstName.length).toBeGreaterThanOrEqual(2);
270
+ expect(sample.firstName.length).toBeLessThan(15);
271
+ expect(sample.firstName).not.toContain(" "); // No spaces in first names
272
+
273
+ // Last names should be single words, may have special chars
274
+ expect(sample.lastName).toMatch(/^[A-Z]/);
275
+ expect(sample.lastName.length).toBeGreaterThanOrEqual(2);
276
+ expect(sample.lastName.length).toBeLessThan(25); // More generous for longer names
277
+
278
+ // Full names should have multiple parts
279
+ const nameParts = sample.fullName.split(" ");
280
+ expect(nameParts.length).toBeGreaterThanOrEqual(2);
281
+ expect(nameParts.length).toBeLessThanOrEqual(4);
282
+ });
283
+
284
+ // Should generate diverse names
285
+ const firstNames = samples.map((s) => s.firstName);
286
+ expect(validators.uniquenessRatio(firstNames)).toBeGreaterThan(0.5); // Slightly more forgiving
287
+ });
288
+
289
+ it("generates realistic addresses", () => {
290
+ const schema = schemas.simple.object({
291
+ street: { type: "string" },
292
+ city: { type: "string" },
293
+ zipcode: { type: "string" },
294
+ });
295
+
296
+ const samples = generate.samples<any>(schema, 30);
297
+
298
+ samples.forEach((sample) => {
299
+ // Street addresses should have numbers and street names
300
+ expect(sample.street).toMatch(/\d/);
301
+ expect(sample.street).toMatch(/[A-Z]/);
302
+ expect(sample.street.length).toBeGreaterThanOrEqual(10); // Allow exactly 10
303
+
304
+ // Cities should be properly formatted
305
+ expect(sample.city).toMatch(/^[A-Z]/);
306
+ expect(sample.city).not.toMatch(/\d/); // No numbers in city names
307
+
308
+ // Zip codes should be valid US format
309
+ expect(sample.zipcode).toMatch(/^\d{5}(-\d{4})?$/);
310
+ });
311
+ });
312
+
313
+ it("generates consistent related data", () => {
314
+ const schema = schemas.simple.object({
315
+ user: {
316
+ type: "object",
317
+ properties: {
318
+ email: schemas.simple.string(),
319
+ username: schemas.simple.string(),
320
+ createdAt: schemas.simple.string(),
321
+ updatedAt: schemas.simple.string(),
322
+ },
323
+ },
324
+ });
325
+
326
+ const samples = generate.samples<any>(schema, 20);
327
+
328
+ samples.forEach((sample) => {
329
+ // All fields should be present and be strings
330
+ expect(typeof sample.user.email).toBe("string");
331
+ expect(typeof sample.user.username).toBe("string");
332
+ expect(typeof sample.user.createdAt).toBe("string");
333
+ expect(typeof sample.user.updatedAt).toBe("string");
334
+
335
+ // Should generate reasonable content
336
+ expect(sample.user.email.length).toBeGreaterThan(0);
337
+ expect(sample.user.username.length).toBeGreaterThan(0);
338
+ });
339
+ });
340
+ });
341
+
342
+ describe("Edge Cases and Boundaries", () => {
343
+ it("handles empty strings when allowed", () => {
344
+ const schema = schemas.simple.object({
345
+ optional: {
346
+ type: "string",
347
+ minLength: 0,
348
+ },
349
+ });
350
+
351
+ const samples = generate.samples<any>(schema, 50);
352
+
353
+ // All should be valid strings respecting minLength constraint
354
+ samples.forEach((sample) => {
355
+ expect(typeof sample.optional).toBe("string");
356
+ expect(sample.optional.length).toBeGreaterThanOrEqual(0);
357
+ });
358
+
359
+ // Should generate various lengths
360
+ const lengths = samples.map((s) => s.optional.length);
361
+ const uniqueLengths = new Set(lengths);
362
+ expect(uniqueLengths.size).toBeGreaterThan(1);
363
+ });
364
+
365
+ it("handles zero and negative numbers appropriately", () => {
366
+ const schema = schemas.simple.object({
367
+ balance: {
368
+ type: "number",
369
+ minimum: -1000,
370
+ maximum: 1000,
371
+ },
372
+ });
373
+
374
+ const samples = generate.samples<any>(schema, 100);
375
+ const balances = samples.map((s) => s.balance);
376
+
377
+ // Should include negative and positive values
378
+ const hasNegative = balances.some((b) => b < 0);
379
+ const hasPositive = balances.some((b) => b > 0);
380
+
381
+ expect(hasNegative || hasPositive).toBe(true); // At least one type
382
+
383
+ // All should be within range
384
+ balances.forEach((b) => {
385
+ expect(b).toBeGreaterThanOrEqual(-1000);
386
+ expect(b).toBeLessThanOrEqual(1000);
387
+ });
388
+ });
389
+
390
+ it("generates boundary values occasionally", () => {
391
+ const schema = schemas.simple.object({
392
+ score: {
393
+ type: "integer",
394
+ minimum: 0,
395
+ maximum: 100,
396
+ },
397
+ });
398
+
399
+ const samples = generate.samples<any>(schema, 200);
400
+ const scores = samples.map((s) => s.score);
401
+
402
+ // Should respect the boundaries and generate diverse values
403
+ expect(scores.every((s) => s >= 0 && s <= 100)).toBe(true);
404
+
405
+ // Should generate values near boundaries sometimes (within 5 of min/max)
406
+ const nearMin = scores.filter((s) => s <= 5).length;
407
+ const nearMax = scores.filter((s) => s >= 95).length;
408
+ expect(nearMin + nearMax).toBeGreaterThan(0); // At least some near boundaries
409
+
410
+ // Should have good distribution across the range
411
+ const uniqueValues = new Set(scores);
412
+ expect(uniqueValues.size).toBeGreaterThan(20); // Good variety
413
+ });
414
+ });
415
+ });