datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2029 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "type": "object",
4
+ "title": "DataContractSpecification",
5
+ "properties": {
6
+ "dataContractSpecification": {
7
+ "type": "string",
8
+ "title": "DataContractSpecificationVersion",
9
+ "enum": [
10
+ "1.2.0",
11
+ "1.1.0",
12
+ "0.9.3",
13
+ "0.9.2",
14
+ "0.9.1",
15
+ "0.9.0"
16
+ ],
17
+ "description": "Specifies the Data Contract Specification being used."
18
+ },
19
+ "id": {
20
+ "type": "string",
21
+ "description": "Specifies the identifier of the data contract."
22
+ },
23
+ "info": {
24
+ "type": "object",
25
+ "properties": {
26
+ "title": {
27
+ "type": "string",
28
+ "description": "The title of the data contract."
29
+ },
30
+ "version": {
31
+ "type": "string",
32
+ "description": "The version of the data contract document (which is distinct from the Data Contract Specification version or the Data Product implementation version)."
33
+ },
34
+ "status": {
35
+ "type": "string",
36
+ "description": "The status of the data contract. Can be proposed, in development, active, retired.",
37
+ "examples": [
38
+ "proposed",
39
+ "in development",
40
+ "active",
41
+ "deprecated",
42
+ "retired"
43
+ ]
44
+ },
45
+ "description": {
46
+ "type": "string",
47
+ "description": "A description of the data contract."
48
+ },
49
+ "owner": {
50
+ "type": "string",
51
+ "description": "The owner or team responsible for managing the data contract and providing the data."
52
+ },
53
+ "contact": {
54
+ "type": "object",
55
+ "properties": {
56
+ "name": {
57
+ "type": "string",
58
+ "description": "The identifying name of the contact person/organization."
59
+ },
60
+ "url": {
61
+ "type": "string",
62
+ "format": "uri",
63
+ "description": "The URL pointing to the contact information. This MUST be in the form of a URL."
64
+ },
65
+ "email": {
66
+ "type": "string",
67
+ "format": "email",
68
+ "description": "The email address of the contact person/organization. This MUST be in the form of an email address."
69
+ }
70
+ },
71
+ "description": "Contact information for the data contract.",
72
+ "additionalProperties": true
73
+ }
74
+ },
75
+ "additionalProperties": true,
76
+ "required": [
77
+ "title",
78
+ "version"
79
+ ],
80
+ "description": "Metadata and life cycle information about the data contract."
81
+ },
82
+ "servers": {
83
+ "type": "object",
84
+ "description": "Information about the servers.",
85
+ "additionalProperties": {
86
+ "$ref": "#/$defs/BaseServer",
87
+ "allOf": [
88
+ {
89
+ "if": {
90
+ "properties": {
91
+ "type": {
92
+ "const": "bigquery"
93
+ }
94
+ }
95
+ },
96
+ "then": {
97
+ "$ref": "#/$defs/BigQueryServer"
98
+ }
99
+ },
100
+ {
101
+ "if": {
102
+ "properties": {
103
+ "type": {
104
+ "const": "postgres"
105
+ }
106
+ },
107
+ "required": [
108
+ "type"
109
+ ]
110
+ },
111
+ "then": {
112
+ "$ref": "#/$defs/PostgresServer"
113
+ }
114
+ },
115
+ {
116
+ "if": {
117
+ "properties": {
118
+ "type": {
119
+ "const": "s3"
120
+ }
121
+ },
122
+ "required": [
123
+ "type"
124
+ ]
125
+ },
126
+ "then": {
127
+ "$ref": "#/$defs/S3Server"
128
+ }
129
+ },
130
+ {
131
+ "if": {
132
+ "properties": {
133
+ "type": {
134
+ "const": "sftp"
135
+ }
136
+ },
137
+ "required": [
138
+ "type"
139
+ ]
140
+ },
141
+ "then": {
142
+ "$ref": "#/$defs/SftpServer"
143
+ }
144
+ },
145
+ {
146
+ "if": {
147
+ "properties": {
148
+ "type": {
149
+ "const": "redshift"
150
+ }
151
+ },
152
+ "required": [
153
+ "type"
154
+ ]
155
+ },
156
+ "then": {
157
+ "$ref": "#/$defs/RedshiftServer"
158
+ }
159
+ },
160
+ {
161
+ "if": {
162
+ "properties": {
163
+ "type": {
164
+ "const": "azure"
165
+ }
166
+ },
167
+ "required": [
168
+ "type"
169
+ ]
170
+ },
171
+ "then": {
172
+ "$ref": "#/$defs/AzureServer"
173
+ }
174
+ },
175
+ {
176
+ "if": {
177
+ "properties": {
178
+ "type": {
179
+ "const": "sqlserver"
180
+ }
181
+ },
182
+ "required": [
183
+ "type"
184
+ ]
185
+ },
186
+ "then": {
187
+ "$ref": "#/$defs/SqlserverServer"
188
+ }
189
+ },
190
+ {
191
+ "if": {
192
+ "properties": {
193
+ "type": {
194
+ "const": "snowflake"
195
+ }
196
+ },
197
+ "required": [
198
+ "type"
199
+ ]
200
+ },
201
+ "then": {
202
+ "$ref": "#/$defs/SnowflakeServer"
203
+ }
204
+ },
205
+ {
206
+ "if": {
207
+ "properties": {
208
+ "type": {
209
+ "const": "databricks"
210
+ }
211
+ },
212
+ "required": [
213
+ "type"
214
+ ]
215
+ },
216
+ "then": {
217
+ "$ref": "#/$defs/DatabricksServer"
218
+ }
219
+ },
220
+ {
221
+ "if": {
222
+ "properties": {
223
+ "type": {
224
+ "const": "dataframe"
225
+ }
226
+ },
227
+ "required": [
228
+ "type"
229
+ ]
230
+ },
231
+ "then": {
232
+ "$ref": "#/$defs/DataframeServer"
233
+ }
234
+ },
235
+ {
236
+ "if": {
237
+ "properties": {
238
+ "type": {
239
+ "const": "glue"
240
+ }
241
+ },
242
+ "required": [
243
+ "type"
244
+ ]
245
+ },
246
+ "then": {
247
+ "$ref": "#/$defs/GlueServer"
248
+ }
249
+ },
250
+ {
251
+ "if": {
252
+ "properties": {
253
+ "type": {
254
+ "const": "postgres"
255
+ }
256
+ },
257
+ "required": [
258
+ "type"
259
+ ]
260
+ },
261
+ "then": {
262
+ "$ref": "#/$defs/PostgresServer"
263
+ }
264
+ },
265
+ {
266
+ "if": {
267
+ "properties": {
268
+ "type": {
269
+ "const": "oracle"
270
+ }
271
+ },
272
+ "required": [
273
+ "type"
274
+ ]
275
+ },
276
+ "then": {
277
+ "$ref": "#/$defs/OracleServer"
278
+ }
279
+ },
280
+ {
281
+ "if": {
282
+ "properties": {
283
+ "type": {
284
+ "const": "kafka"
285
+ }
286
+ },
287
+ "required": [
288
+ "type"
289
+ ]
290
+ },
291
+ "then": {
292
+ "$ref": "#/$defs/KafkaServer"
293
+ }
294
+ },
295
+ {
296
+ "if": {
297
+ "properties": {
298
+ "type": {
299
+ "const": "pubsub"
300
+ }
301
+ },
302
+ "required": [
303
+ "type"
304
+ ]
305
+ },
306
+ "then": {
307
+ "$ref": "#/$defs/PubSubServer"
308
+ }
309
+ },
310
+ {
311
+ "if": {
312
+ "properties": {
313
+ "type": {
314
+ "const": "kinesis"
315
+ }
316
+ },
317
+ "required": [
318
+ "type"
319
+ ]
320
+ },
321
+ "then": {
322
+ "$ref": "#/$defs/KinesisDataStreamsServer"
323
+ }
324
+ },
325
+ {
326
+ "if": {
327
+ "properties": {
328
+ "type": {
329
+ "const": "trino"
330
+ }
331
+ },
332
+ "required": [
333
+ "type"
334
+ ]
335
+ },
336
+ "then": {
337
+ "$ref": "#/$defs/TrinoServer"
338
+ }
339
+ },
340
+ {
341
+ "if": {
342
+ "properties": {
343
+ "type": {
344
+ "const": "clickhouse"
345
+ }
346
+ },
347
+ "required": [
348
+ "type"
349
+ ]
350
+ },
351
+ "then": {
352
+ "$ref": "#/$defs/ClickhouseServer"
353
+ }
354
+ },
355
+ {
356
+ "if": {
357
+ "properties": {
358
+ "type": {
359
+ "const": "local"
360
+ }
361
+ },
362
+ "required": [
363
+ "type"
364
+ ]
365
+ },
366
+ "then": {
367
+ "$ref": "#/$defs/LocalServer"
368
+ }
369
+ }
370
+ ]
371
+ }
372
+ },
373
+ "terms": {
374
+ "type": "object",
375
+ "description": "The terms and conditions of the data contract.",
376
+ "properties": {
377
+ "usage": {
378
+ "type": "string",
379
+ "description": "The usage describes the way the data is expected to be used. Can contain business and technical information."
380
+ },
381
+ "limitations": {
382
+ "type": "string",
383
+ "description": "The limitations describe the restrictions on how the data can be used, can be technical or restrictions on what the data may not be used for."
384
+ },
385
+ "policies": {
386
+ "type": "array",
387
+ "items": {
388
+ "type": "object",
389
+ "properties": {
390
+ "type": {
391
+ "type": "string",
392
+ "description": "The type of the policy.",
393
+ "examples": [
394
+ "privacy",
395
+ "security",
396
+ "retention",
397
+ "compliance"
398
+ ]
399
+ },
400
+ "description": {
401
+ "type": "string",
402
+ "description": "A description of the policy."
403
+ },
404
+ "url": {
405
+ "type": "string",
406
+ "format": "uri",
407
+ "description": "A URL to the policy document."
408
+ }
409
+ },
410
+ "additionalProperties": true
411
+ },
412
+ "description": "The limitations describe the restrictions on how the data can be used, can be technical or restrictions on what the data may not be used for."
413
+ },
414
+ "billing": {
415
+ "type": "string",
416
+ "description": "The billing describes the pricing model for using the data, such as whether it's free, having a monthly fee, or metered pay-per-use."
417
+ },
418
+ "noticePeriod": {
419
+ "type": "string",
420
+ "description": "The period of time that must be given by either party to terminate or modify a data usage agreement. Uses ISO-8601 period format, e.g., 'P3M' for a period of three months."
421
+ }
422
+ },
423
+ "additionalProperties": true
424
+ },
425
+ "models": {
426
+ "description": "Specifies the logical data model. Use the models name (e.g., the table name) as the key.",
427
+ "type": "object",
428
+ "minProperties": 1,
429
+ "propertyNames": {
430
+ "pattern": "^[a-zA-Z0-9_-]+$"
431
+ },
432
+ "additionalProperties": {
433
+ "type": "object",
434
+ "title": "Model",
435
+ "properties": {
436
+ "description": {
437
+ "type": "string"
438
+ },
439
+ "type": {
440
+ "description": "The type of the model. Examples: table, view, object. Default: table.",
441
+ "type": "string",
442
+ "title": "ModelType",
443
+ "default": "table",
444
+ "enum": [
445
+ "table",
446
+ "view",
447
+ "object"
448
+ ]
449
+ },
450
+ "title": {
451
+ "type": "string",
452
+ "description": "An optional string providing a human readable name for the model. Especially useful if the model name is cryptic or contains abbreviations.",
453
+ "examples": [
454
+ "Purchase Orders",
455
+ "Air Shipments"
456
+ ]
457
+ },
458
+ "fields": {
459
+ "description": "Specifies a field in the data model. Use the field name (e.g., the column name) as the key.",
460
+ "type": "object",
461
+ "additionalProperties": {
462
+ "type": "object",
463
+ "title": "Field",
464
+ "properties": {
465
+ "description": {
466
+ "type": "string",
467
+ "description": "An optional string describing the semantic of the data in this field."
468
+ },
469
+ "title": {
470
+ "type": "string",
471
+ "description": "An optional string providing a human readable name for the field. Especially useful if the field name is cryptic or contains abbreviations."
472
+ },
473
+ "type": {
474
+ "$ref": "#/$defs/FieldType"
475
+ },
476
+ "required": {
477
+ "type": "boolean",
478
+ "default": false,
479
+ "description": "An indication, if this field must contain a value and may not be null."
480
+ },
481
+ "fields": {
482
+ "description": "The nested fields (e.g. columns) of the object, record, or struct.",
483
+ "type": "object",
484
+ "additionalProperties": {
485
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
486
+ }
487
+ },
488
+ "items": {
489
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
490
+ },
491
+ "keys": {
492
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
493
+ },
494
+ "values": {
495
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
496
+ },
497
+ "primary": {
498
+ "type": "boolean",
499
+ "deprecationMessage": "Use the primaryKey field instead."
500
+ },
501
+ "primaryKey": {
502
+ "type": "boolean",
503
+ "default": false,
504
+ "description": "If this field is a primary key."
505
+ },
506
+ "references": {
507
+ "type": "string",
508
+ "description": "The reference to a field in another model. E.g. use 'orders.order_id' to reference the order_id field of the model orders. Think of defining a foreign key relationship.",
509
+ "examples": [
510
+ "orders.order_id",
511
+ "model.nested_field.field"
512
+ ]
513
+ },
514
+ "unique": {
515
+ "type": "boolean",
516
+ "default": false,
517
+ "description": "An indication, if the value must be unique within the model."
518
+ },
519
+ "enum": {
520
+ "type": "array",
521
+ "items": {
522
+ "type": "string"
523
+ },
524
+ "uniqueItems": true,
525
+ "description": "A value must be equal to one of the elements in this array value. Only evaluated if the value is not null."
526
+ },
527
+ "minLength": {
528
+ "type": "integer",
529
+ "description": "A value must greater than, or equal to, the value of this. Only applies to string types."
530
+ },
531
+ "maxLength": {
532
+ "type": "integer",
533
+ "description": "A value must less than, or equal to, the value of this. Only applies to string types."
534
+ },
535
+ "format": {
536
+ "type": "string",
537
+ "description": "A specific format the value must comply with (e.g., 'email', 'uri', 'uuid').",
538
+ "examples": [
539
+ "email",
540
+ "uri",
541
+ "uuid"
542
+ ]
543
+ },
544
+ "precision": {
545
+ "type": "number",
546
+ "examples": [
547
+ 38
548
+ ],
549
+ "description": "The maximum number of digits in a number. Only applies to numeric values. Defaults to 38."
550
+ },
551
+ "scale": {
552
+ "type": "number",
553
+ "examples": [
554
+ 0
555
+ ],
556
+ "description": "The maximum number of decimal places in a number. Only applies to numeric values. Defaults to 0."
557
+ },
558
+ "pattern": {
559
+ "type": "string",
560
+ "description": "A regular expression the value must match. Only applies to string types.",
561
+ "examples": [
562
+ "^[a-zA-Z0-9_-]+$"
563
+ ]
564
+ },
565
+ "minimum": {
566
+ "type": "number",
567
+ "description": "A value of a number must greater than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
568
+ },
569
+ "exclusiveMinimum": {
570
+ "type": "number",
571
+ "description": "A value of a number must greater than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
572
+ },
573
+ "maximum": {
574
+ "type": "number",
575
+ "description": "A value of a number must less than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
576
+ },
577
+ "exclusiveMaximum": {
578
+ "type": "number",
579
+ "description": "A value of a number must less than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
580
+ },
581
+ "example": {
582
+ "type": "string",
583
+ "description": "An example value for this field.",
584
+ "deprecationMessage": "Use the examples field instead."
585
+ },
586
+ "examples": {
587
+ "type": "array",
588
+ "description": "A examples value for this field."
589
+ },
590
+ "pii": {
591
+ "type": "boolean",
592
+ "description": "An indication, if this field contains Personal Identifiable Information (PII)."
593
+ },
594
+ "classification": {
595
+ "type": "string",
596
+ "description": "The data class defining the sensitivity level for this field, according to the organization's classification scheme.",
597
+ "examples": [
598
+ "sensitive",
599
+ "restricted",
600
+ "internal",
601
+ "public"
602
+ ]
603
+ },
604
+ "tags": {
605
+ "type": "array",
606
+ "items": {
607
+ "type": "string"
608
+ },
609
+ "description": "Custom metadata to provide additional context."
610
+ },
611
+ "links": {
612
+ "type": "object",
613
+ "description": "Links to external resources.",
614
+ "minProperties": 1,
615
+ "propertyNames": {
616
+ "pattern": "^[a-zA-Z0-9_-]+$"
617
+ },
618
+ "additionalProperties": {
619
+ "type": "string",
620
+ "title": "Link",
621
+ "description": "A URL to an external resource.",
622
+ "format": "uri",
623
+ "examples": [
624
+ "https://example.com"
625
+ ]
626
+ }
627
+ },
628
+ "$ref": {
629
+ "type": "string",
630
+ "description": "A reference URI to a definition in the specification, internally or externally. Properties will be inherited from the definition."
631
+ },
632
+ "quality": {
633
+ "type": "array",
634
+ "items": {
635
+ "$ref": "#/$defs/Quality"
636
+ }
637
+ },
638
+ "lineage": {
639
+ "$ref": "#/$defs/Lineage"
640
+ },
641
+ "config": {
642
+ "type": "object",
643
+ "description": "Additional metadata for field configuration.",
644
+ "additionalProperties": {
645
+ "type": [
646
+ "string",
647
+ "number",
648
+ "boolean",
649
+ "object",
650
+ "array",
651
+ "null"
652
+ ]
653
+ },
654
+ "properties": {
655
+ "avroType": {
656
+ "type": "string",
657
+ "description": "Specify the field type to use when exporting the data model to Apache Avro."
658
+ },
659
+ "avroLogicalType": {
660
+ "type": "string",
661
+ "description": "Specify the logical field type to use when exporting the data model to Apache Avro."
662
+ },
663
+ "bigqueryType": {
664
+ "type": "string",
665
+ "description": "Specify the physical column type that is used in a BigQuery table, e.g., `NUMERIC(5, 2)`."
666
+ },
667
+ "snowflakeType": {
668
+ "type": "string",
669
+ "description": "Specify the physical column type that is used in a Snowflake table, e.g., `TIMESTAMP_LTZ`."
670
+ },
671
+ "redshiftType": {
672
+ "type": "string",
673
+ "description": "Specify the physical column type that is used in a Redshift table, e.g., `SMALLINT`."
674
+ },
675
+ "sqlserverType": {
676
+ "type": "string",
677
+ "description": "Specify the physical column type that is used in a SQL Server table, e.g., `DATETIME2`."
678
+ },
679
+ "databricksType": {
680
+ "type": "string",
681
+ "description": "Specify the physical column type that is used in a Databricks Unity Catalog table."
682
+ },
683
+ "glueType": {
684
+ "type": "string",
685
+ "description": "Specify the physical column type that is used in an AWS Glue Data Catalog table."
686
+ }
687
+ }
688
+ }
689
+ }
690
+ }
691
+ },
692
+ "primaryKey": {
693
+ "type": "array",
694
+ "items": {
695
+ "type": "string"
696
+ },
697
+ "description": "The compound primary key of the model."
698
+ },
699
+ "quality": {
700
+ "type": "array",
701
+ "items": {
702
+ "$ref": "#/$defs/Quality"
703
+ }
704
+ },
705
+ "examples": {
706
+ "type": "array"
707
+ },
708
+ "additionalFields": {
709
+ "type": "boolean",
710
+ "description": " Specify, if the model can have additional fields that are not defined in the contract. ",
711
+ "default": false
712
+ },
713
+ "config": {
714
+ "type": "object",
715
+ "description": "Additional metadata for model configuration.",
716
+ "additionalProperties": {
717
+ "type": [
718
+ "string",
719
+ "number",
720
+ "boolean",
721
+ "object",
722
+ "array",
723
+ "null"
724
+ ]
725
+ },
726
+ "properties": {
727
+ "avroNamespace": {
728
+ "type": "string",
729
+ "description": "The namespace to use when importing and exporting the data model from / to Apache Avro."
730
+ }
731
+ }
732
+ }
733
+ }
734
+ }
735
+ },
736
+ "definitions": {
737
+ "description": "Clear and concise explanations of syntax, semantic, and classification of business objects in a given domain.",
738
+ "type": "object",
739
+ "propertyNames": {
740
+ "pattern": "^[a-zA-Z0-9/_-]+$"
741
+ },
742
+ "additionalProperties": {
743
+ "type": "object",
744
+ "title": "Definition",
745
+ "properties": {
746
+ "domain": {
747
+ "type": "string",
748
+ "description": "The domain in which this definition is valid.",
749
+ "default": "global",
750
+ "deprecationMessage": "This field is deprecated. Encode the domain into the ID using slashes."
751
+ },
752
+ "name": {
753
+ "type": "string",
754
+ "description": "The technical name of this definition.",
755
+ "deprecationMessage": "This field is deprecated. Encode the name into the ID using slashes."
756
+ },
757
+ "title": {
758
+ "type": "string",
759
+ "description": "The business name of this definition."
760
+ },
761
+ "description": {
762
+ "type": "string",
763
+ "description": "Clear and concise explanations related to the domain."
764
+ },
765
+ "type": {
766
+ "$ref": "#/$defs/FieldType"
767
+ },
768
+ "fields": {
769
+ "description": "The nested fields (e.g. columns) of the object, record, or struct.",
770
+ "type": "object",
771
+ "additionalProperties": {
772
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
773
+ }
774
+ },
775
+ "items": {
776
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
777
+ },
778
+ "keys": {
779
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
780
+ },
781
+ "values": {
782
+ "$ref": "#/properties/models/additionalProperties/properties/fields/additionalProperties"
783
+ },
784
+ "minLength": {
785
+ "type": "integer",
786
+ "description": "A value must be greater than or equal to this value. Applies only to string types."
787
+ },
788
+ "maxLength": {
789
+ "type": "integer",
790
+ "description": "A value must be less than or equal to this value. Applies only to string types."
791
+ },
792
+ "format": {
793
+ "type": "string",
794
+ "description": "Specific format requirements for the value (e.g., 'email', 'uri', 'uuid')."
795
+ },
796
+ "precision": {
797
+ "type": "integer",
798
+ "examples": [
799
+ 38
800
+ ],
801
+ "description": "The maximum number of digits in a number. Only applies to numeric values. Defaults to 38."
802
+ },
803
+ "scale": {
804
+ "type": "integer",
805
+ "examples": [
806
+ 0
807
+ ],
808
+ "description": "The maximum number of decimal places in a number. Only applies to numeric values. Defaults to 0."
809
+ },
810
+ "pattern": {
811
+ "type": "string",
812
+ "description": "A regular expression pattern the value must match. Applies only to string types."
813
+ },
814
+ "minimum": {
815
+ "type": "number",
816
+ "description": "A value of a number must greater than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
817
+ },
818
+ "exclusiveMinimum": {
819
+ "type": "number",
820
+ "description": "A value of a number must greater than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
821
+ },
822
+ "maximum": {
823
+ "type": "number",
824
+ "description": "A value of a number must less than, or equal to, the value of this. Only evaluated if the value is not null. Only applies to numeric values."
825
+ },
826
+ "exclusiveMaximum": {
827
+ "type": "number",
828
+ "description": "A value of a number must less than the value of this. Only evaluated if the value is not null. Only applies to numeric values."
829
+ },
830
+ "example": {
831
+ "type": "string",
832
+ "description": "An example value.",
833
+ "deprecationMessage": "Use the examples field instead."
834
+ },
835
+ "examples": {
836
+ "type": "array",
837
+ "description": "Example value."
838
+ },
839
+ "pii": {
840
+ "type": "boolean",
841
+ "description": "Indicates if the field contains Personal Identifiable Information (PII)."
842
+ },
843
+ "classification": {
844
+ "type": "string",
845
+ "description": "The data class defining the sensitivity level for this field."
846
+ },
847
+ "tags": {
848
+ "type": "array",
849
+ "items": {
850
+ "type": "string"
851
+ },
852
+ "description": "Custom metadata to provide additional context."
853
+ },
854
+ "links": {
855
+ "type": "object",
856
+ "description": "Links to external resources.",
857
+ "minProperties": 1,
858
+ "propertyNames": {
859
+ "pattern": "^[a-zA-Z0-9_-]+$"
860
+ },
861
+ "additionalProperties": {
862
+ "type": "string",
863
+ "title": "Link",
864
+ "description": "A URL to an external resource.",
865
+ "format": "uri",
866
+ "examples": [
867
+ "https://example.com"
868
+ ]
869
+ }
870
+ }
871
+ },
872
+ "required": [
873
+ "type"
874
+ ]
875
+ }
876
+ },
877
+ "servicelevels": {
878
+ "type": "object",
879
+ "description": "Specifies the service level agreements for the provided data, including availability, data retention policies, latency requirements, data freshness, update frequency, support availability, and backup policies.",
880
+ "properties": {
881
+ "availability": {
882
+ "type": "object",
883
+ "description": "Availability refers to the promise or guarantee by the service provider about the uptime of the system that provides the data.",
884
+ "properties": {
885
+ "description": {
886
+ "type": "string",
887
+ "description": "An optional string describing the availability service level.",
888
+ "example": "The server is available during support hours"
889
+ },
890
+ "percentage": {
891
+ "type": "string",
892
+ "description": "An optional string describing the guaranteed uptime in percent (e.g., `99.9%`)",
893
+ "pattern": "^\\d+(\\.\\d+)?%$",
894
+ "example": "99.9%"
895
+ }
896
+ }
897
+ },
898
+ "retention": {
899
+ "type": "object",
900
+ "description": "Retention covers the period how long data will be available.",
901
+ "properties": {
902
+ "description": {
903
+ "type": "string",
904
+ "description": "An optional string describing the retention service level.",
905
+ "example": "Data is retained for one year."
906
+ },
907
+ "period": {
908
+ "type": "string",
909
+ "description": "An optional period of time, how long data is available. Supported formats: Simple duration (e.g., `1 year`, `30d`) and ISO 8601 duration (e.g, `P1Y`).",
910
+ "example": "P1Y"
911
+ },
912
+ "unlimited": {
913
+ "type": "boolean",
914
+ "description": "An optional indicator that data is kept forever.",
915
+ "example": false
916
+ },
917
+ "timestampField": {
918
+ "type": "string",
919
+ "description": "An optional reference to the field that contains the timestamp that the period refers to.",
920
+ "example": "orders.order_timestamp"
921
+ }
922
+ }
923
+ },
924
+ "latency": {
925
+ "type": "object",
926
+ "description": "Latency refers to the maximum amount of time from the source to its destination.",
927
+ "properties": {
928
+ "description": {
929
+ "type": "string",
930
+ "description": "An optional string describing the latency service level.",
931
+ "example": "Data is available within 25 hours after the order was placed."
932
+ },
933
+ "threshold": {
934
+ "type": "string",
935
+ "description": "An optional maximum duration between the source timestamp and the processed timestamp. Supported formats: Simple duration (e.g., `24 hours`, `5s`) and ISO 8601 duration (e.g, `PT24H`).",
936
+ "example": "25h"
937
+ },
938
+ "sourceTimestampField": {
939
+ "type": "string",
940
+ "description": "An optional reference to the field that contains the timestamp when the data was provided at the source.",
941
+ "example": "orders.order_timestamp"
942
+ },
943
+ "processedTimestampField": {
944
+ "type": "string",
945
+ "description": "An optional reference to the field that contains the processing timestamp, which denotes when the data is made available to consumers of this data contract.",
946
+ "example": "orders.processed_timestamp"
947
+ }
948
+ }
949
+ },
950
+ "freshness": {
951
+ "type": "object",
952
+ "description": "The maximum age of the youngest row in a table.",
953
+ "properties": {
954
+ "description": {
955
+ "type": "string",
956
+ "description": "An optional string describing the freshness service level.",
957
+ "example": "The age of the youngest row in a table is within 25 hours."
958
+ },
959
+ "threshold": {
960
+ "type": "string",
961
+ "description": "An optional maximum age of the youngest entry. Supported formats: Simple duration (e.g., `24 hours`, `5s`) and ISO 8601 duration (e.g., `PT24H`).",
962
+ "example": "25h"
963
+ },
964
+ "timestampField": {
965
+ "type": "string",
966
+ "description": "An optional reference to the field that contains the timestamp that the threshold refers to.",
967
+ "example": "orders.order_timestamp"
968
+ }
969
+ }
970
+ },
971
+ "frequency": {
972
+ "type": "object",
973
+ "description": "Frequency describes how often data is updated.",
974
+ "properties": {
975
+ "description": {
976
+ "type": "string",
977
+ "description": "An optional string describing the frequency service level.",
978
+ "example": "Data is delivered once a day."
979
+ },
980
+ "type": {
981
+ "type": "string",
982
+ "enum": [
983
+ "batch",
984
+ "micro-batching",
985
+ "streaming",
986
+ "manual"
987
+ ],
988
+ "description": "The method of data processing.",
989
+ "example": "batch"
990
+ },
991
+ "interval": {
992
+ "type": "string",
993
+ "description": "Optional. Only for batch: How often the pipeline is triggered, e.g., `daily`.",
994
+ "example": "daily"
995
+ },
996
+ "cron": {
997
+ "type": "string",
998
+ "description": "Optional. Only for batch: A cron expression when the pipelines is triggered. E.g., `0 0 * * *`.",
999
+ "example": "0 0 * * *"
1000
+ }
1001
+ }
1002
+ },
1003
+ "support": {
1004
+ "type": "object",
1005
+ "description": "Support describes the times when support will be available for contact.",
1006
+ "properties": {
1007
+ "description": {
1008
+ "type": "string",
1009
+ "description": "An optional string describing the support service level.",
1010
+ "example": "The data is available during typical business hours at headquarters."
1011
+ },
1012
+ "time": {
1013
+ "type": "string",
1014
+ "description": "An optional string describing the times when support will be available for contact such as `24/7` or `business hours only`.",
1015
+ "example": "9am to 5pm in EST on business days"
1016
+ },
1017
+ "responseTime": {
1018
+ "type": "string",
1019
+ "description": "An optional string describing the time it takes for the support team to acknowledge a request. This does not mean the issue will be resolved immediately, but it assures users that their request has been received and will be dealt with.",
1020
+ "example": "24 hours"
1021
+ }
1022
+ }
1023
+ },
1024
+ "backup": {
1025
+ "type": "object",
1026
+ "description": "Backup specifies details about data backup procedures.",
1027
+ "properties": {
1028
+ "description": {
1029
+ "type": "string",
1030
+ "description": "An optional string describing the backup service level.",
1031
+ "example": "Data is backed up once a week, every Sunday at 0:00 UTC."
1032
+ },
1033
+ "interval": {
1034
+ "type": "string",
1035
+ "description": "An optional interval that defines how often data will be backed up, e.g., `daily`.",
1036
+ "example": "weekly"
1037
+ },
1038
+ "cron": {
1039
+ "type": "string",
1040
+ "description": "An optional cron expression when data will be backed up, e.g., `0 0 * * *`.",
1041
+ "example": "0 0 * * 0"
1042
+ },
1043
+ "recoveryTime": {
1044
+ "type": "string",
1045
+ "description": "An optional Recovery Time Objective (RTO) specifies the maximum amount of time allowed to restore data from a backup after a failure or loss event (e.g., 4 hours, 24 hours).",
1046
+ "example": "24 hours"
1047
+ },
1048
+ "recoveryPoint": {
1049
+ "type": "string",
1050
+ "description": "An optional Recovery Point Objective (RPO) defines the maximum acceptable age of files that must be recovered from backup storage for normal operations to resume after a disaster or data loss event. This essentially measures how much data you can afford to lose, measured in time (e.g., 4 hours, 24 hours).",
1051
+ "example": "1 week"
1052
+ }
1053
+ }
1054
+ }
1055
+ }
1056
+ },
1057
+ "links": {
1058
+ "type": "object",
1059
+ "description": "Links to external resources.",
1060
+ "minProperties": 1,
1061
+ "propertyNames": {
1062
+ "pattern": "^[a-zA-Z0-9_-]+$"
1063
+ },
1064
+ "additionalProperties": {
1065
+ "type": "string",
1066
+ "title": "Link",
1067
+ "description": "A URL to an external resource.",
1068
+ "format": "uri",
1069
+ "examples": [
1070
+ "https://example.com"
1071
+ ]
1072
+ }
1073
+ },
1074
+ "tags": {
1075
+ "type": "array",
1076
+ "items": {
1077
+ "type": "string",
1078
+ "description": "Tags to facilitate searching and filtering.",
1079
+ "examples": [
1080
+ "databricks",
1081
+ "pii",
1082
+ "sensitive"
1083
+ ]
1084
+ },
1085
+ "description": "Tags to facilitate searching and filtering."
1086
+ }
1087
+ },
1088
+ "required": [
1089
+ "dataContractSpecification",
1090
+ "id",
1091
+ "info"
1092
+ ],
1093
+ "$defs": {
1094
+ "FieldType": {
1095
+ "type": "string",
1096
+ "title": "FieldType",
1097
+ "description": "The logical data type of the field.",
1098
+ "enum": [
1099
+ "number",
1100
+ "decimal",
1101
+ "numeric",
1102
+ "int",
1103
+ "integer",
1104
+ "long",
1105
+ "bigint",
1106
+ "float",
1107
+ "double",
1108
+ "string",
1109
+ "text",
1110
+ "varchar",
1111
+ "boolean",
1112
+ "timestamp",
1113
+ "timestamp_tz",
1114
+ "timestamp_ntz",
1115
+ "date",
1116
+ "time",
1117
+ "array",
1118
+ "map",
1119
+ "object",
1120
+ "record",
1121
+ "struct",
1122
+ "bytes",
1123
+ "variant",
1124
+ "json",
1125
+ "null"
1126
+ ]
1127
+ },
1128
+ "BaseServer": {
1129
+ "type": "object",
1130
+ "properties": {
1131
+ "description": {
1132
+ "type": "string",
1133
+ "description": "An optional string describing the servers."
1134
+ },
1135
+ "environment": {
1136
+ "type": "string",
1137
+ "description": "The environment in which the servers are running. Examples: prod, sit, stg."
1138
+ },
1139
+ "type": {
1140
+ "type": "string",
1141
+ "description": "The type of the data product technology that implements the data contract.",
1142
+ "examples": [
1143
+ "azure",
1144
+ "bigquery",
1145
+ "BigQuery",
1146
+ "clickhouse",
1147
+ "databricks",
1148
+ "dataframe",
1149
+ "glue",
1150
+ "kafka",
1151
+ "kinesis",
1152
+ "local",
1153
+ "oracle",
1154
+ "postgres",
1155
+ "pubsub",
1156
+ "redshift",
1157
+ "sftp",
1158
+ "sqlserver",
1159
+ "snowflake",
1160
+ "s3",
1161
+ "trino"
1162
+ ]
1163
+ },
1164
+ "roles": {
1165
+ "description": " An optional array of roles that are available and can be requested to access the server for role-based access control. E.g. separate roles for different regions or sensitive data.",
1166
+ "type": "array",
1167
+ "items": {
1168
+ "type": "object",
1169
+ "properties": {
1170
+ "name": {
1171
+ "type": "string",
1172
+ "description": "The name of the role."
1173
+ },
1174
+ "description": {
1175
+ "type": "string",
1176
+ "description": "A description of the role and what access the role provides."
1177
+ }
1178
+ },
1179
+ "required": [
1180
+ "name"
1181
+ ]
1182
+ }
1183
+ }
1184
+ },
1185
+ "additionalProperties": true,
1186
+ "required": [
1187
+ "type"
1188
+ ]
1189
+ },
1190
+ "BigQueryServer": {
1191
+ "type": "object",
1192
+ "title": "BigQueryServer",
1193
+ "properties": {
1194
+ "project": {
1195
+ "type": "string",
1196
+ "description": "The GCP project name."
1197
+ },
1198
+ "dataset": {
1199
+ "type": "string",
1200
+ "description": "The GCP dataset name."
1201
+ }
1202
+ },
1203
+ "required": [
1204
+ "project",
1205
+ "dataset"
1206
+ ]
1207
+ },
1208
+ "S3Server": {
1209
+ "type": "object",
1210
+ "title": "S3Server",
1211
+ "properties": {
1212
+ "location": {
1213
+ "type": "string",
1214
+ "format": "uri",
1215
+ "description": "S3 URL, starting with `s3://`",
1216
+ "examples": [
1217
+ "s3://datacontract-example-orders-latest/data/{model}/*.json"
1218
+ ]
1219
+ },
1220
+ "endpointUrl": {
1221
+ "type": "string",
1222
+ "format": "uri",
1223
+ "description": "The server endpoint for S3-compatible servers.",
1224
+ "examples": [
1225
+ "https://minio.example.com"
1226
+ ]
1227
+ },
1228
+ "format": {
1229
+ "type": "string",
1230
+ "enum": [
1231
+ "parquet",
1232
+ "delta",
1233
+ "json",
1234
+ "csv"
1235
+ ],
1236
+ "description": "File format."
1237
+ },
1238
+ "delimiter": {
1239
+ "type": "string",
1240
+ "enum": [
1241
+ "new_line",
1242
+ "array"
1243
+ ],
1244
+ "description": "Only for format = json. How multiple json documents are delimited within one file"
1245
+ }
1246
+ },
1247
+ "required": [
1248
+ "location"
1249
+ ]
1250
+ },
1251
+ "SftpServer": {
1252
+ "type": "object",
1253
+ "title": "SftpServer",
1254
+ "properties": {
1255
+ "location": {
1256
+ "type": "string",
1257
+ "format": "uri",
1258
+ "pattern": "^sftp://.*",
1259
+ "description": "SFTP URL, starting with `sftp://`",
1260
+ "examples": [
1261
+ "sftp://123.123.12.123/{model}/*.json"
1262
+ ]
1263
+ },
1264
+ "format": {
1265
+ "type": "string",
1266
+ "enum": [
1267
+ "parquet",
1268
+ "delta",
1269
+ "json",
1270
+ "csv"
1271
+ ],
1272
+ "description": "File format."
1273
+ },
1274
+ "delimiter": {
1275
+ "type": "string",
1276
+ "enum": [
1277
+ "new_line",
1278
+ "array"
1279
+ ],
1280
+ "description": "Only for format = json. How multiple json documents are delimited within one file"
1281
+ }
1282
+ },
1283
+ "required": [
1284
+ "location"
1285
+ ]
1286
+ },
1287
+ "RedshiftServer": {
1288
+ "type": "object",
1289
+ "title": "RedshiftServer",
1290
+ "properties": {
1291
+ "account": {
1292
+ "type": "string",
1293
+ "description": "An optional string describing the server."
1294
+ },
1295
+ "host": {
1296
+ "type": "string",
1297
+ "description": "An optional string describing the host name."
1298
+ },
1299
+ "database": {
1300
+ "type": "string",
1301
+ "description": "An optional string describing the server."
1302
+ },
1303
+ "schema": {
1304
+ "type": "string",
1305
+ "description": "An optional string describing the server."
1306
+ },
1307
+ "clusterIdentifier": {
1308
+ "type": "string",
1309
+ "description": "An optional string describing the cluster's identifier.",
1310
+ "examples": [
1311
+ "redshift-prod-eu",
1312
+ "analytics-cluster"
1313
+ ]
1314
+ },
1315
+ "port": {
1316
+ "type": "integer",
1317
+ "description": "An optional string describing the cluster's port.",
1318
+ "examples": [
1319
+ 5439
1320
+ ]
1321
+ },
1322
+ "endpoint": {
1323
+ "type": "string",
1324
+ "description": "An optional string describing the cluster's endpoint.",
1325
+ "examples": [
1326
+ "analytics-cluster.example.eu-west-1.redshift.amazonaws.com:5439/analytics"
1327
+ ]
1328
+ }
1329
+ },
1330
+ "additionalProperties": true,
1331
+ "required": [
1332
+ "account",
1333
+ "database",
1334
+ "schema"
1335
+ ]
1336
+ },
1337
+ "AzureServer": {
1338
+ "type": "object",
1339
+ "title": "AzureServer",
1340
+ "properties": {
1341
+ "location": {
1342
+ "type": "string",
1343
+ "format": "uri",
1344
+ "description": "Path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs. Recommended pattern is 'abfss://<container_name>/<path>'",
1345
+ "examples": [
1346
+ "abfss://my_container_name/path",
1347
+ "abfss://my_container_name/path/*.json",
1348
+ "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet",
1349
+ "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet"
1350
+ ]
1351
+ },
1352
+ "format": {
1353
+ "type": "string",
1354
+ "enum": [
1355
+ "parquet",
1356
+ "delta",
1357
+ "json",
1358
+ "csv"
1359
+ ],
1360
+ "description": "File format."
1361
+ },
1362
+ "delimiter": {
1363
+ "type": "string",
1364
+ "enum": [
1365
+ "new_line",
1366
+ "array"
1367
+ ],
1368
+ "description": "Only for format = json. How multiple json documents are delimited within one file"
1369
+ }
1370
+ },
1371
+ "required": [
1372
+ "location",
1373
+ "format"
1374
+ ]
1375
+ },
1376
+ "SqlserverServer": {
1377
+ "type": "object",
1378
+ "title": "SqlserverServer",
1379
+ "properties": {
1380
+ "host": {
1381
+ "type": "string",
1382
+ "description": "The host to the database server",
1383
+ "examples": [
1384
+ "localhost"
1385
+ ]
1386
+ },
1387
+ "port": {
1388
+ "type": "integer",
1389
+ "description": "The port to the database server.",
1390
+ "default": 1433,
1391
+ "examples": [
1392
+ 1433
1393
+ ]
1394
+ },
1395
+ "database": {
1396
+ "type": "string",
1397
+ "description": "The name of the database.",
1398
+ "examples": [
1399
+ "database"
1400
+ ]
1401
+ },
1402
+ "schema": {
1403
+ "type": "string",
1404
+ "description": "The name of the schema in the database.",
1405
+ "examples": [
1406
+ "dbo"
1407
+ ]
1408
+ }
1409
+ },
1410
+ "required": [
1411
+ "host",
1412
+ "database",
1413
+ "schema"
1414
+ ]
1415
+ },
1416
+ "SnowflakeServer": {
1417
+ "type": "object",
1418
+ "title": "SnowflakeServer",
1419
+ "properties": {
1420
+ "account": {
1421
+ "type": "string",
1422
+ "description": "An optional string describing the server."
1423
+ },
1424
+ "database": {
1425
+ "type": "string",
1426
+ "description": "An optional string describing the server."
1427
+ },
1428
+ "schema": {
1429
+ "type": "string",
1430
+ "description": "An optional string describing the server."
1431
+ }
1432
+ },
1433
+ "required": [
1434
+ "account",
1435
+ "database",
1436
+ "schema"
1437
+ ]
1438
+ },
1439
+ "DatabricksServer": {
1440
+ "type": "object",
1441
+ "title": "DatabricksServer",
1442
+ "properties": {
1443
+ "host": {
1444
+ "type": "string",
1445
+ "description": "The Databricks host",
1446
+ "examples": [
1447
+ "dbc-abcdefgh-1234.cloud.databricks.com"
1448
+ ]
1449
+ },
1450
+ "catalog": {
1451
+ "type": "string",
1452
+ "description": "The name of the Hive or Unity catalog"
1453
+ },
1454
+ "schema": {
1455
+ "type": "string",
1456
+ "description": "The schema name in the catalog"
1457
+ }
1458
+ },
1459
+ "required": [
1460
+ "catalog",
1461
+ "schema"
1462
+ ]
1463
+ },
1464
+ "DataframeServer": {
1465
+ "type": "object",
1466
+ "title": "DataframeServer",
1467
+ "required": [
1468
+ "type"
1469
+ ]
1470
+ },
1471
+ "GlueServer": {
1472
+ "type": "object",
1473
+ "title": "GlueServer",
1474
+ "properties": {
1475
+ "account": {
1476
+ "type": "string",
1477
+ "description": "The AWS Glue account",
1478
+ "examples": [
1479
+ "1234-5678-9012"
1480
+ ]
1481
+ },
1482
+ "database": {
1483
+ "type": "string",
1484
+ "description": "The AWS Glue database name",
1485
+ "examples": [
1486
+ "my_database"
1487
+ ]
1488
+ },
1489
+ "location": {
1490
+ "type": "string",
1491
+ "format": "uri",
1492
+ "description": "The AWS S3 path. Must be in the form of a URL.",
1493
+ "examples": [
1494
+ "s3://datacontract-example-orders-latest/data/{model}"
1495
+ ]
1496
+ },
1497
+ "format": {
1498
+ "type": "string",
1499
+ "description": "The format of the files",
1500
+ "examples": [
1501
+ "parquet",
1502
+ "csv",
1503
+ "json",
1504
+ "delta"
1505
+ ]
1506
+ }
1507
+ },
1508
+ "required": [
1509
+ "account",
1510
+ "database"
1511
+ ]
1512
+ },
1513
+ "PostgresServer": {
1514
+ "type": "object",
1515
+ "title": "PostgresServer",
1516
+ "properties": {
1517
+ "host": {
1518
+ "type": "string",
1519
+ "description": "The host to the database server",
1520
+ "examples": [
1521
+ "localhost"
1522
+ ]
1523
+ },
1524
+ "port": {
1525
+ "type": "integer",
1526
+ "description": "The port to the database server."
1527
+ },
1528
+ "database": {
1529
+ "type": "string",
1530
+ "description": "The name of the database.",
1531
+ "examples": [
1532
+ "postgres"
1533
+ ]
1534
+ },
1535
+ "schema": {
1536
+ "type": "string",
1537
+ "description": "The name of the schema in the database.",
1538
+ "examples": [
1539
+ "public"
1540
+ ]
1541
+ }
1542
+ },
1543
+ "required": [
1544
+ "host",
1545
+ "port",
1546
+ "database",
1547
+ "schema"
1548
+ ]
1549
+ },
1550
+ "OracleServer": {
1551
+ "type": "object",
1552
+ "title": "OracleServer",
1553
+ "properties": {
1554
+ "host": {
1555
+ "type": "string",
1556
+ "description": "The host to the oracle server",
1557
+ "examples": [
1558
+ "localhost"
1559
+ ]
1560
+ },
1561
+ "port": {
1562
+ "type": "integer",
1563
+ "description": "The port to the oracle server.",
1564
+ "examples": [
1565
+ 1523
1566
+ ]
1567
+ },
1568
+ "serviceName": {
1569
+ "type": "string",
1570
+ "description": "The name of the service.",
1571
+ "examples": [
1572
+ "service"
1573
+ ]
1574
+ }
1575
+ },
1576
+ "required": [
1577
+ "host",
1578
+ "port",
1579
+ "serviceName"
1580
+ ]
1581
+ },
1582
+ "KafkaServer": {
1583
+ "type": "object",
1584
+ "title": "KafkaServer",
1585
+ "description": "Kafka Server",
1586
+ "properties": {
1587
+ "host": {
1588
+ "type": "string",
1589
+ "description": "The bootstrap server of the kafka cluster."
1590
+ },
1591
+ "topic": {
1592
+ "type": "string",
1593
+ "description": "The topic name."
1594
+ },
1595
+ "format": {
1596
+ "type": "string",
1597
+ "description": "The format of the message. Examples: json, avro, protobuf.",
1598
+ "default": "json"
1599
+ }
1600
+ },
1601
+ "required": [
1602
+ "host",
1603
+ "topic"
1604
+ ]
1605
+ },
1606
+ "PubSubServer": {
1607
+ "type": "object",
1608
+ "title": "PubSubServer",
1609
+ "properties": {
1610
+ "project": {
1611
+ "type": "string",
1612
+ "description": "The GCP project name."
1613
+ },
1614
+ "topic": {
1615
+ "type": "string",
1616
+ "description": "The topic name."
1617
+ }
1618
+ },
1619
+ "required": [
1620
+ "project",
1621
+ "topic"
1622
+ ]
1623
+ },
1624
+ "KinesisDataStreamsServer": {
1625
+ "type": "object",
1626
+ "title": "KinesisDataStreamsServer",
1627
+ "description": "Kinesis Data Streams Server",
1628
+ "properties": {
1629
+ "stream": {
1630
+ "type": "string",
1631
+ "description": "The name of the Kinesis data stream."
1632
+ },
1633
+ "region": {
1634
+ "type": "string",
1635
+ "description": "AWS region.",
1636
+ "examples": [
1637
+ "eu-west-1"
1638
+ ]
1639
+ },
1640
+ "format": {
1641
+ "type": "string",
1642
+ "description": "The format of the record",
1643
+ "examples": [
1644
+ "json",
1645
+ "avro",
1646
+ "protobuf"
1647
+ ]
1648
+ }
1649
+ },
1650
+ "required": [
1651
+ "stream"
1652
+ ]
1653
+ },
1654
+ "TrinoServer": {
1655
+ "type": "object",
1656
+ "title": "TrinoServer",
1657
+ "properties": {
1658
+ "host": {
1659
+ "type": "string",
1660
+ "description": "The Trino host URL.",
1661
+ "examples": [
1662
+ "localhost"
1663
+ ]
1664
+ },
1665
+ "port": {
1666
+ "type": "integer",
1667
+ "description": "The Trino port."
1668
+ },
1669
+ "catalog": {
1670
+ "type": "string",
1671
+ "description": "The name of the catalog.",
1672
+ "examples": [
1673
+ "hive"
1674
+ ]
1675
+ },
1676
+ "schema": {
1677
+ "type": "string",
1678
+ "description": "The name of the schema in the database.",
1679
+ "examples": [
1680
+ "my_schema"
1681
+ ]
1682
+ }
1683
+ },
1684
+ "required": [
1685
+ "host",
1686
+ "port",
1687
+ "catalog",
1688
+ "schema"
1689
+ ]
1690
+ },
1691
+ "ClickhouseServer": {
1692
+ "type": "object",
1693
+ "title": "ClickhouseServer",
1694
+ "properties": {
1695
+ "host": {
1696
+ "type": "string",
1697
+ "description": "The host to the database server",
1698
+ "examples": [
1699
+ "localhost"
1700
+ ]
1701
+ },
1702
+ "port": {
1703
+ "type": "integer",
1704
+ "description": "The port to the database server."
1705
+ },
1706
+ "database": {
1707
+ "type": "string",
1708
+ "description": "The name of the database.",
1709
+ "examples": [
1710
+ "postgres"
1711
+ ]
1712
+ }
1713
+ },
1714
+ "required": [
1715
+ "host",
1716
+ "port",
1717
+ "database"
1718
+ ]
1719
+ },
1720
+ "LocalServer": {
1721
+ "type": "object",
1722
+ "title": "LocalServer",
1723
+ "properties": {
1724
+ "path": {
1725
+ "type": "string",
1726
+ "description": "The relative or absolute path to the data file(s).",
1727
+ "examples": [
1728
+ "./folder/data.parquet",
1729
+ "./folder/*.parquet"
1730
+ ]
1731
+ },
1732
+ "format": {
1733
+ "type": "string",
1734
+ "description": "The format of the file(s)",
1735
+ "examples": [
1736
+ "json",
1737
+ "parquet",
1738
+ "delta",
1739
+ "csv"
1740
+ ]
1741
+ }
1742
+ },
1743
+ "required": [
1744
+ "path",
1745
+ "format"
1746
+ ]
1747
+ },
1748
+ "Quality": {
1749
+ "allOf": [
1750
+ {
1751
+ "type": "object",
1752
+ "properties": {
1753
+ "type": {
1754
+ "type": "string",
1755
+ "description": "The type of quality check",
1756
+ "enum": [
1757
+ "text",
1758
+ "library",
1759
+ "sql",
1760
+ "custom"
1761
+ ]
1762
+ },
1763
+ "description": {
1764
+ "type": "string",
1765
+ "description": "A plain text describing the quality attribute in natural language."
1766
+ }
1767
+ }
1768
+ },
1769
+ {
1770
+ "if": {
1771
+ "properties": {
1772
+ "type": {
1773
+ "const": "text"
1774
+ }
1775
+ }
1776
+ },
1777
+ "then": {
1778
+ "required": [
1779
+ "description"
1780
+ ]
1781
+ }
1782
+ },
1783
+ {
1784
+ "if": {
1785
+ "properties": {
1786
+ "type": {
1787
+ "const": "sql"
1788
+ }
1789
+ }
1790
+ },
1791
+ "then": {
1792
+ "properties": {
1793
+ "query": {
1794
+ "type": "string",
1795
+ "description": "A SQL query that returns a single number to compare with the threshold."
1796
+ },
1797
+ "dialect": {
1798
+ "type": "string",
1799
+ "description": "The SQL dialect that is used for the query. Should be compatible to the server.type.",
1800
+ "examples": [
1801
+ "athena",
1802
+ "bigquery",
1803
+ "redshift",
1804
+ "snowflake",
1805
+ "trino",
1806
+ "postgres",
1807
+ "oracle"
1808
+ ]
1809
+ },
1810
+ "mustBe": {
1811
+ "type": "number"
1812
+ },
1813
+ "mustNotBe": {
1814
+ "type": "number"
1815
+ },
1816
+ "mustBeGreaterThan": {
1817
+ "type": "number"
1818
+ },
1819
+ "mustBeGreaterThanOrEqualTo": {
1820
+ "type": "number"
1821
+ },
1822
+ "mustBeLessThan": {
1823
+ "type": "number"
1824
+ },
1825
+ "mustBeLessThanOrEqualTo": {
1826
+ "type": "number"
1827
+ },
1828
+ "mustBeBetween": {
1829
+ "type": "array",
1830
+ "items": {
1831
+ "type": "number"
1832
+ },
1833
+ "minItems": 2,
1834
+ "maxItems": 2
1835
+ },
1836
+ "mustNotBeBetween": {
1837
+ "type": "array",
1838
+ "items": {
1839
+ "type": "number"
1840
+ },
1841
+ "minItems": 2,
1842
+ "maxItems": 2
1843
+ }
1844
+ },
1845
+ "required": [
1846
+ "query"
1847
+ ]
1848
+ }
1849
+ },
1850
+ {
1851
+ "if": {
1852
+ "properties": {
1853
+ "type": {
1854
+ "const": "library"
1855
+ }
1856
+ }
1857
+ },
1858
+ "then": {
1859
+ "properties": {
1860
+ "rule": {
1861
+ "type": "string",
1862
+ "description": "Define a data quality check based on the predefined rules as per ODCS.",
1863
+ "examples": [
1864
+ "duplicateCount",
1865
+ "validValues",
1866
+ "rowCount"
1867
+ ]
1868
+ },
1869
+ "mustBe": {
1870
+ "description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='."
1871
+ },
1872
+ "mustNotBe": {
1873
+ "description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='."
1874
+ },
1875
+ "mustBeGreaterThan": {
1876
+ "type": "number",
1877
+ "description": "Must be greater than the value to be valid. It is equivalent to '>'."
1878
+ },
1879
+ "mustBeGreaterOrEqualTo": {
1880
+ "type": "number",
1881
+ "description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='."
1882
+ },
1883
+ "mustBeLessThan": {
1884
+ "type": "number",
1885
+ "description": "Must be less than the value to be valid. It is equivalent to '<'."
1886
+ },
1887
+ "mustBeLessOrEqualTo": {
1888
+ "type": "number",
1889
+ "description": "Must be less than or equal to the value to be valid. It is equivalent to '<='."
1890
+ },
1891
+ "mustBeBetween": {
1892
+ "type": "array",
1893
+ "description": "Must be between the two numbers to be valid. Smallest number first in the array.",
1894
+ "minItems": 2,
1895
+ "maxItems": 2,
1896
+ "uniqueItems": true,
1897
+ "items": {
1898
+ "type": "number"
1899
+ }
1900
+ },
1901
+ "mustNotBeBetween": {
1902
+ "type": "array",
1903
+ "description": "Must not be between the two numbers to be valid. Smallest number first in the array.",
1904
+ "minItems": 2,
1905
+ "maxItems": 2,
1906
+ "uniqueItems": true,
1907
+ "items": {
1908
+ "type": "number"
1909
+ }
1910
+ }
1911
+ },
1912
+ "required": [
1913
+ "rule"
1914
+ ]
1915
+ }
1916
+ },
1917
+ {
1918
+ "if": {
1919
+ "properties": {
1920
+ "type": {
1921
+ "const": "custom"
1922
+ }
1923
+ }
1924
+ },
1925
+ "then": {
1926
+ "properties": {
1927
+ "description": {
1928
+ "type": "string",
1929
+ "description": "A plain text describing the quality attribute in natural language."
1930
+ },
1931
+ "engine": {
1932
+ "type": "string",
1933
+ "examples": [
1934
+ "soda",
1935
+ "great-expectations"
1936
+ ],
1937
+ "description": "The engine used for custom quality checks."
1938
+ },
1939
+ "implementation": {
1940
+ "type": [
1941
+ "object",
1942
+ "array",
1943
+ "string"
1944
+ ],
1945
+ "description": "Engine-specific quality checks and expectations."
1946
+ }
1947
+ },
1948
+ "required": [
1949
+ "engine"
1950
+ ]
1951
+ }
1952
+ }
1953
+ ]
1954
+ },
1955
+ "Lineage": {
1956
+ "type": "object",
1957
+ "properties": {
1958
+ "inputFields": {
1959
+ "type": "array",
1960
+ "items": {
1961
+ "type": "object",
1962
+ "properties": {
1963
+ "namespace": {
1964
+ "type": "string",
1965
+ "description": "The input dataset namespace"
1966
+ },
1967
+ "name": {
1968
+ "type": "string",
1969
+ "description": "The input dataset name"
1970
+ },
1971
+ "field": {
1972
+ "type": "string",
1973
+ "description": "The input field"
1974
+ },
1975
+ "transformations": {
1976
+ "type": "array",
1977
+ "items": {
1978
+ "type": "object",
1979
+ "properties": {
1980
+ "type": {
1981
+ "description": "The type of the transformation. Allowed values are: DIRECT, INDIRECT",
1982
+ "type": "string"
1983
+ },
1984
+ "subtype": {
1985
+ "type": "string",
1986
+ "description": "The subtype of the transformation"
1987
+ },
1988
+ "description": {
1989
+ "type": "string",
1990
+ "description": "a string representation of the transformation applied"
1991
+ },
1992
+ "masking": {
1993
+ "type": "boolean",
1994
+ "description": "is transformation masking the data or not"
1995
+ }
1996
+ },
1997
+ "required": [
1998
+ "type"
1999
+ ],
2000
+ "additionalProperties": true
2001
+ }
2002
+ }
2003
+ },
2004
+ "additionalProperties": true,
2005
+ "required": [
2006
+ "namespace",
2007
+ "name",
2008
+ "field"
2009
+ ]
2010
+ }
2011
+ },
2012
+ "transformationDescription": {
2013
+ "type": "string",
2014
+ "description": "a string representation of the transformation applied",
2015
+ "deprecated": true
2016
+ },
2017
+ "transformationType": {
2018
+ "type": "string",
2019
+ "description": "IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY: exact same as input; MASKED: no original data available (like a hash of PII for example)",
2020
+ "deprecated": true
2021
+ }
2022
+ },
2023
+ "additionalProperties": true,
2024
+ "required": [
2025
+ "inputFields"
2026
+ ]
2027
+ }
2028
+ }
2029
+ }