datacontract-cli 0.10.33__py3-none-any.whl → 0.10.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

@@ -1,4 +1,6 @@
1
+ import re
1
2
  import uuid
3
+ from dataclasses import dataclass
2
4
  from typing import List
3
5
  from venv import logger
4
6
 
@@ -9,6 +11,12 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
9
11
  from datacontract.model.run import Check
10
12
 
11
13
 
14
+ @dataclass
15
+ class QuotingConfig:
16
+ quote_field_name: bool = False
17
+ quote_model_name: bool = False
18
+
19
+
12
20
  def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
13
21
  checks: List[Check] = []
14
22
  for model_key, model_value in data_contract_spec.models.items():
@@ -26,37 +34,41 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
26
34
  fields = model_value.fields
27
35
 
28
36
  check_types = is_check_types(server)
29
- quote_field_name = server_type in ["postgres", "sqlserver"]
37
+
38
+ quoting_config = QuotingConfig(
39
+ quote_field_name=server_type in ["postgres", "sqlserver"],
40
+ quote_model_name=server_type in ["postgres", "sqlserver"],
41
+ )
30
42
 
31
43
  for field_name, field in fields.items():
32
- checks.append(check_field_is_present(model_name, field_name, quote_field_name))
44
+ checks.append(check_field_is_present(model_name, field_name, quoting_config))
33
45
  if check_types and field.type is not None:
34
46
  sql_type = convert_to_sql_type(field, server_type)
35
- checks.append(check_field_type(model_name, field_name, sql_type, quote_field_name))
47
+ checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
36
48
  if field.required:
37
- checks.append(check_field_required(model_name, field_name, quote_field_name))
49
+ checks.append(check_field_required(model_name, field_name, quoting_config))
38
50
  if field.unique:
39
- checks.append(check_field_unique(model_name, field_name, quote_field_name))
51
+ checks.append(check_field_unique(model_name, field_name, quoting_config))
40
52
  if field.minLength is not None:
41
- checks.append(check_field_min_length(model_name, field_name, field.minLength, quote_field_name))
53
+ checks.append(check_field_min_length(model_name, field_name, field.minLength, quoting_config))
42
54
  if field.maxLength is not None:
43
- checks.append(check_field_max_length(model_name, field_name, field.maxLength, quote_field_name))
55
+ checks.append(check_field_max_length(model_name, field_name, field.maxLength, quoting_config))
44
56
  if field.minimum is not None:
45
- checks.append(check_field_minimum(model_name, field_name, field.minimum, quote_field_name))
57
+ checks.append(check_field_minimum(model_name, field_name, field.minimum, quoting_config))
46
58
  if field.maximum is not None:
47
- checks.append(check_field_maximum(model_name, field_name, field.maximum, quote_field_name))
59
+ checks.append(check_field_maximum(model_name, field_name, field.maximum, quoting_config))
48
60
  if field.exclusiveMinimum is not None:
49
- checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quote_field_name))
50
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quote_field_name))
61
+ checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quoting_config))
62
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quoting_config))
51
63
  if field.exclusiveMaximum is not None:
52
- checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quote_field_name))
53
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quote_field_name))
64
+ checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quoting_config))
65
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quoting_config))
54
66
  if field.pattern is not None:
55
- checks.append(check_field_regex(model_name, field_name, field.pattern, quote_field_name))
67
+ checks.append(check_field_regex(model_name, field_name, field.pattern, quoting_config))
56
68
  if field.enum is not None and len(field.enum) > 0:
57
- checks.append(check_field_enum(model_name, field_name, field.enum, quote_field_name))
69
+ checks.append(check_field_enum(model_name, field_name, field.enum, quoting_config))
58
70
  if field.quality is not None and len(field.quality) > 0:
59
- quality_list = check_quality_list(model_name, field_name, field.quality)
71
+ quality_list = check_quality_list(model_name, field_name, field.quality, quoting_config)
60
72
  if (quality_list is not None) and len(quality_list) > 0:
61
73
  checks.extend(quality_list)
62
74
  # TODO references: str = None
@@ -70,8 +82,8 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
70
82
  return checks
71
83
 
72
84
 
73
- def checks_for(model_name, quote_field_name):
74
- if quote_field_name:
85
+ def checks_for(model_name, quote_model_name: bool):
86
+ if quote_model_name:
75
87
  return f'checks for "{model_name}"'
76
88
  return f"checks for {model_name}"
77
89
 
@@ -98,11 +110,11 @@ def to_model_name(model_key, model_value, server_type):
98
110
  return model_key
99
111
 
100
112
 
101
- def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Check:
113
+ def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig = QuotingConfig()) -> Check:
102
114
  check_type = "field_is_present"
103
115
  check_key = f"{model_name}__{field_name}__{check_type}"
104
116
  sodacl_check_dict = {
105
- checks_for(model_name, quote_field_name): [
117
+ checks_for(model_name, quoting_config.quote_model_name): [
106
118
  {
107
119
  "schema": {
108
120
  "name": check_key,
@@ -127,11 +139,13 @@ def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Ch
127
139
  )
128
140
 
129
141
 
130
- def check_field_type(model_name: str, field_name: str, expected_type: str, quote_field_name: bool = False):
142
+ def check_field_type(
143
+ model_name: str, field_name: str, expected_type: str, quoting_config: QuotingConfig = QuotingConfig()
144
+ ):
131
145
  check_type = "field_type"
132
146
  check_key = f"{model_name}__{field_name}__{check_type}"
133
147
  sodacl_check_dict = {
134
- checks_for(model_name, quote_field_name): [
148
+ checks_for(model_name, quoting_config.quote_model_name): [
135
149
  {
136
150
  "schema": {
137
151
  "name": check_key,
@@ -158,8 +172,8 @@ def check_field_type(model_name: str, field_name: str, expected_type: str, quote
158
172
  )
159
173
 
160
174
 
161
- def check_field_required(model_name: str, field_name: str, quote_field_name: bool = False):
162
- if quote_field_name:
175
+ def check_field_required(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
176
+ if quoting_config.quote_field_name:
163
177
  field_name_for_soda = f'"{field_name}"'
164
178
  else:
165
179
  field_name_for_soda = field_name
@@ -167,7 +181,7 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
167
181
  check_type = "field_required"
168
182
  check_key = f"{model_name}__{field_name}__{check_type}"
169
183
  sodacl_check_dict = {
170
- checks_for(model_name, quote_field_name): [
184
+ checks_for(model_name, quoting_config.quote_model_name): [
171
185
  {
172
186
  f"missing_count({field_name_for_soda}) = 0": {
173
187
  "name": check_key,
@@ -189,8 +203,8 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
189
203
  )
190
204
 
191
205
 
192
- def check_field_unique(model_name: str, field_name: str, quote_field_name: bool = False):
193
- if quote_field_name:
206
+ def check_field_unique(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
207
+ if quoting_config.quote_field_name:
194
208
  field_name_for_soda = f'"{field_name}"'
195
209
  else:
196
210
  field_name_for_soda = field_name
@@ -198,7 +212,7 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
198
212
  check_type = "field_unique"
199
213
  check_key = f"{model_name}__{field_name}__{check_type}"
200
214
  sodacl_check_dict = {
201
- checks_for(model_name, quote_field_name): [
215
+ checks_for(model_name, quoting_config.quote_model_name): [
202
216
  {
203
217
  f"duplicate_count({field_name_for_soda}) = 0": {
204
218
  "name": check_key,
@@ -220,8 +234,10 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
220
234
  )
221
235
 
222
236
 
223
- def check_field_min_length(model_name: str, field_name: str, min_length: int, quote_field_name: bool = False):
224
- if quote_field_name:
237
+ def check_field_min_length(
238
+ model_name: str, field_name: str, min_length: int, quoting_config: QuotingConfig = QuotingConfig()
239
+ ):
240
+ if quoting_config.quote_field_name:
225
241
  field_name_for_soda = f'"{field_name}"'
226
242
  else:
227
243
  field_name_for_soda = field_name
@@ -229,7 +245,7 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
229
245
  check_type = "field_min_length"
230
246
  check_key = f"{model_name}__{field_name}__{check_type}"
231
247
  sodacl_check_dict = {
232
- checks_for(model_name, quote_field_name): [
248
+ checks_for(model_name, quoting_config.quote_model_name): [
233
249
  {
234
250
  f"invalid_count({field_name_for_soda}) = 0": {
235
251
  "name": check_key,
@@ -252,8 +268,10 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
252
268
  )
253
269
 
254
270
 
255
- def check_field_max_length(model_name: str, field_name: str, max_length: int, quote_field_name: bool = False):
256
- if quote_field_name:
271
+ def check_field_max_length(
272
+ model_name: str, field_name: str, max_length: int, quoting_config: QuotingConfig = QuotingConfig()
273
+ ):
274
+ if quoting_config.quote_field_name:
257
275
  field_name_for_soda = f'"{field_name}"'
258
276
  else:
259
277
  field_name_for_soda = field_name
@@ -261,7 +279,7 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
261
279
  check_type = "field_max_length"
262
280
  check_key = f"{model_name}__{field_name}__{check_type}"
263
281
  sodacl_check_dict = {
264
- checks_for(model_name, quote_field_name): [
282
+ checks_for(model_name, quoting_config.quote_model_name): [
265
283
  {
266
284
  f"invalid_count({field_name_for_soda}) = 0": {
267
285
  "name": check_key,
@@ -284,8 +302,10 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
284
302
  )
285
303
 
286
304
 
287
- def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_field_name: bool = False):
288
- if quote_field_name:
305
+ def check_field_minimum(
306
+ model_name: str, field_name: str, minimum: int, quoting_config: QuotingConfig = QuotingConfig()
307
+ ):
308
+ if quoting_config.quote_field_name:
289
309
  field_name_for_soda = f'"{field_name}"'
290
310
  else:
291
311
  field_name_for_soda = field_name
@@ -293,7 +313,7 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
293
313
  check_type = "field_minimum"
294
314
  check_key = f"{model_name}__{field_name}__{check_type}"
295
315
  sodacl_check_dict = {
296
- checks_for(model_name, quote_field_name): [
316
+ checks_for(model_name, quoting_config.quote_model_name): [
297
317
  {
298
318
  f"invalid_count({field_name_for_soda}) = 0": {
299
319
  "name": check_key,
@@ -316,8 +336,10 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
316
336
  )
317
337
 
318
338
 
319
- def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_field_name: bool = False):
320
- if quote_field_name:
339
+ def check_field_maximum(
340
+ model_name: str, field_name: str, maximum: int, quoting_config: QuotingConfig = QuotingConfig()
341
+ ):
342
+ if quoting_config.quote_field_name:
321
343
  field_name_for_soda = f'"{field_name}"'
322
344
  else:
323
345
  field_name_for_soda = field_name
@@ -325,7 +347,7 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
325
347
  check_type = "field_maximum"
326
348
  check_key = f"{model_name}__{field_name}__{check_type}"
327
349
  sodacl_check_dict = {
328
- checks_for(model_name, quote_field_name): [
350
+ checks_for(model_name, quoting_config.quote_model_name): [
329
351
  {
330
352
  f"invalid_count({field_name_for_soda}) = 0": {
331
353
  "name": check_key,
@@ -348,8 +370,10 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
348
370
  )
349
371
 
350
372
 
351
- def check_field_not_equal(model_name: str, field_name: str, value: int, quote_field_name: bool = False):
352
- if quote_field_name:
373
+ def check_field_not_equal(
374
+ model_name: str, field_name: str, value: int, quoting_config: QuotingConfig = QuotingConfig()
375
+ ):
376
+ if quoting_config.quote_field_name:
353
377
  field_name_for_soda = f'"{field_name}"'
354
378
  else:
355
379
  field_name_for_soda = field_name
@@ -357,7 +381,7 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
357
381
  check_type = "field_not_equal"
358
382
  check_key = f"{model_name}__{field_name}__{check_type}"
359
383
  sodacl_check_dict = {
360
- checks_for(model_name, quote_field_name): [
384
+ checks_for(model_name, quoting_config.quote_model_name): [
361
385
  {
362
386
  f"invalid_count({field_name_for_soda}) = 0": {
363
387
  "name": check_key,
@@ -380,8 +404,8 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
380
404
  )
381
405
 
382
406
 
383
- def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_name: bool = False):
384
- if quote_field_name:
407
+ def check_field_enum(model_name: str, field_name: str, enum: list, quoting_config: QuotingConfig = QuotingConfig()):
408
+ if quoting_config.quote_field_name:
385
409
  field_name_for_soda = f'"{field_name}"'
386
410
  else:
387
411
  field_name_for_soda = field_name
@@ -389,7 +413,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
389
413
  check_type = "field_enum"
390
414
  check_key = f"{model_name}__{field_name}__{check_type}"
391
415
  sodacl_check_dict = {
392
- checks_for(model_name, quote_field_name): [
416
+ checks_for(model_name, quoting_config.quote_model_name): [
393
417
  {
394
418
  f"invalid_count({field_name_for_soda}) = 0": {
395
419
  "name": check_key,
@@ -412,8 +436,8 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
412
436
  )
413
437
 
414
438
 
415
- def check_field_regex(model_name: str, field_name: str, pattern: str, quote_field_name: bool = False):
416
- if quote_field_name:
439
+ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_config: QuotingConfig = QuotingConfig()):
440
+ if quoting_config.quote_field_name:
417
441
  field_name_for_soda = f'"{field_name}"'
418
442
  else:
419
443
  field_name_for_soda = field_name
@@ -421,7 +445,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
421
445
  check_type = "field_regex"
422
446
  check_key = f"{model_name}__{field_name}__{check_type}"
423
447
  sodacl_check_dict = {
424
- checks_for(model_name, quote_field_name): [
448
+ checks_for(model_name, quoting_config.quote_model_name): [
425
449
  {
426
450
  f"invalid_count({field_name_for_soda}) = 0": {
427
451
  "name": check_key,
@@ -444,7 +468,9 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
444
468
  )
445
469
 
446
470
 
447
- def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> List[Check]:
471
+ def check_quality_list(
472
+ model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
473
+ ) -> List[Check]:
448
474
  checks: List[Check] = []
449
475
 
450
476
  count = 0
@@ -457,15 +483,20 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
457
483
  check_key = f"{model_name}__{field_name}__quality_sql_{count}"
458
484
  check_type = "model_quality_sql"
459
485
  threshold = to_sodacl_threshold(quality)
460
- query = prepare_query(quality, model_name, field_name)
486
+ query = prepare_query(quality, model_name, field_name, quoting_config)
461
487
  if query is None:
462
488
  logger.warning(f"Quality check {check_key} has no query")
463
489
  continue
464
490
  if threshold is None:
465
491
  logger.warning(f"Quality check {check_key} has no valid threshold")
466
492
  continue
493
+
494
+ if quoting_config.quote_model_name:
495
+ model_name_for_soda = f'"{model_name}"'
496
+ else:
497
+ model_name_for_soda = model_name
467
498
  sodacl_check_dict = {
468
- f"checks for {model_name}": [
499
+ f"checks for {model_name_for_soda}": [
469
500
  {
470
501
  f"{check_key} {threshold}": {
471
502
  f"{check_key} query": query,
@@ -493,7 +524,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
493
524
  return checks
494
525
 
495
526
 
496
- def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
527
+ def prepare_query(
528
+ quality: Quality, model_name: str, field_name: str = None, quoting_config: QuotingConfig = QuotingConfig()
529
+ ) -> str | None:
497
530
  if quality.query is None:
498
531
  return None
499
532
  if quality.query == "":
@@ -501,14 +534,24 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
501
534
 
502
535
  query = quality.query
503
536
 
504
- query = query.replace("{model}", model_name)
505
- query = query.replace("{schema}", model_name)
506
- query = query.replace("{table}", model_name)
537
+ if quoting_config.quote_field_name:
538
+ field_name_for_soda = f'"{field_name}"'
539
+ else:
540
+ field_name_for_soda = field_name
541
+
542
+ if quoting_config.quote_model_name:
543
+ model_name_for_soda = f'"{model_name}"'
544
+ else:
545
+ model_name_for_soda = model_name
546
+
547
+ query = re.sub(r'["\']?\{model}["\']?', model_name_for_soda, query)
548
+ query = re.sub(r'["\']?{schema}["\']?', model_name_for_soda, query)
549
+ query = re.sub(r'["\']?{table}["\']?', model_name_for_soda, query)
507
550
 
508
551
  if field_name is not None:
509
- query = query.replace("{field}", field_name)
510
- query = query.replace("{column}", field_name)
511
- query = query.replace("{property}", field_name)
552
+ query = re.sub(r'["\']?{field}["\']?', field_name_for_soda, query)
553
+ query = re.sub(r'["\']?{column}["\']?', field_name_for_soda, query)
554
+ query = re.sub(r'["\']?{property}["\']?', field_name_for_soda, query)
512
555
 
513
556
  return query
514
557
 
@@ -1,5 +1,9 @@
1
+ import atexit
2
+ import os
3
+ import tempfile
1
4
  import typing
2
5
 
6
+ import requests
3
7
  from duckdb.duckdb import DuckDBPyConnection
4
8
 
5
9
  from datacontract.engines.data_contract_checks import create_checks
@@ -46,6 +50,9 @@ def execute_data_contract_test(
46
50
  run.outputPortId = server.outputPortId
47
51
  run.server = server_name
48
52
 
53
+ if server.type == "api":
54
+ server = process_api_response(run, server)
55
+
49
56
  run.checks.extend(create_checks(data_contract_specification, server))
50
57
 
51
58
  # TODO check server is supported type for nicer error messages
@@ -74,3 +81,33 @@ def get_server(data_contract_specification: DataContractSpecification, server_na
74
81
  server_name = list(data_contract_specification.servers.keys())[0]
75
82
  server = data_contract_specification.servers.get(server_name)
76
83
  return server
84
+
85
+
86
+ def process_api_response(run, server):
87
+ tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract_cli_api_")
88
+ atexit.register(tmp_dir.cleanup)
89
+ headers = {}
90
+ if os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION") is not None:
91
+ headers["Authorization"] = os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION")
92
+ try:
93
+ response = requests.get(server.location, headers=headers)
94
+ response.raise_for_status()
95
+ except requests.exceptions.RequestException as e:
96
+ raise DataContractException(
97
+ type="connection",
98
+ name="API server connection error",
99
+ result=ResultEnum.error,
100
+ reason=f"Failed to fetch API response from {server.location}: {e}",
101
+ engine="datacontract",
102
+ )
103
+ with open(f"{tmp_dir.name}/api_response.json", "w") as f:
104
+ f.write(response.text)
105
+ run.log_info(f"Saved API response to {tmp_dir.name}/api_response.json")
106
+ server = Server(
107
+ type="local",
108
+ format="json",
109
+ path=f"{tmp_dir.name}/api_response.json",
110
+ dataProductId=server.dataProductId,
111
+ outputPortId=server.outputPortId,
112
+ )
113
+ return server
@@ -159,6 +159,14 @@ def process_json_file(run, schema, model_name, validate, file, delimiter):
159
159
 
160
160
  def process_local_file(run, server, schema, model_name, validate):
161
161
  path = server.path
162
+ if not path:
163
+ raise DataContractException(
164
+ type="schema",
165
+ name="Check that JSON has valid schema",
166
+ result=ResultEnum.warning,
167
+ reason="For server with type 'local', a 'path' must be defined.",
168
+ engine="datacontract",
169
+ )
162
170
  if "{model}" in path:
163
171
  path = path.format(model=model_name)
164
172
 
@@ -2,6 +2,8 @@ import logging
2
2
  import typing
3
3
  import uuid
4
4
 
5
+ from datacontract.engines.soda.connections.athena import to_athena_soda_configuration
6
+
5
7
  if typing.TYPE_CHECKING:
6
8
  from pyspark.sql import SparkSession
7
9
 
@@ -106,6 +108,10 @@ def check_soda_execute(
106
108
  soda_configuration_str = to_trino_soda_configuration(server)
107
109
  scan.add_configuration_yaml_str(soda_configuration_str)
108
110
  scan.set_data_source_name(server.type)
111
+ elif server.type == "athena":
112
+ soda_configuration_str = to_athena_soda_configuration(server)
113
+ scan.add_configuration_yaml_str(soda_configuration_str)
114
+ scan.set_data_source_name(server.type)
109
115
 
110
116
  else:
111
117
  run.checks.append(
@@ -0,0 +1,79 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+ from datacontract.model.exceptions import DataContractException
6
+
7
+
8
+ def to_athena_soda_configuration(server):
9
+ s3_region = os.getenv("DATACONTRACT_S3_REGION")
10
+ s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
11
+ s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
12
+ s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
13
+
14
+ # Validate required parameters
15
+ if not s3_access_key_id:
16
+ raise DataContractException(
17
+ type="athena-connection",
18
+ name="missing_access_key_id",
19
+ reason="AWS access key ID is required. Set the DATACONTRACT_S3_ACCESS_KEY_ID environment variable.",
20
+ engine="datacontract",
21
+ )
22
+
23
+ if not s3_secret_access_key:
24
+ raise DataContractException(
25
+ type="athena-connection",
26
+ name="missing_secret_access_key",
27
+ reason="AWS secret access key is required. Set the DATACONTRACT_S3_SECRET_ACCESS_KEY environment variable.",
28
+ engine="datacontract",
29
+ )
30
+
31
+ if not hasattr(server, "schema_") or not server.schema_:
32
+ raise DataContractException(
33
+ type="athena-connection",
34
+ name="missing_schema",
35
+ reason="Schema is required for Athena connection. Specify the schema where your tables exist in the server configuration.",
36
+ engine="datacontract",
37
+ )
38
+
39
+ if not hasattr(server, "stagingDir") or not server.stagingDir:
40
+ raise DataContractException(
41
+ type="athena-connection",
42
+ name="missing_s3_staging_dir",
43
+ reason="S3 staging directory is required for Athena connection. This should be the Amazon S3 Query Result Location (e.g., 's3://my-bucket/athena-results/').",
44
+ engine="datacontract",
45
+ )
46
+
47
+ # Validate S3 staging directory format
48
+ if not server.stagingDir.startswith("s3://"):
49
+ raise DataContractException(
50
+ type="athena-connection",
51
+ name="invalid_s3_staging_dir",
52
+ reason=f"S3 staging directory must start with 's3://'. Got: {server.s3_staging_dir}. Example: 's3://my-bucket/athena-results/'",
53
+ engine="datacontract",
54
+ )
55
+
56
+ data_source = {
57
+ "type": "athena",
58
+ "access_key_id": s3_access_key_id,
59
+ "secret_access_key": s3_secret_access_key,
60
+ "schema": server.schema_,
61
+ "staging_dir": server.stagingDir,
62
+ }
63
+
64
+ if s3_region:
65
+ data_source["region_name"] = s3_region
66
+ elif server.region_name:
67
+ data_source["region_name"] = server.region_name
68
+
69
+ if server.catalog:
70
+ # Optional, Identify the name of the Data Source, also referred to as a Catalog. The default value is `awsdatacatalog`.
71
+ data_source["catalog"] = server.catalog
72
+
73
+ if s3_session_token:
74
+ data_source["aws_session_token"] = s3_session_token
75
+
76
+ soda_configuration = {f"data_source {server.type}": data_source}
77
+
78
+ soda_configuration_str = yaml.dump(soda_configuration)
79
+ return soda_configuration_str
@@ -71,6 +71,9 @@ def get_duckdb_connection(
71
71
  elif server.format == "delta":
72
72
  con.sql("update extensions;") # Make sure we have the latest delta extension
73
73
  con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
74
+ table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchdf()
75
+ if table_info is not None and not table_info.empty:
76
+ run.log_info(f"DuckDB Table Info: {table_info.to_string(index=False)}")
74
77
  return con
75
78
 
76
79
 
@@ -44,12 +44,18 @@ def to_avro_field(field, field_name):
44
44
  avro_type = to_avro_type(field, field_name)
45
45
  avro_field["type"] = avro_type if is_required_avro else ["null", avro_type]
46
46
 
47
- if avro_field["type"] == "enum":
48
- avro_field["type"] = {
47
+ # Handle enum types - both required and optional
48
+ if avro_type == "enum" or (isinstance(avro_field["type"], list) and "enum" in avro_field["type"]):
49
+ enum_def = {
49
50
  "type": "enum",
50
51
  "name": field.title,
51
52
  "symbols": field.enum,
52
53
  }
54
+ if is_required_avro:
55
+ avro_field["type"] = enum_def
56
+ else:
57
+ # Replace "enum" with the full enum definition in the union
58
+ avro_field["type"] = ["null", enum_def]
53
59
 
54
60
  if field.config:
55
61
  if "avroDefault" in field.config:
@@ -77,6 +83,10 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
77
83
  if "avroType" in field.config:
78
84
  return field.config["avroType"]
79
85
 
86
+ # Check for enum fields based on presence of enum list and avroType config
87
+ if field.enum and field.config and field.config.get("avroType") == "enum":
88
+ return "enum"
89
+
80
90
  if field.type is None:
81
91
  return "null"
82
92
  if field.type in ["string", "varchar", "text"]:
@@ -1,4 +1,4 @@
1
- from typing import Dict
1
+ from typing import Dict, List
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -12,6 +12,9 @@ from datacontract.model.data_contract_specification import (
12
12
  ServiceLevel,
13
13
  )
14
14
 
15
+ TAB = " "
16
+ ARROW = "↳"
17
+
15
18
 
16
19
  class MarkdownExporter(Exporter):
17
20
  """Exporter implementation for converting data contracts to Markdown."""
@@ -70,7 +73,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
70
73
  else:
71
74
  bullet_char = "-"
72
75
  newline_char = "\n"
73
- obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
76
+ model_attributes_to_include = set(obj.__class__.model_fields.keys())
77
+ obj_model = obj.model_dump(exclude_unset=True, include=model_attributes_to_include, exclude=excluded_fields)
74
78
  description_value = obj_model.pop("description", None)
75
79
  attributes = [
76
80
  (f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
@@ -78,7 +82,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
78
82
  if value
79
83
  ]
80
84
  description = f"*{description_to_markdown(description_value)}*"
81
- return newline_char.join([description] + attributes)
85
+ extra = [extra_to_markdown(obj)] if obj.model_extra else []
86
+ return newline_char.join([description] + attributes + extra)
82
87
 
83
88
 
84
89
  def servers_to_markdown(servers: Dict[str, Server]) -> str:
@@ -153,8 +158,8 @@ def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
153
158
  Returns:
154
159
  str: A Markdown table rows for the field.
155
160
  """
156
- tabs = " " * level
157
- arrow = "↳" if level > 0 else ""
161
+ tabs = TAB * level
162
+ arrow = ARROW if level > 0 else ""
158
163
  column_name = f"{tabs}{arrow} {field_name}"
159
164
 
160
165
  attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
@@ -206,3 +211,108 @@ def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
206
211
 
207
212
  def description_to_markdown(description: str | None) -> str:
208
213
  return (description or "No description.").replace("\n", "<br>")
214
+
215
+
216
+ def array_of_dict_to_markdown(array: List[Dict[str, str]]) -> str:
217
+ """
218
+ Convert a list of dictionaries to a Markdown table.
219
+
220
+ Args:
221
+ array (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a row in the table.
222
+
223
+ Returns:
224
+ str: A Markdown formatted table.
225
+ """
226
+ if not array:
227
+ return ""
228
+
229
+ headers = []
230
+
231
+ for item in array:
232
+ headers += item.keys()
233
+ headers = list(dict.fromkeys(headers)) # Preserve order and remove duplicates
234
+
235
+ markdown_parts = [
236
+ "| " + " | ".join(headers) + " |",
237
+ "| " + " | ".join(["---"] * len(headers)) + " |",
238
+ ]
239
+
240
+ for row in array:
241
+ element = row
242
+ markdown_parts.append(
243
+ "| "
244
+ + " | ".join(
245
+ f"{str(element.get(header, ''))}".replace("\n", "<br>").replace("\t", TAB) for header in headers
246
+ )
247
+ + " |"
248
+ )
249
+
250
+ return "\n".join(markdown_parts) + "\n"
251
+
252
+
253
+ def array_to_markdown(array: List[str]) -> str:
254
+ """
255
+ Convert a list of strings to a Markdown formatted list.
256
+
257
+ Args:
258
+ array (List[str]): A list of strings to convert.
259
+
260
+ Returns:
261
+ str: A Markdown formatted list.
262
+ """
263
+ if not array:
264
+ return ""
265
+ return "\n".join(f"- {item}" for item in array) + "\n"
266
+
267
+
268
+ def dict_to_markdown(dictionary: Dict[str, str]) -> str:
269
+ """
270
+ Convert a dictionary to a Markdown formatted list.
271
+
272
+ Args:
273
+ dictionary (Dict[str, str]): A dictionary where keys are item names and values are item descriptions.
274
+
275
+ Returns:
276
+ str: A Markdown formatted list of items.
277
+ """
278
+ if not dictionary:
279
+ return ""
280
+
281
+ markdown_parts = []
282
+ for key, value in dictionary.items():
283
+ if isinstance(value, dict):
284
+ markdown_parts.append(f"- {key}")
285
+ nested_markdown = dict_to_markdown(value)
286
+ if nested_markdown:
287
+ nested_lines = nested_markdown.split("\n")
288
+ for line in nested_lines:
289
+ if line.strip():
290
+ markdown_parts.append(f" {line}")
291
+ else:
292
+ markdown_parts.append(f"- {key}: {value}")
293
+ return "\n".join(markdown_parts) + "\n"
294
+
295
+
296
+ def extra_to_markdown(obj: BaseModel) -> str:
297
+ """
298
+ Convert the extra attributes of a data contract to Markdown format.
299
+ Args:
300
+ obj (BaseModel): The data contract object containing extra attributes.
301
+ Returns:
302
+ str: A Markdown formatted string representing the extra attributes of the data contract.
303
+ """
304
+ markdown_part = ""
305
+ extra = obj.model_extra
306
+ if extra:
307
+ for key_extra, value_extra in extra.items():
308
+ markdown_part += f"\n### {key_extra.capitalize()}\n"
309
+ if isinstance(value_extra, list) and len(value_extra):
310
+ if isinstance(value_extra[0], dict):
311
+ markdown_part += array_of_dict_to_markdown(value_extra)
312
+ elif isinstance(value_extra[0], str):
313
+ markdown_part += array_to_markdown(value_extra)
314
+ elif isinstance(value_extra, dict):
315
+ markdown_part += dict_to_markdown(value_extra)
316
+ else:
317
+ markdown_part += f"{str(value_extra)}\n"
318
+ return markdown_part
@@ -3,6 +3,9 @@ from datacontract.model.data_contract_specification import Field
3
3
 
4
4
 
5
5
  def convert_to_sql_type(field: Field, server_type: str) -> str:
6
+ if field.config and "physicalType" in field.config:
7
+ return field.config["physicalType"]
8
+
6
9
  if server_type == "snowflake":
7
10
  return convert_to_snowflake(field)
8
11
  elif server_type == "postgres":
@@ -19,6 +22,7 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
19
22
  return convert_type_to_bigquery(field)
20
23
  elif server_type == "trino":
21
24
  return convert_type_to_trino(field)
25
+
22
26
  return field.type
23
27
 
24
28
 
@@ -130,13 +130,23 @@ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Fi
130
130
  imported_field.fields = import_record_fields(field.type.fields)
131
131
  elif field.type.type == "union":
132
132
  imported_field.required = False
133
- type = import_type_of_optional_field(field)
134
- imported_field.type = type
135
- if type == "record":
136
- imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
137
- elif type == "array":
138
- imported_field.type = "array"
139
- imported_field.items = import_avro_array_items(get_array_from_union_field(field))
133
+ # Check for enum in union first, since it needs special handling
134
+ enum_schema = get_enum_from_union_field(field)
135
+ if enum_schema:
136
+ imported_field.type = "string"
137
+ imported_field.enum = enum_schema.symbols
138
+ imported_field.title = enum_schema.name
139
+ if not imported_field.config:
140
+ imported_field.config = {}
141
+ imported_field.config["avroType"] = "enum"
142
+ else:
143
+ type = import_type_of_optional_field(field)
144
+ imported_field.type = type
145
+ if type == "record":
146
+ imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
147
+ elif type == "array":
148
+ imported_field.type = "array"
149
+ imported_field.items = import_avro_array_items(get_array_from_union_field(field))
140
150
  elif field.type.type == "array":
141
151
  imported_field.type = "array"
142
152
  imported_field.items = import_avro_array_items(field.type)
@@ -277,6 +287,22 @@ def get_array_from_union_field(field: avro.schema.Field) -> avro.schema.ArraySch
277
287
  return None
278
288
 
279
289
 
290
+ def get_enum_from_union_field(field: avro.schema.Field) -> avro.schema.EnumSchema | None:
291
+ """
292
+ Get the enum schema from a union field.
293
+
294
+ Args:
295
+ field: The Avro field with a union type.
296
+
297
+ Returns:
298
+ The enum schema if found, None otherwise.
299
+ """
300
+ for field_type in field.type.schemas:
301
+ if field_type.type == "enum":
302
+ return field_type
303
+ return None
304
+
305
+
280
306
  def map_type_from_avro(avro_type_str: str) -> str:
281
307
  """
282
308
  Map Avro type strings to data contract type strings.
@@ -131,6 +131,7 @@ def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
131
131
  server.host = odcs_server.host
132
132
  server.port = odcs_server.port
133
133
  server.catalog = odcs_server.catalog
134
+ server.stagingDir = odcs_server.stagingDir
134
135
  server.topic = getattr(odcs_server, "topic", None)
135
136
  server.http_path = getattr(odcs_server, "http_path", None)
136
137
  server.token = getattr(odcs_server, "token", None)
@@ -1,4 +1,6 @@
1
+ import atexit
1
2
  import logging
3
+ import tempfile
2
4
 
3
5
  from databricks.sdk import WorkspaceClient
4
6
  from pyspark.sql import DataFrame, SparkSession, types
@@ -54,7 +56,16 @@ def import_spark(
54
56
  Returns:
55
57
  DataContractSpecification: The updated contract spec with imported models.
56
58
  """
57
- spark = SparkSession.builder.getOrCreate()
59
+
60
+ tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
61
+ atexit.register(tmp_dir.cleanup)
62
+
63
+ spark = (
64
+ SparkSession.builder.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
65
+ .config("spark.streaming.stopGracefullyOnShutdown", "true")
66
+ .config("spark.ui.enabled", "false")
67
+ .getOrCreate()
68
+ )
58
69
  data_contract_specification.servers["local"] = Server(type="dataframe")
59
70
 
60
71
  if dataframe is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacontract-cli
3
- Version: 0.10.33
3
+ Version: 0.10.34
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  License-Expression: MIT
@@ -20,7 +20,7 @@ Requires-Dist: fastparquet<2025.0.0,>=2024.5.0
20
20
  Requires-Dist: numpy<2.0.0,>=1.26.4
21
21
  Requires-Dist: python-multipart<1.0.0,>=0.0.20
22
22
  Requires-Dist: rich<15.0,>=13.7
23
- Requires-Dist: sqlglot<27.0.0,>=26.6.0
23
+ Requires-Dist: sqlglot<28.0.0,>=26.6.0
24
24
  Requires-Dist: duckdb<2.0.0,>=1.0.0
25
25
  Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20
26
26
  Requires-Dist: setuptools>=60
@@ -56,10 +56,12 @@ Provides-Extra: s3
56
56
  Requires-Dist: s3fs<2026.0.0,>=2025.2.0; extra == "s3"
57
57
  Requires-Dist: aiobotocore<2.24.0,>=2.17.0; extra == "s3"
58
58
  Provides-Extra: snowflake
59
- Requires-Dist: snowflake-connector-python[pandas]<3.16,>=3.6; extra == "snowflake"
59
+ Requires-Dist: snowflake-connector-python[pandas]<3.17,>=3.6; extra == "snowflake"
60
60
  Requires-Dist: soda-core-snowflake<3.6.0,>=3.3.20; extra == "snowflake"
61
61
  Provides-Extra: sqlserver
62
62
  Requires-Dist: soda-core-sqlserver<3.6.0,>=3.3.20; extra == "sqlserver"
63
+ Provides-Extra: athena
64
+ Requires-Dist: soda-core-athena<3.6.0,>=3.3.20; extra == "athena"
63
65
  Provides-Extra: trino
64
66
  Requires-Dist: soda-core-trino<3.6.0,>=3.3.20; extra == "trino"
65
67
  Provides-Extra: dbt
@@ -76,19 +78,19 @@ Requires-Dist: uvicorn==0.35.0; extra == "api"
76
78
  Provides-Extra: protobuf
77
79
  Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
78
80
  Provides-Extra: all
79
- Requires-Dist: datacontract-cli[api,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
81
+ Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
80
82
  Provides-Extra: dev
81
83
  Requires-Dist: datacontract-cli[all]; extra == "dev"
82
84
  Requires-Dist: httpx==0.28.1; extra == "dev"
83
85
  Requires-Dist: kafka-python; extra == "dev"
84
- Requires-Dist: moto==5.1.6; extra == "dev"
86
+ Requires-Dist: moto==5.1.8; extra == "dev"
85
87
  Requires-Dist: pandas>=2.1.0; extra == "dev"
86
88
  Requires-Dist: pre-commit<4.3.0,>=3.7.1; extra == "dev"
87
89
  Requires-Dist: pytest; extra == "dev"
88
90
  Requires-Dist: pytest-xdist; extra == "dev"
89
91
  Requires-Dist: pymssql==2.3.7; extra == "dev"
90
92
  Requires-Dist: ruff; extra == "dev"
91
- Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.10.0; extra == "dev"
93
+ Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.12.0; extra == "dev"
92
94
  Requires-Dist: trino==0.335.0; extra == "dev"
93
95
  Dynamic: license-file
94
96
 
@@ -316,6 +318,7 @@ A list of available extras:
316
318
 
317
319
  | Dependency | Installation Command |
318
320
  |-------------------------|--------------------------------------------|
321
+ | Amazon Athena | `pip install datacontract-cli[athena]` |
319
322
  | Avro Support | `pip install datacontract-cli[avro]` |
320
323
  | Google BigQuery | `pip install datacontract-cli[bigquery]` |
321
324
  | Databricks Integration | `pip install datacontract-cli[databricks]` |
@@ -460,6 +463,7 @@ Credentials are provided with environment variables.
460
463
  Supported server types:
461
464
 
462
465
  - [s3](#S3)
466
+ - [athena](#athena)
463
467
  - [bigquery](#bigquery)
464
468
  - [azure](#azure)
465
469
  - [sqlserver](#sqlserver)
@@ -470,6 +474,7 @@ Supported server types:
470
474
  - [kafka](#kafka)
471
475
  - [postgres](#postgres)
472
476
  - [trino](#trino)
477
+ - [api](#api)
473
478
  - [local](#local)
474
479
 
475
480
  Supported formats:
@@ -529,6 +534,41 @@ servers:
529
534
  | `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
530
535
 
531
536
 
537
+ #### Athena
538
+
539
+ Data Contract CLI can test data in AWS Athena stored in S3.
540
+ Supports different file formats, such as Iceberg, Parquet, JSON, CSV...
541
+
542
+ ##### Example
543
+
544
+ datacontract.yaml
545
+ ```yaml
546
+ servers:
547
+ athena:
548
+ type: athena
549
+ catalog: awsdatacatalog # awsdatacatalog is the default setting
550
+ schema: icebergdemodb # in Athena, this is called "database"
551
+ regionName: eu-central-1
552
+ stagingDir: s3://my-bucket/athena-results/
553
+ models:
554
+ my_table: # corresponds to a table of view name
555
+ type: table
556
+ fields:
557
+ my_column_1: # corresponds to a column
558
+ type: string
559
+ config:
560
+ physicalType: varchar
561
+ ```
562
+
563
+ ##### Environment Variables
564
+
565
+ | Environment Variable | Example | Description |
566
+ |-------------------------------------|---------------------------------|----------------------------------------|
567
+ | `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of Athena service |
568
+ | `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
569
+ | `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
570
+ | `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
571
+
532
572
 
533
573
  #### Google Cloud Storage (GCS)
534
574
 
@@ -896,6 +936,38 @@ models:
896
936
  | `DATACONTRACT_TRINO_PASSWORD` | `mysecretpassword` | Password |
897
937
 
898
938
 
939
+ #### API
940
+
941
+ Data Contract CLI can test APIs that return data in JSON format.
942
+ Currently, only GET requests are supported.
943
+
944
+ ##### Example
945
+
946
+ datacontract.yaml
947
+ ```yaml
948
+ servers:
949
+ api:
950
+ type: "api"
951
+ location: "https://api.example.com/path"
952
+ delimiter: none # new_line, array, or none (default)
953
+
954
+ models:
955
+ my_object: # corresponds to the root element of the JSON response
956
+ type: object
957
+ fields:
958
+ field1:
959
+ type: string
960
+ fields2:
961
+ type: number
962
+ ```
963
+
964
+ ##### Environment Variables
965
+
966
+ | Environment Variable | Example | Description |
967
+ |-----------------------------------------|------------------|---------------------------------------------------|
968
+ | `DATACONTRACT_API_HEADER_AUTHORIZATION` | `Bearer <token>` | The value for the `authorization` header. Optional. |
969
+
970
+
899
971
  #### Local
900
972
 
901
973
  Data Contract CLI can test local files in parquet, json, csv, or delta format.
@@ -959,8 +1031,10 @@ models:
959
1031
  │ --engine TEXT [engine] The engine used for great │
960
1032
  │ expection run. │
961
1033
  │ [default: None] │
962
- │ --template PATH [custom] The file path of Jinja
963
- template.
1034
+ │ --template PATH The file path or URL of a template.
1035
+ For Excel format: path/URL to custom
1036
+ │ Excel template. For custom format: │
1037
+ │ path to Jinja template. │
964
1038
  │ [default: None] │
965
1039
  │ --help Show this message and exit. │
966
1040
  ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
@@ -2102,6 +2176,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
2102
2176
 
2103
2177
  ## Companies using this tool
2104
2178
 
2179
+ - [Entropy Data](https://www.entropy-data.com)
2105
2180
  - [INNOQ](https://innoq.com)
2106
2181
  - [Data Catering](https://data.catering/)
2107
2182
  - [Oliver Wyman](https://www.oliverwyman.com/)
@@ -2120,7 +2195,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
2120
2195
 
2121
2196
  ## Credits
2122
2197
 
2123
- Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/) and [Jochen Christ](https://www.linkedin.com/in/jochenchrist/).
2198
+ Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/), [Jochen Christ](https://www.linkedin.com/in/jochenchrist/), and [Simon Harrer]().
2124
2199
 
2125
2200
 
2126
2201
 
@@ -8,24 +8,25 @@ datacontract/breaking/breaking_change.py,sha256=BIDEUo1U2CQLVT2-I5PyFttxAj6zQPI1
8
8
  datacontract/breaking/breaking_rules.py,sha256=M9IdzVJSA7oOr1fvLQl0y9MoBKeItPz42Db2U2cjH2Y,4063
9
9
  datacontract/catalog/catalog.py,sha256=HyFmUPsN3pWJ2WTdbs0DmOf0qmwAzc2-ijWse9_dhBc,2729
10
10
  datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- datacontract/engines/data_contract_checks.py,sha256=2WSxqslt7hxr0CzCir-8I3RihCbFAAa8LmF3fxORMQk,26728
12
- datacontract/engines/data_contract_test.py,sha256=NAnO_744H30tkQh43PSUSsAs3TC9DaNIWZe9jFKzA8Q,3242
11
+ datacontract/engines/data_contract_checks.py,sha256=NbYz7p9ljnia2XiF6PeWR4UNiZVpCAj2ufKgpir-Ve4,28234
12
+ datacontract/engines/data_contract_test.py,sha256=8qg0SkwtTmayfzNL2U_0xgx5Hi_DUePaMt2q_JiCqX8,4543
13
13
  datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py,sha256=zrDn-_EJJ5kv0kZWAA-toeEPuBd3YQ0-U7Jb8euNUS8,1558
14
14
  datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=Vw-7U0GmQT2127tybxggZfpRFiZVgoIh6ndkTGM0FP4,665
15
- datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=-knTZ-NsHpBWCoR7r1JP5iYSWx697mugijmqUPx0pEY,10307
15
+ datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=zjTEHNyPdh1hhStvpImjqKXjsz97AAkbNzoWF-pWgJE,10603
16
16
  datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=0sTDWvuu0AzSgn7fKWJxGaTmPww00TFYyDK-X0s5T3c,1193
17
17
  datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- datacontract/engines/soda/check_soda_execute.py,sha256=SYJdPpkozOA62yTM7s6cfwLfgfxgGJptO5U-cShrtPk,8600
18
+ datacontract/engines/soda/check_soda_execute.py,sha256=qc56ZNKyHIoSFgoXzBRioOhnhgsFSJ6L-nyBU7d0fW8,8902
19
+ datacontract/engines/soda/connections/athena.py,sha256=wjrJA9CHhl6FbSW0HulWcYlkT2_nY1s19Y2MFe4lbCU,3028
19
20
  datacontract/engines/soda/connections/bigquery.py,sha256=C-8kxmzpYe88bJp80ObHFLMh4rpnIjnUQ7XOj0Ke7lk,903
20
21
  datacontract/engines/soda/connections/databricks.py,sha256=cMRasuO0MrSKVgHPB-9uFTGTZPFg6z9Kpk3tJ0SdR0s,943
21
- datacontract/engines/soda/connections/duckdb_connection.py,sha256=UY6BElfLUs_LPZ4EA5SNZKhaq6EsPe9Mb79zhoSv4sg,9110
22
+ datacontract/engines/soda/connections/duckdb_connection.py,sha256=wGiB6EKr-OZosEFvT2gkutFgAzAxFMKicfpjbIJUZwQ,9332
22
23
  datacontract/engines/soda/connections/kafka.py,sha256=lnj_-3-CnJ6stetGqm6HOzN1Qatlw7xoCQU2zKBIXxU,8725
23
24
  datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
24
25
  datacontract/engines/soda/connections/snowflake.py,sha256=rfG2ysuqNM6TkvyqQKcGHFsTGJ6AROmud5VleUDRrb0,749
25
26
  datacontract/engines/soda/connections/sqlserver.py,sha256=RzGLbCUdRyfmDcqtM_AB9WZ-Xk-XYX91nkXpVNpYbvc,1440
26
27
  datacontract/engines/soda/connections/trino.py,sha256=JvKUP9aFg_n095oWE0-bGmfbETSWEOURGEZdQuG8txA,718
27
28
  datacontract/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- datacontract/export/avro_converter.py,sha256=0_MOAwJYOMxQpmgYKSQ5TJjOTeuqVWfm4kTxmaSVH3A,4882
29
+ datacontract/export/avro_converter.py,sha256=MnfeW2x-Eko9dK6_fpdQYWtEzLkFWXfKABAUSJqiDpo,5381
29
30
  datacontract/export/avro_idl_converter.py,sha256=SGO7JfI9UGXLYFR5wMGNUH1qf6kt9lF6dUU9roVqnFo,9878
30
31
  datacontract/export/bigquery_converter.py,sha256=9mm-XP3klu1C5J87L9EL5ZyMCJhLBwsixo3aAw9QmRI,4738
31
32
  datacontract/export/custom_converter.py,sha256=xb8KbkRRgHmT4ewwC7XxtnKpe_ZMSJWBjYOaKjmO_KQ,1216
@@ -42,7 +43,7 @@ datacontract/export/great_expectations_converter.py,sha256=Wx0mESRy4xAf8y7HjESsG
42
43
  datacontract/export/html_exporter.py,sha256=EyTMj25_Df3irZiYw1hxVZeLYWp6YSG6z3IuFUviP14,3066
43
44
  datacontract/export/iceberg_converter.py,sha256=ArcQ_Y3z_W4_kGDU_8jPRx2-pHpP3Nhx1zYoETOL3c4,6804
44
45
  datacontract/export/jsonschema_converter.py,sha256=2MT82MurcQQbrVDRj1kFsxnmFd9scNSfYI1upQSecl4,5631
45
- datacontract/export/markdown_converter.py,sha256=ioTFe3mHKLYwpo7wasOr-4TBlHIjowDHSonDgXV4uOQ,6468
46
+ datacontract/export/markdown_converter.py,sha256=sV74JBGEfvhteNYPwBR-78ucq0Avp9oaPdFpu3Ckd0w,9935
46
47
  datacontract/export/mermaid_exporter.py,sha256=wBFqRKgy3Y5FZnzD0RCR1UAlT9ZqYW7dkXWSJa6eHiQ,3465
47
48
  datacontract/export/odcs_v3_exporter.py,sha256=b__AiPAnCUuFQE5DPHsvXBrMeEl1t_mJ1vzTx84TMlI,13931
48
49
  datacontract/export/pandas_type_converter.py,sha256=464pQ3JQKFQa1TO0HBNcEoZvQye_yUbY6jQtiBaphSc,1117
@@ -52,10 +53,10 @@ datacontract/export/rdf_converter.py,sha256=1aTe_fwBRBnYUrJZzhEQ8eVnl0mQ1hcF45aK
52
53
  datacontract/export/sodacl_converter.py,sha256=75vQ2TnoLfjiDtWT2x8opumvotXVRs1YaIu1NLYz05M,1473
53
54
  datacontract/export/spark_converter.py,sha256=c4C_QZFRQLMW6Cz35WYdHqYGKyHSF7bBVmJua2YF0Cs,7290
54
55
  datacontract/export/sql_converter.py,sha256=vyLbDqzt_J3LRXpPv2W2HqUIyAtQx_S-jviBiSxh14A,5087
55
- datacontract/export/sql_type_converter.py,sha256=4r8S7nb4z2ODgXTwHxdtU158HWE8uxXHydiZ_y4-N_E,13592
56
+ datacontract/export/sql_type_converter.py,sha256=6-FKC4GTTSftXntIesiptQ51WVtS-mYgJpKrzhVDi1M,13694
56
57
  datacontract/export/sqlalchemy_converter.py,sha256=0DMncvA811lTtd5q4ZORREQ9YH1vQm1lJeqMWsFvloE,6463
57
58
  datacontract/export/terraform_converter.py,sha256=ExFoEvErVk-gBnWJiqC38SxDUmUEydpACWc917l5RyM,2163
58
- datacontract/imports/avro_importer.py,sha256=ryu4iUCSPJEV1uaE3AKdxD7fUxmRJ-ta936xurbgtHc,10922
59
+ datacontract/imports/avro_importer.py,sha256=isfAnMq9bk-Yo5zSyTnqMegu7JIujn_sTGSTOYAc8-0,11847
59
60
  datacontract/imports/bigquery_importer.py,sha256=7TcP9FDsIas5LwJZ-HrOPXZ-NuR056sxLfDDh3vjo8E,8419
60
61
  datacontract/imports/csv_importer.py,sha256=mBsmyTvfB8q64Z3NYqv4zTDUOvoXG896hZvp3oLt5YM,5330
61
62
  datacontract/imports/dbml_importer.py,sha256=o0IOgvXN34lU1FICDHm_QUTv0DKsgwbHPHUDxQhIapE,3872
@@ -68,10 +69,10 @@ datacontract/imports/importer_factory.py,sha256=RS7uwkkT7rIKGeMKgPmZhE3GVC9IfZxZ
68
69
  datacontract/imports/json_importer.py,sha256=JeGbqAC_wAO0u8HeMA5H-KJBfs6gpp1oGIpxt6nxSZI,12641
69
70
  datacontract/imports/jsonschema_importer.py,sha256=67H__XLugV4vguHrIqzW02dtx27zYTWnOms4D1ma3bk,4961
70
71
  datacontract/imports/odcs_importer.py,sha256=ZP2u3kJsgULANTbbqkP3joOlU9cUneZOPy6Ak3oTMgs,2140
71
- datacontract/imports/odcs_v3_importer.py,sha256=smtFSoywNojUCdB8sDu0e_3yHN4CmUw5YS3qF-AU0qY,16873
72
+ datacontract/imports/odcs_v3_importer.py,sha256=cDfAq5AdIS_DuHUhMWlAdTf3YLvc6fQhIElJyznRQBE,16924
72
73
  datacontract/imports/parquet_importer.py,sha256=W_0_16mX4stwDUt4GM2L7dnGmTpAySab5k13-OlTCCc,3095
73
74
  datacontract/imports/protobuf_importer.py,sha256=rlUIskv9PNi5rFQ4Hobt9zlnKpahGsb4dy5G5UJoVAw,10840
74
- datacontract/imports/spark_importer.py,sha256=vqGM3kisQGSiHS1AvuC6gp0unj2-6TT3R-ejG96j-Tw,8618
75
+ datacontract/imports/spark_importer.py,sha256=OxX9hJhi8e1o1pZGOKh5zWsK96SX13r0WV04kKDD61M,8964
75
76
  datacontract/imports/sql_importer.py,sha256=CfHap1zpyy-NVol_i21bDai3l4PD8OLuJQwaVlihbqg,9543
76
77
  datacontract/imports/unity_importer.py,sha256=iW4CJNbHNoQR5fqVlBJfdHnbbDasPgkwHHmyV9FKGkI,8687
77
78
  datacontract/init/init_template.py,sha256=nc-B2ZkwDQ3GNFqxNmSDcStQMDbBTxysgTZDLw15izo,721
@@ -113,9 +114,9 @@ datacontract/templates/partials/model_field.html,sha256=2YBF95ypNCPFYuYKoeilRnDG
113
114
  datacontract/templates/partials/quality.html,sha256=ynEDWRn8I90Uje-xhGYgFcfwOgKI1R-CDki-EvTsauQ,1785
114
115
  datacontract/templates/partials/server.html,sha256=dHFJtonMjhiUHtT69RUgTpkoRwmNdTRzkCdH0LtGg_4,6279
115
116
  datacontract/templates/style/output.css,sha256=ioIo1f96VW7LHhDifj6QI8QbRChJl-LlQ59EwM8MEmA,28692
116
- datacontract_cli-0.10.33.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
117
- datacontract_cli-0.10.33.dist-info/METADATA,sha256=gqEgdS3X0NnbsbAuATDP2YmwNVcqQMgXEJmVayqQbVA,111469
118
- datacontract_cli-0.10.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
119
- datacontract_cli-0.10.33.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
120
- datacontract_cli-0.10.33.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
121
- datacontract_cli-0.10.33.dist-info/RECORD,,
117
+ datacontract_cli-0.10.34.dist-info/licenses/LICENSE,sha256=0hcS8X51AL0UvEsx1ZM6WQcxiy9d0j5iOfzdPYM6ONU,2205
118
+ datacontract_cli-0.10.34.dist-info/METADATA,sha256=vMywE_iShiMd1OEND9UggCJ_s9swhY7Ls_6eaMgvC-k,114165
119
+ datacontract_cli-0.10.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
120
+ datacontract_cli-0.10.34.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
121
+ datacontract_cli-0.10.34.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
122
+ datacontract_cli-0.10.34.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023 innoQ Deutschland GmbH
3
+ Copyright (c) 2025 Entropy Data GmbH
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal