datacontract-cli 0.10.33__py3-none-any.whl → 0.10.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

datacontract/api.py CHANGED
@@ -162,15 +162,22 @@ async def test(
162
162
  server: Annotated[
163
163
  str | None,
164
164
  Query(
165
- examples=["production"],
166
165
  description="The server name to test. Optional, if there is only one server.",
166
+ examples=["production"],
167
+ ),
168
+ ] = None,
169
+ publish_url: Annotated[
170
+ str | None,
171
+ Query(
172
+ description="URL to publish test results. Optional, if you want to publish the test results to a Data Mesh Manager or Data Contract Manager. Example: https://api.datamesh-manager.com/api/test-results",
173
+ examples=["https://api.datamesh-manager.com/api/test-results"],
167
174
  ),
168
175
  ] = None,
169
176
  ) -> Run:
170
177
  check_api_key(api_key)
171
178
  logging.info("Testing data contract...")
172
179
  logging.info(body)
173
- return DataContract(data_contract_str=body, server=server).test()
180
+ return DataContract(data_contract_str=body, server=server, publish_url=publish_url).test()
174
181
 
175
182
 
176
183
  @app.post(
datacontract/cli.py CHANGED
@@ -126,8 +126,10 @@ def test(
126
126
  "servers (default)."
127
127
  ),
128
128
  ] = "all",
129
- publish_test_results: Annotated[bool, typer.Option(help="Publish the results after the test")] = False,
130
- publish: Annotated[str, typer.Option(help="DEPRECATED. The url to publish the results after the test.")] = None,
129
+ publish_test_results: Annotated[
130
+ bool, typer.Option(help="Deprecated. Use publish parameter. Publish the results after the test")
131
+ ] = False,
132
+ publish: Annotated[str, typer.Option(help="The url to publish the results after the test.")] = None,
131
133
  output: Annotated[
132
134
  Path,
133
135
  typer.Option(
@@ -1,4 +1,6 @@
1
+ import re
1
2
  import uuid
3
+ from dataclasses import dataclass
2
4
  from typing import List
3
5
  from venv import logger
4
6
 
@@ -9,6 +11,12 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
9
11
  from datacontract.model.run import Check
10
12
 
11
13
 
14
+ @dataclass
15
+ class QuotingConfig:
16
+ quote_field_name: bool = False
17
+ quote_model_name: bool = False
18
+
19
+
12
20
  def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
13
21
  checks: List[Check] = []
14
22
  for model_key, model_value in data_contract_spec.models.items():
@@ -26,37 +34,41 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
26
34
  fields = model_value.fields
27
35
 
28
36
  check_types = is_check_types(server)
29
- quote_field_name = server_type in ["postgres", "sqlserver"]
37
+
38
+ quoting_config = QuotingConfig(
39
+ quote_field_name=server_type in ["postgres", "sqlserver"],
40
+ quote_model_name=server_type in ["postgres", "sqlserver"],
41
+ )
30
42
 
31
43
  for field_name, field in fields.items():
32
- checks.append(check_field_is_present(model_name, field_name, quote_field_name))
44
+ checks.append(check_field_is_present(model_name, field_name, quoting_config))
33
45
  if check_types and field.type is not None:
34
46
  sql_type = convert_to_sql_type(field, server_type)
35
- checks.append(check_field_type(model_name, field_name, sql_type, quote_field_name))
47
+ checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
36
48
  if field.required:
37
- checks.append(check_field_required(model_name, field_name, quote_field_name))
49
+ checks.append(check_field_required(model_name, field_name, quoting_config))
38
50
  if field.unique:
39
- checks.append(check_field_unique(model_name, field_name, quote_field_name))
51
+ checks.append(check_field_unique(model_name, field_name, quoting_config))
40
52
  if field.minLength is not None:
41
- checks.append(check_field_min_length(model_name, field_name, field.minLength, quote_field_name))
53
+ checks.append(check_field_min_length(model_name, field_name, field.minLength, quoting_config))
42
54
  if field.maxLength is not None:
43
- checks.append(check_field_max_length(model_name, field_name, field.maxLength, quote_field_name))
55
+ checks.append(check_field_max_length(model_name, field_name, field.maxLength, quoting_config))
44
56
  if field.minimum is not None:
45
- checks.append(check_field_minimum(model_name, field_name, field.minimum, quote_field_name))
57
+ checks.append(check_field_minimum(model_name, field_name, field.minimum, quoting_config))
46
58
  if field.maximum is not None:
47
- checks.append(check_field_maximum(model_name, field_name, field.maximum, quote_field_name))
59
+ checks.append(check_field_maximum(model_name, field_name, field.maximum, quoting_config))
48
60
  if field.exclusiveMinimum is not None:
49
- checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quote_field_name))
50
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quote_field_name))
61
+ checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quoting_config))
62
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quoting_config))
51
63
  if field.exclusiveMaximum is not None:
52
- checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quote_field_name))
53
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quote_field_name))
64
+ checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quoting_config))
65
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quoting_config))
54
66
  if field.pattern is not None:
55
- checks.append(check_field_regex(model_name, field_name, field.pattern, quote_field_name))
67
+ checks.append(check_field_regex(model_name, field_name, field.pattern, quoting_config))
56
68
  if field.enum is not None and len(field.enum) > 0:
57
- checks.append(check_field_enum(model_name, field_name, field.enum, quote_field_name))
69
+ checks.append(check_field_enum(model_name, field_name, field.enum, quoting_config))
58
70
  if field.quality is not None and len(field.quality) > 0:
59
- quality_list = check_quality_list(model_name, field_name, field.quality)
71
+ quality_list = check_quality_list(model_name, field_name, field.quality, quoting_config)
60
72
  if (quality_list is not None) and len(quality_list) > 0:
61
73
  checks.extend(quality_list)
62
74
  # TODO references: str = None
@@ -70,8 +82,8 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
70
82
  return checks
71
83
 
72
84
 
73
- def checks_for(model_name, quote_field_name):
74
- if quote_field_name:
85
+ def checks_for(model_name, quote_model_name: bool):
86
+ if quote_model_name:
75
87
  return f'checks for "{model_name}"'
76
88
  return f"checks for {model_name}"
77
89
 
@@ -98,11 +110,11 @@ def to_model_name(model_key, model_value, server_type):
98
110
  return model_key
99
111
 
100
112
 
101
- def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Check:
113
+ def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig = QuotingConfig()) -> Check:
102
114
  check_type = "field_is_present"
103
115
  check_key = f"{model_name}__{field_name}__{check_type}"
104
116
  sodacl_check_dict = {
105
- checks_for(model_name, quote_field_name): [
117
+ checks_for(model_name, quoting_config.quote_model_name): [
106
118
  {
107
119
  "schema": {
108
120
  "name": check_key,
@@ -127,11 +139,13 @@ def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Ch
127
139
  )
128
140
 
129
141
 
130
- def check_field_type(model_name: str, field_name: str, expected_type: str, quote_field_name: bool = False):
142
+ def check_field_type(
143
+ model_name: str, field_name: str, expected_type: str, quoting_config: QuotingConfig = QuotingConfig()
144
+ ):
131
145
  check_type = "field_type"
132
146
  check_key = f"{model_name}__{field_name}__{check_type}"
133
147
  sodacl_check_dict = {
134
- checks_for(model_name, quote_field_name): [
148
+ checks_for(model_name, quoting_config.quote_model_name): [
135
149
  {
136
150
  "schema": {
137
151
  "name": check_key,
@@ -158,8 +172,8 @@ def check_field_type(model_name: str, field_name: str, expected_type: str, quote
158
172
  )
159
173
 
160
174
 
161
- def check_field_required(model_name: str, field_name: str, quote_field_name: bool = False):
162
- if quote_field_name:
175
+ def check_field_required(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
176
+ if quoting_config.quote_field_name:
163
177
  field_name_for_soda = f'"{field_name}"'
164
178
  else:
165
179
  field_name_for_soda = field_name
@@ -167,7 +181,7 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
167
181
  check_type = "field_required"
168
182
  check_key = f"{model_name}__{field_name}__{check_type}"
169
183
  sodacl_check_dict = {
170
- checks_for(model_name, quote_field_name): [
184
+ checks_for(model_name, quoting_config.quote_model_name): [
171
185
  {
172
186
  f"missing_count({field_name_for_soda}) = 0": {
173
187
  "name": check_key,
@@ -189,8 +203,8 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
189
203
  )
190
204
 
191
205
 
192
- def check_field_unique(model_name: str, field_name: str, quote_field_name: bool = False):
193
- if quote_field_name:
206
+ def check_field_unique(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
207
+ if quoting_config.quote_field_name:
194
208
  field_name_for_soda = f'"{field_name}"'
195
209
  else:
196
210
  field_name_for_soda = field_name
@@ -198,7 +212,7 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
198
212
  check_type = "field_unique"
199
213
  check_key = f"{model_name}__{field_name}__{check_type}"
200
214
  sodacl_check_dict = {
201
- checks_for(model_name, quote_field_name): [
215
+ checks_for(model_name, quoting_config.quote_model_name): [
202
216
  {
203
217
  f"duplicate_count({field_name_for_soda}) = 0": {
204
218
  "name": check_key,
@@ -220,8 +234,10 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
220
234
  )
221
235
 
222
236
 
223
- def check_field_min_length(model_name: str, field_name: str, min_length: int, quote_field_name: bool = False):
224
- if quote_field_name:
237
+ def check_field_min_length(
238
+ model_name: str, field_name: str, min_length: int, quoting_config: QuotingConfig = QuotingConfig()
239
+ ):
240
+ if quoting_config.quote_field_name:
225
241
  field_name_for_soda = f'"{field_name}"'
226
242
  else:
227
243
  field_name_for_soda = field_name
@@ -229,7 +245,7 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
229
245
  check_type = "field_min_length"
230
246
  check_key = f"{model_name}__{field_name}__{check_type}"
231
247
  sodacl_check_dict = {
232
- checks_for(model_name, quote_field_name): [
248
+ checks_for(model_name, quoting_config.quote_model_name): [
233
249
  {
234
250
  f"invalid_count({field_name_for_soda}) = 0": {
235
251
  "name": check_key,
@@ -252,8 +268,10 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
252
268
  )
253
269
 
254
270
 
255
- def check_field_max_length(model_name: str, field_name: str, max_length: int, quote_field_name: bool = False):
256
- if quote_field_name:
271
+ def check_field_max_length(
272
+ model_name: str, field_name: str, max_length: int, quoting_config: QuotingConfig = QuotingConfig()
273
+ ):
274
+ if quoting_config.quote_field_name:
257
275
  field_name_for_soda = f'"{field_name}"'
258
276
  else:
259
277
  field_name_for_soda = field_name
@@ -261,7 +279,7 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
261
279
  check_type = "field_max_length"
262
280
  check_key = f"{model_name}__{field_name}__{check_type}"
263
281
  sodacl_check_dict = {
264
- checks_for(model_name, quote_field_name): [
282
+ checks_for(model_name, quoting_config.quote_model_name): [
265
283
  {
266
284
  f"invalid_count({field_name_for_soda}) = 0": {
267
285
  "name": check_key,
@@ -284,8 +302,10 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
284
302
  )
285
303
 
286
304
 
287
- def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_field_name: bool = False):
288
- if quote_field_name:
305
+ def check_field_minimum(
306
+ model_name: str, field_name: str, minimum: int, quoting_config: QuotingConfig = QuotingConfig()
307
+ ):
308
+ if quoting_config.quote_field_name:
289
309
  field_name_for_soda = f'"{field_name}"'
290
310
  else:
291
311
  field_name_for_soda = field_name
@@ -293,7 +313,7 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
293
313
  check_type = "field_minimum"
294
314
  check_key = f"{model_name}__{field_name}__{check_type}"
295
315
  sodacl_check_dict = {
296
- checks_for(model_name, quote_field_name): [
316
+ checks_for(model_name, quoting_config.quote_model_name): [
297
317
  {
298
318
  f"invalid_count({field_name_for_soda}) = 0": {
299
319
  "name": check_key,
@@ -316,8 +336,10 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
316
336
  )
317
337
 
318
338
 
319
- def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_field_name: bool = False):
320
- if quote_field_name:
339
+ def check_field_maximum(
340
+ model_name: str, field_name: str, maximum: int, quoting_config: QuotingConfig = QuotingConfig()
341
+ ):
342
+ if quoting_config.quote_field_name:
321
343
  field_name_for_soda = f'"{field_name}"'
322
344
  else:
323
345
  field_name_for_soda = field_name
@@ -325,7 +347,7 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
325
347
  check_type = "field_maximum"
326
348
  check_key = f"{model_name}__{field_name}__{check_type}"
327
349
  sodacl_check_dict = {
328
- checks_for(model_name, quote_field_name): [
350
+ checks_for(model_name, quoting_config.quote_model_name): [
329
351
  {
330
352
  f"invalid_count({field_name_for_soda}) = 0": {
331
353
  "name": check_key,
@@ -348,8 +370,10 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
348
370
  )
349
371
 
350
372
 
351
- def check_field_not_equal(model_name: str, field_name: str, value: int, quote_field_name: bool = False):
352
- if quote_field_name:
373
+ def check_field_not_equal(
374
+ model_name: str, field_name: str, value: int, quoting_config: QuotingConfig = QuotingConfig()
375
+ ):
376
+ if quoting_config.quote_field_name:
353
377
  field_name_for_soda = f'"{field_name}"'
354
378
  else:
355
379
  field_name_for_soda = field_name
@@ -357,7 +381,7 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
357
381
  check_type = "field_not_equal"
358
382
  check_key = f"{model_name}__{field_name}__{check_type}"
359
383
  sodacl_check_dict = {
360
- checks_for(model_name, quote_field_name): [
384
+ checks_for(model_name, quoting_config.quote_model_name): [
361
385
  {
362
386
  f"invalid_count({field_name_for_soda}) = 0": {
363
387
  "name": check_key,
@@ -380,8 +404,8 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
380
404
  )
381
405
 
382
406
 
383
- def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_name: bool = False):
384
- if quote_field_name:
407
+ def check_field_enum(model_name: str, field_name: str, enum: list, quoting_config: QuotingConfig = QuotingConfig()):
408
+ if quoting_config.quote_field_name:
385
409
  field_name_for_soda = f'"{field_name}"'
386
410
  else:
387
411
  field_name_for_soda = field_name
@@ -389,7 +413,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
389
413
  check_type = "field_enum"
390
414
  check_key = f"{model_name}__{field_name}__{check_type}"
391
415
  sodacl_check_dict = {
392
- checks_for(model_name, quote_field_name): [
416
+ checks_for(model_name, quoting_config.quote_model_name): [
393
417
  {
394
418
  f"invalid_count({field_name_for_soda}) = 0": {
395
419
  "name": check_key,
@@ -412,8 +436,8 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
412
436
  )
413
437
 
414
438
 
415
- def check_field_regex(model_name: str, field_name: str, pattern: str, quote_field_name: bool = False):
416
- if quote_field_name:
439
+ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_config: QuotingConfig = QuotingConfig()):
440
+ if quoting_config.quote_field_name:
417
441
  field_name_for_soda = f'"{field_name}"'
418
442
  else:
419
443
  field_name_for_soda = field_name
@@ -421,7 +445,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
421
445
  check_type = "field_regex"
422
446
  check_key = f"{model_name}__{field_name}__{check_type}"
423
447
  sodacl_check_dict = {
424
- checks_for(model_name, quote_field_name): [
448
+ checks_for(model_name, quoting_config.quote_model_name): [
425
449
  {
426
450
  f"invalid_count({field_name_for_soda}) = 0": {
427
451
  "name": check_key,
@@ -444,7 +468,9 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
444
468
  )
445
469
 
446
470
 
447
- def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> List[Check]:
471
+ def check_quality_list(
472
+ model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
473
+ ) -> List[Check]:
448
474
  checks: List[Check] = []
449
475
 
450
476
  count = 0
@@ -457,15 +483,20 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
457
483
  check_key = f"{model_name}__{field_name}__quality_sql_{count}"
458
484
  check_type = "model_quality_sql"
459
485
  threshold = to_sodacl_threshold(quality)
460
- query = prepare_query(quality, model_name, field_name)
486
+ query = prepare_query(quality, model_name, field_name, quoting_config)
461
487
  if query is None:
462
488
  logger.warning(f"Quality check {check_key} has no query")
463
489
  continue
464
490
  if threshold is None:
465
491
  logger.warning(f"Quality check {check_key} has no valid threshold")
466
492
  continue
493
+
494
+ if quoting_config.quote_model_name:
495
+ model_name_for_soda = f'"{model_name}"'
496
+ else:
497
+ model_name_for_soda = model_name
467
498
  sodacl_check_dict = {
468
- f"checks for {model_name}": [
499
+ f"checks for {model_name_for_soda}": [
469
500
  {
470
501
  f"{check_key} {threshold}": {
471
502
  f"{check_key} query": query,
@@ -493,7 +524,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
493
524
  return checks
494
525
 
495
526
 
496
- def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
527
+ def prepare_query(
528
+ quality: Quality, model_name: str, field_name: str = None, quoting_config: QuotingConfig = QuotingConfig()
529
+ ) -> str | None:
497
530
  if quality.query is None:
498
531
  return None
499
532
  if quality.query == "":
@@ -501,14 +534,24 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
501
534
 
502
535
  query = quality.query
503
536
 
504
- query = query.replace("{model}", model_name)
505
- query = query.replace("{schema}", model_name)
506
- query = query.replace("{table}", model_name)
537
+ if quoting_config.quote_field_name:
538
+ field_name_for_soda = f'"{field_name}"'
539
+ else:
540
+ field_name_for_soda = field_name
541
+
542
+ if quoting_config.quote_model_name:
543
+ model_name_for_soda = f'"{model_name}"'
544
+ else:
545
+ model_name_for_soda = model_name
546
+
547
+ query = re.sub(r'["\']?\{model}["\']?', model_name_for_soda, query)
548
+ query = re.sub(r'["\']?{schema}["\']?', model_name_for_soda, query)
549
+ query = re.sub(r'["\']?{table}["\']?', model_name_for_soda, query)
507
550
 
508
551
  if field_name is not None:
509
- query = query.replace("{field}", field_name)
510
- query = query.replace("{column}", field_name)
511
- query = query.replace("{property}", field_name)
552
+ query = re.sub(r'["\']?{field}["\']?', field_name_for_soda, query)
553
+ query = re.sub(r'["\']?{column}["\']?', field_name_for_soda, query)
554
+ query = re.sub(r'["\']?{property}["\']?', field_name_for_soda, query)
512
555
 
513
556
  return query
514
557
 
@@ -1,5 +1,9 @@
1
+ import atexit
2
+ import os
3
+ import tempfile
1
4
  import typing
2
5
 
6
+ import requests
3
7
  from duckdb.duckdb import DuckDBPyConnection
4
8
 
5
9
  from datacontract.engines.data_contract_checks import create_checks
@@ -46,6 +50,9 @@ def execute_data_contract_test(
46
50
  run.outputPortId = server.outputPortId
47
51
  run.server = server_name
48
52
 
53
+ if server.type == "api":
54
+ server = process_api_response(run, server)
55
+
49
56
  run.checks.extend(create_checks(data_contract_specification, server))
50
57
 
51
58
  # TODO check server is supported type for nicer error messages
@@ -74,3 +81,33 @@ def get_server(data_contract_specification: DataContractSpecification, server_na
74
81
  server_name = list(data_contract_specification.servers.keys())[0]
75
82
  server = data_contract_specification.servers.get(server_name)
76
83
  return server
84
+
85
+
86
+ def process_api_response(run, server):
87
+ tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract_cli_api_")
88
+ atexit.register(tmp_dir.cleanup)
89
+ headers = {}
90
+ if os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION") is not None:
91
+ headers["Authorization"] = os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION")
92
+ try:
93
+ response = requests.get(server.location, headers=headers)
94
+ response.raise_for_status()
95
+ except requests.exceptions.RequestException as e:
96
+ raise DataContractException(
97
+ type="connection",
98
+ name="API server connection error",
99
+ result=ResultEnum.error,
100
+ reason=f"Failed to fetch API response from {server.location}: {e}",
101
+ engine="datacontract",
102
+ )
103
+ with open(f"{tmp_dir.name}/api_response.json", "w") as f:
104
+ f.write(response.text)
105
+ run.log_info(f"Saved API response to {tmp_dir.name}/api_response.json")
106
+ server = Server(
107
+ type="local",
108
+ format="json",
109
+ path=f"{tmp_dir.name}/api_response.json",
110
+ dataProductId=server.dataProductId,
111
+ outputPortId=server.outputPortId,
112
+ )
113
+ return server
@@ -1,8 +1,9 @@
1
+ import glob
1
2
  import json
2
3
  import logging
3
4
  import os
4
5
  import threading
5
- from typing import List, Optional
6
+ from typing import Any, Callable, Generator, List, Optional
6
7
 
7
8
  import fastjsonschema
8
9
  from fastjsonschema import JsonSchemaValueException
@@ -85,7 +86,7 @@ def process_exceptions(run, exceptions: List[DataContractException]):
85
86
 
86
87
 
87
88
  def validate_json_stream(
88
- schema: dict, model_name: str, validate: callable, json_stream: list[dict]
89
+ schema: dict, model_name: str, validate: Callable, json_stream: Generator[Any, Any, None]
89
90
  ) -> List[DataContractException]:
90
91
  logging.info(f"Validating JSON stream for model: '{model_name}'.")
91
92
  exceptions: List[DataContractException] = []
@@ -99,7 +100,7 @@ def validate_json_stream(
99
100
  DataContractException(
100
101
  type="schema",
101
102
  name="Check that JSON has valid schema",
102
- result="failed",
103
+ result=ResultEnum.failed,
103
104
  reason=f"{f'#{primary_key_value}: ' if primary_key_value is not None else ''}{e.message}",
104
105
  model=model_name,
105
106
  engine="jsonschema",
@@ -159,27 +160,44 @@ def process_json_file(run, schema, model_name, validate, file, delimiter):
159
160
 
160
161
  def process_local_file(run, server, schema, model_name, validate):
161
162
  path = server.path
163
+ if not path:
164
+ raise DataContractException(
165
+ type="schema",
166
+ name="Check that JSON has valid schema",
167
+ result=ResultEnum.warning,
168
+ reason="For server with type 'local', a 'path' must be defined.",
169
+ engine="datacontract",
170
+ )
162
171
  if "{model}" in path:
163
172
  path = path.format(model=model_name)
164
173
 
174
+ all_files = []
165
175
  if os.path.isdir(path):
166
- return process_directory(run, path, server, model_name, validate)
176
+ # Fetch all JSONs in the directory
177
+ for root, _, files in os.walk(path):
178
+ for file in files:
179
+ if file.endswith(".json"):
180
+ all_files.append(os.path.join(root, file))
167
181
  else:
168
- logging.info(f"Processing file {path}")
169
- with open(path, "r") as file:
170
- process_json_file(run, schema, model_name, validate, file, server.delimiter)
182
+ # Use glob to fetch all JSONs
183
+ for file_path in glob.glob(path, recursive=True):
184
+ if os.path.isfile(file_path):
185
+ if file_path.endswith(".json"):
186
+ all_files.append(file_path)
171
187
 
188
+ if not all_files:
189
+ raise DataContractException(
190
+ type="schema",
191
+ name="Check that JSON has valid schema",
192
+ result=ResultEnum.warning,
193
+ reason=f"No files found in '{path}'.",
194
+ engine="datacontract",
195
+ )
172
196
 
173
- def process_directory(run, path, server, model_name, validate):
174
- success = True
175
- for filename in os.listdir(path):
176
- if filename.endswith(".json"): # or make this a parameter
177
- file_path = os.path.join(path, filename)
178
- with open(file_path, "r") as file:
179
- if not process_json_file(run, model_name, validate, file, server.delimiter):
180
- success = False
181
- break
182
- return success
197
+ for file in all_files:
198
+ logging.info(f"Processing file: {file}")
199
+ with open(file, "r") as f:
200
+ process_json_file(run, schema, model_name, validate, f, server.delimiter)
183
201
 
184
202
 
185
203
  def process_s3_file(run, server, schema, model_name, validate):
@@ -201,7 +219,7 @@ def process_s3_file(run, server, schema, model_name, validate):
201
219
  raise DataContractException(
202
220
  type="schema",
203
221
  name="Check that JSON has valid schema",
204
- result="warning",
222
+ result=ResultEnum.warning,
205
223
  reason=f"Cannot find any file in {s3_location}",
206
224
  engine="datacontract",
207
225
  )
@@ -222,7 +240,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
222
240
  Check(
223
241
  type="schema",
224
242
  name="Check that JSON has valid schema",
225
- result="warning",
243
+ result=ResultEnum.warning,
226
244
  reason="Server format is not 'json'. Skip validating jsonschema.",
227
245
  engine="jsonschema",
228
246
  )
@@ -2,6 +2,8 @@ import logging
2
2
  import typing
3
3
  import uuid
4
4
 
5
+ from datacontract.engines.soda.connections.athena import to_athena_soda_configuration
6
+
5
7
  if typing.TYPE_CHECKING:
6
8
  from pyspark.sql import SparkSession
7
9
 
@@ -106,6 +108,10 @@ def check_soda_execute(
106
108
  soda_configuration_str = to_trino_soda_configuration(server)
107
109
  scan.add_configuration_yaml_str(soda_configuration_str)
108
110
  scan.set_data_source_name(server.type)
111
+ elif server.type == "athena":
112
+ soda_configuration_str = to_athena_soda_configuration(server)
113
+ scan.add_configuration_yaml_str(soda_configuration_str)
114
+ scan.set_data_source_name(server.type)
109
115
 
110
116
  else:
111
117
  run.checks.append(