mustrd 0.2.7a0__tar.gz → 0.3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/PKG-INFO +1 -1
  2. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/anzo_utils.py +8 -5
  3. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/logger_setup.py +1 -0
  4. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdShapes.ttl +11 -2
  5. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/ontology.ttl +5 -0
  6. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrd.py +88 -30
  7. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdAnzo.py +3 -2
  8. mustrd-0.3.0.0/mustrd/mustrdTestPlugin.py +549 -0
  9. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/spec_component.py +14 -13
  10. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/steprunner.py +14 -4
  11. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/pyproject.toml +1 -1
  12. mustrd-0.2.7a0/mustrd/mustrdTestPlugin.py +0 -378
  13. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/LICENSE +0 -0
  14. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/README.md +0 -0
  15. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/README.adoc +0 -0
  16. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/README.md +0 -0
  17. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/TestResult.py +0 -0
  18. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/__init__.py +0 -0
  19. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/catalog-v001.xml +0 -0
  20. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdTestOntology.ttl +0 -0
  21. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdTestShapes.ttl +0 -0
  22. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/test-resources/resources.ttl +0 -0
  23. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/triplestoreOntology.ttl +0 -0
  24. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/triplestoreshapes.ttl +0 -0
  25. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdGraphDb.py +0 -0
  26. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdRdfLib.py +0 -0
  27. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/namespace.py +0 -0
  28. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/run.py +0 -0
  29. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_ResultList_leaf_template.jinja +0 -0
  30. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_ResultList_template.jinja +0 -0
  31. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_stats_template.jinja +0 -0
  32. {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mustrd
3
- Version: 0.2.7a0
3
+ Version: 0.3.0.0
4
4
  Summary: A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber.
5
5
  License: MIT
6
6
  Author: John Placek
@@ -31,16 +31,18 @@ from requests import Response, HTTPError, RequestException
31
31
  from bs4 import BeautifulSoup
32
32
  import logging
33
33
 
34
+ logger = logging.getLogger()
35
+
34
36
 
35
37
  def query_azg(anzo_config: dict, query: str,
36
38
  format: str = "json", is_update: bool = False,
37
39
  data_layers: List[str] = None):
38
40
  params = {
39
- 'skipCache': True,
41
+ 'skipCache': 'true',
40
42
  'format': format,
41
43
  'datasourceURI': anzo_config['gqe_uri'],
42
- 'default-graph-uri': data_layers,
43
- 'named-graph-uri': data_layers
44
+ 'using-graph-uri' if is_update else 'default-graph-uri': data_layers,
45
+ 'using-named-graph-uri' if is_update else 'named-graph-uri': data_layers
44
46
  }
45
47
  url = f"{anzo_config['url']}/sparql"
46
48
  return send_anzo_query(anzo_config, url=url, params=params, query=query, is_update=is_update)
@@ -52,7 +54,7 @@ def query_graphmart(anzo_config: dict,
52
54
  format: str = "json",
53
55
  data_layers: List[str] = None):
54
56
  params = {
55
- 'skipCache': True,
57
+ 'skipCache': 'true',
56
58
  'format': format,
57
59
  'default-graph-uri': data_layers,
58
60
  'named-graph-uri': data_layers
@@ -87,7 +89,8 @@ def manage_anzo_response(response: Response) -> str:
87
89
 
88
90
  def send_anzo_query(anzo_config, url, params, query, is_update=False):
89
91
  headers = {"Content-Type": f"application/sparql-{'update' if is_update else 'query' }"}
90
- return manage_anzo_response(requests.post(url=url, params=params, data=query,
92
+ logger.debug(f"send_anzo_query {url=} {query=} {is_update=}")
93
+ return manage_anzo_response(requests.post(url=url, params=params, data=query.encode('utf-8'),
91
94
  auth=(anzo_config['username'], anzo_config['password']),
92
95
  headers=headers, verify=False))
93
96
 
@@ -35,6 +35,7 @@ def setup_logger(name: str) -> logging.Logger:
35
35
  log = logging.getLogger(name)
36
36
  log.setLevel(LOG_LEVEL)
37
37
 
38
+
38
39
  stderr_handler = logging.StreamHandler(sys.stderr)
39
40
  stderr_handler.setLevel(logging.ERROR)
40
41
  log.addHandler(stderr_handler)
@@ -249,5 +249,14 @@ must:AnzoGraphmartQueryDrivenTemplatedStepSparqlSourceShape
249
249
  sh:minCount 1 ;
250
250
  sh:maxCount 1 ; ] .
251
251
 
252
-
253
-
252
+ must:SpadeEdnGroupSourceShape
253
+ a sh:NodeShape ;
254
+ sh:targetClass must:SpadeEdnGroupSource ;
255
+ sh:property [ sh:path must:file ;
256
+ sh:message "A SpadeEdnGroupSource must have a file property pointing to the spade.edn config." ;
257
+ sh:minCount 1 ;
258
+ sh:maxCount 1 ; ] ;
259
+ sh:property [ sh:path must:groupId ;
260
+ sh:message "A SpadeEdnGroupSource must have a groupId property referencing the group in the EDN file." ;
261
+ sh:minCount 1 ;
262
+ sh:maxCount 1 ; ] .
@@ -461,6 +461,11 @@ sh:order rdf:type owl:DatatypeProperty ;
461
461
  rdfs:isDefinedBy : ;
462
462
  rdfs:label "AnzoGraphmartQueryDrivenTemplatedStepSparqlSource" .
463
463
 
464
+ ### https://mustrd.com/model/SpadeEdnGroupSource
465
+ :SpadeEdnGroupSource rdf:type owl:Class ;
466
+ rdfs:subClassOf :SparqlSource ;
467
+ rdfs:comment "Allows reference to a spade.edn file, and a specific groupid (think Anzo layer), within that" ;
468
+ rdfs:label "SpadeEdnGroupSource" .
464
469
 
465
470
  ### https://mustrd.com/model/Then
466
471
  :Then rdf:type owl:Class ;
@@ -55,8 +55,9 @@ import logging
55
55
  from http.client import HTTPConnection
56
56
  from .steprunner import upload_given, run_when
57
57
  from multimethods import MultiMethod
58
+ import traceback
58
59
 
59
- log = logger_setup.setup_logger(__name__)
60
+ log = logging.getLogger(__name__)
60
61
 
61
62
  requests.packages.urllib3.disable_warnings()
62
63
  requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
@@ -184,20 +185,33 @@ class UpdateSparqlQuery(SparqlAction):
184
185
 
185
186
  # https://github.com/Semantic-partners/mustrd/issues/19
186
187
  # Validate the specs found in spec_path
187
- def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
188
+ def validate_specs(run_config: dict,
189
+ triple_stores: List,
190
+ shacl_graph: Graph,
191
+ ont_graph: Graph,
192
+ file_name: str = "*",
193
+ selected_test_files: List[str] = [])\
188
194
  -> Tuple[List, Graph, List]:
189
195
  spec_graph = Graph()
190
196
  subject_uris = set()
191
197
  focus_uris = set()
192
198
  invalid_specs = []
193
- ttl_files = list(run_config['spec_path'].glob(
194
- f'**/{file_name}.mustrd.ttl'))
199
+ ttl_files = []
200
+
201
+ if not selected_test_files:
202
+ ttl_files = list(run_config['spec_path'].glob(
203
+ f'**/{file_name}.mustrd.ttl'))
204
+ log.info(
205
+ f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
206
+ else:
207
+ ttl_files = selected_test_files
208
+
209
+ log.info(f"Using {ttl_files} for test source")
195
210
  ttl_files.sort()
196
- log.info(
197
- f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
198
211
 
199
212
  # For each spec file found in spec_path
200
213
  for file in ttl_files:
214
+ # file = file.resolve()
201
215
  error_messages = []
202
216
 
203
217
  log.info(f"Parse: {file}")
@@ -224,7 +238,10 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
224
238
  advanced=True,
225
239
  js=False,
226
240
  debug=False)
227
-
241
+ if str(file.name).endswith("_duplicate"):
242
+ log.debug(f"Validation of {file.name} against SHACL shapes: {conforms}")
243
+ log.debug(f"{results_graph.serialize(format='turtle')}")
244
+ # log.debug(f"SHACL validation results: {results_text}")
228
245
  # Add error message if not conform to spec shapes
229
246
  if not conforms:
230
247
  for msg in results_graph.objects(predicate=SH.resultMessage):
@@ -269,6 +286,10 @@ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple
269
286
  error_messages: list, invalid_specs: list, spec_graph: Graph):
270
287
 
271
288
  for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
289
+ # Always add file name and source file to the graph for error reporting
290
+ file_graph.add([subject_uri, MUST.specSourceFile, Literal(str(file))])
291
+ file_graph.add([subject_uri, MUST.specFileName, Literal(file.name)])
292
+
272
293
  # If we already collected a URI, then we tag it as duplicate and it won't be executed
273
294
  if subject_uri in subject_uris:
274
295
  log.warning(
@@ -311,8 +332,11 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
311
332
  specs += [get_spec(spec_uri, spec_graph,
312
333
  run_config, triple_store)]
313
334
  except (ValueError, FileNotFoundError, ConnectionError) as e:
335
+ # Try to get file name/path from the graph, but fallback to "unknown"
336
+ file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName) or "unknown"
337
+ file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile) or "unknown"
314
338
  skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
315
- e, get_spec_file(spec_uri, spec_graph))]
339
+ str(e), str(file_name), Path(file_path))]
316
340
 
317
341
  except (BadSyntax, FileNotFoundError) as e:
318
342
  template = "An exception of type {0} occurred when trying to parse the triple store configuration file. " \
@@ -334,7 +358,14 @@ def run_specs(specs) -> List[SpecResult]:
334
358
 
335
359
 
336
360
  def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
337
- return str(spec_graph.value(subject=spec_uri, predicate=MUST.specFileName, default="default.mustrd.ttl"))
361
+ file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName)
362
+ if file_name:
363
+ return str(file_name)
364
+ # fallback: try to get from MUST.specSourceFile
365
+ file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile)
366
+ if file_path:
367
+ return str(Path(file_path).name)
368
+ return "default.mustrd.ttl"
338
369
 
339
370
 
340
371
  def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
@@ -367,23 +398,42 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
367
398
 
368
399
 
369
400
  def check_result(spec: Specification, result: Union[str, Graph]):
401
+
402
+ log.debug(
403
+ f"check_result {spec.spec_uri=}, {spec.triple_store=}, {result=} {type(spec.then)}")
370
404
  if isinstance(spec.then, TableThenSpec):
405
+ log.debug("table_comparison")
371
406
  return table_comparison(result, spec)
372
407
  else:
373
408
  graph_compare = graph_comparison(spec.then.value, result)
374
409
  if isomorphic(result, spec.then.value):
375
- return SpecPassed(spec.spec_uri, spec.triple_store["type"])
410
+ log.debug(f"isomorphic {spec}")
411
+ log.debug(f"{spec.spec_uri}")
412
+ log.debug(f"{spec.triple_store}")
413
+ ret = SpecPassed(spec.spec_uri, spec.triple_store["type"])
414
+
415
+ return ret
376
416
  else:
417
+ log.debug("not isomorphic")
377
418
  if spec.when[0].queryType == MUST.ConstructSparql:
419
+ log.debug("ConstructSpecFailure")
378
420
  return ConstructSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
379
421
  else:
422
+ log.debug("UpdateSpecFailure")
380
423
  return UpdateSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
381
424
 
382
425
 
383
426
  def run_spec(spec: Specification) -> SpecResult:
384
427
  spec_uri = spec.spec_uri
385
428
  triple_store = spec.triple_store
386
- # close_connection = True
429
+
430
+ if not isinstance(spec, Specification):
431
+ log.warning(f"check_result called with non-Specification: {type(spec)}")
432
+ return spec
433
+ # return SpecSkipped(getattr(spec, 'spec_uri', None), getattr(spec, 'triple_store', {}), "Spec is not a valid Specification instance")
434
+
435
+ log.debug(
436
+ f"run_spec {spec=}")
387
437
  log.debug(
388
438
  f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
389
439
  if spec.given:
@@ -399,10 +449,16 @@ def run_spec(spec: Specification) -> SpecResult:
399
449
  f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
400
450
  try:
401
451
  result = run_when(spec_uri, triple_store, when)
452
+ log.info(
453
+ f"run {when.queryType} spec {spec_uri} on {triple_store['type']} {result=}")
402
454
  except ParseException as e:
455
+ log.error(
456
+ f"parseException {e}")
403
457
  return SparqlParseFailure(spec_uri, triple_store["type"], e)
404
458
  except NotImplementedError as ex:
405
- return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
459
+ log.error(f"NotImplementedError {ex}")
460
+ raise ex
461
+ # return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
406
462
  return check_result(spec, result)
407
463
  except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout, OSError) as e:
408
464
  # close_connection = False
@@ -413,7 +469,9 @@ def run_spec(spec: Specification) -> SpecResult:
413
469
  except (TypeError, RequestException) as e:
414
470
  log.error(f"{type(e)} {e}")
415
471
  return SparqlExecutionError(spec_uri, triple_store["type"], e)
416
-
472
+ except Exception as e:
473
+ log.error(f"Unexpected error {e}")
474
+ return RuntimeError(spec_uri, triple_store["type"], f"{type(e).__name__}: {e}")
417
475
  # https://github.com/Semantic-partners/mustrd/issues/78
418
476
  # finally:
419
477
  # if type(mustrd_triple_store) == MustrdAnzo and close_connection:
@@ -724,33 +782,33 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
724
782
  return expected_results
725
783
 
726
784
 
727
- def write_result_diff_to_log(res):
785
+ def write_result_diff_to_log(res, info):
728
786
  if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
729
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
730
- log.info(f"{Fore.BLUE} In Expected Not In Actual:")
731
- log.info(
787
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
788
+ info(f"{Fore.BLUE} In Expected Not In Actual:")
789
+ info(
732
790
  res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
733
- log.info(f"{Fore.RED} in_actual_not_in_expected")
734
- log.info(
791
+ info(f"{Fore.RED} in_actual_not_in_expected")
792
+ info(
735
793
  res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
736
- log.info(f"{Fore.GREEN} in_both")
737
- log.info(res.graph_comparison.in_both.serialize(format="ttl"))
794
+ info(f"{Fore.GREEN} in_both")
795
+ info(res.graph_comparison.in_both.serialize(format="ttl"))
738
796
 
739
797
  if isinstance(res, SelectSpecFailure):
740
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
741
- log.info(res.message)
742
- log.info(res.table_comparison.to_markdown())
798
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
799
+ info(res.message)
800
+ info(res.table_comparison.to_markdown())
743
801
  if isinstance(res, SpecPassedWithWarning):
744
- log.info(
802
+ info(
745
803
  f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
746
- log.info(res.warning)
804
+ info(res.warning)
747
805
  if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
748
806
  isinstance(res, SparqlParseFailure):
749
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
750
- log.info(res.exception)
807
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
808
+ info(res.exception)
751
809
  if isinstance(res, SpecSkipped):
752
- log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
753
- log.info(res.message)
810
+ info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
811
+ info(res.message)
754
812
 
755
813
 
756
814
  def calculate_row_difference(df1: pandas.DataFrame,
@@ -29,6 +29,7 @@ from mustrd.anzo_utils import query_azg, query_graphmart
29
29
  from mustrd.anzo_utils import query_configuration, json_to_dictlist, ttl_to_graph
30
30
 
31
31
 
32
+
32
33
  def execute_select(triple_store: dict, when: str, bindings: dict = None) -> str:
33
34
  try:
34
35
  if bindings:
@@ -39,7 +40,7 @@ def execute_select(triple_store: dict, when: str, bindings: dict = None) -> str
39
40
  f"FROM <{triple_store['input_graph']}>\nFROM <{triple_store['output_graph']}>").replace(
40
41
  "${targetGraph}", f"<{triple_store['output_graph']}>")
41
42
  # TODO: manage results here
42
- return query_azg(anzo_config=triple_store, query=when)
43
+ return query_azg(anzo_config=triple_store, query=when, data_layers=[triple_store['input_graph']])
43
44
  except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
44
45
  raise
45
46
 
@@ -58,7 +59,7 @@ USING <{triple_store['output_graph']}>""").replace(
58
59
  "${targetGraph}", f"<{output_graph}>")
59
60
 
60
61
  response = query_azg(anzo_config=triple_store, query=substituted_query, is_update=True,
61
- data_layers=input_graph, format="ttl")
62
+ data_layers=[input_graph, output_graph], format="ttl")
62
63
  logging.debug(f'response {response}')
63
64
  # TODO: deal with error responses
64
65
  new_graph = ttl_to_graph(query_azg(anzo_config=triple_store, query="construct {?s ?p ?o} { ?s ?p ?o }",