mustrd 0.2.7a0__tar.gz → 0.3.1a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. mustrd-0.3.1a0/PKG-INFO +96 -0
  2. mustrd-0.3.1a0/README.md +54 -0
  3. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/README.md +2 -0
  4. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/anzo_utils.py +8 -5
  5. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/logger_setup.py +3 -0
  6. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/mustrdShapes.ttl +25 -6
  7. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/ontology.ttl +6 -2
  8. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/mustrd.py +508 -235
  9. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/mustrdAnzo.py +3 -2
  10. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/mustrdRdfLib.py +8 -1
  11. mustrd-0.3.1a0/mustrd/mustrdTestPlugin.py +549 -0
  12. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/namespace.py +10 -1
  13. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/spec_component.py +238 -58
  14. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/steprunner.py +78 -20
  15. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/pyproject.toml +2 -1
  16. mustrd-0.2.7a0/PKG-INFO +0 -96
  17. mustrd-0.2.7a0/README.md +0 -55
  18. mustrd-0.2.7a0/mustrd/mustrdTestPlugin.py +0 -378
  19. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/LICENSE +0 -0
  20. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/README.adoc +0 -0
  21. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/TestResult.py +0 -0
  22. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/__init__.py +0 -0
  23. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/catalog-v001.xml +0 -0
  24. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/mustrdTestOntology.ttl +0 -0
  25. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/mustrdTestShapes.ttl +0 -0
  26. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/test-resources/resources.ttl +0 -0
  27. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/triplestoreOntology.ttl +0 -0
  28. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/model/triplestoreshapes.ttl +0 -0
  29. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/mustrdGraphDb.py +0 -0
  30. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/run.py +0 -0
  31. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/templates/md_ResultList_leaf_template.jinja +0 -0
  32. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/templates/md_ResultList_template.jinja +0 -0
  33. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/templates/md_stats_template.jinja +0 -0
  34. {mustrd-0.2.7a0 → mustrd-0.3.1a0}/mustrd/utils.py +0 -0
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.3
2
+ Name: mustrd
3
+ Version: 0.3.1a0
4
+ Summary: A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber.
5
+ License: MIT
6
+ Author: John Placek
7
+ Author-email: john.placek@semanticpartners.com
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: Framework :: Pytest
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Natural Language :: English
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Quality Assurance
18
+ Classifier: Topic :: Software Development :: Testing
19
+ Classifier: Topic :: Utilities
20
+ Requires-Dist: Jinja2 (==3.1.5)
21
+ Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
22
+ Requires-Dist: colorama (==0.4.6)
23
+ Requires-Dist: colorlog (>=6.7.0,<7.0.0)
24
+ Requires-Dist: coverage (==7.4.3)
25
+ Requires-Dist: edn-format (>=0.7.5,<0.8.0)
26
+ Requires-Dist: flake8 (==7.0.0)
27
+ Requires-Dist: multimethods-py (>=0.5.3,<0.6.0)
28
+ Requires-Dist: numpy (>=1.26.0,<1.27.0)
29
+ Requires-Dist: openpyxl (>=3.1.2,<4.0.0)
30
+ Requires-Dist: pandas (>=2.0,<3.0)
31
+ Requires-Dist: pyshacl (>=0.30.0,<0.31.0)
32
+ Requires-Dist: pytest (>=7.2.0,<8.0.0)
33
+ Requires-Dist: rdflib (>=7.1.3,<8.0.0)
34
+ Requires-Dist: requests (>=2.28.2,<3.0.0)
35
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
36
+ Requires-Dist: toml (>=0.10.2,<0.11.0)
37
+ Requires-Dist: tomli (>=2.0.1,<3.0.0)
38
+ Requires-Dist: urllib3 (==1.26.19)
39
+ Project-URL: Repository, https://github.com/Semantic-partners/mustrd
40
+ Description-Content-Type: text/markdown
41
+
42
+ # mustrd
43
+
44
+ **"MustRD: Validate your SPARQL queries and transformations with precision and confidence, using BDD and Given-When-Then principles."**
45
+
46
+ [<img src="https://github.com/Semantic-partners/mustrd/raw/python-coverage-comment-action-data/badge.svg?sanitize=true" alt="coverage badge">](https://github.com/Semantic-partners/mustrd/tree/python-coverage-comment-action-data)
47
+
48
+ ### Why?
49
+
50
+ SPARQL is a powerful query language for RDF data, but how can you ensure your queries and transformations are doing what you intend? Whether you're working on a pipeline or a standalone query, certainty is key.
51
+
52
+ While RDF and SPARQL offer great flexibility, we noticed a gap in tooling to validate their behavior. We missed the robust testing frameworks available in imperative programming languages that help ensure your code works as expected.
53
+
54
+ With MustRD, you can:
55
+
56
+ * Define data scenarios and verify that queries produce the expected results.
57
+ * Test edge cases to ensure your queries remain reliable.
58
+ * Isolate small SPARQL enrichment or transformation steps and confirm you're only inserting what you intend.
59
+
60
+ ### What?
61
+
62
+ MustRD is a Spec-By-Example ontology with a reference Python implementation, inspired by tools like Cucumber. It uses the Given-When-Then approach to define and validate SPARQL queries and transformations.
63
+
64
+ MustRD is designed to be triplestore/SPARQL engine agnostic, leveraging open standards to ensure compatibility across different platforms.
65
+
66
+ ### What it is NOT
67
+
68
+ MustRD is not an alternative to SHACL. While SHACL validates data structures, MustRD focuses on validating data transformations and query results.
69
+
70
+ ### How?
71
+
72
+ You define your specs in Turtle (`.ttl`) or TriG (`.trig`) files using the Given-When-Then approach:
73
+
74
+ * **Given**: Define the starting dataset.
75
+ * **When**: Specify the action (e.g., a SPARQL query).
76
+ * **Then**: Outline the expected results.
77
+
78
+ Depending on the type of SPARQL query (CONSTRUCT, SELECT, INSERT/DELETE), MustRD runs the query and compares the results against the expectations defined in the spec.
79
+
80
+ Expectations can also be defined as:
81
+
82
+ * INSERT queries.
83
+ * SELECT queries.
84
+ * Higher-order expectation languages, similar to those used in various platforms.
85
+
86
+ ### When?
87
+
88
+ MustRD is a work in progress, built to meet the needs of our projects across multiple clients and vendor stacks. While we find it useful, it may not meet your needs out of the box.
89
+
90
+ We invite you to try it, raise issues, or contribute via pull requests. If you need custom features, contact us for consultancy rates, and we may prioritize your request.
91
+
92
+ ## Support
93
+
94
+ Semantic Partners is a specialist consultancy in Semantic Technology. If you need more support, contact us at info@semanticpartners.com or mustrd@semanticpartners.com.
95
+
96
+
@@ -0,0 +1,54 @@
1
+ # mustrd
2
+
3
+ **"MustRD: Validate your SPARQL queries and transformations with precision and confidence, using BDD and Given-When-Then principles."**
4
+
5
+ [<img src="https://github.com/Semantic-partners/mustrd/raw/python-coverage-comment-action-data/badge.svg?sanitize=true" alt="coverage badge">](https://github.com/Semantic-partners/mustrd/tree/python-coverage-comment-action-data)
6
+
7
+ ### Why?
8
+
9
+ SPARQL is a powerful query language for RDF data, but how can you ensure your queries and transformations are doing what you intend? Whether you're working on a pipeline or a standalone query, certainty is key.
10
+
11
+ While RDF and SPARQL offer great flexibility, we noticed a gap in tooling to validate their behavior. We missed the robust testing frameworks available in imperative programming languages that help ensure your code works as expected.
12
+
13
+ With MustRD, you can:
14
+
15
+ * Define data scenarios and verify that queries produce the expected results.
16
+ * Test edge cases to ensure your queries remain reliable.
17
+ * Isolate small SPARQL enrichment or transformation steps and confirm you're only inserting what you intend.
18
+
19
+ ### What?
20
+
21
+ MustRD is a Spec-By-Example ontology with a reference Python implementation, inspired by tools like Cucumber. It uses the Given-When-Then approach to define and validate SPARQL queries and transformations.
22
+
23
+ MustRD is designed to be triplestore/SPARQL engine agnostic, leveraging open standards to ensure compatibility across different platforms.
24
+
25
+ ### What it is NOT
26
+
27
+ MustRD is not an alternative to SHACL. While SHACL validates data structures, MustRD focuses on validating data transformations and query results.
28
+
29
+ ### How?
30
+
31
+ You define your specs in Turtle (`.ttl`) or TriG (`.trig`) files using the Given-When-Then approach:
32
+
33
+ * **Given**: Define the starting dataset.
34
+ * **When**: Specify the action (e.g., a SPARQL query).
35
+ * **Then**: Outline the expected results.
36
+
37
+ Depending on the type of SPARQL query (CONSTRUCT, SELECT, INSERT/DELETE), MustRD runs the query and compares the results against the expectations defined in the spec.
38
+
39
+ Expectations can also be defined as:
40
+
41
+ * INSERT queries.
42
+ * SELECT queries.
43
+ * Higher-order expectation languages, similar to those used in various platforms.
44
+
45
+ ### When?
46
+
47
+ MustRD is a work in progress, built to meet the needs of our projects across multiple clients and vendor stacks. While we find it useful, it may not meet your needs out of the box.
48
+
49
+ We invite you to try it, raise issues, or contribute via pull requests. If you need custom features, contact us for consultancy rates, and we may prioritize your request.
50
+
51
+ ## Support
52
+
53
+ Semantic Partners is a specialist consultancy in Semantic Technology. If you need more support, contact us at info@semanticpartners.com or mustrd@semanticpartners.com.
54
+
@@ -27,3 +27,5 @@ As the project is actually built from the requirements.txt file at the project r
27
27
 
28
28
  `poetry export -f requirements.txt --without-hashes > requirements.txt`
29
29
 
30
+ We also recommend pairing MustRD with the VS Code plugin [faubulous.mentor](https://marketplace.visualstudio.com/items?itemName=faubulous.mentor) to enhance your development experience and streamline working with SPARQL and RDF specifications.
31
+
@@ -31,16 +31,18 @@ from requests import Response, HTTPError, RequestException
31
31
  from bs4 import BeautifulSoup
32
32
  import logging
33
33
 
34
+ logger = logging.getLogger()
35
+
34
36
 
35
37
  def query_azg(anzo_config: dict, query: str,
36
38
  format: str = "json", is_update: bool = False,
37
39
  data_layers: List[str] = None):
38
40
  params = {
39
- 'skipCache': True,
41
+ 'skipCache': 'true',
40
42
  'format': format,
41
43
  'datasourceURI': anzo_config['gqe_uri'],
42
- 'default-graph-uri': data_layers,
43
- 'named-graph-uri': data_layers
44
+ 'using-graph-uri' if is_update else 'default-graph-uri': data_layers,
45
+ 'using-named-graph-uri' if is_update else 'named-graph-uri': data_layers
44
46
  }
45
47
  url = f"{anzo_config['url']}/sparql"
46
48
  return send_anzo_query(anzo_config, url=url, params=params, query=query, is_update=is_update)
@@ -52,7 +54,7 @@ def query_graphmart(anzo_config: dict,
52
54
  format: str = "json",
53
55
  data_layers: List[str] = None):
54
56
  params = {
55
- 'skipCache': True,
57
+ 'skipCache': 'true',
56
58
  'format': format,
57
59
  'default-graph-uri': data_layers,
58
60
  'named-graph-uri': data_layers
@@ -87,7 +89,8 @@ def manage_anzo_response(response: Response) -> str:
87
89
 
88
90
  def send_anzo_query(anzo_config, url, params, query, is_update=False):
89
91
  headers = {"Content-Type": f"application/sparql-{'update' if is_update else 'query' }"}
90
- return manage_anzo_response(requests.post(url=url, params=params, data=query,
92
+ logger.debug(f"send_anzo_query {url=} {query=} {is_update=}")
93
+ return manage_anzo_response(requests.post(url=url, params=params, data=query.encode('utf-8'),
91
94
  auth=(anzo_config['username'], anzo_config['password']),
92
95
  headers=headers, verify=False))
93
96
 
@@ -35,6 +35,7 @@ def setup_logger(name: str) -> logging.Logger:
35
35
  log = logging.getLogger(name)
36
36
  log.setLevel(LOG_LEVEL)
37
37
 
38
+
38
39
  stderr_handler = logging.StreamHandler(sys.stderr)
39
40
  stderr_handler.setLevel(logging.ERROR)
40
41
  log.addHandler(stderr_handler)
@@ -50,3 +51,5 @@ def setup_logger(name: str) -> logging.Logger:
50
51
  def flush():
51
52
  logging.shutdown()
52
53
  sys.stdout.flush()
54
+
55
+ logging.getLogger("edn_format").setLevel(logging.WARNING)
@@ -140,10 +140,20 @@ must:OrderedTableDatasetShape
140
140
  must:FileDatasetShape
141
141
  a sh:NodeShape ;
142
142
  sh:targetClass must:FileDataset ;
143
- sh:property [ sh:path must:file ;
144
- sh:datatype xsd:string ;
145
- sh:minCount 1 ;
146
- sh:maxCount 1 ; ] .
143
+ sh:or (
144
+ [
145
+ sh:path must:file ;
146
+ sh:datatype xsd:string ;
147
+ sh:maxCount 1 ;
148
+ ]
149
+ [
150
+ sh:path must:fileurl ;
151
+ sh:nodeKind sh:IRI ;
152
+ sh:minCount 1 ;
153
+ sh:maxCount 1 ;
154
+ ]
155
+ )
156
+ .
147
157
 
148
158
  must:StatementShape
149
159
  a sh:NodeShape ;
@@ -249,5 +259,14 @@ must:AnzoGraphmartQueryDrivenTemplatedStepSparqlSourceShape
249
259
  sh:minCount 1 ;
250
260
  sh:maxCount 1 ; ] .
251
261
 
252
-
253
-
262
+ must:SpadeEdnGroupSourceShape
263
+ a sh:NodeShape ;
264
+ sh:targetClass must:SpadeEdnGroupSource ;
265
+ sh:property [ sh:path must:fileurl ;
266
+ sh:message "A SpadeEdnGroupSource must have a fileurl property pointing to the spade.edn config." ;
267
+ sh:minCount 1 ;
268
+ sh:maxCount 1 ; ] ;
269
+ sh:property [ sh:path must:groupId ;
270
+ sh:message "A SpadeEdnGroupSource must have a groupId property referencing the group in the EDN file." ;
271
+ sh:minCount 1 ;
272
+ sh:maxCount 1 ; ] .
@@ -146,7 +146,7 @@ sh:order rdf:type owl:DatatypeProperty ;
146
146
 
147
147
  ### https://mustrd.com/model/file
148
148
  :file rdf:type owl:DatatypeProperty ;
149
- rdfs:comment "Relative or absolute path to local file" ;
149
+ rdfs:comment "Relative or absolute path to local file as a string, or a file:// url" ;
150
150
  rdfs:label "file" .
151
151
 
152
152
 
@@ -158,7 +158,6 @@ sh:order rdf:type owl:DatatypeProperty ;
158
158
 
159
159
  ### https://mustrd.com/model/fileurl
160
160
  :fileurl rdf:type owl:DatatypeProperty ;
161
- rdfs:domain :FileSparqlSource ;
162
161
  rdfs:comment "a full or relatively qualified file:// url. Relative to what? We haven't thought that through, yet." ;
163
162
  rdfs:isDefinedBy : ;
164
163
  rdfs:label "fileUrl" .
@@ -461,6 +460,11 @@ sh:order rdf:type owl:DatatypeProperty ;
461
460
  rdfs:isDefinedBy : ;
462
461
  rdfs:label "AnzoGraphmartQueryDrivenTemplatedStepSparqlSource" .
463
462
 
463
+ ### https://mustrd.com/model/SpadeEdnGroupSource
464
+ :SpadeEdnGroupSource rdf:type owl:Class ;
465
+ rdfs:subClassOf :SparqlSource ;
466
+ rdfs:comment "Allows reference to a spade.edn file, and a specific groupid (think Anzo layer), within that" ;
467
+ rdfs:label "SpadeEdnGroupSource" .
464
468
 
465
469
  ### https://mustrd.com/model/Then
466
470
  :Then rdf:type owl:Class ;