mustrd 0.2.0__py3-none-any.whl → 0.2.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.adoc +201 -210
- mustrd/execute_update_spec.py +18 -0
- mustrd/logger_setup.py +48 -48
- mustrd/mustrd.py +842 -787
- mustrd/mustrdAnzo.py +208 -220
- mustrd/mustrdGraphDb.py +128 -125
- mustrd/mustrdRdfLib.py +56 -56
- mustrd/namespace.py +104 -125
- mustrd/run.py +95 -106
- mustrd/spec_component.py +617 -690
- mustrd/triple_store_dispatch.py +115 -0
- mustrd/utils.py +30 -38
- {mustrd-0.2.0.dist-info → mustrd-0.2.0a1.dist-info}/LICENSE +21 -21
- mustrd-0.2.0a1.dist-info/METADATA +24 -0
- mustrd-0.2.0a1.dist-info/RECORD +17 -0
- {mustrd-0.2.0.dist-info → mustrd-0.2.0a1.dist-info}/WHEEL +1 -1
- mustrd/TestResult.py +0 -136
- mustrd/model/catalog-v001.xml +0 -5
- mustrd/model/mustrdShapes.ttl +0 -253
- mustrd/model/mustrdTestOntology.ttl +0 -51
- mustrd/model/mustrdTestShapes.ttl +0 -24
- mustrd/model/ontology.ttl +0 -494
- mustrd/model/test-resources/resources.ttl +0 -60
- mustrd/model/triplestoreOntology.ttl +0 -174
- mustrd/model/triplestoreshapes.ttl +0 -42
- mustrd/mustrdQueryProcessor.py +0 -136
- mustrd/mustrdTestPlugin.py +0 -328
- mustrd/steprunner.py +0 -166
- mustrd/templates/md_ResultList_leaf_template.jinja +0 -19
- mustrd/templates/md_ResultList_template.jinja +0 -9
- mustrd/templates/md_stats_template.jinja +0 -3
- mustrd/test/test_mustrd.py +0 -5
- mustrd-0.2.0.dist-info/METADATA +0 -97
- mustrd-0.2.0.dist-info/RECORD +0 -32
- mustrd-0.2.0.dist-info/entry_points.txt +0 -3
mustrd/mustrdAnzo.py
CHANGED
@@ -1,220 +1,208 @@
|
|
1
|
-
"""
|
2
|
-
MIT License
|
3
|
-
|
4
|
-
Copyright (c) 2023 Semantic Partners Ltd
|
5
|
-
|
6
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
-
of this software and associated documentation files (the "Software"), to deal
|
8
|
-
in the Software without restriction, including without limitation the rights
|
9
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
-
copies of the Software, and to permit persons to whom the Software is
|
11
|
-
furnished to do so, subject to the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be included in all
|
14
|
-
copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
-
SOFTWARE.
|
23
|
-
"""
|
24
|
-
|
25
|
-
import requests
|
26
|
-
from pyanzo import AnzoClient
|
27
|
-
from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
|
28
|
-
from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
|
29
|
-
from bs4 import BeautifulSoup
|
30
|
-
import logging
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
raise
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
"
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
}
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
logging.debug(f"upload_given {triple_store} {given}")
|
210
|
-
clear_graph(triple_store, triple_store['input_graph'])
|
211
|
-
clear_graph(triple_store, triple_store['output_graph'])
|
212
|
-
serialized_given = given.serialize(format="nt")
|
213
|
-
|
214
|
-
insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
|
215
|
-
execute_sparql(triple_store, True, insert_query, None, None)
|
216
|
-
|
217
|
-
|
218
|
-
def clear_graph(triple_store: dict, graph_uri: str):
|
219
|
-
execute_sparql(triple_store, True, f"CLEAR GRAPH <{graph_uri}>", None, None)
|
220
|
-
|
1
|
+
"""
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Copyright (c) 2023 Semantic Partners Ltd
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
23
|
+
"""
|
24
|
+
|
25
|
+
import requests
|
26
|
+
from pyanzo import AnzoClient
|
27
|
+
from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
|
28
|
+
from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
|
29
|
+
from bs4 import BeautifulSoup
|
30
|
+
import logging
|
31
|
+
from execute_update_spec import execute_update_spec
|
32
|
+
from namespace import MUST
|
33
|
+
|
34
|
+
|
35
|
+
# https://github.com/Semantic-partners/mustrd/issues/73
|
36
|
+
def manage_anzo_response(response: Response) -> str:
|
37
|
+
content_string = response.content.decode("utf-8")
|
38
|
+
if response.status_code == 200:
|
39
|
+
return content_string
|
40
|
+
elif response.status_code == 403:
|
41
|
+
html = BeautifulSoup(content_string, 'html.parser')
|
42
|
+
title_tag = html.title.string
|
43
|
+
raise HTTPError(f"Anzo authentication error, status code: {response.status_code}, content: {title_tag}")
|
44
|
+
else:
|
45
|
+
raise RequestException(f"Anzo error, status code: {response.status_code}, content: {content_string}")
|
46
|
+
|
47
|
+
|
48
|
+
def query_with_bindings(bindings: dict, when: str) -> str:
|
49
|
+
values = ""
|
50
|
+
for key, value in bindings.items():
|
51
|
+
values += f"VALUES ?{key} {{{value.n3()}}} "
|
52
|
+
split_query = when.lower().split("where {", 1)
|
53
|
+
return f"{split_query[0].strip()} WHERE {{ {values} {split_query[1].strip()}"
|
54
|
+
|
55
|
+
|
56
|
+
def execute_select_mustrd_spec_stage(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> str:
|
57
|
+
try:
|
58
|
+
upload_given(triple_store, given)
|
59
|
+
if bindings:
|
60
|
+
when = query_with_bindings(bindings, when)
|
61
|
+
data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
|
62
|
+
'default-graph-uri': triple_store['input_graph'], 'skipCache': 'true'}
|
63
|
+
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=application/sparql-results+json"
|
64
|
+
input("Press enter to continue")
|
65
|
+
return manage_anzo_response(requests.post(url=url,
|
66
|
+
auth=(triple_store['username'], triple_store['password']),
|
67
|
+
data=data,
|
68
|
+
verify=False))
|
69
|
+
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
70
|
+
raise
|
71
|
+
|
72
|
+
@execute_update_spec.method(MUST.Anzo)
|
73
|
+
def execute_update_spec_stage_anzo(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> Graph:
|
74
|
+
logging.info(f"updating in anzo! {triple_store=} {given=} {when=}")
|
75
|
+
upload_given(triple_store, given)
|
76
|
+
# input("have uploaded given")
|
77
|
+
|
78
|
+
input_graph = triple_store['input_graph']
|
79
|
+
output_graph = triple_store['output_graph']
|
80
|
+
|
81
|
+
substituted_query = when.replace("${usingSources}", f"USING <{input_graph}>").replace(
|
82
|
+
"${targetGraph}", f"<{output_graph}>")
|
83
|
+
|
84
|
+
data = {'datasourceURI': triple_store['gqe_uri'], 'update': substituted_query,
|
85
|
+
'default-graph-uri': input_graph, 'skipCache': 'true'}
|
86
|
+
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
|
87
|
+
response = manage_anzo_response(requests.post(url=url,
|
88
|
+
auth=(triple_store['username'],
|
89
|
+
triple_store['password']),
|
90
|
+
data=data,
|
91
|
+
verify=False))
|
92
|
+
logging.info(f'response {response}')
|
93
|
+
check_data = {'datasourceURI': triple_store['gqe_uri'], 'query': "construct {?s ?p ?o} { ?s ?p ?o }",
|
94
|
+
'default-graph-uri': output_graph, 'skipCache': 'true'}
|
95
|
+
everything_response = manage_anzo_response(requests.post(url=url,
|
96
|
+
auth=(triple_store['username'],
|
97
|
+
triple_store['password']),
|
98
|
+
data=check_data,
|
99
|
+
verify=False))
|
100
|
+
# todo deal with error responses
|
101
|
+
new_graph = Graph().parse(data=everything_response)
|
102
|
+
logging.info(f"new_graph={new_graph.serialize(format='ttl')}")
|
103
|
+
return new_graph
|
104
|
+
|
105
|
+
|
106
|
+
def execute_construct_mustrd_spec_stage(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> Graph:
|
107
|
+
try:
|
108
|
+
upload_given(triple_store, given)
|
109
|
+
if bindings:
|
110
|
+
when = query_with_bindings(bindings, when)
|
111
|
+
data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
|
112
|
+
'default-graph-uri': triple_store['input_graph'], 'skipCache': 'true'}
|
113
|
+
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
|
114
|
+
response = requests.post(url=url,
|
115
|
+
auth=(triple_store['username'],
|
116
|
+
triple_store['password']),
|
117
|
+
data=data,
|
118
|
+
verify=False)
|
119
|
+
logging.info(f'response {response}')
|
120
|
+
return Graph().parse(data=manage_anzo_response(response))
|
121
|
+
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
|
122
|
+
logging.error(f'response {e}')
|
123
|
+
raise
|
124
|
+
|
125
|
+
|
126
|
+
# Get Given or then from the content of a graphmart
|
127
|
+
def get_spec_component_from_graphmart(triple_store: dict, graphmart: URIRef, layer: URIRef = None) -> ConjunctiveGraph:
|
128
|
+
try:
|
129
|
+
anzo_client = AnzoClient(triple_store['url'], triple_store['port'], triple_store['username'],
|
130
|
+
triple_store['password'])
|
131
|
+
return anzo_client.query_graphmart(graphmart=graphmart,
|
132
|
+
data_layers=layer,
|
133
|
+
query_string="CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o}",
|
134
|
+
skip_cache=True).as_quad_store().as_rdflib_graph()
|
135
|
+
except RuntimeError as e:
|
136
|
+
raise ConnectionError(f"Anzo connection error, {e}")
|
137
|
+
|
138
|
+
|
139
|
+
def get_query_from_querybuilder(triple_store: dict, folder_name: Literal, query_name: Literal) -> str:
|
140
|
+
query = f"""SELECT ?query WHERE {{
|
141
|
+
graph ?queryFolder {{
|
142
|
+
?bookmark a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryBookmark>;
|
143
|
+
<http://openanzo.org/ontologies/2008/07/System#query> ?query;
|
144
|
+
<http://purl.org/dc/elements/1.1/title> "{query_name}"
|
145
|
+
}}
|
146
|
+
?queryFolder a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryFolder>;
|
147
|
+
<http://purl.org/dc/elements/1.1/title> "{folder_name}"
|
148
|
+
}}"""
|
149
|
+
anzo_client = AnzoClient(triple_store['url'], triple_store['port'], triple_store['username'],
|
150
|
+
triple_store['password'])
|
151
|
+
|
152
|
+
result = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
|
153
|
+
if len(result) == 0:
|
154
|
+
raise FileNotFoundError(f"Query {query_name} not found in folder {folder_name}")
|
155
|
+
return result[0].get("query")
|
156
|
+
|
157
|
+
|
158
|
+
# https://github.com/Semantic-partners/mustrd/issues/102
|
159
|
+
def get_query_from_step(triple_store: dict, query_step_uri: URIRef):
|
160
|
+
query = f"""SELECT ?stepUri ?query WHERE {{
|
161
|
+
BIND(<{query_step_uri}> as ?stepUri)
|
162
|
+
graph ?g {{
|
163
|
+
?stepUri a <http://cambridgesemantics.com/ontologies/Graphmarts#Step>;
|
164
|
+
<http://cambridgesemantics.com/ontologies/Graphmarts#transformQuery> ?query
|
165
|
+
}}
|
166
|
+
}}
|
167
|
+
# """
|
168
|
+
# query="""
|
169
|
+
# select ?g ?s ?p ?o { graph ?g { ?s ?p ?o }} limit 10
|
170
|
+
# """
|
171
|
+
anzo_client = AnzoClient(triple_store['url'], triple_store['port'], triple_store['username'],
|
172
|
+
triple_store['password'])
|
173
|
+
record_dictionaries = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
|
174
|
+
|
175
|
+
return record_dictionaries[0].get(
|
176
|
+
"query")
|
177
|
+
|
178
|
+
|
179
|
+
def upload_given(triple_store: dict, given: Graph):
|
180
|
+
logging.info(f"upload_given {triple_store} {given}")
|
181
|
+
if given:
|
182
|
+
try:
|
183
|
+
input_graph = triple_store['input_graph']
|
184
|
+
output_graph = triple_store['output_graph']
|
185
|
+
|
186
|
+
clear_graph(triple_store, input_graph)
|
187
|
+
clear_graph(triple_store, output_graph)
|
188
|
+
serialized_given = given.serialize(format="nt")
|
189
|
+
insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
|
190
|
+
data = {'datasourceURI': triple_store['gqe_uri'], 'update': insert_query}
|
191
|
+
response = requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
|
192
|
+
auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
|
193
|
+
manage_anzo_response(response)
|
194
|
+
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
195
|
+
raise
|
196
|
+
|
197
|
+
|
198
|
+
def clear_graph(triple_store: dict, graph_uri: str):
|
199
|
+
try:
|
200
|
+
clear_query = f"CLEAR GRAPH <{graph_uri}>"
|
201
|
+
data = {'datasourceURI': triple_store['gqe_uri'], 'update': clear_query}
|
202
|
+
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql"
|
203
|
+
response = requests.post(url=url,
|
204
|
+
auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
|
205
|
+
manage_anzo_response(response)
|
206
|
+
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
207
|
+
raise
|
208
|
+
|