mustrd 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mustrd/mustrdAnzo.py CHANGED
@@ -1,220 +1,236 @@
1
- """
2
- MIT License
3
-
4
- Copyright (c) 2023 Semantic Partners Ltd
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in all
14
- copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
- SOFTWARE.
23
- """
24
-
25
- import requests
26
- from pyanzo import AnzoClient
27
- from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
28
- from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
29
- from bs4 import BeautifulSoup
30
- import logging
31
-
32
-
33
- # https://github.com/Semantic-partners/mustrd/issues/73
34
- def manage_anzo_response(response: Response) -> str:
35
- content_string = response.content.decode("utf-8")
36
- if response.status_code == 200:
37
- return content_string
38
- elif response.status_code == 403:
39
- html = BeautifulSoup(content_string, 'html.parser')
40
- title_tag = html.title.string
41
- raise HTTPError(f"Anzo authentication error, status code: {response.status_code}, content: {title_tag}")
42
- else:
43
- raise RequestException(f"Anzo error, status code: {response.status_code}, content: {content_string}")
44
-
45
- def query_with_bindings(bindings: dict, when: str) -> str:
46
- values = ""
47
- for key, value in bindings.items():
48
- values += f"VALUES ?{key} {{{value.n3()}}} "
49
- split_query = when.lower().split("where {", 1)
50
- return f"{split_query[0].strip()} WHERE {{ {values} {split_query[1].strip()}"
51
-
52
- def execute_select (triple_store: dict, when: str, bindings: dict = None) -> str:
53
- if bindings:
54
- when = query_with_bindings(bindings, when)
55
- # Just remove ${fromSources} if we are executing a query step; the sources are defined using http parameters
56
- when = when.replace("${fromSources}", "")
57
- return execute_sparql(triple_store, False, when, triple_store['input_graph'])
58
-
59
- PARAMS = {
60
- # Update parameters for INSERT / DELETE
61
- True: {
62
- "default-graph-param": "using-graph-uri",
63
- "named-graph-param":"using-named-graph-uri",
64
- "Content-Type": "application/sparql-update"
65
- },
66
- # Query parameters for SELECT / CONSTRUCT
67
- False: {
68
- "default-graph-param": "default-graph-uri",
69
- "named-graph-param":"named-graph-uri",
70
- "Content-Type": "application/sparql-query"
71
- }
72
- }
73
- def execute_sparql(triple_store: dict, is_update: bool, sparql, graph: str, format: str = "application/sparql-results+json"):
74
- params = {
75
- "format" : format,
76
- "datasourceURI" : triple_store['gqe_uri'],
77
- "skipCache": "true",
78
- # Default and named datasets have different query param for query and update
79
- PARAMS[is_update]["default-graph-param"] : graph,
80
- PARAMS[is_update]["named-graph-param"] : graph
81
- }
82
- headers={"Content-Type": PARAMS[is_update]["Content-Type"]}
83
- try:
84
- response = manage_anzo_response(requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
85
- params=params,
86
- auth=(triple_store['username'], triple_store['password']),
87
- headers=headers,
88
- data=sparql,
89
- verify=False))
90
- logging.debug(f'response {response}')
91
- return response
92
- except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
93
- logging.error(f'response {e}')
94
- raise
95
-
96
- def execute_update(triple_store: dict, when: str, bindings: dict = None) -> Graph:
97
- logging.debug(f"updating in anzo! {triple_store=} {when=}")
98
-
99
- # FIXME If query doesn't contain ${targetGraph}, then graph should be defined explicitly in the query
100
- substituted_query = when.replace("${usingSources}", "").replace(
101
- "${targetGraph}", f"<{triple_store['output_graph']}>")
102
-
103
- execute_sparql(triple_store, True, substituted_query, triple_store['input_graph'], "ttl")
104
-
105
- new_graph = execute_construct(triple_store, "construct {?s ?p ?o} { ?s ?p ?o }")
106
-
107
- logging.debug(f"new_graph={new_graph.serialize(format='ttl')}")
108
-
109
- return new_graph
110
-
111
-
112
- def execute_construct(triple_store: dict, when: str, bindings: dict = None) -> Graph:
113
- if bindings:
114
- when = query_with_bindings(bindings, when)
115
- response = execute_sparql(triple_store, False, when, triple_store['input_graph'], "ttl")
116
- return Graph().parse(data=response)
117
-
118
-
119
- # Get Given or then from the content of a graphmart
120
- def get_spec_component_from_graphmart(triple_store: dict, graphmart: URIRef, layer: URIRef = None) -> ConjunctiveGraph:
121
- try:
122
- anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
123
- username=triple_store['username'],
124
- password=triple_store['password'])
125
- return anzo_client.query_graphmart(graphmart=graphmart,
126
- data_layers=layer,
127
- query_string="CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o}",
128
- skip_cache=True).as_quad_store().as_rdflib_graph()
129
- except RuntimeError as e:
130
- raise ConnectionError(f"Anzo connection error, {e}")
131
-
132
-
133
- def get_query_from_querybuilder(triple_store: dict, folder_name: Literal, query_name: Literal) -> str:
134
- query = f"""SELECT ?query WHERE {{
135
- graph ?queryFolder {{
136
- ?bookmark a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryBookmark>;
137
- <http://openanzo.org/ontologies/2008/07/System#query> ?query;
138
- <http://purl.org/dc/elements/1.1/title> "{query_name}"
139
- }}
140
- ?queryFolder a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryFolder>;
141
- <http://purl.org/dc/elements/1.1/title> "{folder_name}"
142
- }}"""
143
- anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
144
- username=triple_store['username'],
145
- password=triple_store['password'])
146
-
147
- result = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
148
- if len(result) == 0:
149
- raise FileNotFoundError(f"Query {query_name} not found in folder {folder_name}")
150
- return result[0].get("query")
151
-
152
-
153
- # https://github.com/Semantic-partners/mustrd/issues/102
154
- def get_query_from_step(triple_store: dict, query_step_uri: URIRef) -> str:
155
- query = f"""SELECT ?stepUri ?query WHERE {{
156
- BIND(<{query_step_uri}> as ?stepUri)
157
- ?stepUri a <http://cambridgesemantics.com/ontologies/Graphmarts#Step>;
158
- <http://cambridgesemantics.com/ontologies/Graphmarts#transformQuery> ?query
159
- }}
160
- # """
161
- anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
162
- username=triple_store['username'],
163
- password=triple_store['password'])
164
- record_dictionaries = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
165
-
166
- return record_dictionaries[0].get(
167
- "query")
168
-
169
- def get_queries_from_templated_step(triple_store: dict, query_step_uri: URIRef) -> dict:
170
-
171
- query = f"""SELECT ?stepUri ?param_query ?query_template WHERE {{
172
- BIND(<{query_step_uri}> as ?stepUri)
173
- ?stepUri a <http://cambridgesemantics.com/ontologies/Graphmarts#Step> ;
174
- <http://cambridgesemantics.com/ontologies/Graphmarts#parametersTemplate> ?param_query ;
175
- <http://cambridgesemantics.com/ontologies/Graphmarts#template> ?query_template .
176
- }}
177
- """
178
- anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
179
- username=triple_store['username'],
180
- password=triple_store['password'])
181
- record_dictionaries = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
182
- return record_dictionaries[0]
183
-
184
-
185
- def get_queries_for_layer(triple_store: dict, graphmart_layer_uri: URIRef):
186
- query = f"""PREFIX graphmarts: <http://cambridgesemantics.com/ontologies/Graphmarts#>
187
- PREFIX anzo: <http://openanzo.org/ontologies/2008/07/Anzo#>
188
- SELECT ?query ?param_query ?query_template
189
- {{ <{graphmart_layer_uri}> graphmarts:step ?step .
190
- ?step anzo:index ?index ;
191
- anzo:orderedValue ?query_step .
192
- ?query_step graphmarts:enabled true ;
193
- OPTIONAL {{ ?query_step
194
- graphmarts:parametersTemplate ?param_query ;
195
- graphmarts:template ?query_template ;
196
- . }}
197
- OPTIONAL {{ ?query_step
198
- graphmarts:transformQuery ?query ;
199
- . }}
200
- }}
201
- ORDER BY ?index"""
202
- anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
203
- username=triple_store['username'],
204
- password=triple_store['password'])
205
- return anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
206
-
207
-
208
- def upload_given(triple_store: dict, given: Graph):
209
- logging.debug(f"upload_given {triple_store} {given}")
210
- clear_graph(triple_store, triple_store['input_graph'])
211
- clear_graph(triple_store, triple_store['output_graph'])
212
- serialized_given = given.serialize(format="nt")
213
-
214
- insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
215
- execute_sparql(triple_store, True, insert_query, None, None)
216
-
217
-
218
- def clear_graph(triple_store: dict, graph_uri: str):
219
- execute_sparql(triple_store, True, f"CLEAR GRAPH <{graph_uri}>", None, None)
220
-
1
+ """
2
+ MIT License
3
+
4
+ Copyright (c) 2023 Semantic Partners Ltd
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
24
+
25
+ import requests
26
+ from pyanzo import AnzoClient
27
+ from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
28
+ from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
29
+ from bs4 import BeautifulSoup
30
+ import logging
31
+ from .namespace import MUST
32
+
33
+
34
+ # https://github.com/Semantic-partners/mustrd/issues/73
35
+ def manage_anzo_response(response: Response) -> str:
36
+ content_string = response.content.decode("utf-8")
37
+ if response.status_code == 200:
38
+ return content_string
39
+ elif response.status_code == 403:
40
+ html = BeautifulSoup(content_string, 'html.parser')
41
+ title_tag = html.title.string
42
+ raise HTTPError(f"Anzo authentication error, status code: {response.status_code}, content: {title_tag}")
43
+ else:
44
+ raise RequestException(f"Anzo error, status code: {response.status_code}, content: {content_string}")
45
+
46
+
47
+ def query_with_bindings(bindings: dict, when: str) -> str:
48
+ values = ""
49
+ for key, value in bindings.items():
50
+ values += f"VALUES ?{key} {{{value.n3()}}} "
51
+ split_query = when.lower().split("where {", 1)
52
+ return f"{split_query[0].strip()} WHERE {{ {values} {split_query[1].strip()}"
53
+
54
+ def execute_select (triple_store: dict, when: str, bindings: dict = None) -> str:
55
+ try:
56
+ if bindings:
57
+ when = query_with_bindings(bindings, when)
58
+ when = when.replace("${fromSources}", f"FROM <{triple_store['input_graph']}>\nFROM <{triple_store['output_graph']}>").replace(
59
+ "${targetGraph}", f"<{triple_store['output_graph']}>")
60
+ data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
61
+ 'skipCache': 'true'}
62
+ url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=application/sparql-results+json"
63
+ return manage_anzo_response(requests.post(url=url,
64
+ auth=(triple_store['username'], triple_store['password']),
65
+ data=data,
66
+ verify=False))
67
+ except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
68
+ raise
69
+
70
+ def execute_update(triple_store: dict, when: str, bindings: dict = None) -> Graph:
71
+ logging.debug(f"updating in anzo! {triple_store=} {when=}")
72
+ input_graph = triple_store['input_graph']
73
+ output_graph = triple_store['output_graph']
74
+
75
+ substituted_query = when.replace("${usingSources}", f"USING <{triple_store['input_graph']}> \nUSING <{triple_store['output_graph']}>").replace(
76
+ "${targetGraph}", f"<{output_graph}>")
77
+
78
+ data = {'datasourceURI': triple_store['gqe_uri'], 'update': substituted_query,
79
+ 'default-graph-uri': input_graph, 'skipCache': 'true'}
80
+ url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
81
+ response = manage_anzo_response(requests.post(url=url,
82
+ auth=(triple_store['username'],
83
+ triple_store['password']),
84
+ data=data,
85
+ verify=False))
86
+ logging.debug(f'response {response}')
87
+ check_data = {'datasourceURI': triple_store['gqe_uri'], 'query': "construct {?s ?p ?o} { ?s ?p ?o }",
88
+ 'default-graph-uri': output_graph, 'skipCache': 'true'}
89
+ everything_response = manage_anzo_response(requests.post(url=url,
90
+ auth=(triple_store['username'],
91
+ triple_store['password']),
92
+ data=check_data,
93
+ verify=False))
94
+ # todo deal with error responses
95
+ new_graph = Graph().parse(data=everything_response)
96
+ logging.debug(f"new_graph={new_graph.serialize(format='ttl')}")
97
+ return new_graph
98
+
99
+
100
+ def execute_construct(triple_store: dict, when: str, bindings: dict = None) -> Graph:
101
+ try:
102
+ if bindings:
103
+ when = query_with_bindings(bindings, when)
104
+ data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
105
+ 'default-graph-uri': triple_store['input_graph'], 'skipCache': 'true'}
106
+ url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
107
+ response = requests.post(url=url,
108
+ auth=(triple_store['username'],
109
+ triple_store['password']),
110
+ data=data,
111
+ verify=False)
112
+ logging.debug(f'response {response}')
113
+ return Graph().parse(data=manage_anzo_response(response))
114
+ except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
115
+ logging.error(f'response {e}')
116
+ raise
117
+
118
+
119
+ # Get Given or then from the content of a graphmart
120
+ def get_spec_component_from_graphmart(triple_store: dict, graphmart: URIRef, layer: URIRef = None) -> ConjunctiveGraph:
121
+ try:
122
+ anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
123
+ username=triple_store['username'],
124
+ password=triple_store['password'])
125
+ return anzo_client.query_graphmart(graphmart=graphmart,
126
+ data_layers=layer,
127
+ query_string="CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o}",
128
+ skip_cache=True).as_quad_store().as_rdflib_graph()
129
+ except RuntimeError as e:
130
+ raise ConnectionError(f"Anzo connection error, {e}")
131
+
132
+
133
+ def get_query_from_querybuilder(triple_store: dict, folder_name: Literal, query_name: Literal) -> str:
134
+ query = f"""SELECT ?query WHERE {{
135
+ graph ?queryFolder {{
136
+ ?bookmark a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryBookmark>;
137
+ <http://openanzo.org/ontologies/2008/07/System#query> ?query;
138
+ <http://purl.org/dc/elements/1.1/title> "{query_name}"
139
+ }}
140
+ ?queryFolder a <http://www.cambridgesemantics.com/ontologies/QueryPlayground#QueryFolder>;
141
+ <http://purl.org/dc/elements/1.1/title> "{folder_name}"
142
+ }}"""
143
+ anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
144
+ username=triple_store['username'],
145
+ password=triple_store['password'])
146
+
147
+ result = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
148
+ if len(result) == 0:
149
+ raise FileNotFoundError(f"Query {query_name} not found in folder {folder_name}")
150
+ return result[0].get("query")
151
+
152
+
153
+ # https://github.com/Semantic-partners/mustrd/issues/102
154
+ def get_query_from_step(triple_store: dict, query_step_uri: URIRef) -> str:
155
+ query = f"""SELECT ?stepUri ?query WHERE {{
156
+ BIND(<{query_step_uri}> as ?stepUri)
157
+ ?stepUri a <http://cambridgesemantics.com/ontologies/Graphmarts#Step>;
158
+ <http://cambridgesemantics.com/ontologies/Graphmarts#transformQuery> ?query
159
+ }}
160
+ # """
161
+ anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
162
+ username=triple_store['username'],
163
+ password=triple_store['password'])
164
+ record_dictionaries = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
165
+
166
+ return record_dictionaries[0].get(
167
+ "query")
168
+
169
+ def get_queries_from_templated_step(triple_store: dict, query_step_uri: URIRef) -> dict:
170
+
171
+ query = f"""SELECT ?stepUri ?param_query ?query_template WHERE {{
172
+ BIND(<{query_step_uri}> as ?stepUri)
173
+ ?stepUri a <http://cambridgesemantics.com/ontologies/Graphmarts#Step> ;
174
+ <http://cambridgesemantics.com/ontologies/Graphmarts#parametersTemplate> ?param_query ;
175
+ <http://cambridgesemantics.com/ontologies/Graphmarts#template> ?query_template .
176
+ }}
177
+ """
178
+ anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
179
+ username=triple_store['username'],
180
+ password=triple_store['password'])
181
+ record_dictionaries = anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
182
+ return record_dictionaries[0]
183
+
184
+
185
+ def get_queries_for_layer(triple_store: dict, graphmart_layer_uri: URIRef):
186
+ query = f"""PREFIX graphmarts: <http://cambridgesemantics.com/ontologies/Graphmarts#>
187
+ PREFIX anzo: <http://openanzo.org/ontologies/2008/07/Anzo#>
188
+ SELECT ?query ?param_query ?query_template
189
+ {{ <{graphmart_layer_uri}> graphmarts:step ?step .
190
+ ?step anzo:index ?index ;
191
+ anzo:orderedValue ?query_step .
192
+ ?query_step graphmarts:enabled true ;
193
+ OPTIONAL {{ ?query_step
194
+ graphmarts:parametersTemplate ?param_query ;
195
+ graphmarts:template ?query_template ;
196
+ . }}
197
+ OPTIONAL {{ ?query_step
198
+ graphmarts:transformQuery ?query ;
199
+ . }}
200
+ }}
201
+ ORDER BY ?index"""
202
+ anzo_client = AnzoClient(triple_store['url'], triple_store['port'],
203
+ username=triple_store['username'],
204
+ password=triple_store['password'])
205
+ return anzo_client.query_journal(query_string=query).as_table_results().as_record_dictionaries()
206
+
207
+
208
+ def upload_given(triple_store: dict, given: Graph):
209
+ logging.debug(f"upload_given {triple_store} {given}")
210
+
211
+ try:
212
+ input_graph = triple_store['input_graph']
213
+ output_graph = triple_store['output_graph']
214
+ clear_graph(triple_store, input_graph)
215
+ clear_graph(triple_store, output_graph)
216
+ serialized_given = given.serialize(format="nt")
217
+ insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
218
+ data = {'datasourceURI': triple_store['gqe_uri'], 'update': insert_query}
219
+ response = requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
220
+ auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
221
+ manage_anzo_response(response)
222
+ except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
223
+ raise
224
+
225
+
226
+ def clear_graph(triple_store: dict, graph_uri: str):
227
+ try:
228
+ clear_query = f"CLEAR GRAPH <{graph_uri}>"
229
+ data = {'datasourceURI': triple_store['gqe_uri'], 'update': clear_query}
230
+ url = f"https://{triple_store['url']}:{triple_store['port']}/sparql"
231
+ response = requests.post(url=url,
232
+ auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
233
+ manage_anzo_response(response)
234
+ except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
235
+ raise
236
+