cmem-client 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. cmem_client/__init__.py +13 -0
  2. cmem_client/auth_provider/__init__.py +14 -0
  3. cmem_client/auth_provider/abc.py +124 -0
  4. cmem_client/auth_provider/client_credentials.py +207 -0
  5. cmem_client/auth_provider/password.py +252 -0
  6. cmem_client/auth_provider/prefetched_token.py +153 -0
  7. cmem_client/client.py +485 -0
  8. cmem_client/components/__init__.py +10 -0
  9. cmem_client/components/graph_store.py +316 -0
  10. cmem_client/components/marketplace.py +179 -0
  11. cmem_client/components/sparql_wrapper.py +53 -0
  12. cmem_client/components/workspace.py +194 -0
  13. cmem_client/config.py +364 -0
  14. cmem_client/exceptions.py +82 -0
  15. cmem_client/logging_utils.py +49 -0
  16. cmem_client/models/__init__.py +16 -0
  17. cmem_client/models/access_condition.py +147 -0
  18. cmem_client/models/base.py +30 -0
  19. cmem_client/models/dataset.py +32 -0
  20. cmem_client/models/error.py +67 -0
  21. cmem_client/models/graph.py +26 -0
  22. cmem_client/models/item.py +143 -0
  23. cmem_client/models/logging_config.py +51 -0
  24. cmem_client/models/package.py +35 -0
  25. cmem_client/models/project.py +46 -0
  26. cmem_client/models/python_package.py +26 -0
  27. cmem_client/models/token.py +40 -0
  28. cmem_client/models/url.py +34 -0
  29. cmem_client/models/workflow.py +80 -0
  30. cmem_client/repositories/__init__.py +15 -0
  31. cmem_client/repositories/access_conditions.py +62 -0
  32. cmem_client/repositories/base/__init__.py +12 -0
  33. cmem_client/repositories/base/abc.py +138 -0
  34. cmem_client/repositories/base/paged_list.py +63 -0
  35. cmem_client/repositories/base/plain_list.py +39 -0
  36. cmem_client/repositories/base/task_search.py +70 -0
  37. cmem_client/repositories/datasets.py +36 -0
  38. cmem_client/repositories/graph_imports.py +93 -0
  39. cmem_client/repositories/graphs.py +458 -0
  40. cmem_client/repositories/marketplace_packages.py +486 -0
  41. cmem_client/repositories/projects.py +214 -0
  42. cmem_client/repositories/protocols/__init__.py +15 -0
  43. cmem_client/repositories/protocols/create_item.py +125 -0
  44. cmem_client/repositories/protocols/delete_item.py +95 -0
  45. cmem_client/repositories/protocols/export_item.py +114 -0
  46. cmem_client/repositories/protocols/import_item.py +141 -0
  47. cmem_client/repositories/python_packages.py +58 -0
  48. cmem_client/repositories/workflows.py +143 -0
  49. cmem_client-0.5.0.dist-info/METADATA +64 -0
  50. cmem_client-0.5.0.dist-info/RECORD +52 -0
  51. cmem_client-0.5.0.dist-info/WHEEL +4 -0
  52. cmem_client-0.5.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,458 @@
1
+ """Repository for managing named graphs in Corporate Memory.
2
+
3
+ Provides GraphRepository class for managing RDF named graphs with operations for
4
+ deletion and import. Supports multiple RDF formats (Turtle, RDF/XML, JSON-LD, N-Triples)
5
+ with automatic file type detection.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import mimetypes
11
+ import tempfile
12
+ import zipfile
13
+ from copy import copy
14
+ from pathlib import Path
15
+ from tempfile import NamedTemporaryFile
16
+ from typing import TYPE_CHECKING, ClassVar
17
+ from urllib.parse import quote
18
+ from uuid import uuid4
19
+
20
+ from pydantic import Field, TypeAdapter
21
+ from rdflib import Graph as RDFGraph
22
+
23
+ if TYPE_CHECKING:
24
+ from cmem_client.client import Client
25
+
26
+ from cmem_client.exceptions import GraphExportError, GraphImportError, RepositoryModificationError
27
+ from cmem_client.models.base import Model
28
+ from cmem_client.models.graph import Graph
29
+ from cmem_client.models.item import FileImportItem, ImportItem, ZipImportItem
30
+ from cmem_client.repositories.base.abc import RepositoryConfig
31
+ from cmem_client.repositories.base.plain_list import PlainListRepository
32
+ from cmem_client.repositories.protocols.delete_item import DeleteConfig, DeleteItemProtocol
33
+ from cmem_client.repositories.protocols.export_item import ExportConfig, ExportItemProtocol
34
+ from cmem_client.repositories.protocols.import_item import ImportConfig, ImportItemProtocol
35
+
36
+ if TYPE_CHECKING:
37
+ from collections.abc import Generator
38
+
39
+
40
+ GET_ONTOLOGY_IRI_QUERY = """
41
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
42
+ SELECT DISTINCT ?iri
43
+ WHERE {
44
+ ?iri a owl:Ontology;
45
+ }
46
+ """
47
+
48
+ GET_PREFIX_DECLARATION = """
49
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
50
+ PREFIX vann: <http://purl.org/vocab/vann/>
51
+ SELECT DISTINCT ?prefix ?namespace
52
+ WHERE {{
53
+ <{ontology_iri}> a owl:Ontology;
54
+ vann:preferredNamespacePrefix ?prefix;
55
+ vann:preferredNamespaceUri ?namespace.
56
+ }}
57
+ """
58
+
59
+ INSERT_CATALOG_ENTRY = """
60
+ PREFIX voaf: <http://purl.org/vocommons/voaf#>
61
+ PREFIX vann: <http://purl.org/vocab/vann/>
62
+ PREFIX dct: <http://purl.org/dc/terms/>
63
+ PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
64
+ WITH <https://ns.eccenca.com/example/data/vocabs/>
65
+ INSERT {{
66
+ <{iri}> a voaf:Vocabulary ;
67
+ skos:prefLabel "{label}"{language} ;
68
+ vann:preferredNamespacePrefix "{prefix}" ;
69
+ vann:preferredNamespaceUri "{namespace}" ;
70
+ dct:description "vocabulary imported with cmem-client" .
71
+ }}
72
+ WHERE {{}}
73
+ """
74
+
75
+
76
+ class GraphFileSerialization(Model):
77
+ """Supported graph format description"""
78
+
79
+ mime_type: str
80
+ file_extensions: list[str]
81
+ encoding: str | None = None
82
+ known_not_supporters: list[str] = Field(default_factory=list)
83
+
84
+
85
+ class GraphImportConfig(ImportConfig):
86
+ """Graph Import Configuration."""
87
+
88
+ register_as_vocabulary: bool = False
89
+ serialization: GraphFileSerialization | None = None
90
+
91
+
92
+ class GraphExportConfig(ExportConfig):
93
+ """Graph Export Configuration."""
94
+
95
+ register_as_vocabulary: bool = False
96
+ serialization: GraphFileSerialization | None = None
97
+
98
+
99
+ class GraphDeleteConfig(DeleteConfig):
100
+ """Graph Delete Configuration."""
101
+
102
+
103
+ def _extract_vann_metadata(graph: RDFGraph, ontology_iri: str) -> tuple[str, str] | None:
104
+ """Extract vann namespace prefix and URI from an RDF graph.
105
+
106
+ Args:
107
+ graph: Parsed RDF graph
108
+ ontology_iri: IRI of the owl:Ontology resource
109
+
110
+ Returns:
111
+ Tuple of (prefix, namespace_uri) if vann properties exist, None otherwise
112
+ """
113
+ vann_data = graph.query(GET_PREFIX_DECLARATION.format(ontology_iri=ontology_iri))
114
+
115
+ if len(vann_data) == 0:
116
+ return None
117
+
118
+ if len(vann_data) > 1:
119
+ raise GraphImportError(f"Multiple vann namespace declarations found for ontology: {ontology_iri}")
120
+
121
+ namespace_info = next(iter(vann_data))
122
+ prefix = str(namespace_info[0]) # type: ignore[index]
123
+ namespace_uri = str(namespace_info[1]) # type: ignore[index]
124
+
125
+ return prefix, namespace_uri
126
+
127
+
128
+ class GraphsRepository(PlainListRepository, DeleteItemProtocol, ImportItemProtocol, ExportItemProtocol):
129
+ """Repository for graphs.
130
+
131
+ This repository manages named graphs which are described with the Graph model.
132
+ Supports both regular graphs and vocabularies through the register_as_vocabulary flag.
133
+ """
134
+
135
+ _client: Client
136
+
137
+ _dict: dict[str, Graph]
138
+ _allowed_import_items: ClassVar[list[type[ImportItem]]] = [FileImportItem, ZipImportItem]
139
+ _config = RepositoryConfig(
140
+ component="explore",
141
+ fetch_data_path="/graphs/list",
142
+ fetch_data_adapter=TypeAdapter(list[Graph]),
143
+ )
144
+ _formats: ClassVar[dict[str, GraphFileSerialization]] = {
145
+ "turtle": GraphFileSerialization(mime_type="text/turtle", file_extensions=["ttl"]),
146
+ "rdf/xml": GraphFileSerialization(
147
+ mime_type="application/rdf+xml", file_extensions=["rdf", "xml"], known_not_supporters=["TENTRIS"]
148
+ ),
149
+ "json-ld": GraphFileSerialization(
150
+ mime_type="application/ld+json", file_extensions=["jsonld"], known_not_supporters=["TENTRIS"]
151
+ ),
152
+ "n-triples": GraphFileSerialization(mime_type="application/n-triples", file_extensions=["nt"]),
153
+ }
154
+
155
+ def export_to_zip(self, key: str, path: Path | None = None, replace: bool = False) -> Path:
156
+ """Export graph to a ZIP file.
157
+
158
+ Exports a single RDF file to a ZIP archive.
159
+
160
+ Args:
161
+ key: The URI/identifier of the graph to export.
162
+ path: Optional target path for the ZIP file. If None, creates a temporary file.
163
+ replace: Whether to overwrite an existing file at the target path.
164
+
165
+ Returns:
166
+ Path to the created ZIP file.
167
+
168
+ Raises:
169
+ GraphExportError: If the file already exists and replace is False, or if
170
+ the exported graph is empty.
171
+ """
172
+ if path is None:
173
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
174
+ zip_path = Path(tmp.name)
175
+ else:
176
+ zip_path = path
177
+ if zip_path.exists() and not replace:
178
+ raise GraphExportError(f"File {zip_path} already exists and replace is False")
179
+
180
+ with tempfile.TemporaryDirectory() as tmpdir:
181
+ turtle_path = Path(tmpdir) / "graph.ttl"
182
+ self.export_item(key=key, path=turtle_path, replace=True)
183
+
184
+ turtle_content = turtle_path.read_bytes()
185
+ if not turtle_content:
186
+ raise GraphExportError("Exported turtle file is empty")
187
+
188
+ safe_name = key.split("/")[-1].rstrip("#") or "graph"
189
+ if not safe_name.endswith(".ttl"):
190
+ safe_name += ".ttl"
191
+
192
+ with zipfile.ZipFile(zip_path, mode="w", compression=zipfile.ZIP_DEFLATED) as zipf:
193
+ zipf.writestr(safe_name, turtle_content)
194
+
195
+ return zip_path
196
+
197
+ def _delete_item(self, key: str, configuration: GraphDeleteConfig | None = None) -> None:
198
+ """Delete a named graph from the repository.
199
+
200
+ Args:
201
+ key: The URI/identifier of the graph to delete.
202
+ configuration: Optional configuration to delete.
203
+
204
+ Raises:
205
+ HTTPError: If the deletion request fails.
206
+ """
207
+ _ = configuration
208
+ url = self._url("/proxy/default/graph")
209
+ params = {"graph": key}
210
+ response = self._client.http.delete(url=url, params=params)
211
+ response.raise_for_status()
212
+
213
+ def guess_file_type(self, path: Path) -> GraphFileSerialization:
214
+ """Guess the RDF serialization format from a file path.
215
+
216
+ Attempts to determine the appropriate GraphFileSerialization by examining
217
+ the file's MIME type and file extension. Supports compressed files (.gz).
218
+
219
+ Args:
220
+ path: Path to the RDF file to analyze.
221
+
222
+ Returns:
223
+ GraphFileSerialization: The detected serialization format with
224
+ MIME type, file extensions, and optional encoding information.
225
+
226
+ Raises:
227
+ RepositoryModificationError: If the file type cannot be determined
228
+ from the path or extension.
229
+ """
230
+ guessed_file_type: GraphFileSerialization | None = None
231
+
232
+ # guess with mime-type python standard lib
233
+ # then guess with file suffix
234
+ content_type, encoding = mimetypes.guess_type(path)
235
+ for _ in self._formats.values():
236
+ if content_type == _.mime_type:
237
+ guessed_file_type = copy(_)
238
+ break
239
+ for suffix in _.file_extensions:
240
+ if path.name.endswith(suffix) or path.name.endswith(f"{suffix}.gz"):
241
+ guessed_file_type = copy(_)
242
+ break
243
+
244
+ if guessed_file_type is None:
245
+ raise RepositoryModificationError(f"Can not guess file type of {path.name}")
246
+
247
+ if encoding is not None:
248
+ guessed_file_type.encoding = encoding
249
+ return guessed_file_type
250
+
251
+ @staticmethod
252
+ def byte_generator(file_path: Path, chunk_size: int = 1024) -> Generator[bytes, None, None]:
253
+ """Generate bytes from a file in chunks.
254
+
255
+ Args:
256
+ file_path: Path to the file to read
257
+ chunk_size: Size of each chunk in bytes (default: 1024)
258
+
259
+ Yields:
260
+ bytes: Chunks of data from the file
261
+ """
262
+ with file_path.open("rb") as opened_file:
263
+ while True:
264
+ chunk = opened_file.read(chunk_size)
265
+ if not chunk:
266
+ break
267
+ yield chunk
268
+
269
+ def _import_item( # noqa: C901 PLR0912
270
+ self,
271
+ path: Path | None = Path(),
272
+ replace: bool = False,
273
+ key: str | None = None,
274
+ configuration: GraphImportConfig | None = None,
275
+ ) -> str:
276
+ """Import an RDF graph from a file into the repository.
277
+
278
+ Uploads an RDF file to Corporate Memory, automatically detecting the
279
+ serialization format and setting appropriate HTTP headers. If no key
280
+ is provided, generates a unique URI for the graph.
281
+
282
+ For vocabularies (when register_as_vocabulary=True), extracts vann namespace
283
+ metadata and creates a catalog entry.
284
+
285
+ Args:
286
+ path: Path to the RDF file to import.
287
+ replace: Whether to replace an existing graph with the same key.
288
+ key: Optional URI/identifier for the graph. If None, generates
289
+ a unique UUID-based URI.
290
+ configuration: Optional configuration object for the graph to decide
291
+ weather the graph should be imported as a vocabulary.
292
+
293
+ Returns:
294
+ str: The URI/identifier of the imported graph.
295
+
296
+ Raises:
297
+ RepositoryModificationError: If the file type cannot be detected.
298
+ GraphImportError: If no path is given,
299
+ vocabulary validation fails or vann metadata is missing.
300
+ HTTPError: If the import request fails.
301
+ """
302
+ if path is None:
303
+ raise GraphImportError("Path must be specified.")
304
+
305
+ if configuration is None:
306
+ configuration = GraphImportConfig()
307
+
308
+ if path.is_dir():
309
+ graph_files = [f for f in path.iterdir() if f.is_file()]
310
+ if len(graph_files) == 0:
311
+ raise GraphImportError(f"No graph files found in directory {path}")
312
+ if len(graph_files) > 1:
313
+ raise GraphImportError(f"Multiple graph files found in directory {path}")
314
+ path = graph_files[0]
315
+
316
+ parsed_graph: RDFGraph
317
+ vann_metadata: tuple[str, str] | None = None
318
+
319
+ if configuration.register_as_vocabulary:
320
+ parsed_graph = RDFGraph().parse(path)
321
+
322
+ if key is None:
323
+ ontology_iris = parsed_graph.query(GET_ONTOLOGY_IRI_QUERY)
324
+ if len(ontology_iris) == 0:
325
+ raise GraphImportError("There is no owl:Ontology resource described in the RDF file.")
326
+ if len(ontology_iris) > 1:
327
+ ontology_iris_str = [str(iri[0]) for iri in ontology_iris] # type: ignore[index]
328
+ raise GraphImportError(
329
+ f"There are more than one owl:Ontology resources described in the RDF file: {ontology_iris_str}"
330
+ )
331
+ key = str(next(iter(ontology_iris))[0]) # type: ignore[index]
332
+ vann_metadata = _extract_vann_metadata(parsed_graph, key)
333
+
334
+ if key is None:
335
+ key = str(self._client.config.url_base / f"{uuid4()!s}/")
336
+
337
+ encoded_key = quote(key, safe="")
338
+ url = self._url(f"/proxy/default/graph?graph={encoded_key}&replace={str(replace).lower()}")
339
+
340
+ file_type = configuration.serialization or self.guess_file_type(path)
341
+
342
+ headers = {"Content-Type": file_type.mime_type}
343
+ if file_type.encoding:
344
+ headers["Content-Encoding"] = file_type.encoding
345
+
346
+ self._client.http.post(url=url, headers=headers, content=self.byte_generator(file_path=path)).raise_for_status()
347
+ self.fetch_data()
348
+
349
+ if configuration.register_as_vocabulary:
350
+ if vann_metadata is not None:
351
+ prefix, namespace = vann_metadata
352
+ label, language = self._resolve_label(iri=key, prefix=prefix)
353
+ self._insert_catalog_entry(iri=key, prefix=prefix, namespace=namespace, label=label, language=language)
354
+
355
+ self._reload_vocabularies(key)
356
+
357
+ return key
358
+
359
+ def _export_item(
360
+ self, key: str, path: Path | None, replace: bool = False, configuration: GraphExportConfig | None = None
361
+ ) -> Path:
362
+ """Export a named graph from the repository to a file.
363
+
364
+ Downloads the specified graph from Corporate Memory and saves it to
365
+ the given path as Turtle format. If no path is provided, creates a
366
+ temporary file. OWL imports are not resolved during export.
367
+
368
+ Args:
369
+ key: The URI/identifier of the graph to export.
370
+ path: Optional target path for the exported file. If None,
371
+ creates a temporary file with .ttl extension.
372
+ replace: Whether to overwrite an existing file at the target path.
373
+ configuration: Optional configuration object for the graph to decide
374
+ whether the graph should be exported as a vocabulary.
375
+
376
+ Returns:
377
+ Path: The path where the graph was exported.
378
+
379
+ Raises:
380
+ FileExistsError: If the target file exists and replace is False.
381
+ HTTPError: If the export request fails.
382
+ """
383
+ _ = configuration
384
+ if path and path.exists() and not replace:
385
+ raise FileExistsError(f"File {path.name} already exists and replace is {replace}")
386
+ encoded_key = quote(key, safe="")
387
+ url = self._url(f"/proxy/default/graph?graph={encoded_key}&owlImportsResolution=false")
388
+ with (
389
+ NamedTemporaryFile(delete=False, suffix=".ttl") if path is None else path.open("wb") as opened_file,
390
+ self._client.http.stream(method="GET", url=url) as response,
391
+ ):
392
+ response.raise_for_status()
393
+ for chunk in response.iter_bytes():
394
+ if chunk:
395
+ opened_file.write(chunk)
396
+ opened_file.close()
397
+ return Path(opened_file.name)
398
+
399
+ def _resolve_label(self, iri: str, prefix: str) -> tuple[str, str]:
400
+ """Resolve label for an ontology using the /api/explore/titles endpoint.
401
+
402
+ Calls Corporate Memory's title resolution API to get a human-readable label
403
+ for the ontology IRI. Formats the label with the prefix as "prefix: title".
404
+
405
+ Args:
406
+ iri: The ontology IRI to resolve
407
+ prefix: The namespace prefix to prepend to the label
408
+
409
+ Returns:
410
+ Tuple of (label, language_tag). Language tag is empty if not present.
411
+ """
412
+ url = self._client.config.url_explore_api / "/api/explore/titles"
413
+ headers = {"Content-Type": "application/json", "Accept": "application/json"}
414
+ params = {"contextGraph": iri}
415
+ response = self._client.http.post(url=url, headers=headers, params=params, json=[iri])
416
+ response.raise_for_status()
417
+ results = response.json()
418
+
419
+ resolved = results.get(iri, {})
420
+ title = resolved.get("title", iri)
421
+ lang = resolved.get("lang", "")
422
+
423
+ label = f"{prefix}: {title}" if not title.startswith(f"{prefix}:") else title
424
+
425
+ return label, lang
426
+
427
+ def _insert_catalog_entry(self, iri: str, prefix: str, namespace: str, label: str, language: str) -> None:
428
+ """Insert vocabulary catalog entry with vann namespace metadata.
429
+
430
+ Creates a catalog entry in the vocabulary catalog graph that links the
431
+ ontology IRI with its preferred namespace prefix and URI.
432
+
433
+ Args:
434
+ iri: The ontology IRI
435
+ prefix: The vann:preferredNamespacePrefix
436
+ namespace: The vann:preferredNamespaceUri
437
+ label: The human-readable label for the vocabulary
438
+ language: Optional language tag (e.g., "en", "de")
439
+ """
440
+ language_tag = f"@{language}" if language else ""
441
+
442
+ query = INSERT_CATALOG_ENTRY.format(
443
+ iri=iri,
444
+ prefix=prefix,
445
+ namespace=namespace,
446
+ label=label,
447
+ language=language_tag,
448
+ )
449
+ self._client.store.sparql.update(query)
450
+
451
+ def _reload_vocabularies(self, iri: str) -> None:
452
+ """Reload the caches and prefixes for vocabularies."""
453
+ reload_prefix_url = self._client.config.url_build_api / "/workspace/reloadPrefixes"
454
+ self._client.http.post(url=reload_prefix_url)
455
+ update_cache_url = self._client.config.url_build_api / "/workspace/updateGlobalVocabularyCache"
456
+ update_cache_data = {"iri": iri}
457
+ update_cache_headers = {"Content-Type": "application/json"}
458
+ self._client.http.post(url=update_cache_url, json=update_cache_data, headers=update_cache_headers)