rdfsolve 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdfsolve/__init__.py +26 -0
- rdfsolve/__main__.py +12 -0
- rdfsolve/api.py +515 -0
- rdfsolve/cli.py +657 -0
- rdfsolve/models.py +138 -0
- rdfsolve/parser.py +4126 -0
- rdfsolve/py.typed +1 -0
- rdfsolve/schema_utils.py +326 -0
- rdfsolve/sparql_helper.py +846 -0
- rdfsolve/tools/utils.py +49 -0
- rdfsolve/utils.py +159 -0
- rdfsolve/version.py +42 -0
- rdfsolve-0.0.1.dist-info/METADATA +307 -0
- rdfsolve-0.0.1.dist-info/RECORD +18 -0
- rdfsolve-0.0.1.dist-info/WHEEL +5 -0
- rdfsolve-0.0.1.dist-info/entry_points.txt +2 -0
- rdfsolve-0.0.1.dist-info/licenses/LICENSE +21 -0
- rdfsolve-0.0.1.dist-info/top_level.txt +1 -0
rdfsolve/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""RDFSolve: A library for RDF schema analysis and VoID generation.
|
|
2
|
+
|
|
3
|
+
Main modules:
|
|
4
|
+
- parser: VoidParser class for parsing VoID descriptions and schemas
|
|
5
|
+
- config: RDF configuration tools for YAML model processing (separate module)
|
|
6
|
+
- utils: Common utility functions for RDF processing
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
# Import parser and models
|
|
10
|
+
from . import utils
|
|
11
|
+
from .models import LinkMLSchema, SchemaTriple, VoidSchema
|
|
12
|
+
from .parser import VoidParser
|
|
13
|
+
from .parser import parse_void_file as parse_void_simple
|
|
14
|
+
|
|
15
|
+
# Import version information
|
|
16
|
+
from .version import VERSION
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"VERSION",
|
|
20
|
+
"LinkMLSchema",
|
|
21
|
+
"SchemaTriple",
|
|
22
|
+
"VoidParser",
|
|
23
|
+
"VoidSchema",
|
|
24
|
+
"parse_void_simple",
|
|
25
|
+
"utils",
|
|
26
|
+
]
|
rdfsolve/__main__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Entrypoint module, in case you use `python -m rdfsolve`.
|
|
2
|
+
|
|
3
|
+
Why does this file exist, and why ``__main__``? For more info, read:
|
|
4
|
+
|
|
5
|
+
- https://www.python.org/dev/peps/pep-0338/
|
|
6
|
+
- https://docs.python.org/3/using/cmdline.html#cmdoption-m
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .cli import main
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
main()
|
rdfsolve/api.py
ADDED
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
"""Main RDFSolve functionalities for VoID extraction and conversion."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from rdflib import Graph
|
|
7
|
+
|
|
8
|
+
from .parser import VoidParser
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"count_instances",
|
|
12
|
+
"count_instances_per_class",
|
|
13
|
+
"discover_void_graphs",
|
|
14
|
+
"extract_partitions_from_void",
|
|
15
|
+
"generate_void_from_endpoint",
|
|
16
|
+
"graph_to_jsonld",
|
|
17
|
+
"graph_to_linkml",
|
|
18
|
+
"graph_to_schema",
|
|
19
|
+
"graph_to_shacl",
|
|
20
|
+
"load_parser_from_file",
|
|
21
|
+
"load_parser_from_graph",
|
|
22
|
+
"retrieve_void_from_graphs",
|
|
23
|
+
"to_jsonld_from_file",
|
|
24
|
+
"to_linkml_from_file",
|
|
25
|
+
"to_rdfconfig_from_file",
|
|
26
|
+
"to_shacl_from_file",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_parser_from_file(
|
|
31
|
+
void_file_path: str,
|
|
32
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
33
|
+
exclude_graphs: bool = True,
|
|
34
|
+
) -> VoidParser:
|
|
35
|
+
"""Load a VoID file and return a parser for schema extraction.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
void_file_path: Path to VoID Turtle file
|
|
39
|
+
graph_uris: Graph URIs to filter queries
|
|
40
|
+
exclude_graphs: Exclude system graphs
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
VoidParser instance
|
|
44
|
+
"""
|
|
45
|
+
return VoidParser(
|
|
46
|
+
void_source=void_file_path, graph_uris=graph_uris, exclude_graphs=exclude_graphs
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_parser_from_graph(
|
|
51
|
+
graph: Graph,
|
|
52
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
53
|
+
exclude_graphs: bool = True,
|
|
54
|
+
) -> VoidParser:
|
|
55
|
+
"""Load a VoID graph and return a parser for schema extraction.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
graph: RDFLib Graph with VoID data
|
|
59
|
+
graph_uris: Graph URIs to filter queries
|
|
60
|
+
exclude_graphs: Exclude system graphs
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
VoidParser instance
|
|
64
|
+
"""
|
|
65
|
+
return VoidParser(void_source=graph, graph_uris=graph_uris, exclude_graphs=exclude_graphs)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def to_linkml_from_file(
|
|
69
|
+
void_file_path: str,
|
|
70
|
+
filter_void_nodes: bool = True,
|
|
71
|
+
schema_name: Optional[str] = None,
|
|
72
|
+
schema_description: Optional[str] = None,
|
|
73
|
+
schema_base_uri: Optional[str] = None,
|
|
74
|
+
) -> str:
|
|
75
|
+
"""Convert a VoID file to LinkML YAML schema.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
void_file_path: Path to VoID file
|
|
79
|
+
filter_void_nodes: Remove VoID-specific nodes
|
|
80
|
+
schema_name: Name for the schema
|
|
81
|
+
schema_description: Description for the schema
|
|
82
|
+
schema_base_uri: Base URI for the schema
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
LinkML YAML schema string
|
|
86
|
+
"""
|
|
87
|
+
parser = load_parser_from_file(void_file_path)
|
|
88
|
+
return parser.to_linkml_yaml(
|
|
89
|
+
filter_void_nodes=filter_void_nodes,
|
|
90
|
+
schema_name=schema_name,
|
|
91
|
+
schema_description=schema_description,
|
|
92
|
+
schema_base_uri=schema_base_uri,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def to_shacl_from_file(
|
|
97
|
+
void_file_path: str,
|
|
98
|
+
filter_void_nodes: bool = True,
|
|
99
|
+
schema_name: Optional[str] = None,
|
|
100
|
+
schema_description: Optional[str] = None,
|
|
101
|
+
schema_base_uri: Optional[str] = None,
|
|
102
|
+
closed: bool = True,
|
|
103
|
+
suffix: Optional[str] = None,
|
|
104
|
+
include_annotations: bool = False,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Convert a VoID file to SHACL shapes.
|
|
107
|
+
|
|
108
|
+
Generates SHACL (Shapes Constraint Language) shapes from a VoID
|
|
109
|
+
description file. SHACL shapes define constraints on RDF data and
|
|
110
|
+
can be used for validation.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
void_file_path: Path to VoID file
|
|
114
|
+
filter_void_nodes: Remove VoID-specific nodes
|
|
115
|
+
schema_name: Name for the schema
|
|
116
|
+
schema_description: Description for the schema
|
|
117
|
+
schema_base_uri: Base URI for the schema
|
|
118
|
+
closed: Generate closed shapes (only allow defined properties)
|
|
119
|
+
suffix: Optional suffix for shape names (e.g., "Shape")
|
|
120
|
+
include_annotations: Include class/slot annotations in shapes
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
SHACL shapes as Turtle/RDF string
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
>>> from rdfsolve.api import to_shacl_from_file
|
|
127
|
+
>>> shacl_ttl = to_shacl_from_file(
|
|
128
|
+
... "dataset_void.ttl", schema_name="my_dataset", closed=True
|
|
129
|
+
... )
|
|
130
|
+
>>> with open("schema.shacl.ttl", "w") as f:
|
|
131
|
+
... f.write(shacl_ttl)
|
|
132
|
+
"""
|
|
133
|
+
parser = load_parser_from_file(void_file_path)
|
|
134
|
+
return parser.to_shacl(
|
|
135
|
+
filter_void_nodes=filter_void_nodes,
|
|
136
|
+
schema_name=schema_name,
|
|
137
|
+
schema_description=schema_description,
|
|
138
|
+
schema_base_uri=schema_base_uri,
|
|
139
|
+
closed=closed,
|
|
140
|
+
suffix=suffix,
|
|
141
|
+
include_annotations=include_annotations,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def to_rdfconfig_from_file(
|
|
146
|
+
void_file_path: str,
|
|
147
|
+
filter_void_nodes: bool = True,
|
|
148
|
+
endpoint_url: Optional[str] = None,
|
|
149
|
+
endpoint_name: Optional[str] = None,
|
|
150
|
+
graph_uri: Optional[str] = None,
|
|
151
|
+
) -> Dict[str, str]:
|
|
152
|
+
"""Convert a VoID file to RDF-config YAML files.
|
|
153
|
+
|
|
154
|
+
RDF-config is a schema standard that describes RDF data models using
|
|
155
|
+
YAML configuration files. This function generates three files:
|
|
156
|
+
- model.yml: Class and property structure
|
|
157
|
+
- prefix.yml: Namespace prefix definitions
|
|
158
|
+
- endpoint.yml: SPARQL endpoint configuration
|
|
159
|
+
|
|
160
|
+
Note: The rdf-config tool requires these files to be named exactly
|
|
161
|
+
model.yml, prefix.yml, and endpoint.yml, and placed in a directory
|
|
162
|
+
named {dataset}_config. The CLI automatically creates this structure.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
void_file_path: Path to VoID file
|
|
166
|
+
filter_void_nodes: Remove VoID-specific nodes
|
|
167
|
+
endpoint_url: SPARQL endpoint URL (optional)
|
|
168
|
+
endpoint_name: Name for endpoint (default: "endpoint")
|
|
169
|
+
graph_uri: Named graph URI (optional)
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Dictionary with 'model', 'prefix', 'endpoint' keys containing
|
|
173
|
+
YAML strings
|
|
174
|
+
|
|
175
|
+
Example:
|
|
176
|
+
>>> from rdfsolve.api import to_rdfconfig_from_file
|
|
177
|
+
>>> rdfconfig = to_rdfconfig_from_file(
|
|
178
|
+
... "dataset_void.ttl",
|
|
179
|
+
... endpoint_url="https://example.org/sparql",
|
|
180
|
+
... graph_uri="http://example.org/graph",
|
|
181
|
+
... )
|
|
182
|
+
>>> # Save files
|
|
183
|
+
>>> with open("model.yml", "w") as f:
|
|
184
|
+
... f.write(rdfconfig["model"])
|
|
185
|
+
>>> with open("prefix.yml", "w") as f:
|
|
186
|
+
... f.write(rdfconfig["prefix"])
|
|
187
|
+
>>> with open("endpoint.yml", "w") as f:
|
|
188
|
+
... f.write(rdfconfig["endpoint"])
|
|
189
|
+
"""
|
|
190
|
+
parser = load_parser_from_file(void_file_path)
|
|
191
|
+
return parser.to_rdfconfig(
|
|
192
|
+
filter_void_nodes=filter_void_nodes,
|
|
193
|
+
endpoint_url=endpoint_url,
|
|
194
|
+
endpoint_name=endpoint_name,
|
|
195
|
+
graph_uri=graph_uri,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def to_jsonld_from_file(
|
|
200
|
+
void_file_path: str, filter_void_admin_nodes: bool = True
|
|
201
|
+
) -> Dict[str, Any]:
|
|
202
|
+
"""Convert a VoID file to JSON-LD format.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
void_file_path: Path to VoID file
|
|
206
|
+
filter_void_admin_nodes: Remove VoID and administrative nodes
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
JSON-LD with @context and @graph
|
|
210
|
+
"""
|
|
211
|
+
parser = load_parser_from_file(void_file_path)
|
|
212
|
+
return parser.to_jsonld(filter_void_admin_nodes=filter_void_admin_nodes)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def graph_to_jsonld(
|
|
216
|
+
graph: Graph,
|
|
217
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
218
|
+
filter_void_admin_nodes: bool = True,
|
|
219
|
+
) -> Dict[str, Any]:
|
|
220
|
+
"""Convert a VoID graph to JSON-LD format.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
graph: RDFLib Graph with VoID data
|
|
224
|
+
graph_uris: Graph URIs to filter extraction
|
|
225
|
+
filter_void_admin_nodes: Remove VoID and administrative nodes
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
JSON-LD with @context and @graph
|
|
229
|
+
"""
|
|
230
|
+
parser = load_parser_from_graph(graph, graph_uris=graph_uris)
|
|
231
|
+
return parser.to_jsonld(filter_void_admin_nodes=filter_void_admin_nodes)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def graph_to_linkml(
|
|
235
|
+
graph: Graph,
|
|
236
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
237
|
+
filter_void_nodes: bool = True,
|
|
238
|
+
schema_name: Optional[str] = None,
|
|
239
|
+
schema_description: Optional[str] = None,
|
|
240
|
+
schema_base_uri: Optional[str] = None,
|
|
241
|
+
) -> str:
|
|
242
|
+
"""Convert a VoID graph to LinkML YAML schema.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
graph: RDFLib Graph with VoID data
|
|
246
|
+
graph_uris: Graph URIs to filter extraction
|
|
247
|
+
filter_void_nodes: Remove VoID-specific nodes
|
|
248
|
+
schema_name: Name for the schema
|
|
249
|
+
schema_description: Description for the schema
|
|
250
|
+
schema_base_uri: Base URI for the schema
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
LinkML YAML schema string
|
|
254
|
+
"""
|
|
255
|
+
parser = load_parser_from_graph(graph, graph_uris=graph_uris)
|
|
256
|
+
return parser.to_linkml_yaml(
|
|
257
|
+
filter_void_nodes=filter_void_nodes,
|
|
258
|
+
schema_name=schema_name,
|
|
259
|
+
schema_description=schema_description,
|
|
260
|
+
schema_base_uri=schema_base_uri,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def graph_to_shacl(
|
|
265
|
+
graph: Graph,
|
|
266
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
267
|
+
filter_void_nodes: bool = True,
|
|
268
|
+
schema_name: Optional[str] = None,
|
|
269
|
+
schema_description: Optional[str] = None,
|
|
270
|
+
schema_base_uri: Optional[str] = None,
|
|
271
|
+
closed: bool = True,
|
|
272
|
+
suffix: Optional[str] = None,
|
|
273
|
+
include_annotations: bool = False,
|
|
274
|
+
) -> str:
|
|
275
|
+
"""Convert a VoID graph to SHACL shapes.
|
|
276
|
+
|
|
277
|
+
Generates SHACL (Shapes Constraint Language) shapes from a VoID
|
|
278
|
+
graph. SHACL shapes define constraints on RDF data and can be used
|
|
279
|
+
for validation.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
graph: RDFLib Graph with VoID data
|
|
283
|
+
graph_uris: Graph URIs to filter extraction
|
|
284
|
+
filter_void_nodes: Remove VoID-specific nodes
|
|
285
|
+
schema_name: Name for the schema
|
|
286
|
+
schema_description: Description for the schema
|
|
287
|
+
schema_base_uri: Base URI for the schema
|
|
288
|
+
closed: Generate closed shapes (only allow defined properties)
|
|
289
|
+
suffix: Optional suffix for shape names (e.g., "Shape")
|
|
290
|
+
include_annotations: Include class/slot annotations in shapes
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
SHACL shapes as Turtle/RDF string
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
>>> from rdflib import Graph
|
|
297
|
+
>>> from rdfsolve.api import graph_to_shacl
|
|
298
|
+
>>> void_graph = Graph()
|
|
299
|
+
>>> void_graph.parse("dataset_void.ttl", format="turtle")
|
|
300
|
+
>>> shacl_ttl = graph_to_shacl(void_graph, schema_name="my_dataset")
|
|
301
|
+
"""
|
|
302
|
+
parser = load_parser_from_graph(graph, graph_uris=graph_uris)
|
|
303
|
+
return parser.to_shacl(
|
|
304
|
+
filter_void_nodes=filter_void_nodes,
|
|
305
|
+
schema_name=schema_name,
|
|
306
|
+
schema_description=schema_description,
|
|
307
|
+
schema_base_uri=schema_base_uri,
|
|
308
|
+
closed=closed,
|
|
309
|
+
suffix=suffix,
|
|
310
|
+
include_annotations=include_annotations,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def discover_void_graphs(
|
|
315
|
+
endpoint_url: str,
|
|
316
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
317
|
+
exclude_graphs: bool = False,
|
|
318
|
+
) -> Dict[str, Any]:
|
|
319
|
+
"""Find VoID graphs in a SPARQL endpoint.
|
|
320
|
+
|
|
321
|
+
Discovery always includes well-known URIs and VoID graphs by default,
|
|
322
|
+
as these commonly contain metadata descriptions. Only Virtuoso system
|
|
323
|
+
graphs are excluded by default.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
endpoint_url: SPARQL endpoint URL
|
|
327
|
+
graph_uris: Graph URIs to search
|
|
328
|
+
exclude_graphs: Exclude Virtuoso system graphs (default: False for discovery)
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Discovery metadata per graph URI
|
|
332
|
+
"""
|
|
333
|
+
parser = VoidParser(graph_uris=graph_uris, exclude_graphs=exclude_graphs)
|
|
334
|
+
return parser.discover_void_graphs(endpoint_url)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def count_instances(
|
|
338
|
+
endpoint_url: str,
|
|
339
|
+
sample_limit: Optional[int] = None,
|
|
340
|
+
sample_offset: Optional[int] = None,
|
|
341
|
+
chunk_size: Optional[int] = None,
|
|
342
|
+
offset_limit_steps: Optional[int] = None,
|
|
343
|
+
delay_between_chunks: float = 20.0,
|
|
344
|
+
streaming: bool = False,
|
|
345
|
+
) -> Union[Dict[str, int], Any]:
|
|
346
|
+
"""Count instances per class in a SPARQL endpoint.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
endpoint_url: SPARQL endpoint URL
|
|
350
|
+
sample_limit: Max results to return
|
|
351
|
+
sample_offset: Starting offset
|
|
352
|
+
chunk_size: Chunk size for pagination
|
|
353
|
+
offset_limit_steps: Combined LIMIT/OFFSET step
|
|
354
|
+
delay_between_chunks: Seconds between chunks
|
|
355
|
+
streaming: Return generator if True
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Dict mapping class URI to count, or generator
|
|
359
|
+
"""
|
|
360
|
+
parser = VoidParser()
|
|
361
|
+
return parser.count_instances_per_class(
|
|
362
|
+
endpoint_url,
|
|
363
|
+
sample_limit=sample_limit,
|
|
364
|
+
sample_offset=sample_offset,
|
|
365
|
+
chunk_size=chunk_size,
|
|
366
|
+
offset_limit_steps=offset_limit_steps,
|
|
367
|
+
delay_between_chunks=delay_between_chunks,
|
|
368
|
+
streaming=streaming,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def extract_partitions_from_void(
|
|
373
|
+
endpoint_url: str, void_graph_uris: List[str]
|
|
374
|
+
) -> List[Dict[str, str]]:
|
|
375
|
+
"""Extract partition data from discovered VoID graphs.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
endpoint_url: SPARQL endpoint URL
|
|
379
|
+
void_graph_uris: List of VoID graph URIs with partitions
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
List of partition records (class-property-object)
|
|
383
|
+
"""
|
|
384
|
+
parser = VoidParser()
|
|
385
|
+
return parser.retrieve_partitions_from_void(endpoint_url, void_graph_uris)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def retrieve_void_from_graphs(
|
|
389
|
+
endpoint_url: str,
|
|
390
|
+
void_graph_uris: List[str],
|
|
391
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
392
|
+
partitions: Optional[List[Dict[str, str]]] = None,
|
|
393
|
+
) -> Graph:
|
|
394
|
+
"""Retrieve VoID descriptions from specific graphs at endpoint.
|
|
395
|
+
|
|
396
|
+
If partition data is provided (from discover_void_graphs), builds the
|
|
397
|
+
graph directly from that data. Otherwise, runs a new discovery query.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
endpoint_url: SPARQL endpoint URL
|
|
401
|
+
void_graph_uris: List of graph URIs containing VoID
|
|
402
|
+
graph_uris: Graph URIs to filter queries
|
|
403
|
+
partitions: Optional partition data from discover_void_graphs result
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
RDF Graph with VoID descriptions built from partition data
|
|
407
|
+
"""
|
|
408
|
+
parser = VoidParser(graph_uris=graph_uris)
|
|
409
|
+
|
|
410
|
+
# If partition data provided, build graph directly (no CONSTRUCT needed)
|
|
411
|
+
if partitions:
|
|
412
|
+
base_uri = void_graph_uris[0] if void_graph_uris else None
|
|
413
|
+
return parser.build_void_graph_from_partitions(partitions, base_uri=base_uri)
|
|
414
|
+
|
|
415
|
+
# Otherwise, run discovery to get partitions and build graph
|
|
416
|
+
discovery_result = parser.discover_void_graphs(endpoint_url)
|
|
417
|
+
partitions = discovery_result.get("partitions", [])
|
|
418
|
+
|
|
419
|
+
if partitions:
|
|
420
|
+
base_uri = void_graph_uris[0] if void_graph_uris else None
|
|
421
|
+
return parser.build_void_graph_from_partitions(partitions, base_uri=base_uri)
|
|
422
|
+
|
|
423
|
+
# Fallback: return empty graph if no partitions found
|
|
424
|
+
from rdflib import Graph
|
|
425
|
+
|
|
426
|
+
return Graph()
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def generate_void_from_endpoint(
|
|
430
|
+
endpoint_url: str,
|
|
431
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
432
|
+
output_file: Optional[str] = None,
|
|
433
|
+
counts: bool = True,
|
|
434
|
+
offset_limit_steps: Optional[int] = None,
|
|
435
|
+
exclude_graphs: bool = True,
|
|
436
|
+
dataset_uri: Optional[str] = None,
|
|
437
|
+
void_base_uri: Optional[str] = None,
|
|
438
|
+
) -> Graph:
|
|
439
|
+
"""Generate VoID description from a SPARQL endpoint.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
endpoint_url: SPARQL endpoint URL
|
|
443
|
+
graph_uris: Graph URI(s) to analyze
|
|
444
|
+
output_file: Path to save Turtle output
|
|
445
|
+
counts: Include instance counts
|
|
446
|
+
offset_limit_steps: Chunk size for pagination
|
|
447
|
+
exclude_graphs: Exclude system graphs
|
|
448
|
+
dataset_uri: Custom URI for the VoID dataset (default: uses first graph_uri or endpoint URL)
|
|
449
|
+
void_base_uri: Custom base URI for VoID partition IRIs
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
RDF graph with VoID description
|
|
453
|
+
"""
|
|
454
|
+
# Determine dataset_uri if not provided
|
|
455
|
+
if dataset_uri is None:
|
|
456
|
+
if graph_uris:
|
|
457
|
+
dataset_uri = graph_uris[0] if isinstance(graph_uris, list) else graph_uris
|
|
458
|
+
else:
|
|
459
|
+
# Use endpoint URL as fallback
|
|
460
|
+
dataset_uri = endpoint_url.rstrip("/")
|
|
461
|
+
|
|
462
|
+
# Note: VoidParser.generate_void_from_sparql uses graph_uris for building partition IRIs
|
|
463
|
+
# The dataset_uri is embedded in the VoID graph structure
|
|
464
|
+
return VoidParser.generate_void_from_sparql(
|
|
465
|
+
endpoint_url=endpoint_url,
|
|
466
|
+
graph_uris=graph_uris,
|
|
467
|
+
output_file=output_file,
|
|
468
|
+
counts=counts,
|
|
469
|
+
offset_limit_steps=offset_limit_steps,
|
|
470
|
+
exclude_graphs=exclude_graphs,
|
|
471
|
+
void_base_uri=void_base_uri,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def graph_to_schema(
|
|
476
|
+
void_graph: Graph,
|
|
477
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
478
|
+
filter_void_admin_nodes: bool = True,
|
|
479
|
+
) -> pd.DataFrame:
|
|
480
|
+
"""Convert VoID graph to schema DataFrame.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
void_graph: RDFLib graph with VoID data
|
|
484
|
+
graph_uris: Graph URIs to extract
|
|
485
|
+
filter_void_admin_nodes: Filter VoID or administrative nodes
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
DataFrame with schema patterns (subject/property/object URIs)
|
|
489
|
+
"""
|
|
490
|
+
parser = VoidParser(void_source=void_graph, graph_uris=graph_uris)
|
|
491
|
+
return parser.to_schema(filter_void_admin_nodes=filter_void_admin_nodes)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def count_instances_per_class(
|
|
495
|
+
endpoint_url: str,
|
|
496
|
+
graph_uris: Optional[Union[str, List[str]]] = None,
|
|
497
|
+
sample_limit: Optional[int] = None,
|
|
498
|
+
exclude_graphs: bool = True,
|
|
499
|
+
) -> Dict[str, int]:
|
|
500
|
+
"""Count instances per class in a SPARQL endpoint.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
endpoint_url: SPARQL endpoint URL
|
|
504
|
+
graph_uris: Graph URI(s) to query
|
|
505
|
+
sample_limit: Max results to sample
|
|
506
|
+
exclude_graphs: Exclude service/system graphs from counting
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Class URI to instance count mapping
|
|
510
|
+
"""
|
|
511
|
+
parser = VoidParser(graph_uris=graph_uris, exclude_graphs=exclude_graphs)
|
|
512
|
+
result = parser.count_instances_per_class(endpoint_url, sample_limit=sample_limit)
|
|
513
|
+
if isinstance(result, dict):
|
|
514
|
+
return result
|
|
515
|
+
return dict(result) # Convert generator to dict if needed
|