kurra 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kurra-0.6.0/LICENSE +11 -0
- kurra-0.6.0/PKG-INFO +73 -0
- kurra-0.6.0/README.md +55 -0
- kurra-0.6.0/kurra/__init__.py +3 -0
- kurra-0.6.0/kurra/cli/__init__.py +8 -0
- kurra-0.6.0/kurra/cli/app.py +3 -0
- kurra-0.6.0/kurra/cli/commands/__init__.py +0 -0
- kurra-0.6.0/kurra/cli/commands/format.py +31 -0
- kurra-0.6.0/kurra/cli/commands/fuseki/__init__.py +0 -0
- kurra-0.6.0/kurra/cli/commands/fuseki/app.py +7 -0
- kurra-0.6.0/kurra/cli/commands/fuseki/clear.py +39 -0
- kurra-0.6.0/kurra/cli/commands/fuseki/dataset.py +70 -0
- kurra-0.6.0/kurra/cli/commands/fuseki/upload.py +60 -0
- kurra-0.6.0/kurra/cli/commands/version.py +31 -0
- kurra-0.6.0/kurra/cli/console.py +3 -0
- kurra-0.6.0/kurra/format.py +194 -0
- kurra-0.6.0/kurra/fuseki.py +171 -0
- kurra-0.6.0/kurra/utils.py +32 -0
- kurra-0.6.0/pyproject.toml +33 -0
kurra-0.6.0/LICENSE
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Copyright 2023 KurrawongAI
|
|
2
|
+
|
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
4
|
+
|
|
5
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
6
|
+
|
|
7
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
8
|
+
|
|
9
|
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
|
10
|
+
|
|
11
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
kurra-0.6.0/PKG-INFO
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: kurra
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: A Python library of common code and CLI apps shared across Kurrawong projects and infrastructure.
|
|
5
|
+
Author: Edmond Chuc
|
|
6
|
+
Author-email: edmond@kurrawong.ai
|
|
7
|
+
Requires-Python: >=3.11,<4.0.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Requires-Dist: httpx (>=0.24.1,<1.0.0)
|
|
13
|
+
Requires-Dist: rdflib (>=6.3.2,<8.0.0)
|
|
14
|
+
Requires-Dist: rich (>=13.4.1,<14.0.0)
|
|
15
|
+
Requires-Dist: typer (>=0.9.0,<1.0.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# Kurra Python Library
|
|
19
|
+
|
|
20
|
+
A Python library of RDF data manipulation functions.
|
|
21
|
+
|
|
22
|
+
## CLI Features
|
|
23
|
+
|
|
24
|
+
## `kurra format`
|
|
25
|
+
|
|
26
|
+
Format Turtle files using RDFLib's `longturtle` format.
|
|
27
|
+
|
|
28
|
+
### `kurra fuseki`
|
|
29
|
+
|
|
30
|
+
A set of commands to interface with a Fuseki server.
|
|
31
|
+
|
|
32
|
+
#### `kurra fuseki dataset list`
|
|
33
|
+
|
|
34
|
+
Get a list of Fuseki datasets.
|
|
35
|
+
|
|
36
|
+
#### `kurra fuseki dataset create`
|
|
37
|
+
|
|
38
|
+
Create a new Fuseki dataset.
|
|
39
|
+
|
|
40
|
+
#### `kurra fuseki upload`
|
|
41
|
+
|
|
42
|
+
Upload a file or a directory of files with an RDF file extension.
|
|
43
|
+
|
|
44
|
+
#### `kurra fuseki clear`
|
|
45
|
+
|
|
46
|
+
Clear a named graph or clear all graphs.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
View the [releases](https://github.com/Kurrawong/kurrawong-python/releases) page and install using the source code (zip) link.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install https://github.com/Kurrawong/kurra/archive/refs/tags/0.6.0.zip
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Development
|
|
57
|
+
|
|
58
|
+
Install the Poetry project and its dependencies.
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
task install
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Format code.
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
task code
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
[BSD-3-Clause](https://opensource.org/license/bsd-3-clause/) license. See [LICENSE](LICENSE).
|
|
73
|
+
|
kurra-0.6.0/README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Kurra Python Library
|
|
2
|
+
|
|
3
|
+
A Python library of RDF data manipulation functions.
|
|
4
|
+
|
|
5
|
+
## CLI Features
|
|
6
|
+
|
|
7
|
+
## `kurra format`
|
|
8
|
+
|
|
9
|
+
Format Turtle files using RDFLib's `longturtle` format.
|
|
10
|
+
|
|
11
|
+
### `kurra fuseki`
|
|
12
|
+
|
|
13
|
+
A set of commands to interface with a Fuseki server.
|
|
14
|
+
|
|
15
|
+
#### `kurra fuseki dataset list`
|
|
16
|
+
|
|
17
|
+
Get a list of Fuseki datasets.
|
|
18
|
+
|
|
19
|
+
#### `kurra fuseki dataset create`
|
|
20
|
+
|
|
21
|
+
Create a new Fuseki dataset.
|
|
22
|
+
|
|
23
|
+
#### `kurra fuseki upload`
|
|
24
|
+
|
|
25
|
+
Upload a file or a directory of files with an RDF file extension.
|
|
26
|
+
|
|
27
|
+
#### `kurra fuseki clear`
|
|
28
|
+
|
|
29
|
+
Clear a named graph or clear all graphs.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
View the [releases](https://github.com/Kurrawong/kurrawong-python/releases) page and install using the source code (zip) link.
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install https://github.com/Kurrawong/kurra/archive/refs/tags/0.6.0.zip
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Development
|
|
40
|
+
|
|
41
|
+
Install the Poetry project and its dependencies.
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
task install
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Format code.
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
task code
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## License
|
|
54
|
+
|
|
55
|
+
[BSD-3-Clause](https://opensource.org/license/bsd-3-clause/) license. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from kurra.cli.app import app
|
|
2
|
+
from kurra.cli.commands.format import app as format_app
|
|
3
|
+
from kurra.cli.commands.version import app as version_app
|
|
4
|
+
from kurra.cli.commands.fuseki.app import app as fuseki_app
|
|
5
|
+
|
|
6
|
+
app.add_typer(fuseki_app)
|
|
7
|
+
app.add_typer(format_app)
|
|
8
|
+
app.add_typer(version_app)
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from kurra.format import FailOnChangeError, format_rdf, RDF_FILE_SUFFIXES
|
|
6
|
+
|
|
7
|
+
app = typer.Typer()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@app.command(name="format", help="Format Turtle files using the longturtle format.")
|
|
11
|
+
def format_command(
|
|
12
|
+
file_or_dir: str = typer.Argument(
|
|
13
|
+
..., help="The file or directory of RDF files to be formatted"
|
|
14
|
+
),
|
|
15
|
+
check: bool = typer.Option(
|
|
16
|
+
False, help="Check whether files will be formatted without applying the effect."
|
|
17
|
+
),
|
|
18
|
+
output_format: str = typer.Option(
|
|
19
|
+
"longturtle",
|
|
20
|
+
help=f"Indicate the output RDF format. Available are {list(RDF_FILE_SUFFIXES.keys())}.",
|
|
21
|
+
),
|
|
22
|
+
output_filename: str = typer.Option(
|
|
23
|
+
None,
|
|
24
|
+
help="the name of the file you want to write the reformatted content to",
|
|
25
|
+
),
|
|
26
|
+
) -> None:
|
|
27
|
+
try:
|
|
28
|
+
format_rdf(file_or_dir, check, output_format, output_filename)
|
|
29
|
+
except FailOnChangeError as err:
|
|
30
|
+
print(err)
|
|
31
|
+
sys.exit(1)
|
|
File without changes
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
import typer
|
|
3
|
+
from typing_extensions import Annotated
|
|
4
|
+
|
|
5
|
+
from kurra.cli.commands.fuseki.app import app
|
|
6
|
+
from kurra.cli.console import console
|
|
7
|
+
from kurra.fuseki import clear_graph
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@app.command(name="clear", help="Clear graph in the Fuseki dataset.")
|
|
11
|
+
def clear_command(
|
|
12
|
+
named_graph: str = typer.Argument(
|
|
13
|
+
..., help="Named graph. If 'all' is supplied, it will remove all named graphs."
|
|
14
|
+
),
|
|
15
|
+
fuseki_url: str = typer.Argument(
|
|
16
|
+
..., help="Fuseki base URL. E.g. http://localhost:3030"
|
|
17
|
+
),
|
|
18
|
+
username: Annotated[
|
|
19
|
+
str, typer.Option("--username", "-u", help="Fuseki username.")
|
|
20
|
+
] = None,
|
|
21
|
+
password: Annotated[
|
|
22
|
+
str, typer.Option("--password", "-p", help="Fuseki password.")
|
|
23
|
+
] = None,
|
|
24
|
+
timeout: Annotated[
|
|
25
|
+
int, typer.Option("--timeout", "-t", help="Timeout per request")
|
|
26
|
+
] = 60,
|
|
27
|
+
):
|
|
28
|
+
auth = (
|
|
29
|
+
(username, password) if username is not None and password is not None else None
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
with httpx.Client(auth=auth, timeout=timeout) as client:
|
|
33
|
+
try:
|
|
34
|
+
clear_graph(fuseki_url, client, named_graph)
|
|
35
|
+
except Exception as err:
|
|
36
|
+
console.print(
|
|
37
|
+
f"[bold red]ERROR[/bold red] Failed to run clear command with '{named_graph}' at {fuseki_url}."
|
|
38
|
+
)
|
|
39
|
+
raise err
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from kurra.cli.console import console
|
|
7
|
+
from kurra.fuseki import dataset_create, dataset_list
|
|
8
|
+
|
|
9
|
+
app = typer.Typer()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@app.command(name="list", help="Get a list of Fuseki datasets.")
|
|
13
|
+
def dataset_list_command(
|
|
14
|
+
fuseki_url: str = typer.Argument(
|
|
15
|
+
..., help="Fuseki base URL. E.g. http://localhost:3030"
|
|
16
|
+
),
|
|
17
|
+
username: Annotated[
|
|
18
|
+
str, typer.Option("--username", "-u", help="Fuseki username.")
|
|
19
|
+
] = None,
|
|
20
|
+
password: Annotated[
|
|
21
|
+
str, typer.Option("--password", "-p", help="Fuseki password.")
|
|
22
|
+
] = None,
|
|
23
|
+
timeout: Annotated[
|
|
24
|
+
int, typer.Option("--timeout", "-t", help="Timeout per request")
|
|
25
|
+
] = 60,
|
|
26
|
+
) -> None:
|
|
27
|
+
auth = (
|
|
28
|
+
(username, password) if username is not None and password is not None else None
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
with httpx.Client(auth=auth, timeout=timeout) as client:
|
|
32
|
+
try:
|
|
33
|
+
result = dataset_list(fuseki_url, client)
|
|
34
|
+
console.print(result)
|
|
35
|
+
except Exception as err:
|
|
36
|
+
console.print(
|
|
37
|
+
f"[bold red]ERROR[/bold red] Failed to list Fuseki datasets at {fuseki_url}."
|
|
38
|
+
)
|
|
39
|
+
raise err
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.command(name="create", help="Create a new Fuseki dataset.")
|
|
43
|
+
def dataset_create_command(
|
|
44
|
+
fuseki_url: str = typer.Argument(
|
|
45
|
+
..., help="Fuseki base URL. E.g. http://localhost:3030"
|
|
46
|
+
),
|
|
47
|
+
dataset_name: str = typer.Argument(..., help="Fuseki dataset name"),
|
|
48
|
+
username: Annotated[
|
|
49
|
+
str, typer.Option("--username", "-u", help="Fuseki username.")
|
|
50
|
+
] = None,
|
|
51
|
+
password: Annotated[
|
|
52
|
+
str, typer.Option("--password", "-p", help="Fuseki password.")
|
|
53
|
+
] = None,
|
|
54
|
+
timeout: Annotated[
|
|
55
|
+
int, typer.Option("--timeout", "-t", help="Timeout per request")
|
|
56
|
+
] = 60,
|
|
57
|
+
) -> None:
|
|
58
|
+
auth = (
|
|
59
|
+
(username, password) if username is not None and password is not None else None
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
with httpx.Client(auth=auth, timeout=timeout) as client:
|
|
63
|
+
try:
|
|
64
|
+
result = dataset_create(fuseki_url, client, dataset_name)
|
|
65
|
+
console.print(result)
|
|
66
|
+
except Exception as err:
|
|
67
|
+
console.print(
|
|
68
|
+
f"[bold red]ERROR[/bold red] Failed to create Fuseki dataset {dataset_name} at {fuseki_url}."
|
|
69
|
+
)
|
|
70
|
+
raise err
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import typer
|
|
5
|
+
from rich.progress import track
|
|
6
|
+
from typing_extensions import Annotated
|
|
7
|
+
|
|
8
|
+
from kurra.cli.commands.fuseki.app import app
|
|
9
|
+
from kurra.cli.console import console
|
|
10
|
+
from kurra.fuseki import suffix_map, upload
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command(name="upload", help="Upload files to a Fuseki dataset.")
|
|
14
|
+
def upload_command(
|
|
15
|
+
path: Path = typer.Argument(
|
|
16
|
+
..., help="The path of a file or directory to be uploaded."
|
|
17
|
+
),
|
|
18
|
+
fuseki_url: str = typer.Argument(
|
|
19
|
+
..., help="Fuseki dataset URL. E.g. http://localhost:3030/ds"
|
|
20
|
+
),
|
|
21
|
+
username: Annotated[
|
|
22
|
+
str, typer.Option("--username", "-u", help="Fuseki username.")
|
|
23
|
+
] = None,
|
|
24
|
+
password: Annotated[
|
|
25
|
+
str, typer.Option("--password", "-p", help="Fuseki password.")
|
|
26
|
+
] = None,
|
|
27
|
+
timeout: Annotated[
|
|
28
|
+
int, typer.Option("--timeout", "-t", help="Timeout per request")
|
|
29
|
+
] = 60,
|
|
30
|
+
) -> None:
|
|
31
|
+
"""Upload a file or a directory of files with an RDF file extension.
|
|
32
|
+
|
|
33
|
+
File extensions: [.nt, .nq, .ttl, .trig, .json, .jsonld, .xml]
|
|
34
|
+
|
|
35
|
+
Files are uploaded into their own named graph in the format:
|
|
36
|
+
<urn:file:{file.name}>
|
|
37
|
+
E.g. <urn:file:example.ttl>
|
|
38
|
+
"""
|
|
39
|
+
files = []
|
|
40
|
+
|
|
41
|
+
if path.is_file():
|
|
42
|
+
files.append(path)
|
|
43
|
+
else:
|
|
44
|
+
files += path.glob("**/*")
|
|
45
|
+
|
|
46
|
+
auth = (
|
|
47
|
+
(username, password) if username is not None and password is not None else None
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
files = list(filter(lambda f: f.suffix in suffix_map.keys(), files))
|
|
51
|
+
|
|
52
|
+
with httpx.Client(auth=auth, timeout=timeout) as client:
|
|
53
|
+
for file in track(files, description=f"Uploading {len(files)} files..."):
|
|
54
|
+
try:
|
|
55
|
+
upload(fuseki_url, file, client, f"urn:file:{file.name}")
|
|
56
|
+
except Exception as err:
|
|
57
|
+
console.print(
|
|
58
|
+
f"[bold red]ERROR[/bold red] Failed to upload file {file}."
|
|
59
|
+
)
|
|
60
|
+
raise err
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
from typing_extensions import Annotated
|
|
3
|
+
|
|
4
|
+
from kurra.cli import app as main_app
|
|
5
|
+
from kurra import __version__
|
|
6
|
+
from kurra.cli.console import console
|
|
7
|
+
|
|
8
|
+
app = typer.Typer()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@app.command(name="version", help="Show the version of the kurra CLI app.")
|
|
12
|
+
def version_command():
|
|
13
|
+
console.print(f"kurra version {__version__}")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def version_callback(value: bool):
|
|
17
|
+
if value:
|
|
18
|
+
from kurra.cli.commands import version
|
|
19
|
+
|
|
20
|
+
version.version_command()
|
|
21
|
+
raise typer.Exit()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@main_app.callback(invoke_without_command=True)
|
|
25
|
+
def main(
|
|
26
|
+
version: Annotated[
|
|
27
|
+
bool, typer.Option("--version", callback=version_callback, is_eager=True)
|
|
28
|
+
] = False,
|
|
29
|
+
):
|
|
30
|
+
"""Main callback for the CLI app."""
|
|
31
|
+
pass
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Tuple, Literal, Union, Optional
|
|
3
|
+
|
|
4
|
+
from rdflib import Graph, URIRef, Dataset
|
|
5
|
+
from kurra.utils import guess_format_from_data, load_graph
|
|
6
|
+
|
|
7
|
+
KNOWN_RDF_FORMATS = Literal["turtle", "longturtle", "xml", "n-triples", "json-ld"]
|
|
8
|
+
RDF_FILE_SUFFIXES = {
|
|
9
|
+
"turtle": ".ttl",
|
|
10
|
+
"longturtle": ".ttl",
|
|
11
|
+
"xml": ".rdf",
|
|
12
|
+
"n-triples": ".nt",
|
|
13
|
+
"json-ld": ".jsonld",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FailOnChangeError(Exception):
|
|
18
|
+
"""
|
|
19
|
+
This exception is raised when running format and the
|
|
20
|
+
check bool is set to true and the file has resulted in a change.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_topbraid_metadata(content: str) -> str:
|
|
25
|
+
"""Get the TopBraid Composer metadata at the top of an ontology file."""
|
|
26
|
+
lines = content.split("\n")
|
|
27
|
+
comments = []
|
|
28
|
+
for line in lines:
|
|
29
|
+
if line.startswith("#"):
|
|
30
|
+
comments.append(line)
|
|
31
|
+
else:
|
|
32
|
+
break
|
|
33
|
+
|
|
34
|
+
if comments:
|
|
35
|
+
return "\n".join(comments) + "\n"
|
|
36
|
+
else:
|
|
37
|
+
return ""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def do_format(
|
|
41
|
+
content: str, output_format: KNOWN_RDF_FORMATS = "longturtle"
|
|
42
|
+
) -> Tuple[str, bool]:
|
|
43
|
+
metadata = get_topbraid_metadata(content)
|
|
44
|
+
|
|
45
|
+
graph = Graph()
|
|
46
|
+
graph.parse(data=content, format=guess_format_from_data(content))
|
|
47
|
+
new_content = graph.serialize(format=output_format)
|
|
48
|
+
new_content = metadata + new_content
|
|
49
|
+
changed = content != new_content
|
|
50
|
+
return new_content, changed
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def format_file(
|
|
54
|
+
file: Path,
|
|
55
|
+
check: bool = False,
|
|
56
|
+
output_format: KNOWN_RDF_FORMATS = "longturtle",
|
|
57
|
+
output_filename: Path = None,
|
|
58
|
+
) -> bool:
|
|
59
|
+
if not file.is_file():
|
|
60
|
+
raise ValueError(f"{file} is not a file.")
|
|
61
|
+
|
|
62
|
+
path = Path(file).resolve()
|
|
63
|
+
if not path.exists():
|
|
64
|
+
raise FileNotFoundError(f"File not found: {path.absolute()}")
|
|
65
|
+
|
|
66
|
+
if output_filename is None:
|
|
67
|
+
output_filename = path.with_suffix(RDF_FILE_SUFFIXES[output_format])
|
|
68
|
+
|
|
69
|
+
Path(output_filename).touch(exist_ok=True)
|
|
70
|
+
|
|
71
|
+
with open(path, "r", encoding="utf-8") as fread:
|
|
72
|
+
content = fread.read()
|
|
73
|
+
|
|
74
|
+
content, changed = do_format(content, output_format)
|
|
75
|
+
if changed:
|
|
76
|
+
if check:
|
|
77
|
+
raise FailOnChangeError(
|
|
78
|
+
f"The file {path} contains changes that can be formatted."
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
print(f"The file {path} has been formatted.")
|
|
82
|
+
|
|
83
|
+
# Didn't fail and file has changed, so write to file.
|
|
84
|
+
with open(output_filename, "w", encoding="utf-8") as fwrite:
|
|
85
|
+
fwrite.write(content)
|
|
86
|
+
|
|
87
|
+
return changed
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def format_rdf(
|
|
91
|
+
path: Path,
|
|
92
|
+
check: bool,
|
|
93
|
+
output_format: KNOWN_RDF_FORMATS = "longturtle",
|
|
94
|
+
output_filename: Path = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
path = Path(path).resolve()
|
|
97
|
+
|
|
98
|
+
if path.is_dir():
|
|
99
|
+
files = list(path.glob("**/*.ttl"))
|
|
100
|
+
|
|
101
|
+
changed_files = []
|
|
102
|
+
|
|
103
|
+
for file in files:
|
|
104
|
+
try:
|
|
105
|
+
changed = format_file(file, check, output_format=output_format)
|
|
106
|
+
if changed:
|
|
107
|
+
changed_files.append(file)
|
|
108
|
+
except FailOnChangeError as err:
|
|
109
|
+
print(err)
|
|
110
|
+
changed_files.append(file)
|
|
111
|
+
|
|
112
|
+
if check and changed_files:
|
|
113
|
+
if changed_files:
|
|
114
|
+
raise FailOnChangeError(
|
|
115
|
+
f"{len(changed_files)} out of {len(files)} files will change."
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
print(
|
|
119
|
+
f"{len(changed_files)} out of {len(files)} files will change.",
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
print(
|
|
123
|
+
f"{len(changed_files)} out of {len(files)} files changed.",
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
# single file reformatting
|
|
127
|
+
if bool(output_filename) and output_format is not None:
|
|
128
|
+
print("output_filename:")
|
|
129
|
+
print(output_filename)
|
|
130
|
+
output_filename = Path(output_filename)
|
|
131
|
+
output_filename = output_filename.resolve().with_suffix(
|
|
132
|
+
RDF_FILE_SUFFIXES[output_format]
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
print(output_filename)
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
format_file(
|
|
139
|
+
path,
|
|
140
|
+
check,
|
|
141
|
+
output_format=output_format,
|
|
142
|
+
output_filename=output_filename,
|
|
143
|
+
)
|
|
144
|
+
except FailOnChangeError as err:
|
|
145
|
+
print(err)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def make_dataset(
|
|
149
|
+
path_str_or_graph: Union[Path, str, Graph], graph_iri: Union[str, URIRef]
|
|
150
|
+
) -> Dataset:
|
|
151
|
+
"""Returns a given Graph, or string or file of triples, as a Dataset, with the supplied graph IRI"""
|
|
152
|
+
|
|
153
|
+
# TODO: make a Dataset from a Graph or Datatset
|
|
154
|
+
# - override option to replace existing graph
|
|
155
|
+
# - set default union graph
|
|
156
|
+
# - set default graph
|
|
157
|
+
if not isinstance(graph_iri, URIRef):
|
|
158
|
+
graph_iri = URIRef(graph_iri)
|
|
159
|
+
|
|
160
|
+
g = load_graph(path_str_or_graph)
|
|
161
|
+
|
|
162
|
+
d = Dataset()
|
|
163
|
+
for s, p, o in g:
|
|
164
|
+
d.add((s, p, o, graph_iri))
|
|
165
|
+
|
|
166
|
+
return d
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def export_quads(
|
|
170
|
+
path_str_or_dataset: Union[Path, str, Dataset], destination: Optional[Path] = None
|
|
171
|
+
) -> bool | str:
|
|
172
|
+
"""Exports a given Dataset, or quads in trig format or a quads file specified by a path, either as
|
|
173
|
+
quads to a string, if no destination is given, or a file, if one is"""
|
|
174
|
+
if isinstance(path_str_or_dataset, Path):
|
|
175
|
+
d = Dataset()
|
|
176
|
+
d.print(str(path_str_or_dataset))
|
|
177
|
+
elif isinstance(path_str_or_dataset, str):
|
|
178
|
+
d = Dataset()
|
|
179
|
+
d.parse(data=path_str_or_dataset, format="trig")
|
|
180
|
+
else: # Dataset
|
|
181
|
+
d = path_str_or_dataset
|
|
182
|
+
|
|
183
|
+
if destination is not None:
|
|
184
|
+
if Path(destination).is_file():
|
|
185
|
+
d2 = Dataset()
|
|
186
|
+
d2.parse(destination)
|
|
187
|
+
d3 = d + d2
|
|
188
|
+
d3.serialize(format="trig", destination=destination)
|
|
189
|
+
else:
|
|
190
|
+
d.serialize(format="trig", destination=destination)
|
|
191
|
+
|
|
192
|
+
return True
|
|
193
|
+
else:
|
|
194
|
+
return d.serialize(format="trig")
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Union
|
|
4
|
+
from textwrap import dedent
|
|
5
|
+
from rdflib import Graph
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from kurra.utils import guess_format_from_data, load_graph
|
|
10
|
+
|
|
11
|
+
suffix_map = {
|
|
12
|
+
".nt": "application/n-triples",
|
|
13
|
+
".nq": "application/n-quads",
|
|
14
|
+
".ttl": "text/turtle",
|
|
15
|
+
".trig": "application/trig",
|
|
16
|
+
".json": "application/ld+json",
|
|
17
|
+
".jsonld": "application/ld+json",
|
|
18
|
+
".xml": "application/rdf+xml",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _guess_query_is_update(query: str) -> bool:
|
|
23
|
+
if any(x in query for x in ["DROP", "INSERT", "DELETE"]):
|
|
24
|
+
return True
|
|
25
|
+
else:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _guess_return_type_for_sparql_query(query: str) -> str:
|
|
30
|
+
if any(x in query for x in ["SELECT", "INSERT", "ASK"]):
|
|
31
|
+
return "application/sparql-results+json"
|
|
32
|
+
elif "CONSTRUCT" in query:
|
|
33
|
+
return "text/turtle"
|
|
34
|
+
else:
|
|
35
|
+
return "application/sparql-results+json"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def upload(
|
|
39
|
+
url: str,
|
|
40
|
+
file_or_str_or_graph: Union[Path, str, Graph],
|
|
41
|
+
graph_name: str = None,
|
|
42
|
+
append: bool = False,
|
|
43
|
+
http_client: httpx.Client = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""This function uploads a file to a Fuseki server using the Graph Store Protocol.
|
|
46
|
+
|
|
47
|
+
It will upload it into a graph named graph_name (an IRI). If no graph_name is given, it will be uploaded into
|
|
48
|
+
the Fuseki default graph.
|
|
49
|
+
|
|
50
|
+
By default, it will replace all content in the Named Graph or default graph. If append is set to True, it will
|
|
51
|
+
add it to existing content in the graph_name Named Graph.
|
|
52
|
+
|
|
53
|
+
An httpx Client may be supplied for efficient client reuse, else each call to this function will recreate a new
|
|
54
|
+
Client."""
|
|
55
|
+
|
|
56
|
+
close_http_client = False
|
|
57
|
+
if http_client is None:
|
|
58
|
+
http_client = httpx.Client()
|
|
59
|
+
close_http_client = True
|
|
60
|
+
|
|
61
|
+
params = {"graph": graph_name} if graph_name else "default"
|
|
62
|
+
|
|
63
|
+
data = load_graph(file_or_str_or_graph).serialize(format="longturtle")
|
|
64
|
+
headers = {"content-type": "text/turtle"}
|
|
65
|
+
|
|
66
|
+
if append:
|
|
67
|
+
response = http_client.post(url, params=params, headers=headers, content=data)
|
|
68
|
+
else:
|
|
69
|
+
response = http_client.put(url, params=params, headers=headers, content=data)
|
|
70
|
+
|
|
71
|
+
status_code = response.status_code
|
|
72
|
+
|
|
73
|
+
if status_code != 200 and status_code != 201 and status_code != 204:
|
|
74
|
+
message = (
|
|
75
|
+
str(file_or_str_or_graph)
|
|
76
|
+
if isinstance(file_or_str_or_graph, Path)
|
|
77
|
+
else "content"
|
|
78
|
+
)
|
|
79
|
+
raise RuntimeError(
|
|
80
|
+
f"Received status code {status_code} for file {message} at url {url}. Message: {response.text}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if close_http_client:
|
|
84
|
+
http_client.close()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dataset_list(
|
|
88
|
+
url: str,
|
|
89
|
+
http_client: httpx.Client,
|
|
90
|
+
) -> str:
|
|
91
|
+
headers = {"accept": "application/json"}
|
|
92
|
+
response = http_client.get(f"{url}/$/datasets", headers=headers)
|
|
93
|
+
status_code = response.status_code
|
|
94
|
+
|
|
95
|
+
if status_code != 200:
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
f"Received status code {status_code}. Message: {response.text}"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return json.dumps(response.json(), indent=2)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def dataset_create(
|
|
104
|
+
url: str, http_client: httpx.Client, dataset_name: str, dataset_type: str = "tdb2"
|
|
105
|
+
) -> str:
|
|
106
|
+
data = {"dbName": dataset_name, "dbType": dataset_type}
|
|
107
|
+
response = http_client.post(f"{url}/$/datasets", data=data)
|
|
108
|
+
status_code = response.status_code
|
|
109
|
+
|
|
110
|
+
if response.status_code != 200 and response.status_code != 201:
|
|
111
|
+
raise RuntimeError(
|
|
112
|
+
f"Received status code {status_code}. Message: {response.text}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return f"Dataset {dataset_name} created at {url}."
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def clear_graph(url: str, http_client: httpx.Client, named_graph: str):
|
|
119
|
+
query = "CLEAR ALL" if named_graph == "all" else f"CLEAR GRAPH <{named_graph}>"
|
|
120
|
+
headers = {"content-type": "application/sparql-update"}
|
|
121
|
+
response = http_client.post(url, headers=headers, content=query)
|
|
122
|
+
status_code = response.status_code
|
|
123
|
+
|
|
124
|
+
if status_code != 204:
|
|
125
|
+
raise RuntimeError(
|
|
126
|
+
f"Received status code {status_code}. Message: {response.text}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def query(
|
|
131
|
+
sparql_endpoint: str,
|
|
132
|
+
query: str,
|
|
133
|
+
http_client: httpx.Client = None,
|
|
134
|
+
return_python: bool = False,
|
|
135
|
+
return_bindings_only: bool = False,
|
|
136
|
+
):
|
|
137
|
+
"""Poses a SPARQL query to the Fuseki server."""
|
|
138
|
+
|
|
139
|
+
if http_client is None:
|
|
140
|
+
http_client = httpx.Client()
|
|
141
|
+
|
|
142
|
+
if query is None:
|
|
143
|
+
raise ValueError("You must supply a query")
|
|
144
|
+
|
|
145
|
+
if _guess_query_is_update(query):
|
|
146
|
+
headers = {"Content-Type": "application/sparql-update"}
|
|
147
|
+
else:
|
|
148
|
+
headers = {"Content-Type": "application/sparql-query"}
|
|
149
|
+
|
|
150
|
+
headers["Accept"] = _guess_return_type_for_sparql_query(query)
|
|
151
|
+
|
|
152
|
+
response = http_client.post(
|
|
153
|
+
sparql_endpoint,
|
|
154
|
+
headers=headers,
|
|
155
|
+
content=query,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
status_code = response.status_code
|
|
159
|
+
|
|
160
|
+
if status_code != 200 and status_code != 201 and status_code != 204:
|
|
161
|
+
raise RuntimeError(f"ERROR {status_code}: {response.text}")
|
|
162
|
+
|
|
163
|
+
match (return_python, return_bindings_only):
|
|
164
|
+
case (True, True):
|
|
165
|
+
return response.json()["results"]["bindings"]
|
|
166
|
+
case (True, False):
|
|
167
|
+
return response.json()
|
|
168
|
+
case (False, True):
|
|
169
|
+
return dedent(response.text.split('"bindings": [')[1].split("]")[0])
|
|
170
|
+
case _:
|
|
171
|
+
return response.text
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Union
|
|
3
|
+
from rdflib import Graph
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def guess_format_from_data(rdf: str):
|
|
7
|
+
if rdf is not None:
|
|
8
|
+
rdf = rdf.strip()
|
|
9
|
+
if rdf.startswith("PREFIX") or rdf.startswith("@prefix"):
|
|
10
|
+
return "text/turtle"
|
|
11
|
+
elif rdf.startswith("{") or rdf.startswith("["):
|
|
12
|
+
return "application/ld+json"
|
|
13
|
+
elif rdf.startswith("<?xml") or rdf.startswith("<rdf"):
|
|
14
|
+
return "application/rdf+xml"
|
|
15
|
+
else:
|
|
16
|
+
return "application/n-triples"
|
|
17
|
+
else:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def load_graph(file_or_str_or_graph: Union[Path, str, Graph]):
|
|
22
|
+
if isinstance(file_or_str_or_graph, Path):
|
|
23
|
+
return Graph().parse(str(file_or_str_or_graph))
|
|
24
|
+
|
|
25
|
+
elif isinstance(file_or_str_or_graph, Graph):
|
|
26
|
+
return file_or_str_or_graph
|
|
27
|
+
|
|
28
|
+
else: # str (data)
|
|
29
|
+
return Graph().parse(
|
|
30
|
+
data=file_or_str_or_graph,
|
|
31
|
+
format=guess_format_from_data(file_or_str_or_graph),
|
|
32
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "kurra"
|
|
3
|
+
version = "0.6.0"
|
|
4
|
+
description = "A Python library of common code and CLI apps shared across Kurrawong projects and infrastructure."
|
|
5
|
+
authors = ["Edmond Chuc <edmond@kurrawong.ai>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
|
|
8
|
+
[tool.poetry.dependencies]
|
|
9
|
+
python = ">=3.11,<4.0.0"
|
|
10
|
+
httpx = ">=0.24.1,<1.0.0"
|
|
11
|
+
rdflib = ">=6.3.2,<8.0.0"
|
|
12
|
+
typer = ">=0.9.0,<1.0.0"
|
|
13
|
+
rich = ">=13.4.1,<14.0.0"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
[tool.poetry.group.dev.dependencies]
|
|
17
|
+
black = "^23.3.0"
|
|
18
|
+
pytest = "^7.3.1"
|
|
19
|
+
ruff = "^0.0.272"
|
|
20
|
+
testcontainers = "^4.9.0"
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["poetry-core"]
|
|
24
|
+
build-backend = "poetry.core.masonry.api"
|
|
25
|
+
|
|
26
|
+
[tool.poetry.scripts]
|
|
27
|
+
kurra = 'kurra.cli:app'
|
|
28
|
+
|
|
29
|
+
[tool.ruff]
|
|
30
|
+
select = [
|
|
31
|
+
"I001",
|
|
32
|
+
"I002"
|
|
33
|
+
]
|