iolanta 2.0.6__py3-none-any.whl → 2.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iolanta/__init__.py +0 -1
- iolanta/facets/facet.py +0 -7
- iolanta/facets/textual_no_facet_found.py +33 -1
- iolanta/iolanta.py +3 -101
- iolanta/models.py +25 -6
- iolanta/namespaces.py +2 -0
- iolanta/parse_quads.py +89 -22
- iolanta/sparqlspace/processor.py +9 -24
- iolanta/widgets/description.py +22 -3
- {iolanta-2.0.6.dist-info → iolanta-2.0.7.dist-info}/METADATA +1 -1
- {iolanta-2.0.6.dist-info → iolanta-2.0.7.dist-info}/RECORD +13 -30
- iolanta/loaders/__init__.py +0 -2
- iolanta/loaders/base.py +0 -124
- iolanta/loaders/data_type_choice.py +0 -66
- iolanta/loaders/dict_loader.py +0 -57
- iolanta/loaders/errors.py +0 -29
- iolanta/loaders/http.py +0 -127
- iolanta/loaders/local_directory.py +0 -148
- iolanta/loaders/local_file.py +0 -107
- iolanta/loaders/scheme_choice.py +0 -72
- iolanta/parsers/__init__.py +0 -0
- iolanta/parsers/base.py +0 -41
- iolanta/parsers/dict_parser.py +0 -171
- iolanta/parsers/errors.py +0 -35
- iolanta/parsers/json.py +0 -35
- iolanta/parsers/markdown.py +0 -58
- iolanta/parsers/yaml.py +0 -46
- iolanta/shortcuts.py +0 -63
- {iolanta-2.0.6.dist-info → iolanta-2.0.7.dist-info}/WHEEL +0 -0
- {iolanta-2.0.6.dist-info → iolanta-2.0.7.dist-info}/entry_points.txt +0 -0
iolanta/loaders/__init__.py
DELETED
iolanta/loaders/base.py
DELETED
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
from abc import ABC
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from logging import Logger
|
|
4
|
-
from typing import (
|
|
5
|
-
Any,
|
|
6
|
-
Dict,
|
|
7
|
-
Generic,
|
|
8
|
-
Iterable,
|
|
9
|
-
Optional,
|
|
10
|
-
TextIO,
|
|
11
|
-
TypedDict,
|
|
12
|
-
TypeVar,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
from rdflib import URIRef
|
|
16
|
-
from yarl import URL
|
|
17
|
-
|
|
18
|
-
from iolanta.conversions import url_to_iri
|
|
19
|
-
from iolanta.ensure_is_context import ensure_is_context
|
|
20
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
21
|
-
from iolanta.namespaces import PYTHON
|
|
22
|
-
|
|
23
|
-
SourceType = TypeVar('SourceType')
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
PyLDOptions = Dict[str, Any] # type: ignore
|
|
27
|
-
|
|
28
|
-
PyLDResponse = TypedDict(
|
|
29
|
-
'PyLDResponse', {
|
|
30
|
-
'contentType': str,
|
|
31
|
-
'contextUrl': Optional[str],
|
|
32
|
-
'documentUrl': str,
|
|
33
|
-
'document': LDDocument,
|
|
34
|
-
},
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def term_for_python_class(cls: type) -> URIRef:
|
|
39
|
-
"""Construct term for Python class."""
|
|
40
|
-
return PYTHON.term(f'{cls.__module__}.{cls.__qualname__}')
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# noinspection TaskProblemsInspection
|
|
44
|
-
@dataclass(frozen=True)
|
|
45
|
-
class Loader(ABC, Generic[SourceType]):
|
|
46
|
-
"""
|
|
47
|
-
Base class for loaders.
|
|
48
|
-
|
|
49
|
-
Loader receives a URL (or a path) to certain location. It is responsible for
|
|
50
|
-
reading data from that location and returning it as a stream of RDF quads.
|
|
51
|
-
|
|
52
|
-
Usually, depending on the data format, Loader leverages Parsers for that
|
|
53
|
-
purpose.
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
logger: Logger
|
|
57
|
-
|
|
58
|
-
@classmethod
|
|
59
|
-
def loader_class_iri(cls) -> URIRef:
|
|
60
|
-
"""Import path to the loader class."""
|
|
61
|
-
return term_for_python_class(cls)
|
|
62
|
-
|
|
63
|
-
def choose_parser_class(self, source: SourceType):
|
|
64
|
-
"""Find which parser class to use for this URL."""
|
|
65
|
-
raise NotImplementedError(
|
|
66
|
-
f'{self}.choose_parser_class() is not implemented.',
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
def as_jsonld_document(
|
|
70
|
-
self,
|
|
71
|
-
source: SourceType,
|
|
72
|
-
iri: Optional[URIRef] = None,
|
|
73
|
-
) -> LDDocument:
|
|
74
|
-
"""Represent a file as a JSON-LD document."""
|
|
75
|
-
raise NotImplementedError(
|
|
76
|
-
f'{self}.as_jsonld_document() is not implemented.',
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
def as_file(self, source: SourceType) -> TextIO:
|
|
80
|
-
"""Construct a file-like object."""
|
|
81
|
-
raise NotImplementedError()
|
|
82
|
-
|
|
83
|
-
def as_quad_stream(
|
|
84
|
-
self,
|
|
85
|
-
source: SourceType,
|
|
86
|
-
iri: Optional[URIRef],
|
|
87
|
-
root_loader: 'Loader[SourceType]',
|
|
88
|
-
context: Optional[LDContext] = None,
|
|
89
|
-
) -> Iterable[Quad]:
|
|
90
|
-
"""Convert data into a stream of RDF quads."""
|
|
91
|
-
raise NotImplementedError(
|
|
92
|
-
f'{self}.as_quad_stream() is not implemented.',
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
def find_context(self, source: SourceType) -> LDContext:
|
|
96
|
-
"""Find context for the file."""
|
|
97
|
-
raise NotImplementedError(
|
|
98
|
-
f'{self}.find_context() is not implemented.',
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
def __call__(self, source: str, options: PyLDOptions) -> PyLDResponse:
|
|
102
|
-
"""
|
|
103
|
-
Call the loader to retrieve the document in a PYLD friendly format.
|
|
104
|
-
|
|
105
|
-
Used to resolve remote contexts.
|
|
106
|
-
|
|
107
|
-
The type of `source` parameter is intentionally `str`: that's the only
|
|
108
|
-
thing which pyld can do.
|
|
109
|
-
"""
|
|
110
|
-
source = URL(source)
|
|
111
|
-
|
|
112
|
-
document = ensure_is_context(
|
|
113
|
-
self.as_jsonld_document(
|
|
114
|
-
source=URL(source),
|
|
115
|
-
iri=url_to_iri(source),
|
|
116
|
-
),
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
return {
|
|
120
|
-
'document': document,
|
|
121
|
-
'contextUrl': None,
|
|
122
|
-
'documentUrl': source,
|
|
123
|
-
'contentType': 'application/ld+json',
|
|
124
|
-
}
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import Any, Dict, Iterable, Optional, TextIO
|
|
3
|
-
|
|
4
|
-
from rdflib import URIRef
|
|
5
|
-
from yarl import URL
|
|
6
|
-
|
|
7
|
-
from iolanta.loaders.base import Loader, SourceType
|
|
8
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass(frozen=True)
|
|
12
|
-
class DataTypeChoiceLoader(Loader[Any]): # type: ignore
|
|
13
|
-
"""Try to load a file via several loaders."""
|
|
14
|
-
|
|
15
|
-
loader_by_data_type: Dict[type, Loader[Any]] # type: ignore
|
|
16
|
-
|
|
17
|
-
def choose_parser_class(self, source: SourceType):
|
|
18
|
-
raise ValueError('choose_parser_class')
|
|
19
|
-
|
|
20
|
-
def as_file(self, source: SourceType) -> TextIO:
|
|
21
|
-
raise ValueError('as_file')
|
|
22
|
-
|
|
23
|
-
def find_context(self, source: SourceType) -> LDContext:
|
|
24
|
-
raise ValueError('find_context')
|
|
25
|
-
|
|
26
|
-
def resolve_loader(self, source: Any): # type: ignore
|
|
27
|
-
"""Find loader instance by URL."""
|
|
28
|
-
for source_type, loader in self.loader_by_data_type.items():
|
|
29
|
-
if isinstance(source, source_type):
|
|
30
|
-
return loader
|
|
31
|
-
|
|
32
|
-
source_type = type(source)
|
|
33
|
-
raise ValueError(
|
|
34
|
-
f'Cannot find a loader for source: {source} '
|
|
35
|
-
f'of type: {source_type}',
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
def as_jsonld_document(
|
|
39
|
-
self,
|
|
40
|
-
source: URL,
|
|
41
|
-
iri: Optional[URIRef] = None,
|
|
42
|
-
) -> LDDocument:
|
|
43
|
-
"""Represent a file as a JSON-LD document."""
|
|
44
|
-
return self.resolve_loader(
|
|
45
|
-
source=source,
|
|
46
|
-
).as_jsonld_document(
|
|
47
|
-
source=source,
|
|
48
|
-
iri=iri,
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
def as_quad_stream(
|
|
52
|
-
self,
|
|
53
|
-
source: str,
|
|
54
|
-
iri: Optional[URIRef],
|
|
55
|
-
root_loader: Optional[Loader[URL]] = None,
|
|
56
|
-
context: Optional[LDContext] = None,
|
|
57
|
-
) -> Iterable[Quad]:
|
|
58
|
-
"""Convert data into a stream of RDF quads."""
|
|
59
|
-
return self.resolve_loader(
|
|
60
|
-
source=source,
|
|
61
|
-
).as_quad_stream(
|
|
62
|
-
source=source,
|
|
63
|
-
iri=iri,
|
|
64
|
-
root_loader=root_loader or self,
|
|
65
|
-
context=context,
|
|
66
|
-
)
|
iolanta/loaders/dict_loader.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import Iterable, Optional, TextIO, Type
|
|
3
|
-
|
|
4
|
-
from rdflib import Literal, URIRef
|
|
5
|
-
|
|
6
|
-
from iolanta.conversions import url_to_iri
|
|
7
|
-
from iolanta.loaders.base import Loader
|
|
8
|
-
from iolanta.loaders.errors import IsAContext, ParserNotFound
|
|
9
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
10
|
-
from iolanta.namespaces import IOLANTA
|
|
11
|
-
from iolanta.parsers.base import Parser
|
|
12
|
-
from iolanta.parsers.dict_parser import DictParser
|
|
13
|
-
from iolanta.parsers.json import JSON
|
|
14
|
-
from iolanta.parsers.markdown import Markdown
|
|
15
|
-
from iolanta.parsers.yaml import YAML
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@dataclass(frozen=True)
|
|
19
|
-
class DictLoader(Loader[LDDocument]):
|
|
20
|
-
"""
|
|
21
|
-
Retrieve Linked Data from a file on local disk.
|
|
22
|
-
|
|
23
|
-
Requires a dict of raw JSON-LD data.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def find_context(self, source: str) -> LDContext:
|
|
27
|
-
raise ValueError('???WTF?')
|
|
28
|
-
|
|
29
|
-
def choose_parser_class(self, source: LDDocument) -> Type[Parser]:
|
|
30
|
-
return DictParser(source)
|
|
31
|
-
|
|
32
|
-
def as_quad_stream(
|
|
33
|
-
self,
|
|
34
|
-
source: LDDocument,
|
|
35
|
-
root_loader: Loader[LDDocument],
|
|
36
|
-
iri: Optional[URIRef] = None,
|
|
37
|
-
context: Optional[LDContext] = None,
|
|
38
|
-
) -> Iterable[Quad]:
|
|
39
|
-
"""Extract a sequence of quads."""
|
|
40
|
-
yield from DictParser().as_quad_stream(
|
|
41
|
-
raw_data=source,
|
|
42
|
-
iri=iri,
|
|
43
|
-
context=context,
|
|
44
|
-
root_loader=root_loader,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def as_file(self, source: LDDocument) -> TextIO:
|
|
48
|
-
"""Construct a file-like object."""
|
|
49
|
-
raise ValueError('FOO')
|
|
50
|
-
|
|
51
|
-
def as_jsonld_document(
|
|
52
|
-
self,
|
|
53
|
-
source: LDDocument,
|
|
54
|
-
iri: Optional[URIRef] = None,
|
|
55
|
-
) -> LDDocument:
|
|
56
|
-
"""As JSON-LD document."""
|
|
57
|
-
return source
|
iolanta/loaders/errors.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from documented import DocumentedError
|
|
5
|
-
from yarl import URL
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@dataclass
|
|
9
|
-
class IsAContext(DocumentedError):
|
|
10
|
-
"""
|
|
11
|
-
The provided file is a context.
|
|
12
|
-
|
|
13
|
-
- Path: {self.path}
|
|
14
|
-
|
|
15
|
-
This file is not a piece of data and cannot be loaded into the graph.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
path: URL
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass
|
|
22
|
-
class ParserNotFound(DocumentedError):
|
|
23
|
-
"""
|
|
24
|
-
Parser not found.
|
|
25
|
-
|
|
26
|
-
Path: {self.path}
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
path: Path
|
iolanta/loaders/http.py
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import re
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from functools import reduce
|
|
5
|
-
from io import StringIO
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Iterable, List, Optional, TextIO, Type, Union
|
|
8
|
-
|
|
9
|
-
from documented import DocumentedError
|
|
10
|
-
from rdflib import URIRef
|
|
11
|
-
from rdflib.parser import URLInputSource
|
|
12
|
-
from requests import Response
|
|
13
|
-
from yarl import URL
|
|
14
|
-
|
|
15
|
-
from iolanta.context import merge
|
|
16
|
-
from iolanta.conversions import url_to_iri, url_to_path
|
|
17
|
-
from iolanta.loaders.base import Loader
|
|
18
|
-
from iolanta.loaders.errors import IsAContext, ParserNotFound
|
|
19
|
-
from iolanta.loaders.local_file import choose_parser_by_extension
|
|
20
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
21
|
-
from iolanta.parsers.base import Parser
|
|
22
|
-
from iolanta.parsers.json import JSON
|
|
23
|
-
from iolanta.parsers.markdown import Markdown
|
|
24
|
-
from iolanta.parsers.yaml import YAML
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dataclass(frozen=True)
|
|
28
|
-
class HTTP(Loader[URL]):
|
|
29
|
-
"""
|
|
30
|
-
Retrieve Linked Data from a file on the Web.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
context: LDContext = field(default_factory=dict)
|
|
34
|
-
|
|
35
|
-
def choose_parser_class(self, source: URL, response: Response):
|
|
36
|
-
# FIXME hard code. Make this extensible.
|
|
37
|
-
try:
|
|
38
|
-
return choose_parser_by_extension(source)
|
|
39
|
-
except ParserNotFound:
|
|
40
|
-
content_type = response.headers['Content-Type']
|
|
41
|
-
|
|
42
|
-
raise ValueError(f'Content type: {content_type}')
|
|
43
|
-
|
|
44
|
-
def extract_alternate_url(
|
|
45
|
-
self,
|
|
46
|
-
source: URL,
|
|
47
|
-
response: Response,
|
|
48
|
-
) -> URL | None:
|
|
49
|
-
link = response.headers.get('Link')
|
|
50
|
-
|
|
51
|
-
if link is None:
|
|
52
|
-
return None
|
|
53
|
-
|
|
54
|
-
match = re.match(
|
|
55
|
-
r'<([^>]+)>; rel="alternate"; type="application/ld\+json"',
|
|
56
|
-
link,
|
|
57
|
-
)
|
|
58
|
-
if match is None:
|
|
59
|
-
return None
|
|
60
|
-
|
|
61
|
-
return source / match.group(1)
|
|
62
|
-
|
|
63
|
-
def as_jsonld_document(
|
|
64
|
-
self,
|
|
65
|
-
source: URL,
|
|
66
|
-
iri: Optional[URIRef] = None,
|
|
67
|
-
) -> LDDocument:
|
|
68
|
-
if iri is None:
|
|
69
|
-
iri = url_to_iri(source)
|
|
70
|
-
|
|
71
|
-
response = source.get()
|
|
72
|
-
response.raise_for_status()
|
|
73
|
-
alternate_url = self.extract_alternate_url(
|
|
74
|
-
source=source,
|
|
75
|
-
response=response,
|
|
76
|
-
)
|
|
77
|
-
if alternate_url is not None:
|
|
78
|
-
return self.as_jsonld_document(
|
|
79
|
-
source=alternate_url,
|
|
80
|
-
iri=iri,
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# `response.text` doesn't work.
|
|
84
|
-
# Reasoning: https://stackoverflow.com/a/72621231/1245471
|
|
85
|
-
response_as_file = StringIO(response.content.decode('utf-8'))
|
|
86
|
-
|
|
87
|
-
parser_class: Type[Parser] = self.choose_parser_class(
|
|
88
|
-
source=source,
|
|
89
|
-
response=response,
|
|
90
|
-
)
|
|
91
|
-
try:
|
|
92
|
-
document = parser_class().as_jsonld_document(response_as_file)
|
|
93
|
-
except Exception:
|
|
94
|
-
raise ValueError(response)
|
|
95
|
-
|
|
96
|
-
if iri is not None and isinstance(document, dict):
|
|
97
|
-
document.setdefault('@id', str(iri))
|
|
98
|
-
|
|
99
|
-
return document
|
|
100
|
-
|
|
101
|
-
def as_file(self, source: URL) -> TextIO:
|
|
102
|
-
raise ValueError('!!!')
|
|
103
|
-
|
|
104
|
-
def as_quad_stream(
|
|
105
|
-
self,
|
|
106
|
-
source: URL,
|
|
107
|
-
iri: Optional[URIRef],
|
|
108
|
-
root_loader: 'Loader[URL]',
|
|
109
|
-
) -> Iterable[Quad]:
|
|
110
|
-
try:
|
|
111
|
-
parser_class = self.choose_parser_class(source)
|
|
112
|
-
except ParserNotFound:
|
|
113
|
-
return []
|
|
114
|
-
|
|
115
|
-
if iri is None:
|
|
116
|
-
iri = url_to_iri(source)
|
|
117
|
-
|
|
118
|
-
with source.open() as text_io:
|
|
119
|
-
return parser_class().as_quad_stream(
|
|
120
|
-
raw_data=text_io,
|
|
121
|
-
iri=iri,
|
|
122
|
-
context=self.context,
|
|
123
|
-
root_loader=root_loader,
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def find_context(self, source: str) -> LDContext:
|
|
127
|
-
raise ValueError('??!!?')
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import dataclasses
|
|
2
|
-
from dataclasses import dataclass, field
|
|
3
|
-
from functools import reduce
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Iterable, List, Optional, TextIO, Type
|
|
6
|
-
|
|
7
|
-
from rdflib import URIRef
|
|
8
|
-
|
|
9
|
-
from iolanta.context import merge
|
|
10
|
-
from iolanta.conversions import path_to_iri
|
|
11
|
-
from iolanta.ensure_is_context import NotAContext, ensure_is_context
|
|
12
|
-
from iolanta.loaders.base import Loader, SourceType
|
|
13
|
-
from iolanta.loaders.local_file import LocalFile
|
|
14
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
15
|
-
from iolanta.namespaces import IOLANTA
|
|
16
|
-
from iolanta.parsers.base import Parser
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def merge_contexts(*contexts: LDContext) -> LDContext:
|
|
20
|
-
return reduce(
|
|
21
|
-
merge,
|
|
22
|
-
filter(bool, contexts),
|
|
23
|
-
{},
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dataclass(frozen=True)
|
|
28
|
-
class LocalDirectory(Loader[Path]):
|
|
29
|
-
"""
|
|
30
|
-
Retrieve Linked Data from a file on local disk.
|
|
31
|
-
|
|
32
|
-
Requires Path with file:// scheme as input.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
context_filenames: List[str] = field(
|
|
36
|
-
default_factory=lambda: [
|
|
37
|
-
'context.yaml',
|
|
38
|
-
'context.yml',
|
|
39
|
-
'context.json',
|
|
40
|
-
],
|
|
41
|
-
)
|
|
42
|
-
include_hidden_directories: bool = False
|
|
43
|
-
|
|
44
|
-
def find_context(self, source: SourceType) -> LDContext:
|
|
45
|
-
raise ValueError('?!!!???')
|
|
46
|
-
|
|
47
|
-
def directory_level_context(self, path: Path) -> Optional[LDContext]:
|
|
48
|
-
for file_name in self.context_filenames:
|
|
49
|
-
if (context_path := path / file_name).is_file():
|
|
50
|
-
document = LocalFile(logger=self.logger).as_jsonld_document(
|
|
51
|
-
source=context_path,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
if document:
|
|
55
|
-
try:
|
|
56
|
-
return ensure_is_context(document)
|
|
57
|
-
except NotAContext as err:
|
|
58
|
-
raise dataclasses.replace(
|
|
59
|
-
err,
|
|
60
|
-
path=context_path,
|
|
61
|
-
)
|
|
62
|
-
return None
|
|
63
|
-
|
|
64
|
-
def choose_parser_class(self, source: Path) -> Type[Parser]:
|
|
65
|
-
"""Choose parser class based on file extension."""
|
|
66
|
-
raise ValueError('This is a directory')
|
|
67
|
-
|
|
68
|
-
def as_quad_stream(
|
|
69
|
-
self,
|
|
70
|
-
source: Path,
|
|
71
|
-
iri: Optional[URIRef],
|
|
72
|
-
root_loader: Loader[Path],
|
|
73
|
-
context: Optional[LDContext] = None,
|
|
74
|
-
) -> Iterable[Quad]:
|
|
75
|
-
"""Extract a sequence of quads from a local file."""
|
|
76
|
-
if iri is None:
|
|
77
|
-
iri = path_to_iri(source.absolute())
|
|
78
|
-
|
|
79
|
-
if not source.is_dir():
|
|
80
|
-
yield from LocalFile(logger=self.logger).as_quad_stream(
|
|
81
|
-
source=source,
|
|
82
|
-
root_loader=root_loader,
|
|
83
|
-
iri=iri,
|
|
84
|
-
context=context,
|
|
85
|
-
)
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
context = merge_contexts(
|
|
89
|
-
context,
|
|
90
|
-
self.directory_level_context(source),
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
for child in source.iterdir():
|
|
94
|
-
if not iri.endswith('/'):
|
|
95
|
-
iri = URIRef(f'{iri}/')
|
|
96
|
-
|
|
97
|
-
child_iri = URIRef(f'{iri}{child.name}')
|
|
98
|
-
|
|
99
|
-
if child.is_dir():
|
|
100
|
-
if (
|
|
101
|
-
not self.include_hidden_directories
|
|
102
|
-
and child.name.startswith('.')
|
|
103
|
-
):
|
|
104
|
-
self.logger.info(
|
|
105
|
-
'Skipping a hidden directory: %s',
|
|
106
|
-
child,
|
|
107
|
-
)
|
|
108
|
-
continue
|
|
109
|
-
|
|
110
|
-
child_iri += '/'
|
|
111
|
-
|
|
112
|
-
yield from LocalDirectory(logger=self.logger).as_quad_stream(
|
|
113
|
-
source=child,
|
|
114
|
-
iri=child_iri,
|
|
115
|
-
root_loader=root_loader,
|
|
116
|
-
context=context,
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
elif child.stem != 'context':
|
|
120
|
-
yield from LocalFile(logger=self.logger).as_quad_stream(
|
|
121
|
-
source=child,
|
|
122
|
-
iri=child_iri,
|
|
123
|
-
root_loader=root_loader,
|
|
124
|
-
context=context,
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
if iri is not None:
|
|
128
|
-
yield Quad(
|
|
129
|
-
subject=child_iri,
|
|
130
|
-
predicate=IOLANTA.isChildOf,
|
|
131
|
-
object=iri,
|
|
132
|
-
graph=URIRef(
|
|
133
|
-
'https://iolanta.tech/loaders/local-directory',
|
|
134
|
-
),
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
def as_file(self, source: Path) -> TextIO:
|
|
138
|
-
"""Construct a file-like object."""
|
|
139
|
-
with source.open() as text_io:
|
|
140
|
-
return text_io
|
|
141
|
-
|
|
142
|
-
def as_jsonld_document(
|
|
143
|
-
self,
|
|
144
|
-
source: Path,
|
|
145
|
-
iri: Optional[URIRef] = None,
|
|
146
|
-
) -> LDDocument:
|
|
147
|
-
"""As JSON-LD document."""
|
|
148
|
-
raise ValueError('This is a directory.')
|
iolanta/loaders/local_file.py
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Iterable, Optional, TextIO, Type
|
|
5
|
-
|
|
6
|
-
from rdflib import Literal, URIRef
|
|
7
|
-
|
|
8
|
-
from iolanta.conversions import url_to_iri
|
|
9
|
-
from iolanta.loaders.base import Loader
|
|
10
|
-
from iolanta.loaders.errors import IsAContext, ParserNotFound
|
|
11
|
-
from iolanta.models import LDContext, LDDocument, Quad
|
|
12
|
-
from iolanta.namespaces import IOLANTA
|
|
13
|
-
from iolanta.parsers.base import Parser
|
|
14
|
-
from iolanta.parsers.json import JSON
|
|
15
|
-
from iolanta.parsers.markdown import Markdown
|
|
16
|
-
from iolanta.parsers.yaml import YAML
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def choose_parser_by_extension(path: Path) -> Type[Parser]:
|
|
20
|
-
"""
|
|
21
|
-
Choose parser class based on file extension.
|
|
22
|
-
|
|
23
|
-
FIXME this is currently hard coded; need to change to a more extensible
|
|
24
|
-
mechanism.
|
|
25
|
-
"""
|
|
26
|
-
try:
|
|
27
|
-
return {
|
|
28
|
-
'.json': JSON,
|
|
29
|
-
'.jsonld': JSON,
|
|
30
|
-
|
|
31
|
-
'.yaml': YAML,
|
|
32
|
-
'.yamlld': YAML,
|
|
33
|
-
|
|
34
|
-
'.md': Markdown,
|
|
35
|
-
}[path.suffix]
|
|
36
|
-
except KeyError:
|
|
37
|
-
raise ParserNotFound(path=path)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclass(frozen=True)
|
|
41
|
-
class LocalFile(Loader[Path]):
|
|
42
|
-
"""
|
|
43
|
-
Retrieve Linked Data from a file on local disk.
|
|
44
|
-
|
|
45
|
-
Requires Path with file:// scheme as input.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
def find_context(self, source: str) -> LDContext:
|
|
49
|
-
return {}
|
|
50
|
-
|
|
51
|
-
def choose_parser_class(self, source: Path) -> Type[Parser]:
|
|
52
|
-
return choose_parser_by_extension(source)
|
|
53
|
-
|
|
54
|
-
def as_quad_stream(
|
|
55
|
-
self,
|
|
56
|
-
source: Path,
|
|
57
|
-
root_loader: Loader[Path],
|
|
58
|
-
iri: Optional[URIRef] = None,
|
|
59
|
-
context: Optional[LDContext] = None,
|
|
60
|
-
) -> Iterable[Quad]:
|
|
61
|
-
"""Extract a sequence of quads from a local file."""
|
|
62
|
-
if source.stem == 'context':
|
|
63
|
-
raise IsAContext(path=source)
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
parser_class = self.choose_parser_class(source)
|
|
67
|
-
except ParserNotFound:
|
|
68
|
-
return []
|
|
69
|
-
|
|
70
|
-
if iri is None:
|
|
71
|
-
iri = url_to_iri(source)
|
|
72
|
-
|
|
73
|
-
self.logger.info('Loading data into graph: %s', source)
|
|
74
|
-
with source.open() as text_io:
|
|
75
|
-
yield from parser_class().as_quad_stream(
|
|
76
|
-
raw_data=text_io,
|
|
77
|
-
iri=iri,
|
|
78
|
-
context=context,
|
|
79
|
-
root_loader=root_loader,
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
yield Quad(
|
|
83
|
-
iri,
|
|
84
|
-
IOLANTA.fileName,
|
|
85
|
-
Literal(source.name),
|
|
86
|
-
URIRef('https://iolanta.tech/loaders/local-file'),
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
def as_file(self, source: Path) -> TextIO:
|
|
90
|
-
"""Construct a file-like object."""
|
|
91
|
-
with source.open() as text_io:
|
|
92
|
-
return text_io
|
|
93
|
-
|
|
94
|
-
def as_jsonld_document(
|
|
95
|
-
self,
|
|
96
|
-
source: Path,
|
|
97
|
-
iri: Optional[URIRef] = None,
|
|
98
|
-
) -> LDDocument:
|
|
99
|
-
"""As JSON-LD document."""
|
|
100
|
-
parser_class: Type[Parser] = self.choose_parser_class(source)
|
|
101
|
-
with source.open() as text_io:
|
|
102
|
-
document = parser_class().as_jsonld_document(text_io)
|
|
103
|
-
|
|
104
|
-
if iri is not None and isinstance(document, dict):
|
|
105
|
-
document.setdefault('@id', str(iri))
|
|
106
|
-
|
|
107
|
-
return document
|