iolanta 2.0.6__py3-none-any.whl → 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +0,0 @@
1
- from iolanta.loaders.base import Loader
2
- from iolanta.loaders.local_directory import LocalDirectory
iolanta/loaders/base.py DELETED
@@ -1,124 +0,0 @@
1
- from abc import ABC
2
- from dataclasses import dataclass
3
- from logging import Logger
4
- from typing import (
5
- Any,
6
- Dict,
7
- Generic,
8
- Iterable,
9
- Optional,
10
- TextIO,
11
- TypedDict,
12
- TypeVar,
13
- )
14
-
15
- from rdflib import URIRef
16
- from yarl import URL
17
-
18
- from iolanta.conversions import url_to_iri
19
- from iolanta.ensure_is_context import ensure_is_context
20
- from iolanta.models import LDContext, LDDocument, Quad
21
- from iolanta.namespaces import PYTHON
22
-
23
- SourceType = TypeVar('SourceType')
24
-
25
-
26
- PyLDOptions = Dict[str, Any] # type: ignore
27
-
28
- PyLDResponse = TypedDict(
29
- 'PyLDResponse', {
30
- 'contentType': str,
31
- 'contextUrl': Optional[str],
32
- 'documentUrl': str,
33
- 'document': LDDocument,
34
- },
35
- )
36
-
37
-
38
- def term_for_python_class(cls: type) -> URIRef:
39
- """Construct term for Python class."""
40
- return PYTHON.term(f'{cls.__module__}.{cls.__qualname__}')
41
-
42
-
43
- # noinspection TaskProblemsInspection
44
- @dataclass(frozen=True)
45
- class Loader(ABC, Generic[SourceType]):
46
- """
47
- Base class for loaders.
48
-
49
- Loader receives a URL (or a path) to certain location. It is responsible for
50
- reading data from that location and returning it as a stream of RDF quads.
51
-
52
- Usually, depending on the data format, Loader leverages Parsers for that
53
- purpose.
54
- """
55
-
56
- logger: Logger
57
-
58
- @classmethod
59
- def loader_class_iri(cls) -> URIRef:
60
- """Import path to the loader class."""
61
- return term_for_python_class(cls)
62
-
63
- def choose_parser_class(self, source: SourceType):
64
- """Find which parser class to use for this URL."""
65
- raise NotImplementedError(
66
- f'{self}.choose_parser_class() is not implemented.',
67
- )
68
-
69
- def as_jsonld_document(
70
- self,
71
- source: SourceType,
72
- iri: Optional[URIRef] = None,
73
- ) -> LDDocument:
74
- """Represent a file as a JSON-LD document."""
75
- raise NotImplementedError(
76
- f'{self}.as_jsonld_document() is not implemented.',
77
- )
78
-
79
- def as_file(self, source: SourceType) -> TextIO:
80
- """Construct a file-like object."""
81
- raise NotImplementedError()
82
-
83
- def as_quad_stream(
84
- self,
85
- source: SourceType,
86
- iri: Optional[URIRef],
87
- root_loader: 'Loader[SourceType]',
88
- context: Optional[LDContext] = None,
89
- ) -> Iterable[Quad]:
90
- """Convert data into a stream of RDF quads."""
91
- raise NotImplementedError(
92
- f'{self}.as_quad_stream() is not implemented.',
93
- )
94
-
95
- def find_context(self, source: SourceType) -> LDContext:
96
- """Find context for the file."""
97
- raise NotImplementedError(
98
- f'{self}.find_context() is not implemented.',
99
- )
100
-
101
- def __call__(self, source: str, options: PyLDOptions) -> PyLDResponse:
102
- """
103
- Call the loader to retrieve the document in a PYLD friendly format.
104
-
105
- Used to resolve remote contexts.
106
-
107
- The type of `source` parameter is intentionally `str`: that's the only
108
- thing which pyld can do.
109
- """
110
- source = URL(source)
111
-
112
- document = ensure_is_context(
113
- self.as_jsonld_document(
114
- source=URL(source),
115
- iri=url_to_iri(source),
116
- ),
117
- )
118
-
119
- return {
120
- 'document': document,
121
- 'contextUrl': None,
122
- 'documentUrl': source,
123
- 'contentType': 'application/ld+json',
124
- }
@@ -1,66 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any, Dict, Iterable, Optional, TextIO
3
-
4
- from rdflib import URIRef
5
- from yarl import URL
6
-
7
- from iolanta.loaders.base import Loader, SourceType
8
- from iolanta.models import LDContext, LDDocument, Quad
9
-
10
-
11
- @dataclass(frozen=True)
12
- class DataTypeChoiceLoader(Loader[Any]): # type: ignore
13
- """Try to load a file via several loaders."""
14
-
15
- loader_by_data_type: Dict[type, Loader[Any]] # type: ignore
16
-
17
- def choose_parser_class(self, source: SourceType):
18
- raise ValueError('choose_parser_class')
19
-
20
- def as_file(self, source: SourceType) -> TextIO:
21
- raise ValueError('as_file')
22
-
23
- def find_context(self, source: SourceType) -> LDContext:
24
- raise ValueError('find_context')
25
-
26
- def resolve_loader(self, source: Any): # type: ignore
27
- """Find loader instance by URL."""
28
- for source_type, loader in self.loader_by_data_type.items():
29
- if isinstance(source, source_type):
30
- return loader
31
-
32
- source_type = type(source)
33
- raise ValueError(
34
- f'Cannot find a loader for source: {source} '
35
- f'of type: {source_type}',
36
- )
37
-
38
- def as_jsonld_document(
39
- self,
40
- source: URL,
41
- iri: Optional[URIRef] = None,
42
- ) -> LDDocument:
43
- """Represent a file as a JSON-LD document."""
44
- return self.resolve_loader(
45
- source=source,
46
- ).as_jsonld_document(
47
- source=source,
48
- iri=iri,
49
- )
50
-
51
- def as_quad_stream(
52
- self,
53
- source: str,
54
- iri: Optional[URIRef],
55
- root_loader: Optional[Loader[URL]] = None,
56
- context: Optional[LDContext] = None,
57
- ) -> Iterable[Quad]:
58
- """Convert data into a stream of RDF quads."""
59
- return self.resolve_loader(
60
- source=source,
61
- ).as_quad_stream(
62
- source=source,
63
- iri=iri,
64
- root_loader=root_loader or self,
65
- context=context,
66
- )
@@ -1,57 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Iterable, Optional, TextIO, Type
3
-
4
- from rdflib import Literal, URIRef
5
-
6
- from iolanta.conversions import url_to_iri
7
- from iolanta.loaders.base import Loader
8
- from iolanta.loaders.errors import IsAContext, ParserNotFound
9
- from iolanta.models import LDContext, LDDocument, Quad
10
- from iolanta.namespaces import IOLANTA
11
- from iolanta.parsers.base import Parser
12
- from iolanta.parsers.dict_parser import DictParser
13
- from iolanta.parsers.json import JSON
14
- from iolanta.parsers.markdown import Markdown
15
- from iolanta.parsers.yaml import YAML
16
-
17
-
18
- @dataclass(frozen=True)
19
- class DictLoader(Loader[LDDocument]):
20
- """
21
- Retrieve Linked Data from a file on local disk.
22
-
23
- Requires a dict of raw JSON-LD data.
24
- """
25
-
26
- def find_context(self, source: str) -> LDContext:
27
- raise ValueError('???WTF?')
28
-
29
- def choose_parser_class(self, source: LDDocument) -> Type[Parser]:
30
- return DictParser(source)
31
-
32
- def as_quad_stream(
33
- self,
34
- source: LDDocument,
35
- root_loader: Loader[LDDocument],
36
- iri: Optional[URIRef] = None,
37
- context: Optional[LDContext] = None,
38
- ) -> Iterable[Quad]:
39
- """Extract a sequence of quads."""
40
- yield from DictParser().as_quad_stream(
41
- raw_data=source,
42
- iri=iri,
43
- context=context,
44
- root_loader=root_loader,
45
- )
46
-
47
- def as_file(self, source: LDDocument) -> TextIO:
48
- """Construct a file-like object."""
49
- raise ValueError('FOO')
50
-
51
- def as_jsonld_document(
52
- self,
53
- source: LDDocument,
54
- iri: Optional[URIRef] = None,
55
- ) -> LDDocument:
56
- """As JSON-LD document."""
57
- return source
iolanta/loaders/errors.py DELETED
@@ -1,29 +0,0 @@
1
- from dataclasses import dataclass
2
- from pathlib import Path
3
-
4
- from documented import DocumentedError
5
- from yarl import URL
6
-
7
-
8
- @dataclass
9
- class IsAContext(DocumentedError):
10
- """
11
- The provided file is a context.
12
-
13
- - Path: {self.path}
14
-
15
- This file is not a piece of data and cannot be loaded into the graph.
16
- """
17
-
18
- path: URL
19
-
20
-
21
- @dataclass
22
- class ParserNotFound(DocumentedError):
23
- """
24
- Parser not found.
25
-
26
- Path: {self.path}
27
- """
28
-
29
- path: Path
iolanta/loaders/http.py DELETED
@@ -1,127 +0,0 @@
1
- import json
2
- import re
3
- from dataclasses import dataclass, field
4
- from functools import reduce
5
- from io import StringIO
6
- from pathlib import Path
7
- from typing import Iterable, List, Optional, TextIO, Type, Union
8
-
9
- from documented import DocumentedError
10
- from rdflib import URIRef
11
- from rdflib.parser import URLInputSource
12
- from requests import Response
13
- from yarl import URL
14
-
15
- from iolanta.context import merge
16
- from iolanta.conversions import url_to_iri, url_to_path
17
- from iolanta.loaders.base import Loader
18
- from iolanta.loaders.errors import IsAContext, ParserNotFound
19
- from iolanta.loaders.local_file import choose_parser_by_extension
20
- from iolanta.models import LDContext, LDDocument, Quad
21
- from iolanta.parsers.base import Parser
22
- from iolanta.parsers.json import JSON
23
- from iolanta.parsers.markdown import Markdown
24
- from iolanta.parsers.yaml import YAML
25
-
26
-
27
- @dataclass(frozen=True)
28
- class HTTP(Loader[URL]):
29
- """
30
- Retrieve Linked Data from a file on the Web.
31
- """
32
-
33
- context: LDContext = field(default_factory=dict)
34
-
35
- def choose_parser_class(self, source: URL, response: Response):
36
- # FIXME hard code. Make this extensible.
37
- try:
38
- return choose_parser_by_extension(source)
39
- except ParserNotFound:
40
- content_type = response.headers['Content-Type']
41
-
42
- raise ValueError(f'Content type: {content_type}')
43
-
44
- def extract_alternate_url(
45
- self,
46
- source: URL,
47
- response: Response,
48
- ) -> URL | None:
49
- link = response.headers.get('Link')
50
-
51
- if link is None:
52
- return None
53
-
54
- match = re.match(
55
- r'<([^>]+)>; rel="alternate"; type="application/ld\+json"',
56
- link,
57
- )
58
- if match is None:
59
- return None
60
-
61
- return source / match.group(1)
62
-
63
- def as_jsonld_document(
64
- self,
65
- source: URL,
66
- iri: Optional[URIRef] = None,
67
- ) -> LDDocument:
68
- if iri is None:
69
- iri = url_to_iri(source)
70
-
71
- response = source.get()
72
- response.raise_for_status()
73
- alternate_url = self.extract_alternate_url(
74
- source=source,
75
- response=response,
76
- )
77
- if alternate_url is not None:
78
- return self.as_jsonld_document(
79
- source=alternate_url,
80
- iri=iri,
81
- )
82
-
83
- # `response.text` doesn't work.
84
- # Reasoning: https://stackoverflow.com/a/72621231/1245471
85
- response_as_file = StringIO(response.content.decode('utf-8'))
86
-
87
- parser_class: Type[Parser] = self.choose_parser_class(
88
- source=source,
89
- response=response,
90
- )
91
- try:
92
- document = parser_class().as_jsonld_document(response_as_file)
93
- except Exception:
94
- raise ValueError(response)
95
-
96
- if iri is not None and isinstance(document, dict):
97
- document.setdefault('@id', str(iri))
98
-
99
- return document
100
-
101
- def as_file(self, source: URL) -> TextIO:
102
- raise ValueError('!!!')
103
-
104
- def as_quad_stream(
105
- self,
106
- source: URL,
107
- iri: Optional[URIRef],
108
- root_loader: 'Loader[URL]',
109
- ) -> Iterable[Quad]:
110
- try:
111
- parser_class = self.choose_parser_class(source)
112
- except ParserNotFound:
113
- return []
114
-
115
- if iri is None:
116
- iri = url_to_iri(source)
117
-
118
- with source.open() as text_io:
119
- return parser_class().as_quad_stream(
120
- raw_data=text_io,
121
- iri=iri,
122
- context=self.context,
123
- root_loader=root_loader,
124
- )
125
-
126
- def find_context(self, source: str) -> LDContext:
127
- raise ValueError('??!!?')
@@ -1,148 +0,0 @@
1
- import dataclasses
2
- from dataclasses import dataclass, field
3
- from functools import reduce
4
- from pathlib import Path
5
- from typing import Iterable, List, Optional, TextIO, Type
6
-
7
- from rdflib import URIRef
8
-
9
- from iolanta.context import merge
10
- from iolanta.conversions import path_to_iri
11
- from iolanta.ensure_is_context import NotAContext, ensure_is_context
12
- from iolanta.loaders.base import Loader, SourceType
13
- from iolanta.loaders.local_file import LocalFile
14
- from iolanta.models import LDContext, LDDocument, Quad
15
- from iolanta.namespaces import IOLANTA
16
- from iolanta.parsers.base import Parser
17
-
18
-
19
- def merge_contexts(*contexts: LDContext) -> LDContext:
20
- return reduce(
21
- merge,
22
- filter(bool, contexts),
23
- {},
24
- )
25
-
26
-
27
- @dataclass(frozen=True)
28
- class LocalDirectory(Loader[Path]):
29
- """
30
- Retrieve Linked Data from a file on local disk.
31
-
32
- Requires Path with file:// scheme as input.
33
- """
34
-
35
- context_filenames: List[str] = field(
36
- default_factory=lambda: [
37
- 'context.yaml',
38
- 'context.yml',
39
- 'context.json',
40
- ],
41
- )
42
- include_hidden_directories: bool = False
43
-
44
- def find_context(self, source: SourceType) -> LDContext:
45
- raise ValueError('?!!!???')
46
-
47
- def directory_level_context(self, path: Path) -> Optional[LDContext]:
48
- for file_name in self.context_filenames:
49
- if (context_path := path / file_name).is_file():
50
- document = LocalFile(logger=self.logger).as_jsonld_document(
51
- source=context_path,
52
- )
53
-
54
- if document:
55
- try:
56
- return ensure_is_context(document)
57
- except NotAContext as err:
58
- raise dataclasses.replace(
59
- err,
60
- path=context_path,
61
- )
62
- return None
63
-
64
- def choose_parser_class(self, source: Path) -> Type[Parser]:
65
- """Choose parser class based on file extension."""
66
- raise ValueError('This is a directory')
67
-
68
- def as_quad_stream(
69
- self,
70
- source: Path,
71
- iri: Optional[URIRef],
72
- root_loader: Loader[Path],
73
- context: Optional[LDContext] = None,
74
- ) -> Iterable[Quad]:
75
- """Extract a sequence of quads from a local file."""
76
- if iri is None:
77
- iri = path_to_iri(source.absolute())
78
-
79
- if not source.is_dir():
80
- yield from LocalFile(logger=self.logger).as_quad_stream(
81
- source=source,
82
- root_loader=root_loader,
83
- iri=iri,
84
- context=context,
85
- )
86
- return
87
-
88
- context = merge_contexts(
89
- context,
90
- self.directory_level_context(source),
91
- )
92
-
93
- for child in source.iterdir():
94
- if not iri.endswith('/'):
95
- iri = URIRef(f'{iri}/')
96
-
97
- child_iri = URIRef(f'{iri}{child.name}')
98
-
99
- if child.is_dir():
100
- if (
101
- not self.include_hidden_directories
102
- and child.name.startswith('.')
103
- ):
104
- self.logger.info(
105
- 'Skipping a hidden directory: %s',
106
- child,
107
- )
108
- continue
109
-
110
- child_iri += '/'
111
-
112
- yield from LocalDirectory(logger=self.logger).as_quad_stream(
113
- source=child,
114
- iri=child_iri,
115
- root_loader=root_loader,
116
- context=context,
117
- )
118
-
119
- elif child.stem != 'context':
120
- yield from LocalFile(logger=self.logger).as_quad_stream(
121
- source=child,
122
- iri=child_iri,
123
- root_loader=root_loader,
124
- context=context,
125
- )
126
-
127
- if iri is not None:
128
- yield Quad(
129
- subject=child_iri,
130
- predicate=IOLANTA.isChildOf,
131
- object=iri,
132
- graph=URIRef(
133
- 'https://iolanta.tech/loaders/local-directory',
134
- ),
135
- )
136
-
137
- def as_file(self, source: Path) -> TextIO:
138
- """Construct a file-like object."""
139
- with source.open() as text_io:
140
- return text_io
141
-
142
- def as_jsonld_document(
143
- self,
144
- source: Path,
145
- iri: Optional[URIRef] = None,
146
- ) -> LDDocument:
147
- """As JSON-LD document."""
148
- raise ValueError('This is a directory.')
@@ -1,107 +0,0 @@
1
- import logging
2
- from dataclasses import dataclass
3
- from pathlib import Path
4
- from typing import Iterable, Optional, TextIO, Type
5
-
6
- from rdflib import Literal, URIRef
7
-
8
- from iolanta.conversions import url_to_iri
9
- from iolanta.loaders.base import Loader
10
- from iolanta.loaders.errors import IsAContext, ParserNotFound
11
- from iolanta.models import LDContext, LDDocument, Quad
12
- from iolanta.namespaces import IOLANTA
13
- from iolanta.parsers.base import Parser
14
- from iolanta.parsers.json import JSON
15
- from iolanta.parsers.markdown import Markdown
16
- from iolanta.parsers.yaml import YAML
17
-
18
-
19
- def choose_parser_by_extension(path: Path) -> Type[Parser]:
20
- """
21
- Choose parser class based on file extension.
22
-
23
- FIXME this is currently hard coded; need to change to a more extensible
24
- mechanism.
25
- """
26
- try:
27
- return {
28
- '.json': JSON,
29
- '.jsonld': JSON,
30
-
31
- '.yaml': YAML,
32
- '.yamlld': YAML,
33
-
34
- '.md': Markdown,
35
- }[path.suffix]
36
- except KeyError:
37
- raise ParserNotFound(path=path)
38
-
39
-
40
- @dataclass(frozen=True)
41
- class LocalFile(Loader[Path]):
42
- """
43
- Retrieve Linked Data from a file on local disk.
44
-
45
- Requires Path with file:// scheme as input.
46
- """
47
-
48
- def find_context(self, source: str) -> LDContext:
49
- return {}
50
-
51
- def choose_parser_class(self, source: Path) -> Type[Parser]:
52
- return choose_parser_by_extension(source)
53
-
54
- def as_quad_stream(
55
- self,
56
- source: Path,
57
- root_loader: Loader[Path],
58
- iri: Optional[URIRef] = None,
59
- context: Optional[LDContext] = None,
60
- ) -> Iterable[Quad]:
61
- """Extract a sequence of quads from a local file."""
62
- if source.stem == 'context':
63
- raise IsAContext(path=source)
64
-
65
- try:
66
- parser_class = self.choose_parser_class(source)
67
- except ParserNotFound:
68
- return []
69
-
70
- if iri is None:
71
- iri = url_to_iri(source)
72
-
73
- self.logger.info('Loading data into graph: %s', source)
74
- with source.open() as text_io:
75
- yield from parser_class().as_quad_stream(
76
- raw_data=text_io,
77
- iri=iri,
78
- context=context,
79
- root_loader=root_loader,
80
- )
81
-
82
- yield Quad(
83
- iri,
84
- IOLANTA.fileName,
85
- Literal(source.name),
86
- URIRef('https://iolanta.tech/loaders/local-file'),
87
- )
88
-
89
- def as_file(self, source: Path) -> TextIO:
90
- """Construct a file-like object."""
91
- with source.open() as text_io:
92
- return text_io
93
-
94
- def as_jsonld_document(
95
- self,
96
- source: Path,
97
- iri: Optional[URIRef] = None,
98
- ) -> LDDocument:
99
- """As JSON-LD document."""
100
- parser_class: Type[Parser] = self.choose_parser_class(source)
101
- with source.open() as text_io:
102
- document = parser_class().as_jsonld_document(text_io)
103
-
104
- if iri is not None and isinstance(document, dict):
105
- document.setdefault('@id', str(iri))
106
-
107
- return document