iolanta 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,107 +0,0 @@
1
- import logging
2
- from dataclasses import dataclass
3
- from pathlib import Path
4
- from typing import Iterable, Optional, TextIO, Type
5
-
6
- from rdflib import Literal, URIRef
7
-
8
- from iolanta.conversions import url_to_iri
9
- from iolanta.loaders.base import Loader
10
- from iolanta.loaders.errors import IsAContext, ParserNotFound
11
- from iolanta.models import LDContext, LDDocument, Quad
12
- from iolanta.namespaces import IOLANTA
13
- from iolanta.parsers.base import Parser
14
- from iolanta.parsers.json import JSON
15
- from iolanta.parsers.markdown import Markdown
16
- from iolanta.parsers.yaml import YAML
17
-
18
-
19
- def choose_parser_by_extension(path: Path) -> Type[Parser]:
20
- """
21
- Choose parser class based on file extension.
22
-
23
- FIXME this is currently hard coded; need to change to a more extensible
24
- mechanism.
25
- """
26
- try:
27
- return {
28
- '.json': JSON,
29
- '.jsonld': JSON,
30
-
31
- '.yaml': YAML,
32
- '.yamlld': YAML,
33
-
34
- '.md': Markdown,
35
- }[path.suffix]
36
- except KeyError:
37
- raise ParserNotFound(path=path)
38
-
39
-
40
- @dataclass(frozen=True)
41
- class LocalFile(Loader[Path]):
42
- """
43
- Retrieve Linked Data from a file on local disk.
44
-
45
- Requires Path with file:// scheme as input.
46
- """
47
-
48
- def find_context(self, source: str) -> LDContext:
49
- return {}
50
-
51
- def choose_parser_class(self, source: Path) -> Type[Parser]:
52
- return choose_parser_by_extension(source)
53
-
54
- def as_quad_stream(
55
- self,
56
- source: Path,
57
- root_loader: Loader[Path],
58
- iri: Optional[URIRef] = None,
59
- context: Optional[LDContext] = None,
60
- ) -> Iterable[Quad]:
61
- """Extract a sequence of quads from a local file."""
62
- if source.stem == 'context':
63
- raise IsAContext(path=source)
64
-
65
- try:
66
- parser_class = self.choose_parser_class(source)
67
- except ParserNotFound:
68
- return []
69
-
70
- if iri is None:
71
- iri = url_to_iri(source)
72
-
73
- self.logger.info('Loading data into graph: %s', source)
74
- with source.open() as text_io:
75
- yield from parser_class().as_quad_stream(
76
- raw_data=text_io,
77
- iri=iri,
78
- context=context,
79
- root_loader=root_loader,
80
- )
81
-
82
- yield Quad(
83
- iri,
84
- IOLANTA.fileName,
85
- Literal(source.name),
86
- URIRef('https://iolanta.tech/loaders/local-file'),
87
- )
88
-
89
- def as_file(self, source: Path) -> TextIO:
90
- """Construct a file-like object."""
91
- with source.open() as text_io:
92
- return text_io
93
-
94
- def as_jsonld_document(
95
- self,
96
- source: Path,
97
- iri: Optional[URIRef] = None,
98
- ) -> LDDocument:
99
- """As JSON-LD document."""
100
- parser_class: Type[Parser] = self.choose_parser_class(source)
101
- with source.open() as text_io:
102
- document = parser_class().as_jsonld_document(text_io)
103
-
104
- if iri is not None and isinstance(document, dict):
105
- document.setdefault('@id', str(iri))
106
-
107
- return document
@@ -1,72 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Dict, Iterable, Optional
3
-
4
- from rdflib import URIRef
5
- from yarl import URL
6
-
7
- from iolanta.conversions import url_to_iri
8
- from iolanta.ensure_is_context import ensure_is_context
9
- from iolanta.loaders.base import Loader, PyLDOptions, PyLDResponse
10
- from iolanta.models import LDContext, LDDocument, Quad
11
-
12
-
13
- @dataclass(frozen=True)
14
- class SchemeChoiceLoader(Loader[URL]):
15
- """Try to load a file via several loaders."""
16
-
17
- loader_by_scheme: Dict[str, Loader[URL]]
18
-
19
- def __call__(self, source: str, options: PyLDOptions) -> PyLDResponse:
20
- """Compile document for PyLD."""
21
- source = URL(source)
22
-
23
- document = ensure_is_context(
24
- self.as_jsonld_document(
25
- source=source,
26
- iri=url_to_iri(source),
27
- ),
28
- )
29
-
30
- return {
31
- 'document': document,
32
- 'contextUrl': None,
33
- 'contentType': 'application/ld+json',
34
- 'documentUrl': str(source),
35
- }
36
-
37
- def resolve_loader_by_url(self, url: URL):
38
- """Find loader instance by URL."""
39
- try:
40
- return self.loader_by_scheme[url.scheme]
41
- except (KeyError, AttributeError):
42
- raise ValueError(f'Cannot find a loader for URL: {url}')
43
-
44
- def as_jsonld_document(
45
- self,
46
- source: URL,
47
- iri: Optional[URIRef] = None,
48
- ) -> LDDocument:
49
- """Represent a file as a JSON-LD document."""
50
- return self.resolve_loader_by_url(
51
- url=source,
52
- ).as_jsonld_document(
53
- source=source,
54
- iri=iri,
55
- )
56
-
57
- def as_quad_stream(
58
- self,
59
- source: str,
60
- iri: Optional[URIRef],
61
- root_loader: Optional[Loader[URL]] = None,
62
- context: Optional[LDContext] = None,
63
- ) -> Iterable[Quad]:
64
- """Convert data into a stream of RDF quads."""
65
- return self.resolve_loader_by_url(
66
- url=source,
67
- ).as_quad_stream(
68
- source=source,
69
- iri=iri,
70
- root_loader=root_loader or self,
71
- context=context,
72
- )
File without changes
iolanta/parsers/base.py DELETED
@@ -1,41 +0,0 @@
1
- from abc import ABC
2
- from dataclasses import dataclass
3
- from typing import Generic, Iterable, Optional, TypeVar
4
-
5
- from rdflib import URIRef
6
-
7
- from iolanta.loaders.base import Loader
8
- from iolanta.models import LDContext, LDDocument, Quad
9
-
10
- RawDataType = TypeVar('RawDataType')
11
-
12
-
13
- @dataclass(frozen=True)
14
- class Parser(ABC, Generic[RawDataType]):
15
- """
16
- Parser reads data from a file-like object and interprets them.
17
-
18
- For interpretation, it is also supplied with a context.
19
- """
20
-
21
- blank_node_prefix: str = ''
22
-
23
- def as_jsonld_document(
24
- self,
25
- raw_data: RawDataType,
26
- ) -> LDDocument:
27
- """Generate a JSON-LD document."""
28
- raise NotImplementedError(
29
- f'{self}.as_json_document() is not implemented.',
30
- )
31
-
32
- def as_quad_stream(
33
- self,
34
- raw_data: RawDataType,
35
- iri: Optional[URIRef],
36
- context: LDContext,
37
- root_loader: Loader,
38
- ) -> Iterable[Quad]:
39
- raise NotImplementedError(
40
- f'{self}.as_quad_stream() is not implemented.',
41
- )
@@ -1,171 +0,0 @@
1
- import dataclasses
2
- import itertools
3
- import json
4
- import uuid
5
- from dataclasses import dataclass
6
- from typing import Any, Iterable, Optional
7
-
8
- from documented import DocumentedError
9
- from pyld.jsonld import _resolved_context_cache # noqa: WPS450
10
- from pyld.jsonld import JsonLdError, expand, flatten, to_rdf # noqa: WPS347
11
- from rdflib import BNode, URIRef
12
- from yarl import URL
13
-
14
- from iolanta.errors import UnresolvedIRI
15
- from iolanta.loaders import Loader
16
- from iolanta.models import LDContext, LDDocument, NotLiteralNode, Quad
17
- from iolanta.namespaces import IOLANTA, LOCAL, RDF
18
- from iolanta.parse_quads import parse_quads
19
- from iolanta.parsers.base import Parser, RawDataType
20
-
21
-
22
- class DictParser(Parser[LDDocument]):
23
- """
24
- Old version of dict parser.
25
-
26
- FIXME: Remove this.
27
- """
28
-
29
- def as_jsonld_document(self, raw_data: LDDocument) -> LDDocument:
30
- """Do nothing."""
31
- return raw_data
32
-
33
- def as_quad_stream(
34
- self,
35
- raw_data: RawDataType,
36
- iri: Optional[NotLiteralNode],
37
- context: LDContext,
38
- root_loader: Loader,
39
- ) -> Iterable[Quad]:
40
- """Do nonsense."""
41
- # This helps avoid weird bugs when loading data.
42
- _resolved_context_cache.clear()
43
-
44
- document = raw_data
45
-
46
- if iri is None:
47
- uid = uuid.uuid4().hex
48
- iri = BNode(f'_:dict:{uid}')
49
-
50
- document = assign_key_if_not_present(
51
- document=document,
52
- key='iolanta:subjectOf',
53
- default_value={
54
- '$id': str(iri),
55
- },
56
- )
57
-
58
- try:
59
- document = expand(
60
- document,
61
- options={
62
- 'expandContext': context,
63
- 'documentLoader': root_loader,
64
-
65
- # Explanation:
66
- # https://github.com/digitalbazaar/pyld/issues/143
67
- 'base': str(LOCAL),
68
- },
69
- )
70
- except (JsonLdError, KeyError, TypeError) as err:
71
- raise ExpandError(
72
- message=str(err),
73
- document=document,
74
- context=context,
75
- iri=iri,
76
- document_loader=root_loader,
77
- ) from err
78
-
79
- document = flatten(document)
80
-
81
- static_quads = [
82
- Quad(iri, RDF.type, IOLANTA.File, iri),
83
- ]
84
-
85
- try:
86
- parsed_quads = list(
87
- parse_quads(
88
- quads_document=to_rdf(document),
89
- # FIXME:
90
- # title: Can iri be None in a parser?
91
- # description: |
92
- # Does it make sense? If not, just change
93
- # the annotation.
94
- graph=iri, # type: ignore
95
- blank_node_prefix=str(iri),
96
- ),
97
- )
98
- except UnresolvedIRI as err:
99
- raise dataclasses.replace(
100
- err,
101
- context=context,
102
- iri=iri,
103
- )
104
-
105
- return list(
106
- itertools.chain(
107
- parsed_quads,
108
- static_quads,
109
- ),
110
- )
111
-
112
-
113
- def assign_key_if_not_present( # type: ignore
114
- document: LDDocument,
115
- key: str,
116
- default_value: Any,
117
- ) -> LDDocument:
118
- """Add key to document if it does not exist yet."""
119
- if isinstance(document, dict):
120
- if document.get(key) is None:
121
- return {
122
- key: default_value,
123
- **document,
124
- }
125
-
126
- return document
127
-
128
- elif isinstance(document, list):
129
- return [
130
- assign_key_if_not_present( # type: ignore
131
- document=sub_document,
132
- key=key,
133
- default_value=default_value,
134
- )
135
- for sub_document in document
136
- ]
137
-
138
- return document
139
-
140
-
141
- @dataclass
142
- class ExpandError(DocumentedError):
143
- """
144
- JSON-LD expand operation failed.
145
-
146
- IRI: {self.iri}
147
-
148
- Context: {self.formatted_context}
149
-
150
- Document: {self.formatted_data}
151
-
152
- Error: {self.message}
153
-
154
- Document Loader: {self.document_loader}
155
- """
156
-
157
- message: str
158
- document: LDDocument
159
- context: LDContext
160
- iri: Optional[URIRef]
161
- document_loader: Loader[URL]
162
-
163
- @property
164
- def formatted_data(self) -> str:
165
- """Format document for printing."""
166
- return json.dumps(self.document, indent=2, ensure_ascii=False)
167
-
168
- @property
169
- def formatted_context(self):
170
- """Format context for printing."""
171
- return json.dumps(self.context, indent=2, ensure_ascii=False)
iolanta/parsers/errors.py DELETED
@@ -1,35 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
3
-
4
- from documented import DocumentedError
5
- from rdflib import URIRef
6
-
7
-
8
- @dataclass
9
- class YAMLError(DocumentedError):
10
- """
11
- Invalid YAML.
12
-
13
- File: {self.iri}
14
-
15
- {self.error}
16
- """
17
-
18
- iri: Optional[URIRef]
19
- error: Exception
20
-
21
-
22
- @dataclass
23
- class SpaceInProperty(DocumentedError):
24
- """
25
- Space in property.
26
-
27
- That impedes JSON-LD parsing.
28
-
29
- Please do not use spaces in property names in JSON or YAML data; use `title`
30
- or other methods instead.
31
-
32
- Document IRI: {self.iri}
33
- """
34
-
35
- iri: Optional[URIRef] = None
iolanta/parsers/json.py DELETED
@@ -1,35 +0,0 @@
1
- import json
2
- from typing import Iterable, Optional, TextIO
3
-
4
- from rdflib import URIRef
5
- from yarl import URL
6
-
7
- from iolanta.loaders.base import Loader
8
- from iolanta.models import LDContext, LDDocument, Quad
9
- from iolanta.parsers.base import Parser
10
- from iolanta.parsers.dict_parser import DictParser
11
-
12
-
13
- class JSON(Parser[TextIO]):
14
- """Load JSON data."""
15
-
16
- def as_jsonld_document(self, raw_data: TextIO) -> LDDocument:
17
- """Read JSON content as a JSON-LD document."""
18
- return json.load(raw_data)
19
-
20
- def as_quad_stream(
21
- self,
22
- raw_data: TextIO,
23
- iri: Optional[URIRef],
24
- context: LDContext,
25
- root_loader: Loader[URL],
26
- ) -> Iterable[Quad]:
27
- """Read JSON-LD data into a quad stream."""
28
- document = self.as_jsonld_document(raw_data)
29
-
30
- return DictParser().as_quad_stream(
31
- raw_data=document,
32
- iri=iri,
33
- context=context,
34
- root_loader=root_loader,
35
- )
@@ -1,58 +0,0 @@
1
- import json
2
- from io import StringIO
3
- from typing import Iterable, Optional, TextIO
4
-
5
- import frontmatter
6
- from rdflib import URIRef
7
- from yaml.parser import ParserError
8
- from yaml.scanner import ScannerError
9
-
10
- from iolanta.loaders import Loader
11
- from iolanta.loaders.base import SourceType
12
- from iolanta.models import LDContext, LDDocument, Quad
13
- from iolanta.parsers.errors import YAMLError
14
- from iolanta.parsers.json import JSON
15
- from iolanta.parsers.yaml import YAML
16
-
17
- try: # noqa
18
- from yaml import CSafeLoader as SafeLoader # noqa
19
- except ImportError:
20
- from yaml import SafeLoader # type: ignore # noqa
21
-
22
-
23
- class Markdown(YAML):
24
- """Load YAML data."""
25
-
26
- def as_jsonld_document(self, raw_data: TextIO) -> LDDocument:
27
- """Read YAML content and adapt it to JSON-LD format."""
28
- raw_data.seek(0)
29
- return frontmatter.load(raw_data).metadata
30
-
31
- def as_quad_stream(
32
- self,
33
- raw_data: TextIO,
34
- iri: Optional[URIRef],
35
- context: LDContext,
36
- root_loader: Loader[SourceType],
37
- ) -> Iterable[Quad]:
38
- """Assign iolanta:url and generate quad stream."""
39
- try:
40
- json_data = self.as_jsonld_document(raw_data)
41
- except (ScannerError, ParserError) as err:
42
- raise YAMLError(
43
- iri=iri,
44
- error=err,
45
- ) from err
46
-
47
- return JSON().as_quad_stream(
48
- raw_data=StringIO(
49
- json.dumps(
50
- json_data,
51
- ensure_ascii=False,
52
- default=str,
53
- ),
54
- ),
55
- iri=iri,
56
- context=context,
57
- root_loader=root_loader,
58
- )
iolanta/parsers/yaml.py DELETED
@@ -1,46 +0,0 @@
1
- import json
2
- from io import StringIO
3
- from typing import Iterable, Optional, TextIO
4
-
5
- import yaml
6
- from rdflib import URIRef
7
-
8
- from iolanta.loaders import Loader
9
- from iolanta.models import LDContext, LDDocument, Quad
10
- from iolanta.parsers.json import JSON
11
-
12
- try: # noqa
13
- from yaml import CSafeLoader as SafeLoader # noqa
14
- except ImportError:
15
- from yaml import SafeLoader # type: ignore # noqa
16
-
17
-
18
- class YAML(JSON):
19
- """Load YAML data."""
20
-
21
- def as_jsonld_document(self, raw_data: TextIO) -> LDDocument:
22
- """Read YAML content and adapt it to JSON-LD format."""
23
- return yaml.load(raw_data, Loader=SafeLoader)
24
-
25
- def as_quad_stream(
26
- self,
27
- raw_data: TextIO,
28
- iri: Optional[URIRef],
29
- context: LDContext,
30
- root_loader: Loader,
31
- ) -> Iterable[Quad]:
32
- """Read YAML-LD data into a quad stream."""
33
- json_data = self.as_jsonld_document(raw_data)
34
-
35
- return JSON().as_quad_stream(
36
- raw_data=StringIO(
37
- json.dumps(
38
- json_data,
39
- ensure_ascii=False,
40
- default=str,
41
- ),
42
- ),
43
- iri=iri,
44
- context=context,
45
- root_loader=root_loader,
46
- )
iolanta/shortcuts.py DELETED
@@ -1,63 +0,0 @@
1
- from logging import Logger
2
- from pathlib import Path
3
- from typing import Dict, Iterable, Type
4
-
5
- from rdflib import URIRef
6
- from yarl import URL
7
-
8
- from iolanta.loaders.data_type_choice import DataTypeChoiceLoader
9
- from iolanta.loaders.dict_loader import DictLoader
10
- from iolanta.loaders.http import HTTP
11
- from iolanta.loaders.local_directory import Loader, LocalDirectory
12
- from iolanta.loaders.local_file import LocalFile
13
- from iolanta.loaders.scheme_choice import SchemeChoiceLoader
14
- from iolanta.models import LDContext, LDDocument, Quad
15
-
16
-
17
- def choose_loader_by_url(url: URL) -> Type[Loader[URL]]:
18
- """Find loader by URL scheme."""
19
- return LocalDirectory
20
-
21
-
22
- def as_document(path: Path) -> LDDocument:
23
- """Retrieve the document presented by the specified URL."""
24
- return LocalFile().as_jsonld_document(path)
25
-
26
-
27
- def construct_root_loader(logger: Logger) -> DataTypeChoiceLoader:
28
- # FIXME: Generalize this using endpoints
29
- return DataTypeChoiceLoader(
30
- logger=logger,
31
- loader_by_data_type={
32
- dict: DictLoader(logger=logger),
33
- Path: LocalDirectory(logger=logger),
34
- URL: SchemeChoiceLoader(
35
- logger=logger,
36
- loader_by_scheme={
37
- 'file': LocalDirectory(logger=logger),
38
- 'http': HTTP(logger=logger),
39
- 'https': HTTP(logger=logger),
40
- },
41
- ),
42
- },
43
- )
44
-
45
-
46
- def as_quad_stream(
47
- url: URL,
48
- iri: URIRef,
49
- default_context: LDContext,
50
- root_directory: Path,
51
- named_contexts: Dict[str, LDContext],
52
- ) -> Iterable[Quad]:
53
- """Retrieve the stream presented by the specified URL."""
54
- root_loader = construct_root_loader(
55
- default_context=default_context,
56
- root_directory=root_directory,
57
- )
58
-
59
- return root_loader.as_quad_stream(
60
- source=url,
61
- iri=iri,
62
- context=default_context,
63
- )