PyLD 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ """
2
+ Context Resolver for managing remote contexts.
3
+
4
+ .. module:: context_resolver
5
+ :synopsis: Creates a ContextResolver
6
+
7
+ .. moduleauthor:: Dave Longley
8
+ .. moduleauthor:: Gregg Kellogg <gregg@greggkellogg.net>
9
+ """
10
+
11
+ from frozendict import frozendict
12
+ from c14n.Canonicalize import canonicalize
13
+ from pyld import jsonld
14
+ from .resolved_context import ResolvedContext
15
+
16
+ MAX_CONTEXT_URLS = 10
17
+
18
+ class ContextResolver:
19
+ """
20
+ Resolves and caches remote contexts.
21
+ """
22
+ def __init__(self, shared_cache, document_loader):
23
+ """
24
+ Creates a ContextResolver.
25
+ """
26
+ # processor-specific RDF parsers
27
+ self.per_op_cache = {}
28
+ self.shared_cache = shared_cache
29
+ self.document_loader = document_loader
30
+
31
+ def resolve(self, active_ctx, context, base, cycles=None):
32
+ """
33
+ Resolve a context.
34
+
35
+ :param active_ctx: the current active context.
36
+ :param context: the context to resolve.
37
+ :param base: the absolute URL to use for making url absolute.
38
+ :param cycles: the maximum number of times to recusively fetch contexts.
39
+ (default MAX_CONTEXT_URLS).
40
+ """
41
+ if cycles is None:
42
+ cycles = set()
43
+
44
+ # process `@context`
45
+ if (isinstance(context, dict) or isinstance(context, frozendict)) and '@context' in context:
46
+ context = context['@context']
47
+
48
+ # context is one or more contexts
49
+ if not isinstance(context, list):
50
+ context = [context]
51
+
52
+ # resolve each context in the array
53
+ all_resolved = []
54
+ for ctx in context:
55
+ if isinstance(ctx, str):
56
+ resolved = self._get(ctx)
57
+ if not resolved:
58
+ resolved = self._resolve_remote_context(
59
+ active_ctx, ctx, base, cycles)
60
+
61
+ # add to output and continue
62
+ if isinstance(resolved, list):
63
+ all_resolved.extend(resolved)
64
+ else:
65
+ all_resolved.append(resolved)
66
+ elif ctx is None or ctx is False:
67
+ all_resolved.append(ResolvedContext(False))
68
+ elif not isinstance(ctx, dict) and not isinstance(ctx, frozendict):
69
+ raise jsonld.JsonLdError(
70
+ 'Invalid JSON-LD syntax; @context must be an object.',
71
+ 'jsonld.SyntaxError', {'context': ctx},
72
+ code='invalid local context')
73
+ else:
74
+ # context is an object, get/create `ResolvedContext` for it
75
+ key = canonicalize(dict(ctx)).decode('UTF-8')
76
+ resolved = self._get(key)
77
+ if not resolved:
78
+ # create a new static `ResolvedContext` and cache it
79
+ resolved = ResolvedContext(ctx)
80
+ self._cache_resolved_context(key, resolved, 'static')
81
+ all_resolved.append(resolved)
82
+
83
+ return all_resolved
84
+
85
+ def _get(self, key):
86
+ resolved = self.per_op_cache.get(key)
87
+ if not resolved:
88
+ tag_map = self.shared_cache.get(key)
89
+ if tag_map:
90
+ resolved = tag_map.get('static')
91
+ if resolved:
92
+ self.per_op_cache[key] = resolved
93
+ return resolved
94
+
95
+ def _cache_resolved_context(self, key, resolved, tag):
96
+ self.per_op_cache[key] = resolved
97
+ if tag:
98
+ tag_map = self.shared_cache.get(key)
99
+ if not tag_map:
100
+ tag_map = {}
101
+ self.shared_cache[key] = tag_map
102
+ tag_map[tag] = resolved
103
+ return resolved
104
+
105
+ def _resolve_remote_context(self, active_ctx, url, base, cycles):
106
+ # resolve relative URL and fetch context
107
+ url = jsonld.prepend_base(base, url)
108
+ context, remote_doc = self._fetch_context(active_ctx, url, cycles)
109
+
110
+ # update base according to remote document and resolve any relative URLs
111
+ base = remote_doc.get('documentUrl', url)
112
+ self._resolve_context_urls(context, base)
113
+
114
+ # resolve, cache, and return context
115
+ resolved = self.resolve(active_ctx, context, base, cycles)
116
+ self._cache_resolved_context(url, resolved, remote_doc.get('tag'))
117
+ return resolved
118
+
119
+ def _fetch_context(self, active_ctx, url, cycles):
120
+ # check for max context URLs fetched during a resolve operation
121
+ if len(cycles) > MAX_CONTEXT_URLS:
122
+ raise jsonld.JsonLdError(
123
+ 'Maximum number of @context URLs exceeded.',
124
+ 'jsonld.ContextUrlError', {'max': MAX_CONTEXT_URLS},
125
+ code=('loading remote context failed'
126
+ if active_ctx.get('processingMode') == 'json-ld-1.0'
127
+ else 'context overflow'))
128
+
129
+ # check for context URL cycle
130
+ # shortcut to avoid extra work that would eventually hit the max above
131
+ if url in cycles:
132
+ raise jsonld.JsonLdError(
133
+ 'Cyclical @context URLs detected.',
134
+ 'jsonld.ContextUrlError', {'url': url},
135
+ code=('recursive context inclusion'
136
+ if active_ctx.get('processingMode') == 'json-ld-1.0'
137
+ else 'context overflow'))
138
+
139
+ # track cycles
140
+ cycles.add(url)
141
+
142
+ try:
143
+ remote_doc = jsonld.load_document(url,
144
+ {'documentLoader': self.document_loader},
145
+ requestProfile='http://www.w3.org/ns/json-ld#context')
146
+ context = remote_doc.get('document', url)
147
+ except Exception as cause:
148
+ raise jsonld.JsonLdError(
149
+ 'Dereferencing a URL did not result in a valid JSON-LD object. ' +
150
+ 'Possible causes are an inaccessible URL perhaps due to ' +
151
+ 'a same-origin policy (ensure the server uses CORS if you are ' +
152
+ 'using client-side JavaScript), too many redirects, a ' +
153
+ 'non-JSON response, or more than one HTTP Link Header was ' +
154
+ 'provided for a remote context.',
155
+ 'jsonld.InvalidUrl',
156
+ {'url': url, 'cause': cause},
157
+ code='loading remote context failed')
158
+
159
+ # ensure ctx is an object
160
+ if not isinstance(context, dict) and not isinstance(context, frozendict):
161
+ raise jsonld.JsonLdError(
162
+ 'Dereferencing a URL did not result in a JSON object. The ' +
163
+ 'response was valid JSON, but it was not a JSON object.',
164
+ 'jsonld.InvalidUrl',
165
+ {'url': url},
166
+ code='invalid remote context')
167
+
168
+ # use empty context if no @context key is present
169
+ if '@context' not in context:
170
+ context = {'@context': {}}
171
+ else:
172
+ context = {'@context': context['@context']}
173
+
174
+ # append @context URL to context if given
175
+ if remote_doc['contextUrl']:
176
+ if not isinstance(context['@context'], list):
177
+ context['@context'] = [context['@context']]
178
+ context['@context'].append(remote_doc['contextUrl'])
179
+
180
+ return (context, remote_doc)
181
+
182
+
183
+ def _resolve_context_urls(self, context, base):
184
+ """
185
+ Resolve all relative `@context` URLs in the given context by inline
186
+ replacing them with absolute URLs.
187
+
188
+ :param context: the context.
189
+ :param base: the base IRI to use to resolve relative IRIs.
190
+ """
191
+ if not isinstance(context, dict) and not isinstance(context, frozendict):
192
+ return
193
+
194
+ ctx = context.get('@context')
195
+
196
+ if isinstance(ctx, str):
197
+ context['@context'] = jsonld.prepend_base(base, ctx)
198
+ return
199
+
200
+ if isinstance(ctx, list):
201
+ for num, element in enumerate(ctx):
202
+ if isinstance(element, str):
203
+ ctx[num] = jsonld.prepend_base(base, element)
204
+ elif isinstance(element, dict) or isinstance(element, frozendict):
205
+ self. _resolve_context_urls({'@context': element}, base)
206
+ return
207
+
208
+ if not isinstance(ctx, dict) and not isinstance(ctx, frozendict):
209
+ # no @context URLs can be found in non-object
210
+ return
211
+
212
+ # ctx is an object, resolve any context URLs in terms
213
+ # (Iterate using keys() as items() returns a copy we can't modify)
214
+ for _, definition in ctx.items():
215
+ self._resolve_context_urls(definition, base)
File without changes
@@ -0,0 +1,119 @@
1
+ """
2
+ Remote document loader using aiohttp.
3
+
4
+ .. module:: jsonld.documentloader.aiohttp
5
+ :synopsis: Remote document loader using aiohttp
6
+
7
+ .. moduleauthor:: Olaf Conradi <olaf@conradi.org>
8
+ """
9
+
10
+ import string
11
+ import urllib.parse as urllib_parse
12
+
13
+ from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL)
14
+
15
+
16
+ def aiohttp_document_loader(loop=None, secure=False, **kwargs):
17
+ """
18
+ Create an Asynchronous document loader using aiohttp.
19
+
20
+ :param loop: the event loop used for processing HTTP requests.
21
+ :param secure: require all requests to use HTTPS (default: False).
22
+ :param **kwargs: extra keyword args for the aiohttp request get() call.
23
+
24
+ :return: the RemoteDocument loader function.
25
+ """
26
+ import asyncio
27
+ import aiohttp
28
+
29
+ if loop is None:
30
+ loop = asyncio.get_event_loop()
31
+
32
+ async def async_loader(url, headers):
33
+ """
34
+ Retrieves JSON-LD at the given URL asynchronously.
35
+
36
+ :param url: the URL to retrieve.
37
+
38
+ :return: the RemoteDocument.
39
+ """
40
+ try:
41
+ # validate URL
42
+ pieces = urllib_parse.urlparse(url)
43
+ if (not all([pieces.scheme, pieces.netloc]) or
44
+ pieces.scheme not in ['http', 'https'] or
45
+ set(pieces.netloc) > set(
46
+ string.ascii_letters + string.digits + '-.:')):
47
+ raise JsonLdError(
48
+ 'URL could not be dereferenced; '
49
+ 'only "http" and "https" URLs are supported.',
50
+ 'jsonld.InvalidUrl', {'url': url},
51
+ code='loading document failed')
52
+ if secure and pieces.scheme != 'https':
53
+ raise JsonLdError(
54
+ 'URL could not be dereferenced; '
55
+ 'secure mode enabled and '
56
+ 'the URL\'s scheme is not "https".',
57
+ 'jsonld.InvalidUrl', {'url': url},
58
+ code='loading document failed')
59
+ async with aiohttp.ClientSession(loop=loop) as session:
60
+ async with session.get(url,
61
+ headers=headers,
62
+ **kwargs) as response:
63
+ # Allow any content_type in trying to parse json
64
+ # similar to requests library
65
+ json_body = await response.json(content_type=None)
66
+ content_type = response.headers.get('content-type')
67
+ if not content_type:
68
+ content_type = 'application/octet-stream'
69
+ doc = {
70
+ 'contentType': content_type,
71
+ 'contextUrl': None,
72
+ 'documentUrl': response.url.human_repr(),
73
+ 'document': json_body
74
+ }
75
+ link_header = response.headers.get('link')
76
+ if link_header:
77
+ linked_context = parse_link_header(link_header).get(
78
+ LINK_HEADER_REL)
79
+ # only 1 related link header permitted
80
+ if linked_context and content_type != 'application/ld+json':
81
+ if isinstance(linked_context, list):
82
+ raise JsonLdError(
83
+ 'URL could not be dereferenced, '
84
+ 'it has more than one '
85
+ 'associated HTTP Link Header.',
86
+ 'jsonld.LoadDocumentError',
87
+ {'url': url},
88
+ code='multiple context link headers')
89
+ doc['contextUrl'] = linked_context['target']
90
+ linked_alternate = parse_link_header(link_header).get('alternate')
91
+ # if not JSON-LD, alternate may point there
92
+ if (linked_alternate and
93
+ linked_alternate.get('type') == 'application/ld+json' and
94
+ not re.match(r'^application\/(\w*\+)?json$', content_type)):
95
+ doc['contentType'] = 'application/ld+json'
96
+ doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target'])
97
+
98
+ return doc
99
+ except JsonLdError as e:
100
+ raise e
101
+ except Exception as cause:
102
+ raise JsonLdError(
103
+ 'Could not retrieve a JSON-LD document from the URL.',
104
+ 'jsonld.LoadDocumentError', code='loading document failed',
105
+ cause=cause)
106
+
107
+ def loader(url, options={}):
108
+ """
109
+ Retrieves JSON-LD at the given URL.
110
+
111
+ :param url: the URL to retrieve.
112
+
113
+ :return: the RemoteDocument.
114
+ """
115
+ return loop.run_until_complete(
116
+ async_loader(url,
117
+ options.get('headers', {'Accept': 'application/ld+json, application/json'})))
118
+
119
+ return loader
@@ -0,0 +1,105 @@
1
+ """
2
+ Remote document loader using Requests.
3
+
4
+ .. module:: jsonld.documentloader.requests
5
+ :synopsis: Remote document loader using Requests
6
+
7
+ .. moduleauthor:: Dave Longley
8
+ .. moduleauthor:: Mike Johnson
9
+ .. moduleauthor:: Tim McNamara <tim.mcnamara@okfn.org>
10
+ .. moduleauthor:: Olaf Conradi <olaf@conradi.org>
11
+ """
12
+ import string
13
+ import urllib.parse as urllib_parse
14
+
15
+ from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL)
16
+
17
+
18
+ def requests_document_loader(secure=False, **kwargs):
19
+ """
20
+ Create a Requests document loader.
21
+
22
+ Can be used to setup extra Requests args such as verify, cert, timeout,
23
+ or others.
24
+
25
+ :param secure: require all requests to use HTTPS (default: False).
26
+ :param **kwargs: extra keyword args for Requests get() call.
27
+
28
+ :return: the RemoteDocument loader function.
29
+ """
30
+ import requests
31
+
32
+ def loader(url, options={}):
33
+ """
34
+ Retrieves JSON-LD at the given URL.
35
+
36
+ :param url: the URL to retrieve.
37
+
38
+ :return: the RemoteDocument.
39
+ """
40
+ try:
41
+ # validate URL
42
+ pieces = urllib_parse.urlparse(url)
43
+ if (not all([pieces.scheme, pieces.netloc]) or
44
+ pieces.scheme not in ['http', 'https'] or
45
+ set(pieces.netloc) > set(
46
+ string.ascii_letters + string.digits + '-.:')):
47
+ raise JsonLdError(
48
+ 'URL could not be dereferenced; only "http" and "https" '
49
+ 'URLs are supported.',
50
+ 'jsonld.InvalidUrl', {'url': url},
51
+ code='loading document failed')
52
+ if secure and pieces.scheme != 'https':
53
+ raise JsonLdError(
54
+ 'URL could not be dereferenced; secure mode enabled and '
55
+ 'the URL\'s scheme is not "https".',
56
+ 'jsonld.InvalidUrl', {'url': url},
57
+ code='loading document failed')
58
+ headers = options.get('headers')
59
+ if headers is None:
60
+ headers = {
61
+ 'Accept': 'application/ld+json, application/json'
62
+ }
63
+ response = requests.get(url, headers=headers, **kwargs)
64
+
65
+ content_type = response.headers.get('content-type')
66
+ if not content_type:
67
+ content_type = 'application/octet-stream'
68
+ doc = {
69
+ 'contentType': content_type,
70
+ 'contextUrl': None,
71
+ 'documentUrl': response.url,
72
+ 'document': response.json()
73
+ }
74
+ link_header = response.headers.get('link')
75
+ if link_header:
76
+ linked_context = parse_link_header(link_header).get(
77
+ LINK_HEADER_REL)
78
+ # only 1 related link header permitted
79
+ if linked_context and content_type != 'application/ld+json':
80
+ if isinstance(linked_context, list):
81
+ raise JsonLdError(
82
+ 'URL could not be dereferenced, '
83
+ 'it has more than one '
84
+ 'associated HTTP Link Header.',
85
+ 'jsonld.LoadDocumentError',
86
+ {'url': url},
87
+ code='multiple context link headers')
88
+ doc['contextUrl'] = linked_context['target']
89
+ linked_alternate = parse_link_header(link_header).get('alternate')
90
+ # if not JSON-LD, alternate may point there
91
+ if (linked_alternate and
92
+ linked_alternate.get('type') == 'application/ld+json' and
93
+ not re.match(r'^application\/(\w*\+)?json$', content_type)):
94
+ doc['contentType'] = 'application/ld+json'
95
+ doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target'])
96
+ return doc
97
+ except JsonLdError as e:
98
+ raise e
99
+ except Exception as cause:
100
+ raise JsonLdError(
101
+ 'Could not retrieve a JSON-LD document from the URL.',
102
+ 'jsonld.LoadDocumentError', code='loading document failed',
103
+ cause=cause)
104
+
105
+ return loader