ogc-na 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ogc-na might be problematic. Click here for more details.
- ogc/na/annotate_schema.py +142 -52
- ogc/na/ingest_json.py +16 -7
- ogc/na/input_filters/__init__.py +25 -0
- ogc/na/input_filters/csv.py +71 -0
- ogc/na/profile.py +1 -1
- ogc/na/util.py +45 -12
- {ogc_na-0.1.2.dist-info → ogc_na-0.1.4.dist-info}/METADATA +1 -1
- ogc_na-0.1.4.dist-info/RECORD +16 -0
- ogc_na-0.1.2.dist-info/RECORD +0 -14
- {ogc_na-0.1.2.dist-info → ogc_na-0.1.4.dist-info}/WHEEL +0 -0
- {ogc_na-0.1.2.dist-info → ogc_na-0.1.4.dist-info}/top_level.txt +0 -0
ogc/na/annotate_schema.py
CHANGED
|
@@ -116,16 +116,16 @@ The resulting context will be printed to the standard output.
|
|
|
116
116
|
from __future__ import annotations
|
|
117
117
|
import argparse
|
|
118
118
|
import dataclasses
|
|
119
|
-
import functools
|
|
120
119
|
import json
|
|
121
120
|
import logging
|
|
121
|
+
import re
|
|
122
122
|
import sys
|
|
123
123
|
from pathlib import Path
|
|
124
|
-
from typing import Any, AnyStr
|
|
124
|
+
from typing import Any, AnyStr, Callable
|
|
125
125
|
from urllib.parse import urlparse, urljoin
|
|
126
126
|
import yaml
|
|
127
127
|
import requests
|
|
128
|
-
from ogc.na.util import is_url, merge_dicts, load_yaml
|
|
128
|
+
from ogc.na.util import is_url, merge_dicts, load_yaml, LRUCache
|
|
129
129
|
|
|
130
130
|
try:
|
|
131
131
|
from yaml import CLoader as YamlLoader, CDumper as YamlDumper
|
|
@@ -137,8 +137,11 @@ logger = logging.getLogger(__name__)
|
|
|
137
137
|
ANNOTATION_CONTEXT = 'x-jsonld-context'
|
|
138
138
|
ANNOTATION_ID = 'x-jsonld-id'
|
|
139
139
|
ANNOTATION_TYPE = 'x-jsonld-type'
|
|
140
|
+
ANNOTATION_PREFIXES = 'x-jsonld-prefixes'
|
|
140
141
|
REF_ROOT_MARKER = '$_ROOT_/'
|
|
141
142
|
|
|
143
|
+
context_term_cache = LRUCache(maxsize=20)
|
|
144
|
+
|
|
142
145
|
|
|
143
146
|
@dataclasses.dataclass
|
|
144
147
|
class AnnotatedSchema:
|
|
@@ -218,63 +221,95 @@ def resolve_ref(ref: str, fn_from: str | Path | None = None, url_from: str | Non
|
|
|
218
221
|
return ref, None
|
|
219
222
|
|
|
220
223
|
|
|
221
|
-
|
|
222
|
-
def read_context_terms(file: Path | str = None, url: str = None) -> dict[str, str]:
|
|
224
|
+
def read_context_terms(ctx: Path | str | dict) -> tuple[dict[str, str], dict[str, str], dict[str, str]]:
|
|
223
225
|
"""
|
|
224
226
|
Reads all the terms from a JSON-LD context document.
|
|
225
227
|
|
|
226
|
-
:param
|
|
227
|
-
:param url: URL to load
|
|
228
|
+
:param ctx: file path (Path), URL (str) or dictionary (dict) to load
|
|
228
229
|
:return: a dictionary with term -> URI mappings
|
|
229
230
|
"""
|
|
231
|
+
|
|
232
|
+
cached = context_term_cache.get(ctx)
|
|
233
|
+
if cached:
|
|
234
|
+
return cached
|
|
235
|
+
|
|
230
236
|
context: dict[str, Any] | None = None
|
|
231
237
|
|
|
232
|
-
if
|
|
233
|
-
with open(
|
|
238
|
+
if isinstance(ctx, Path):
|
|
239
|
+
with open(ctx) as f:
|
|
234
240
|
context = json.load(f).get('@context')
|
|
235
|
-
elif
|
|
236
|
-
r = requests.get(
|
|
241
|
+
elif isinstance(ctx, str):
|
|
242
|
+
r = requests.get(ctx)
|
|
237
243
|
r.raise_for_status()
|
|
238
244
|
context = r.json().get('@context')
|
|
245
|
+
elif ctx:
|
|
246
|
+
context = ctx.get('@context')
|
|
239
247
|
|
|
240
248
|
if not context:
|
|
241
|
-
return {}
|
|
249
|
+
return {}, {}, {}
|
|
242
250
|
|
|
243
|
-
result: dict[str, str] = {}
|
|
244
|
-
|
|
251
|
+
result: dict[str, str | tuple[str, str]] = {}
|
|
252
|
+
types: dict[str, str | tuple[str, str]] = {}
|
|
245
253
|
|
|
246
254
|
vocab = context.get('@vocab')
|
|
247
255
|
|
|
256
|
+
def expand_uri(uri: str) -> str | tuple[str, str] | None:
|
|
257
|
+
if not uri:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
if ':' in uri:
|
|
261
|
+
# either URI or prefix:suffix
|
|
262
|
+
pref, suf = uri.split(':', 1)
|
|
263
|
+
if suf.startswith('//') or pref == 'urn':
|
|
264
|
+
# assume full URI
|
|
265
|
+
return uri
|
|
266
|
+
else:
|
|
267
|
+
# prefix:suffix -> add to pending for expansion
|
|
268
|
+
return pref, suf
|
|
269
|
+
elif vocab:
|
|
270
|
+
# append term_val to vocab to get URI
|
|
271
|
+
return f"{vocab}{term_id}"
|
|
272
|
+
else:
|
|
273
|
+
return uri
|
|
274
|
+
|
|
248
275
|
for term, term_val in context.items():
|
|
249
276
|
if not term.startswith("@"):
|
|
250
277
|
# assume term
|
|
278
|
+
term_type = None
|
|
251
279
|
if isinstance(term_val, str):
|
|
252
280
|
term_id = term_val
|
|
253
281
|
elif isinstance(term_val, dict):
|
|
254
282
|
term_id = term_val.get('@id')
|
|
283
|
+
term_type = term_val.get('@type')
|
|
255
284
|
else:
|
|
256
285
|
term_id = None
|
|
257
286
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
result[term] = term_id
|
|
265
|
-
else:
|
|
266
|
-
# prefix:suffix -> add to pending for expansion
|
|
267
|
-
pending[term] = [pref, suf]
|
|
268
|
-
elif vocab:
|
|
269
|
-
# append term_val to vocab to get URI
|
|
270
|
-
result[term] = f"{vocab}{term_id}"
|
|
287
|
+
expanded_id = expand_uri(term_id)
|
|
288
|
+
if expanded_id:
|
|
289
|
+
result[term] = expanded_id
|
|
290
|
+
expanded_type = expand_uri(term_type)
|
|
291
|
+
if expanded_type:
|
|
292
|
+
types[term] = expanded_type
|
|
271
293
|
|
|
272
|
-
|
|
273
|
-
pref, suf = term_val
|
|
274
|
-
if pref in result:
|
|
275
|
-
result[term] = f"{result[pref]}{suf}"
|
|
294
|
+
prefixes = {}
|
|
276
295
|
|
|
277
|
-
|
|
296
|
+
def expand_result(d: dict[str, str | tuple[str, str]]) -> dict[str, str]:
|
|
297
|
+
r = {}
|
|
298
|
+
for term, term_val in d.items():
|
|
299
|
+
if isinstance(term_val, str):
|
|
300
|
+
r[term] = term_val
|
|
301
|
+
else:
|
|
302
|
+
pref, suf = term_val
|
|
303
|
+
if pref in result:
|
|
304
|
+
r[term] = f"{result[pref]}{suf}"
|
|
305
|
+
prefixes[pref] = result[pref]
|
|
306
|
+
return r
|
|
307
|
+
|
|
308
|
+
expanded_types = expand_result(types)
|
|
309
|
+
expanded_terms = expand_result(result)
|
|
310
|
+
|
|
311
|
+
context_term_cache[ctx] = expanded_terms, prefixes, expanded_types
|
|
312
|
+
return expanded_terms, prefixes, expanded_types
|
|
278
313
|
|
|
279
314
|
|
|
280
315
|
class SchemaAnnotator:
|
|
@@ -287,7 +322,8 @@ class SchemaAnnotator:
|
|
|
287
322
|
"""
|
|
288
323
|
|
|
289
324
|
def __init__(self, fn: Path | str | None = None, url: str | None = None,
|
|
290
|
-
follow_refs: bool = True, ref_root: Path | str | None = None
|
|
325
|
+
follow_refs: bool = True, ref_root: Path | str | None = None,
|
|
326
|
+
context: str | Path | dict | None = None):
|
|
291
327
|
"""
|
|
292
328
|
:param fn: file path to load (root schema)
|
|
293
329
|
:param url: URL to load (root schema)
|
|
@@ -297,6 +333,7 @@ class SchemaAnnotator:
|
|
|
297
333
|
self.bundled_schema = None
|
|
298
334
|
self.ref_root = Path(ref_root) if ref_root else None
|
|
299
335
|
self._follow_refs = follow_refs
|
|
336
|
+
self._provided_context = context
|
|
300
337
|
|
|
301
338
|
self._process_schema(fn, url)
|
|
302
339
|
|
|
@@ -309,19 +346,29 @@ class SchemaAnnotator:
|
|
|
309
346
|
|
|
310
347
|
base_url = schema.get('$id', base_url)
|
|
311
348
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
349
|
+
terms = {}
|
|
350
|
+
prefixes = {}
|
|
351
|
+
types = {}
|
|
352
|
+
|
|
353
|
+
if context_fn != self._provided_context or not (isinstance(context_fn, Path)
|
|
354
|
+
and isinstance(self._provided_context, Path)
|
|
355
|
+
and self._provided_context.resolve() == context_fn.resolve()):
|
|
356
|
+
# Only load the provided context if it's different from the schema-referenced one
|
|
357
|
+
terms, prefixes, types = read_context_terms(self._provided_context)
|
|
358
|
+
|
|
359
|
+
if context_fn:
|
|
360
|
+
if base_url:
|
|
361
|
+
context_fn = urljoin(base_url, str(context_fn))
|
|
362
|
+
else:
|
|
363
|
+
context_fn = Path(fn).parent / context_fn
|
|
364
|
+
|
|
365
|
+
for e in zip((terms, prefixes, types), read_context_terms(context_fn)):
|
|
366
|
+
e[0].update(e[1])
|
|
320
367
|
|
|
321
368
|
def process_properties(obj: dict):
|
|
322
369
|
properties: dict[str, dict] = obj.get('properties') if obj else None
|
|
323
|
-
if not properties:
|
|
324
|
-
|
|
370
|
+
if not isinstance(properties, dict):
|
|
371
|
+
raise ValueError('"properties" must be a dictionary')
|
|
325
372
|
|
|
326
373
|
empty_properties = []
|
|
327
374
|
for prop, prop_value in properties.items():
|
|
@@ -330,7 +377,9 @@ class SchemaAnnotator:
|
|
|
330
377
|
continue
|
|
331
378
|
if prop in terms:
|
|
332
379
|
prop_value[ANNOTATION_ID] = terms[prop]
|
|
333
|
-
|
|
380
|
+
if prop in types:
|
|
381
|
+
prop_value[ANNOTATION_TYPE] = types[prop]
|
|
382
|
+
if '$ref' in prop_value and self._follow_refs:
|
|
334
383
|
|
|
335
384
|
ref_fn, ref_url = resolve_ref(prop_value['$ref'], fn, url, base_url)
|
|
336
385
|
ref = ref_fn or ref_url
|
|
@@ -346,13 +395,28 @@ class SchemaAnnotator:
|
|
|
346
395
|
|
|
347
396
|
properties.update({p: {ANNOTATION_ID: terms[p]} for p in empty_properties if p in terms})
|
|
348
397
|
|
|
349
|
-
|
|
398
|
+
def process_subschema(subschema):
|
|
399
|
+
|
|
400
|
+
schema_type = subschema.get('type')
|
|
401
|
+
if not schema_type and 'properties' in subschema:
|
|
402
|
+
schema_type = 'object'
|
|
403
|
+
|
|
404
|
+
if schema_type == 'object':
|
|
405
|
+
process_properties(subschema)
|
|
406
|
+
elif schema_type == 'array':
|
|
407
|
+
for k in ('prefixItems', 'items', 'contains'):
|
|
408
|
+
process_properties(subschema.get(k))
|
|
350
409
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
410
|
+
for defs_prop in ('$defs', 'definitions'):
|
|
411
|
+
defs_value = subschema.get(defs_prop)
|
|
412
|
+
if isinstance(defs_value, dict):
|
|
413
|
+
for defs_entry in defs_value.values():
|
|
414
|
+
process_subschema(defs_entry)
|
|
415
|
+
|
|
416
|
+
process_subschema(schema)
|
|
417
|
+
|
|
418
|
+
if prefixes:
|
|
419
|
+
schema[ANNOTATION_PREFIXES] = prefixes
|
|
356
420
|
|
|
357
421
|
self.schemas[fn or url] = AnnotatedSchema(
|
|
358
422
|
source=fn or url,
|
|
@@ -389,18 +453,43 @@ class ContextBuilder:
|
|
|
389
453
|
|
|
390
454
|
base_url = schema.get('$id', base_url)
|
|
391
455
|
|
|
456
|
+
prefixes = schema.get(ANNOTATION_PREFIXES, {}).items()
|
|
457
|
+
rev_prefixes = {v: k for k, v in prefixes}
|
|
458
|
+
|
|
459
|
+
def compact_uri(uri: str) -> str:
|
|
460
|
+
if uri.startswith('@'):
|
|
461
|
+
# JSON-LD keyword
|
|
462
|
+
return uri
|
|
463
|
+
parts = urlparse(uri)
|
|
464
|
+
if parts.fragment:
|
|
465
|
+
pref, suf = uri.rsplit('#', 1)
|
|
466
|
+
pref += '#'
|
|
467
|
+
elif len(parts.path) > 1:
|
|
468
|
+
pref, suf = uri.rsplit('/', 1)
|
|
469
|
+
pref += '/'
|
|
470
|
+
else:
|
|
471
|
+
return uri
|
|
472
|
+
|
|
473
|
+
if pref in rev_prefixes:
|
|
474
|
+
return f"{rev_prefixes[pref]}:{suf}"
|
|
475
|
+
else:
|
|
476
|
+
return uri
|
|
477
|
+
|
|
392
478
|
own_context = {}
|
|
393
479
|
|
|
480
|
+
if prefixes:
|
|
481
|
+
own_context.update(prefixes)
|
|
482
|
+
|
|
394
483
|
def read_properties(where: dict):
|
|
395
484
|
if not isinstance(where, dict):
|
|
396
485
|
return
|
|
397
486
|
for prop, prop_val in where.get('properties', {}).items():
|
|
398
487
|
if isinstance(prop_val, dict) and ANNOTATION_ID in prop_val:
|
|
399
488
|
prop_context = {
|
|
400
|
-
'@id': prop_val[ANNOTATION_ID]
|
|
489
|
+
'@id': compact_uri(prop_val[ANNOTATION_ID])
|
|
401
490
|
}
|
|
402
491
|
if ANNOTATION_TYPE in prop_val:
|
|
403
|
-
prop_context['@type'] = prop_val[ANNOTATION_TYPE]
|
|
492
|
+
prop_context['@type'] = compact_uri(prop_val[ANNOTATION_TYPE])
|
|
404
493
|
|
|
405
494
|
if '$ref' in prop_val:
|
|
406
495
|
ref_fn, ref_url = resolve_ref(prop_val['$ref'], fn, url, base_url)
|
|
@@ -459,6 +548,7 @@ def dump_annotated_schemas(annotator: SchemaAnnotator, subdir: Path | str = 'ann
|
|
|
459
548
|
:param annotator: a `SchemaAnnotator` with the annotated schemas to read
|
|
460
549
|
:param subdir: a name for the mirror directory
|
|
461
550
|
:param root_dir: root directory for computing relative paths to schemas
|
|
551
|
+
:param output_fn_transform: optional callable to transform the output path
|
|
462
552
|
"""
|
|
463
553
|
wd = (Path(root_dir) if root_dir else Path()).resolve()
|
|
464
554
|
subdir = subdir if isinstance(subdir, Path) else Path(subdir)
|
ogc/na/ingest_json.py
CHANGED
|
@@ -50,6 +50,7 @@ from rdflib.namespace import Namespace, DefinedNamespace
|
|
|
50
50
|
from ogc.na import util, profile
|
|
51
51
|
from ogc.na.domain_config import UpliftConfigurationEntry, DomainConfiguration
|
|
52
52
|
from ogc.na.provenance import ProvenanceMetadata, FileProvenanceMetadata, generate_provenance
|
|
53
|
+
from ogc.na.input_filters import apply_input_filter
|
|
53
54
|
|
|
54
55
|
logger = logging.getLogger(__name__)
|
|
55
56
|
|
|
@@ -393,7 +394,8 @@ def process_file(input_fn: str | Path,
|
|
|
393
394
|
If False, no Turtle output will be generated.
|
|
394
395
|
:param context_fn: YAML context filename. If None, will be autodetected:
|
|
395
396
|
1. From a file with the same name but yml/yaml extension (test.json -> test.yml)
|
|
396
|
-
2. From
|
|
397
|
+
2. From the domain_cfg
|
|
398
|
+
3. From a _json-context.yml/_json-context.yaml file in the same directory
|
|
397
399
|
:param domain_cfg: domain configuration with uplift definition locations
|
|
398
400
|
:param base: base URI for JSON-LD
|
|
399
401
|
:param provenance_base_uri: base URI for provenance resources
|
|
@@ -436,8 +438,15 @@ def process_file(input_fn: str | Path,
|
|
|
436
438
|
if not contexts:
|
|
437
439
|
raise MissingContextException('No context file provided and one could not be discovered automatically')
|
|
438
440
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
+
# Apply input filter of first context only (if any)
|
|
442
|
+
input_filters = contexts[0].get('input-filter')
|
|
443
|
+
if input_filters:
|
|
444
|
+
if not isinstance(input_filters, dict):
|
|
445
|
+
raise ValueError('input-filter must be an object')
|
|
446
|
+
input_data = apply_input_filter(input_fn, input_filters)
|
|
447
|
+
else:
|
|
448
|
+
with open(input_fn, 'r') as j:
|
|
449
|
+
input_data = json.load(j)
|
|
441
450
|
|
|
442
451
|
provenance_metadata: ProvenanceMetadata | None = None
|
|
443
452
|
if provenance_base_uri is not False:
|
|
@@ -635,7 +644,10 @@ def process(input_files: str | Path | Sequence[str | Path],
|
|
|
635
644
|
logger.info("Input files: %s", input_files)
|
|
636
645
|
remaining_fn: deque = deque()
|
|
637
646
|
for input_file in input_files:
|
|
638
|
-
|
|
647
|
+
if isinstance(input_file, str):
|
|
648
|
+
remaining_fn.extend(input_file.split(','))
|
|
649
|
+
else:
|
|
650
|
+
remaining_fn.append(input_file)
|
|
639
651
|
while remaining_fn:
|
|
640
652
|
fn = str(remaining_fn.popleft())
|
|
641
653
|
|
|
@@ -645,9 +657,6 @@ def process(input_files: str | Path | Sequence[str | Path],
|
|
|
645
657
|
remaining_fn.extend(filenames_from_context(fn, domain_config=domain_cfg) or [])
|
|
646
658
|
continue
|
|
647
659
|
|
|
648
|
-
if not re.match(r'.*\.json-?(ld)?$', fn):
|
|
649
|
-
logger.debug('File %s does not match, skipping', fn)
|
|
650
|
-
continue
|
|
651
660
|
logger.info('File %s matches, processing', fn)
|
|
652
661
|
try:
|
|
653
662
|
result.append(process_file(
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, IO, TextIO
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def apply_input_filter(stream: IO | bytes | str | Path, filters: dict[str, dict]) -> dict[str, Any] | list:
|
|
10
|
+
filter_name, filter_conf = filters.popitem()
|
|
11
|
+
try:
|
|
12
|
+
filter_mod = import_module(f"ogc.na.input_filters.{filter_name}")
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ValueError(f'Cannot find input filter with name "{filter_name}"')
|
|
15
|
+
|
|
16
|
+
content: bytes | None = None
|
|
17
|
+
if isinstance(stream, Path) or isinstance(stream, str):
|
|
18
|
+
with open(stream, 'rb') as f:
|
|
19
|
+
content = f.read()
|
|
20
|
+
elif isinstance(stream, TextIO):
|
|
21
|
+
content = stream.read().encode('utf-8')
|
|
22
|
+
else:
|
|
23
|
+
content = stream.read()
|
|
24
|
+
|
|
25
|
+
return filter_mod.apply_filter(content, filter_conf)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CSV Input filter for ingest_json.
|
|
3
|
+
|
|
4
|
+
Returns CSV rows as a list. Values will always be strings (no type inference or coercion is performed).
|
|
5
|
+
|
|
6
|
+
Configuration values:
|
|
7
|
+
|
|
8
|
+
* `rows` (default: `dict`): type of elements in the result list:
|
|
9
|
+
* `dict`: elements will be dictionaries, with the keys taken from the `header-row`.
|
|
10
|
+
* `list`: each resulting row will be an array values.
|
|
11
|
+
* `header-row` (default: `0`): if `rows` is `dict`, the (0-based) index of the header row. All rows before the
|
|
12
|
+
header row will be skipped.
|
|
13
|
+
* `skip-rows` (default: `0`): number of rows to skip at the beginning of the file (apart from the header and pre-header
|
|
14
|
+
ones if `rows` is `dict`).
|
|
15
|
+
* `delimiter` (default: `,`): field separator character
|
|
16
|
+
* `quotechar` (default: `"`): char used to quote (enclose) field values
|
|
17
|
+
* `skip-empty-rows` (default: `True`): whether to omit empty rows (i.e., those with no values) from the result
|
|
18
|
+
* `trim-values` (default: `False`): whether to apply `.strip()` to the resulting values
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import csv
|
|
23
|
+
from io import BytesIO, TextIOWrapper, StringIO
|
|
24
|
+
from typing import IO, Any
|
|
25
|
+
|
|
26
|
+
from ogc.na import util
|
|
27
|
+
|
|
28
|
+
DEFAULT_CONF = {
|
|
29
|
+
'rows': 'dict',
|
|
30
|
+
'header-row': 0,
|
|
31
|
+
'skip-rows': 0,
|
|
32
|
+
'delimiter': ',',
|
|
33
|
+
'quotechar': '"',
|
|
34
|
+
'skip-empty-rows': True,
|
|
35
|
+
'trim-values': False,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def apply_filter(content: bytes, conf: dict[str, Any] | None) -> dict[str, Any] | list:
|
|
40
|
+
conf = util.deep_update(DEFAULT_CONF, conf) if conf else DEFAULT_CONF
|
|
41
|
+
|
|
42
|
+
textio = StringIO(content.decode('utf-8'))
|
|
43
|
+
reader = csv.reader(textio, delimiter=conf['delimiter'], quotechar=conf['quotechar'])
|
|
44
|
+
|
|
45
|
+
headers = None
|
|
46
|
+
if conf['rows'] == 'dict':
|
|
47
|
+
header_row = max(conf['header-row'], 0)
|
|
48
|
+
# Skip to header row
|
|
49
|
+
for i in range(header_row):
|
|
50
|
+
next(reader, None)
|
|
51
|
+
headers = next(reader, [])
|
|
52
|
+
if not headers:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
# Skip requested rows
|
|
56
|
+
for i in range(conf['skip-rows']):
|
|
57
|
+
next(reader, None)
|
|
58
|
+
|
|
59
|
+
result = []
|
|
60
|
+
for row in reader:
|
|
61
|
+
if not row and conf['skip-empty-rows']:
|
|
62
|
+
# skip empty rows
|
|
63
|
+
continue
|
|
64
|
+
if conf['trim-values']:
|
|
65
|
+
row = [v.strip() for v in row]
|
|
66
|
+
if conf['rows'] == 'list':
|
|
67
|
+
result.append(row)
|
|
68
|
+
else:
|
|
69
|
+
result.append(dict(zip(headers, row)))
|
|
70
|
+
|
|
71
|
+
return result
|
ogc/na/profile.py
CHANGED
|
@@ -202,7 +202,7 @@ class ProfileRegistry:
|
|
|
202
202
|
if isinstance(src, str) and src.startswith('sparql:'):
|
|
203
203
|
endpoint = src[len('sparql:'):]
|
|
204
204
|
logger.info("Fetching profiles from SPARQL endpoint %s", endpoint)
|
|
205
|
-
assert util.
|
|
205
|
+
assert util.is_url(endpoint, http_only=True)
|
|
206
206
|
s = g.query(PROFILES_QUERY.replace('__SERVICE__', f"SERVICE <{endpoint}>")).graph
|
|
207
207
|
util.copy_triples(s, g)
|
|
208
208
|
else:
|
ogc/na/util.py
CHANGED
|
@@ -7,7 +7,8 @@ import os.path
|
|
|
7
7
|
import shlex
|
|
8
8
|
from glob import glob
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from
|
|
10
|
+
from time import time
|
|
11
|
+
from typing import Optional, Union, Any, Mapping, Hashable
|
|
11
12
|
|
|
12
13
|
import requests
|
|
13
14
|
import rfc3987
|
|
@@ -18,6 +19,7 @@ from urllib.parse import urlparse
|
|
|
18
19
|
from ogc.na.validation import ValidationReport
|
|
19
20
|
|
|
20
21
|
import yaml
|
|
22
|
+
|
|
21
23
|
try:
|
|
22
24
|
from yaml import CLoader as YamlLoader, CSafeLoader as SafeYamlLoader, CDumper as YamlDumper
|
|
23
25
|
except ImportError:
|
|
@@ -105,12 +107,12 @@ def validate(g: Graph, shacl_graph: Graph, extra: Optional[Graph] = None) -> Val
|
|
|
105
107
|
advanced=True))
|
|
106
108
|
|
|
107
109
|
|
|
108
|
-
def
|
|
110
|
+
def is_url(url: str, http_only: bool = False) -> bool:
|
|
109
111
|
"""
|
|
110
112
|
Checks whether a string is a valid URL.
|
|
111
113
|
|
|
112
114
|
:param url: the input string
|
|
113
|
-
:param http_only: whether to only accept HTTP and HTTPS
|
|
115
|
+
:param http_only: whether to only accept HTTP and HTTPS URLs as valid
|
|
114
116
|
:return: `True` if this is a valid URL, otherwise `False`
|
|
115
117
|
"""
|
|
116
118
|
if not url:
|
|
@@ -159,7 +161,7 @@ def dump_yaml(content: Any, filename: str | Path | None = None,
|
|
|
159
161
|
|
|
160
162
|
:param content: content to convert to YAML.
|
|
161
163
|
:param filename: optional filename to dump the content into. If None, string content will be returned.
|
|
162
|
-
:param kwargs: other args to pass to yaml.dump
|
|
164
|
+
:param kwargs: other args to pass to `yaml.dump()`
|
|
163
165
|
"""
|
|
164
166
|
kwargs.setdefault('sort_keys', False)
|
|
165
167
|
if filename:
|
|
@@ -188,14 +190,6 @@ def merge_dicts(src: dict, dst: dict) -> dict:
|
|
|
188
190
|
return dst
|
|
189
191
|
|
|
190
192
|
|
|
191
|
-
def is_url(s: str) -> bool:
|
|
192
|
-
try:
|
|
193
|
-
url = urlparse(s)
|
|
194
|
-
return bool(url.scheme and url.netloc)
|
|
195
|
-
except ValueError:
|
|
196
|
-
return False
|
|
197
|
-
|
|
198
|
-
|
|
199
193
|
def glob_list_split(s: str, exclude_dirs: bool = True, recursive: bool = False) -> list[str]:
|
|
200
194
|
result = []
|
|
201
195
|
for e in shlex.split(s):
|
|
@@ -206,3 +200,42 @@ def glob_list_split(s: str, exclude_dirs: bool = True, recursive: bool = False)
|
|
|
206
200
|
if not exclude_dirs or os.path.isfile(fn):
|
|
207
201
|
result.append(fn)
|
|
208
202
|
return result
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class LRUCache:
|
|
206
|
+
|
|
207
|
+
def __init__(self, maxsize: int = 10):
|
|
208
|
+
self._cache: dict[Hashable, Any] = {}
|
|
209
|
+
self._last_access: dict[Hashable, float] = {}
|
|
210
|
+
self._maxsize = maxsize
|
|
211
|
+
|
|
212
|
+
def __contains__(self, item):
|
|
213
|
+
return item in self._cache
|
|
214
|
+
|
|
215
|
+
def __len__(self):
|
|
216
|
+
return len(self._cache)
|
|
217
|
+
|
|
218
|
+
def get(self, key, default=None):
|
|
219
|
+
if not isinstance(key, Hashable):
|
|
220
|
+
return default
|
|
221
|
+
return self._cache.get(key, default)
|
|
222
|
+
|
|
223
|
+
def __setitem__(self, key, value):
|
|
224
|
+
if not isinstance(key, Hashable):
|
|
225
|
+
return
|
|
226
|
+
if len(self._cache) >= self._maxsize and key not in self._cache:
|
|
227
|
+
key_to_remove = min(self._last_access, key=self._last_access.get)
|
|
228
|
+
del self._cache[key_to_remove]
|
|
229
|
+
del self._last_access[key_to_remove]
|
|
230
|
+
self._cache[key] = value
|
|
231
|
+
self._last_access[key] = time()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def deep_update(orig_dict: dict, with_dict: dict, replace: bool = False) -> dict:
|
|
235
|
+
dest = orig_dict if replace else {**orig_dict}
|
|
236
|
+
for k, v in with_dict.items():
|
|
237
|
+
if isinstance(v, Mapping):
|
|
238
|
+
dest[k] = deep_update(orig_dict.get(k, {}), with_dict, replace)
|
|
239
|
+
else:
|
|
240
|
+
dest[k] = v
|
|
241
|
+
return dest
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ogc-na
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: OGC Naming Authority tools
|
|
5
5
|
Author-email: Rob Atkinson <ratkinson@ogc.org>, Piotr Zaborowski <pzaborowski@ogc.org>, Alejandro Villar <avillar@ogc.org>
|
|
6
6
|
Project-URL: Homepage, https://github.com/opengeospatial/ogc-na-tools/
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
ogc/na/__init__.py,sha256=uzcNiJ3uKFNJ1HBfKxIwgAy2HMUFsLAe5RkrUg8ncac,464
|
|
2
|
+
ogc/na/annotate_schema.py,sha256=qwbpiZgEoV9P0-kzgAR79_OiCKkJdlVZUCdFJ0JYZq4,21506
|
|
3
|
+
ogc/na/domain_config.py,sha256=bnSyv2XF0x_v1VHF5N4jQkb7j_G2obdkzcSvWSv42Uw,12443
|
|
4
|
+
ogc/na/download.py,sha256=2afrLyl4WsAlxkCgXsl47fs9mNKfDmhVpeT2iwNSoq0,3354
|
|
5
|
+
ogc/na/ingest_json.py,sha256=VaXFHuIduezBjUZuBxiafuxbJy2L90KEPS3UXkUJ3Cg,31236
|
|
6
|
+
ogc/na/profile.py,sha256=AkcWA52uueufK8XTyHTpVjFSp1bFKAZK7Gg3MmdGWdU,14461
|
|
7
|
+
ogc/na/provenance.py,sha256=zm3RwfnjISQfTekYXu1-GlgezGcUJSbmptTkLcitcfs,5446
|
|
8
|
+
ogc/na/update_vocabs.py,sha256=FdHwqpihZpDZiJJfRFywXLZCqZCsgGKmDgxwGGUlbus,17174
|
|
9
|
+
ogc/na/util.py,sha256=88UIxEFeRa92lNmEZHC3Of1qZrkXOzvUrSiSMsPSyic,7331
|
|
10
|
+
ogc/na/validation.py,sha256=FkXx1Pwot4ztg9Vv2LrODfYxpknG9-67BmY3Ep7avd4,3535
|
|
11
|
+
ogc/na/input_filters/__init__.py,sha256=8i7Q9INCDYz5xAWzR6R4t4nWw2XAdrITT_LNML1hmR4,849
|
|
12
|
+
ogc/na/input_filters/csv.py,sha256=teUu0KSB2PLGPGGUvpxy4yug22g3vgpSI77rR5HRa-o,2365
|
|
13
|
+
ogc_na-0.1.4.dist-info/METADATA,sha256=J6CIO-SJYB31Wiw_5dq2yVG8mIlLux6lUiw_Sqy_-e8,3527
|
|
14
|
+
ogc_na-0.1.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
15
|
+
ogc_na-0.1.4.dist-info/top_level.txt,sha256=Kvy3KhzcIhNPT4_nZuJCmS946ptRr_MDyU4IIhZJhCY,4
|
|
16
|
+
ogc_na-0.1.4.dist-info/RECORD,,
|
ogc_na-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
ogc/na/__init__.py,sha256=uzcNiJ3uKFNJ1HBfKxIwgAy2HMUFsLAe5RkrUg8ncac,464
|
|
2
|
-
ogc/na/annotate_schema.py,sha256=DX0dqgl_ql2PnSh85T_akZHpROOrIH_ZFSHZPg38c3Q,18165
|
|
3
|
-
ogc/na/domain_config.py,sha256=bnSyv2XF0x_v1VHF5N4jQkb7j_G2obdkzcSvWSv42Uw,12443
|
|
4
|
-
ogc/na/download.py,sha256=2afrLyl4WsAlxkCgXsl47fs9mNKfDmhVpeT2iwNSoq0,3354
|
|
5
|
-
ogc/na/ingest_json.py,sha256=MoIQGcWgBLC4WKwWv0QbIWDbXVuEaXy_Oz9YhqQATXs,30863
|
|
6
|
-
ogc/na/profile.py,sha256=34SiZ2H_RJZaQpMKd3GkuyYoGKUfp7BTuVd0lhJY8yw,14444
|
|
7
|
-
ogc/na/provenance.py,sha256=zm3RwfnjISQfTekYXu1-GlgezGcUJSbmptTkLcitcfs,5446
|
|
8
|
-
ogc/na/update_vocabs.py,sha256=FdHwqpihZpDZiJJfRFywXLZCqZCsgGKmDgxwGGUlbus,17174
|
|
9
|
-
ogc/na/util.py,sha256=OSHEBKZfX3ZOwE-VBmAtg9udu4dP1iMhZEiCZZfq4Ac,6216
|
|
10
|
-
ogc/na/validation.py,sha256=FkXx1Pwot4ztg9Vv2LrODfYxpknG9-67BmY3Ep7avd4,3535
|
|
11
|
-
ogc_na-0.1.2.dist-info/METADATA,sha256=SAAD05QiTXEXq4Pyj4SAYF_UpO6NfBHaMJ3heYdwmHA,3527
|
|
12
|
-
ogc_na-0.1.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
13
|
-
ogc_na-0.1.2.dist-info/top_level.txt,sha256=Kvy3KhzcIhNPT4_nZuJCmS946ptRr_MDyU4IIhZJhCY,4
|
|
14
|
-
ogc_na-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|