ogc-na 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ogc-na might be problematic. Click here for more details.
- ogc/na/ingest_json.py +16 -7
- ogc/na/input_filters/__init__.py +25 -0
- ogc/na/input_filters/csv.py +71 -0
- ogc/na/util.py +13 -3
- {ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/METADATA +1 -1
- {ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/RECORD +8 -6
- {ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/WHEEL +0 -0
- {ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/top_level.txt +0 -0
ogc/na/ingest_json.py
CHANGED
|
@@ -50,6 +50,7 @@ from rdflib.namespace import Namespace, DefinedNamespace
|
|
|
50
50
|
from ogc.na import util, profile
|
|
51
51
|
from ogc.na.domain_config import UpliftConfigurationEntry, DomainConfiguration
|
|
52
52
|
from ogc.na.provenance import ProvenanceMetadata, FileProvenanceMetadata, generate_provenance
|
|
53
|
+
from ogc.na.input_filters import apply_input_filter
|
|
53
54
|
|
|
54
55
|
logger = logging.getLogger(__name__)
|
|
55
56
|
|
|
@@ -393,7 +394,8 @@ def process_file(input_fn: str | Path,
|
|
|
393
394
|
If False, no Turtle output will be generated.
|
|
394
395
|
:param context_fn: YAML context filename. If None, will be autodetected:
|
|
395
396
|
1. From a file with the same name but yml/yaml extension (test.json -> test.yml)
|
|
396
|
-
2. From
|
|
397
|
+
2. From the domain_cfg
|
|
398
|
+
3. From a _json-context.yml/_json-context.yaml file in the same directory
|
|
397
399
|
:param domain_cfg: domain configuration with uplift definition locations
|
|
398
400
|
:param base: base URI for JSON-LD
|
|
399
401
|
:param provenance_base_uri: base URI for provenance resources
|
|
@@ -436,8 +438,15 @@ def process_file(input_fn: str | Path,
|
|
|
436
438
|
if not contexts:
|
|
437
439
|
raise MissingContextException('No context file provided and one could not be discovered automatically')
|
|
438
440
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
+
# Apply input filter of first context only (if any)
|
|
442
|
+
input_filters = contexts[0].get('input-filter')
|
|
443
|
+
if input_filters:
|
|
444
|
+
if not isinstance(input_filters, dict):
|
|
445
|
+
raise ValueError('input-filter must be an object')
|
|
446
|
+
input_data = apply_input_filter(input_fn, input_filters)
|
|
447
|
+
else:
|
|
448
|
+
with open(input_fn, 'r') as j:
|
|
449
|
+
input_data = json.load(j)
|
|
441
450
|
|
|
442
451
|
provenance_metadata: ProvenanceMetadata | None = None
|
|
443
452
|
if provenance_base_uri is not False:
|
|
@@ -635,7 +644,10 @@ def process(input_files: str | Path | Sequence[str | Path],
|
|
|
635
644
|
logger.info("Input files: %s", input_files)
|
|
636
645
|
remaining_fn: deque = deque()
|
|
637
646
|
for input_file in input_files:
|
|
638
|
-
|
|
647
|
+
if isinstance(input_file, str):
|
|
648
|
+
remaining_fn.extend(input_file.split(','))
|
|
649
|
+
else:
|
|
650
|
+
remaining_fn.append(input_file)
|
|
639
651
|
while remaining_fn:
|
|
640
652
|
fn = str(remaining_fn.popleft())
|
|
641
653
|
|
|
@@ -645,9 +657,6 @@ def process(input_files: str | Path | Sequence[str | Path],
|
|
|
645
657
|
remaining_fn.extend(filenames_from_context(fn, domain_config=domain_cfg) or [])
|
|
646
658
|
continue
|
|
647
659
|
|
|
648
|
-
if not re.match(r'.*\.json-?(ld)?$', fn):
|
|
649
|
-
logger.debug('File %s does not match, skipping', fn)
|
|
650
|
-
continue
|
|
651
660
|
logger.info('File %s matches, processing', fn)
|
|
652
661
|
try:
|
|
653
662
|
result.append(process_file(
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, IO, TextIO
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def apply_input_filter(stream: IO | bytes | str | Path, filters: dict[str, dict]) -> dict[str, Any] | list:
|
|
10
|
+
filter_name, filter_conf = filters.popitem()
|
|
11
|
+
try:
|
|
12
|
+
filter_mod = import_module(f"ogc.na.input_filters.{filter_name}")
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ValueError(f'Cannot find input filter with name "{filter_name}"')
|
|
15
|
+
|
|
16
|
+
content: bytes | None = None
|
|
17
|
+
if isinstance(stream, Path) or isinstance(stream, str):
|
|
18
|
+
with open(stream, 'rb') as f:
|
|
19
|
+
content = f.read()
|
|
20
|
+
elif isinstance(stream, TextIO):
|
|
21
|
+
content = stream.read().encode('utf-8')
|
|
22
|
+
else:
|
|
23
|
+
content = stream.read()
|
|
24
|
+
|
|
25
|
+
return filter_mod.apply_filter(content, filter_conf)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CSV Input filter for ingest_json.
|
|
3
|
+
|
|
4
|
+
Returns CSV rows as a list. Values will always be strings (no type inference or coercion is performed).
|
|
5
|
+
|
|
6
|
+
Configuration values:
|
|
7
|
+
|
|
8
|
+
* `rows` (default: `dict`): type of elements in the result list:
|
|
9
|
+
* `dict`: elements will be dictionaries, with the keys taken from the `header-row`.
|
|
10
|
+
* `list`: each resulting row will be an array values.
|
|
11
|
+
* `header-row` (default: `0`): if `rows` is `dict`, the (0-based) index of the header row. All rows before the
|
|
12
|
+
header row will be skipped.
|
|
13
|
+
* `skip-rows` (default: `0`): number of rows to skip at the beginning of the file (apart from the header and pre-header
|
|
14
|
+
ones if `rows` is `dict`).
|
|
15
|
+
* `delimiter` (default: `,`): field separator character
|
|
16
|
+
* `quotechar` (default: `"`): char used to quote (enclose) field values
|
|
17
|
+
* `skip-empty-rows` (default: `True`): whether to omit empty rows (i.e., those with no values) from the result
|
|
18
|
+
* `trim-values` (default: `False`): whether to apply `.strip()` to the resulting values
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import csv
|
|
23
|
+
from io import BytesIO, TextIOWrapper, StringIO
|
|
24
|
+
from typing import IO, Any
|
|
25
|
+
|
|
26
|
+
from ogc.na import util
|
|
27
|
+
|
|
28
|
+
DEFAULT_CONF = {
|
|
29
|
+
'rows': 'dict',
|
|
30
|
+
'header-row': 0,
|
|
31
|
+
'skip-rows': 0,
|
|
32
|
+
'delimiter': ',',
|
|
33
|
+
'quotechar': '"',
|
|
34
|
+
'skip-empty-rows': True,
|
|
35
|
+
'trim-values': False,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def apply_filter(content: bytes, conf: dict[str, Any] | None) -> dict[str, Any] | list:
|
|
40
|
+
conf = util.deep_update(DEFAULT_CONF, conf) if conf else DEFAULT_CONF
|
|
41
|
+
|
|
42
|
+
textio = StringIO(content.decode('utf-8'))
|
|
43
|
+
reader = csv.reader(textio, delimiter=conf['delimiter'], quotechar=conf['quotechar'])
|
|
44
|
+
|
|
45
|
+
headers = None
|
|
46
|
+
if conf['rows'] == 'dict':
|
|
47
|
+
header_row = max(conf['header-row'], 0)
|
|
48
|
+
# Skip to header row
|
|
49
|
+
for i in range(header_row):
|
|
50
|
+
next(reader, None)
|
|
51
|
+
headers = next(reader, [])
|
|
52
|
+
if not headers:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
# Skip requested rows
|
|
56
|
+
for i in range(conf['skip-rows']):
|
|
57
|
+
next(reader, None)
|
|
58
|
+
|
|
59
|
+
result = []
|
|
60
|
+
for row in reader:
|
|
61
|
+
if not row and conf['skip-empty-rows']:
|
|
62
|
+
# skip empty rows
|
|
63
|
+
continue
|
|
64
|
+
if conf['trim-values']:
|
|
65
|
+
row = [v.strip() for v in row]
|
|
66
|
+
if conf['rows'] == 'list':
|
|
67
|
+
result.append(row)
|
|
68
|
+
else:
|
|
69
|
+
result.append(dict(zip(headers, row)))
|
|
70
|
+
|
|
71
|
+
return result
|
ogc/na/util.py
CHANGED
|
@@ -8,7 +8,7 @@ import shlex
|
|
|
8
8
|
from glob import glob
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from time import time
|
|
11
|
-
from typing import Optional, Union, Any, Hashable
|
|
11
|
+
from typing import Optional, Union, Any, Mapping, Hashable
|
|
12
12
|
|
|
13
13
|
import requests
|
|
14
14
|
import rfc3987
|
|
@@ -112,7 +112,7 @@ def is_url(url: str, http_only: bool = False) -> bool:
|
|
|
112
112
|
Checks whether a string is a valid URL.
|
|
113
113
|
|
|
114
114
|
:param url: the input string
|
|
115
|
-
:param http_only: whether to only accept HTTP and HTTPS
|
|
115
|
+
:param http_only: whether to only accept HTTP and HTTPS URLs as valid
|
|
116
116
|
:return: `True` if this is a valid URL, otherwise `False`
|
|
117
117
|
"""
|
|
118
118
|
if not url:
|
|
@@ -161,7 +161,7 @@ def dump_yaml(content: Any, filename: str | Path | None = None,
|
|
|
161
161
|
|
|
162
162
|
:param content: content to convert to YAML.
|
|
163
163
|
:param filename: optional filename to dump the content into. If None, string content will be returned.
|
|
164
|
-
:param kwargs: other args to pass to yaml.dump
|
|
164
|
+
:param kwargs: other args to pass to `yaml.dump()`
|
|
165
165
|
"""
|
|
166
166
|
kwargs.setdefault('sort_keys', False)
|
|
167
167
|
if filename:
|
|
@@ -229,3 +229,13 @@ class LRUCache:
|
|
|
229
229
|
del self._last_access[key_to_remove]
|
|
230
230
|
self._cache[key] = value
|
|
231
231
|
self._last_access[key] = time()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def deep_update(orig_dict: dict, with_dict: dict, replace: bool = False) -> dict:
|
|
235
|
+
dest = orig_dict if replace else {**orig_dict}
|
|
236
|
+
for k, v in with_dict.items():
|
|
237
|
+
if isinstance(v, Mapping):
|
|
238
|
+
dest[k] = deep_update(orig_dict.get(k, {}), with_dict, replace)
|
|
239
|
+
else:
|
|
240
|
+
dest[k] = v
|
|
241
|
+
return dest
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ogc-na
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: OGC Naming Authority tools
|
|
5
5
|
Author-email: Rob Atkinson <ratkinson@ogc.org>, Piotr Zaborowski <pzaborowski@ogc.org>, Alejandro Villar <avillar@ogc.org>
|
|
6
6
|
Project-URL: Homepage, https://github.com/opengeospatial/ogc-na-tools/
|
|
@@ -2,13 +2,15 @@ ogc/na/__init__.py,sha256=uzcNiJ3uKFNJ1HBfKxIwgAy2HMUFsLAe5RkrUg8ncac,464
|
|
|
2
2
|
ogc/na/annotate_schema.py,sha256=qwbpiZgEoV9P0-kzgAR79_OiCKkJdlVZUCdFJ0JYZq4,21506
|
|
3
3
|
ogc/na/domain_config.py,sha256=bnSyv2XF0x_v1VHF5N4jQkb7j_G2obdkzcSvWSv42Uw,12443
|
|
4
4
|
ogc/na/download.py,sha256=2afrLyl4WsAlxkCgXsl47fs9mNKfDmhVpeT2iwNSoq0,3354
|
|
5
|
-
ogc/na/ingest_json.py,sha256=
|
|
5
|
+
ogc/na/ingest_json.py,sha256=VaXFHuIduezBjUZuBxiafuxbJy2L90KEPS3UXkUJ3Cg,31236
|
|
6
6
|
ogc/na/profile.py,sha256=AkcWA52uueufK8XTyHTpVjFSp1bFKAZK7Gg3MmdGWdU,14461
|
|
7
7
|
ogc/na/provenance.py,sha256=zm3RwfnjISQfTekYXu1-GlgezGcUJSbmptTkLcitcfs,5446
|
|
8
8
|
ogc/na/update_vocabs.py,sha256=FdHwqpihZpDZiJJfRFywXLZCqZCsgGKmDgxwGGUlbus,17174
|
|
9
|
-
ogc/na/util.py,sha256=
|
|
9
|
+
ogc/na/util.py,sha256=88UIxEFeRa92lNmEZHC3Of1qZrkXOzvUrSiSMsPSyic,7331
|
|
10
10
|
ogc/na/validation.py,sha256=FkXx1Pwot4ztg9Vv2LrODfYxpknG9-67BmY3Ep7avd4,3535
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
ogc_na-0.1.
|
|
14
|
-
ogc_na-0.1.
|
|
11
|
+
ogc/na/input_filters/__init__.py,sha256=8i7Q9INCDYz5xAWzR6R4t4nWw2XAdrITT_LNML1hmR4,849
|
|
12
|
+
ogc/na/input_filters/csv.py,sha256=teUu0KSB2PLGPGGUvpxy4yug22g3vgpSI77rR5HRa-o,2365
|
|
13
|
+
ogc_na-0.1.4.dist-info/METADATA,sha256=J6CIO-SJYB31Wiw_5dq2yVG8mIlLux6lUiw_Sqy_-e8,3527
|
|
14
|
+
ogc_na-0.1.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
15
|
+
ogc_na-0.1.4.dist-info/top_level.txt,sha256=Kvy3KhzcIhNPT4_nZuJCmS946ptRr_MDyU4IIhZJhCY,4
|
|
16
|
+
ogc_na-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|