PyPI - ogc-na - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

ogc-na 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ogc-na might be problematic. Click here for more details.

Files changed (8) hide show

ogc/na/ingest_json.py +16 -7
ogc/na/input_filters/__init__.py +25 -0
ogc/na/input_filters/csv.py +71 -0
ogc/na/util.py +13 -3
{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/METADATA +1 -1
{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/RECORD +8 -6
{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/WHEEL +0 -0
{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/top_level.txt +0 -0

ogc/na/ingest_json.py CHANGED Viewed

@@ -50,6 +50,7 @@ from rdflib.namespace import Namespace, DefinedNamespace
 from ogc.na import util, profile
 from ogc.na.domain_config import UpliftConfigurationEntry, DomainConfiguration
 from ogc.na.provenance import ProvenanceMetadata, FileProvenanceMetadata, generate_provenance
+from ogc.na.input_filters import apply_input_filter
 logger = logging.getLogger(__name__)
@@ -393,7 +394,8 @@ def process_file(input_fn: str | Path,
         If False, no Turtle output will be generated.
     :param context_fn: YAML context filename. If None, will be autodetected:
         1. From a file with the same name but yml/yaml extension (test.json -> test.yml)
-        2. From a _json-context.yml/_json-context.yaml file in the same directory
+        2. From the domain_cfg
+        3. From a _json-context.yml/_json-context.yaml file in the same directory
     :param domain_cfg: domain configuration with uplift definition locations
     :param base: base URI for JSON-LD
     :param provenance_base_uri: base URI for provenance resources
@@ -436,8 +438,15 @@ def process_file(input_fn: str | Path,
     if not contexts:
         raise MissingContextException('No context file provided and one could not be discovered automatically')
-    with open(input_fn, 'r') as j:
-        input_data = json.load(j)
+    # Apply input filter of first context only (if any)
+    input_filters = contexts[0].get('input-filter')
+    if input_filters:
+        if not isinstance(input_filters, dict):
+            raise ValueError('input-filter must be an object')
+        input_data = apply_input_filter(input_fn, input_filters)
+    else:
+        with open(input_fn, 'r') as j:
+            input_data = json.load(j)
     provenance_metadata: ProvenanceMetadata | None = None
     if provenance_base_uri is not False:
@@ -635,7 +644,10 @@ def process(input_files: str | Path | Sequence[str | Path],
         logger.info("Input files: %s", input_files)
         remaining_fn: deque = deque()
         for input_file in input_files:
-            remaining_fn.extend(input_file.split(','))
+            if isinstance(input_file, str):
+                remaining_fn.extend(input_file.split(','))
+            else:
+                remaining_fn.append(input_file)
         while remaining_fn:
             fn = str(remaining_fn.popleft())
@@ -645,9 +657,6 @@ def process(input_files: str | Path | Sequence[str | Path],
                 remaining_fn.extend(filenames_from_context(fn, domain_config=domain_cfg) or [])
                 continue
-            if not re.match(r'.*\.json-?(ld)?$', fn):
-                logger.debug('File %s does not match, skipping', fn)
-                continue
             logger.info('File %s matches, processing', fn)
             try:
                 result.append(process_file(

ogc/na/input_filters/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from importlib import import_module
+from io import BytesIO
+from pathlib import Path
+from typing import Any, IO, TextIO
+def apply_input_filter(stream: IO | bytes | str | Path, filters: dict[str, dict]) -> dict[str, Any] | list:
+    filter_name, filter_conf = filters.popitem()
+    try:
+        filter_mod = import_module(f"ogc.na.input_filters.{filter_name}")
+    except ImportError:
+        raise ValueError(f'Cannot find input filter with name "{filter_name}"')
+    content: bytes | None = None
+    if isinstance(stream, Path) or isinstance(stream, str):
+        with open(stream, 'rb') as f:
+            content = f.read()
+    elif isinstance(stream, TextIO):
+        content = stream.read().encode('utf-8')
+    else:
+        content = stream.read()
+    return filter_mod.apply_filter(content, filter_conf)

ogc/na/input_filters/csv.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""
+CSV Input filter for ingest_json.
+Returns CSV rows as a list. Values will always be strings (no type inference or coercion is performed).
+Configuration values:
+* `rows` (default: `dict`): type of elements in the result list:
+    * `dict`: elements will be dictionaries, with the keys taken from the `header-row`.
+    * `list`: each resulting row will be an array values.
+* `header-row` (default: `0`): if `rows` is `dict`, the (0-based) index of the header row. All rows before the
+    header row will be skipped.
+* `skip-rows` (default: `0`): number of rows to skip at the beginning of the file (apart from the header and pre-header
+    ones if `rows` is `dict`).
+* `delimiter` (default: `,`): field separator character
+* `quotechar` (default: `"`): char used to quote (enclose) field values
+* `skip-empty-rows` (default: `True`): whether to omit empty rows (i.e., those with no values) from the result
+* `trim-values` (default: `False`): whether to apply `.strip()` to the resulting values
+"""
+from __future__ import annotations
+import csv
+from io import BytesIO, TextIOWrapper, StringIO
+from typing import IO, Any
+from ogc.na import util
+DEFAULT_CONF = {
+    'rows': 'dict',
+    'header-row': 0,
+    'skip-rows': 0,
+    'delimiter': ',',
+    'quotechar': '"',
+    'skip-empty-rows': True,
+    'trim-values': False,
+}
+def apply_filter(content: bytes, conf: dict[str, Any] | None) -> dict[str, Any] | list:
+    conf = util.deep_update(DEFAULT_CONF, conf) if conf else DEFAULT_CONF
+    textio = StringIO(content.decode('utf-8'))
+    reader = csv.reader(textio, delimiter=conf['delimiter'], quotechar=conf['quotechar'])
+    headers = None
+    if conf['rows'] == 'dict':
+        header_row = max(conf['header-row'], 0)
+        # Skip to header row
+        for i in range(header_row):
+            next(reader, None)
+        headers = next(reader, [])
+        if not headers:
+            return []
+    # Skip requested rows
+    for i in range(conf['skip-rows']):
+        next(reader, None)
+    result = []
+    for row in reader:
+        if not row and conf['skip-empty-rows']:
+            # skip empty rows
+            continue
+        if conf['trim-values']:
+            row = [v.strip() for v in row]
+        if conf['rows'] == 'list':
+            result.append(row)
+        else:
+            result.append(dict(zip(headers, row)))
+    return result

ogc/na/util.py CHANGED Viewed

@@ -8,7 +8,7 @@ import shlex
 from glob import glob
 from pathlib import Path
 from time import time
-from typing import Optional, Union, Any, Hashable
+from typing import Optional, Union, Any, Mapping, Hashable
 import requests
 import rfc3987
@@ -112,7 +112,7 @@ def is_url(url: str, http_only: bool = False) -> bool:
     Checks whether a string is a valid URL.
     :param url: the input string
-    :param http_only: whether to only accept HTTP and HTTPS URL's as valid
+    :param http_only: whether to only accept HTTP and HTTPS URLs as valid
     :return: `True` if this is a valid URL, otherwise `False`
     """
     if not url:
@@ -161,7 +161,7 @@ def dump_yaml(content: Any, filename: str | Path | None = None,
     :param content: content to convert to YAML.
     :param filename: optional filename to dump the content into. If None, string content will be returned.
-    :param kwargs: other args to pass to yaml.dump
+    :param kwargs: other args to pass to `yaml.dump()`
     """
     kwargs.setdefault('sort_keys', False)
     if filename:
@@ -229,3 +229,13 @@ class LRUCache:
             del self._last_access[key_to_remove]
         self._cache[key] = value
         self._last_access[key] = time()
+def deep_update(orig_dict: dict, with_dict: dict, replace: bool = False) -> dict:
+    dest = orig_dict if replace else {**orig_dict}
+    for k, v in with_dict.items():
+        if isinstance(v, Mapping):
+            dest[k] = deep_update(orig_dict.get(k, {}), with_dict, replace)
+        else:
+            dest[k] = v
+    return dest

{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ogc-na
-Version: 0.1.3
+Version: 0.1.4
 Summary: OGC Naming Authority tools
 Author-email: Rob Atkinson <ratkinson@ogc.org>, Piotr Zaborowski <pzaborowski@ogc.org>, Alejandro Villar <avillar@ogc.org>
 Project-URL: Homepage, https://github.com/opengeospatial/ogc-na-tools/

{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,15 @@ ogc/na/__init__.py,sha256=uzcNiJ3uKFNJ1HBfKxIwgAy2HMUFsLAe5RkrUg8ncac,464
 ogc/na/annotate_schema.py,sha256=qwbpiZgEoV9P0-kzgAR79_OiCKkJdlVZUCdFJ0JYZq4,21506
 ogc/na/domain_config.py,sha256=bnSyv2XF0x_v1VHF5N4jQkb7j_G2obdkzcSvWSv42Uw,12443
 ogc/na/download.py,sha256=2afrLyl4WsAlxkCgXsl47fs9mNKfDmhVpeT2iwNSoq0,3354
-ogc/na/ingest_json.py,sha256=MoIQGcWgBLC4WKwWv0QbIWDbXVuEaXy_Oz9YhqQATXs,30863
+ogc/na/ingest_json.py,sha256=VaXFHuIduezBjUZuBxiafuxbJy2L90KEPS3UXkUJ3Cg,31236
 ogc/na/profile.py,sha256=AkcWA52uueufK8XTyHTpVjFSp1bFKAZK7Gg3MmdGWdU,14461
 ogc/na/provenance.py,sha256=zm3RwfnjISQfTekYXu1-GlgezGcUJSbmptTkLcitcfs,5446
 ogc/na/update_vocabs.py,sha256=FdHwqpihZpDZiJJfRFywXLZCqZCsgGKmDgxwGGUlbus,17174
-ogc/na/util.py,sha256=LDktBcrdPhRkDzW4-di0Zh_3N3nLJEYh9bt0LcXsCO4,6984
+ogc/na/util.py,sha256=88UIxEFeRa92lNmEZHC3Of1qZrkXOzvUrSiSMsPSyic,7331
 ogc/na/validation.py,sha256=FkXx1Pwot4ztg9Vv2LrODfYxpknG9-67BmY3Ep7avd4,3535
-ogc_na-0.1.3.dist-info/METADATA,sha256=63QZgvbSSV4AIXB5F2834dpK4IouIIBre09XVcslNME,3527
-ogc_na-0.1.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
-ogc_na-0.1.3.dist-info/top_level.txt,sha256=Kvy3KhzcIhNPT4_nZuJCmS946ptRr_MDyU4IIhZJhCY,4
-ogc_na-0.1.3.dist-info/RECORD,,
+ogc/na/input_filters/__init__.py,sha256=8i7Q9INCDYz5xAWzR6R4t4nWw2XAdrITT_LNML1hmR4,849
+ogc/na/input_filters/csv.py,sha256=teUu0KSB2PLGPGGUvpxy4yug22g3vgpSI77rR5HRa-o,2365
+ogc_na-0.1.4.dist-info/METADATA,sha256=J6CIO-SJYB31Wiw_5dq2yVG8mIlLux6lUiw_Sqy_-e8,3527
+ogc_na-0.1.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
+ogc_na-0.1.4.dist-info/top_level.txt,sha256=Kvy3KhzcIhNPT4_nZuJCmS946ptRr_MDyU4IIhZJhCY,4
+ogc_na-0.1.4.dist-info/RECORD,,

{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{ogc_na-0.1.3.dist-info → ogc_na-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

ogc-na 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

ogc-na 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl