plastron-utils 4.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plastron_utils-4.3.2/PKG-INFO +18 -0
- plastron_utils-4.3.2/README.md +3 -0
- plastron_utils-4.3.2/VERSION +1 -0
- plastron_utils-4.3.2/pyproject.toml +36 -0
- plastron_utils-4.3.2/setup.cfg +4 -0
- plastron_utils-4.3.2/src/plastron/namespaces/__init__.py +136 -0
- plastron_utils-4.3.2/src/plastron/utils/__init__.py +291 -0
- plastron_utils-4.3.2/src/plastron_utils.egg-info/PKG-INFO +18 -0
- plastron_utils-4.3.2/src/plastron_utils.egg-info/SOURCES.txt +13 -0
- plastron_utils-4.3.2/src/plastron_utils.egg-info/dependency_links.txt +1 -0
- plastron_utils-4.3.2/src/plastron_utils.egg-info/requires.txt +5 -0
- plastron_utils-4.3.2/src/plastron_utils.egg-info/top_level.txt +1 -0
- plastron_utils-4.3.2/tests/test_envsubst.py +28 -0
- plastron_utils-4.3.2/tests/test_util.py +87 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: plastron-utils
|
|
3
|
+
Version: 4.3.2
|
|
4
|
+
Summary: Utility functions and namespace definitions for Plastron
|
|
5
|
+
Author-email: University of Maryland Libraries <lib-ssdr@umd.edu>, Josh Westgard <westgard@umd.edu>, Peter Eichman <peichman@umd.edu>, Mohamed Abdul Rasheed <mohideen@umd.edu>, Ben Wallberg <wallberg@umd.edu>, David Steelman <dsteelma@umd.edu>, Marc Andreu Grillo Aguilar <aguilarm@umd.edu>
|
|
6
|
+
Project-URL: Homepage, https://github.com/umd-lib/plastron
|
|
7
|
+
Project-URL: Bug Reports, https://github.com/umd-lib/plastron/issues
|
|
8
|
+
Project-URL: Source, https://github.com/umd-lib/plastron
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: rdflib
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest; extra == "test"
|
|
14
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
15
|
+
|
|
16
|
+
# plastron-utils
|
|
17
|
+
|
|
18
|
+
Namespace definitions and other utilities used by Plastron
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.3.2
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = 'plastron-utils'
|
|
3
|
+
description = 'Utility functions and namespace definitions for Plastron'
|
|
4
|
+
authors = [
|
|
5
|
+
{ name='University of Maryland Libraries', email='lib-ssdr@umd.edu' },
|
|
6
|
+
{ name='Josh Westgard', email='westgard@umd.edu' },
|
|
7
|
+
{ name='Peter Eichman', email='peichman@umd.edu' },
|
|
8
|
+
{ name='Mohamed Abdul Rasheed', email='mohideen@umd.edu' },
|
|
9
|
+
{ name='Ben Wallberg', email='wallberg@umd.edu' },
|
|
10
|
+
{ name='David Steelman', email='dsteelma@umd.edu' },
|
|
11
|
+
{ name='Marc Andreu Grillo Aguilar', email='aguilarm@umd.edu' },
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = '>=3.8'
|
|
15
|
+
dependencies = [
|
|
16
|
+
"rdflib",
|
|
17
|
+
]
|
|
18
|
+
dynamic = ["version"]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
test = [
|
|
22
|
+
"pytest",
|
|
23
|
+
"pytest-cov",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = 'https://github.com/umd-lib/plastron'
|
|
28
|
+
'Bug Reports' = 'https://github.com/umd-lib/plastron/issues'
|
|
29
|
+
Source = 'https://github.com/umd-lib/plastron'
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["setuptools>=66.1.0"]
|
|
33
|
+
build-backend = "setuptools.build_meta"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.dynamic]
|
|
36
|
+
version = { "file" = "VERSION" }
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Useful namespaces for use with `rdflib` code."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from rdflib import Namespace, Graph
|
|
7
|
+
from rdflib.namespace import NamespaceManager
|
|
8
|
+
|
|
9
|
+
acl = Namespace('http://www.w3.org/ns/auth/acl#')
|
|
10
|
+
"""[Web Access Controls (WebAC)](https://solidproject.org/TR/wac)"""
|
|
11
|
+
|
|
12
|
+
activitystreams = Namespace('https://www.w3.org/ns/activitystreams#')
|
|
13
|
+
"""[Activity Streams 2.0](https://www.w3.org/TR/activitystreams-core/)"""
|
|
14
|
+
|
|
15
|
+
bibo = Namespace('http://purl.org/ontology/bibo/')
|
|
16
|
+
"""[Bibliographic Ontology](https://www.dublincore.org/specifications/bibo/bibo/)"""
|
|
17
|
+
|
|
18
|
+
carriers = Namespace('http://id.loc.gov/vocabulary/carriers/')
|
|
19
|
+
"""[Library of Congress Carriers Schema](https://id.loc.gov/vocabulary/carriers.html)"""
|
|
20
|
+
|
|
21
|
+
dc = Namespace('http://purl.org/dc/elements/1.1/')
|
|
22
|
+
"""[Dublin Core Elements 1.1](https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-3)"""
|
|
23
|
+
|
|
24
|
+
dcmitype = Namespace('http://purl.org/dc/dcmitype/')
|
|
25
|
+
"""[Dublin Core Type Vocabulary](https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-7)"""
|
|
26
|
+
|
|
27
|
+
dcterms = Namespace('http://purl.org/dc/terms/')
|
|
28
|
+
"""[Dublin Core Terms](https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-2)"""
|
|
29
|
+
|
|
30
|
+
ebucore = Namespace('http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#')
|
|
31
|
+
"""[European Broadcasting Union (EBU) Core](https://www.ebu.ch/metadata/ontologies/ebucore/)"""
|
|
32
|
+
|
|
33
|
+
edm = Namespace('http://www.europeana.eu/schemas/edm/')
|
|
34
|
+
"""[Europeana Data Model](https://pro.europeana.eu/page/edm-documentation)"""
|
|
35
|
+
|
|
36
|
+
ex = Namespace('http://www.example.org/terms/')
|
|
37
|
+
"""Example Namespace"""
|
|
38
|
+
|
|
39
|
+
fabio = Namespace('http://purl.org/spar/fabio/')
|
|
40
|
+
"""[FRBR-aligned Bibliographic Ontology](https://sparontologies.github.io/fabio/current/fabio.html)"""
|
|
41
|
+
|
|
42
|
+
fedora = Namespace('http://fedora.info/definitions/v4/repository#')
|
|
43
|
+
"""[Fedora Commons Repository Ontology](https://fedora.info/definitions/v4/2016/10/18/repository)"""
|
|
44
|
+
|
|
45
|
+
foaf = Namespace('http://xmlns.com/foaf/0.1/')
|
|
46
|
+
"""[FOAF ("Friend-of-a-friend") Vocabulary](http://xmlns.com/foaf/0.1/)"""
|
|
47
|
+
|
|
48
|
+
geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
|
|
49
|
+
"""[WGS84 Geo Positioning](https://www.w3.org/2003/01/geo/wgs84_pos)"""
|
|
50
|
+
|
|
51
|
+
iana = Namespace('http://www.iana.org/assignments/relation/')
|
|
52
|
+
"""IANA Link Relations"""
|
|
53
|
+
|
|
54
|
+
ldp = Namespace('http://www.w3.org/ns/ldp#')
|
|
55
|
+
"""[Linked Data Platform](https://www.w3.org/TR/ldp/)"""
|
|
56
|
+
|
|
57
|
+
ndnp = Namespace('http://chroniclingamerica.loc.gov/terms/')
|
|
58
|
+
"""[National Digital Newspaper Program (NDNP) Vocabulary](https://chroniclingamerica.loc.gov/terms/)
|
|
59
|
+
|
|
60
|
+
**Note:** This namespace is actually incorrect; it should end with "#" and not "/". Unfortunately,
|
|
61
|
+
correction would require modification of many resources in fcrepo."""
|
|
62
|
+
|
|
63
|
+
oa = Namespace('http://www.w3.org/ns/oa#')
|
|
64
|
+
"""[Web Annotations](https://www.w3.org/TR/annotation-vocab/)"""
|
|
65
|
+
|
|
66
|
+
ore = Namespace('http://www.openarchives.org/ore/terms/')
|
|
67
|
+
"""[OAI Object Reuse and Exchange (ORE)](http://openarchives.org/ore/1.0/vocabulary)"""
|
|
68
|
+
|
|
69
|
+
owl = Namespace('http://www.w3.org/2002/07/owl#')
|
|
70
|
+
"""[Web Ontology Language (OWL)](https://www.w3.org/TR/owl2-syntax/)"""
|
|
71
|
+
|
|
72
|
+
pcdm = Namespace('http://pcdm.org/models#')
|
|
73
|
+
"""[Portland Common Data Model (PCDM)](https://pcdm.org/2016/04/18/models)"""
|
|
74
|
+
|
|
75
|
+
pcdmuse = Namespace('http://pcdm.org/use#')
|
|
76
|
+
"""[PCDM Use Extension](https://pcdm.org/2021/04/09/use)"""
|
|
77
|
+
|
|
78
|
+
premis = Namespace('http://www.loc.gov/premis/rdf/v1#')
|
|
79
|
+
"""[Preservation Metadata: Implementation Strategies (PREMIS)](https://id.loc.gov/ontologies/premis-1-0-0.html)"""
|
|
80
|
+
|
|
81
|
+
prov = Namespace('http://www.w3.org/ns/prov#')
|
|
82
|
+
"""[Provenance Ontology (PROV-O)](https://www.w3.org/TR/prov-o/)"""
|
|
83
|
+
|
|
84
|
+
rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
|
|
85
|
+
"""[RDF](https://www.w3.org/TR/rdf11-schema/)"""
|
|
86
|
+
|
|
87
|
+
rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#')
|
|
88
|
+
"""[RDF Schema](https://www.w3.org/TR/rdf11-schema/)"""
|
|
89
|
+
|
|
90
|
+
rel = Namespace('http://id.loc.gov/vocabulary/relators/')
|
|
91
|
+
"""[Library of Congress Relator Terms](https://id.loc.gov/vocabulary/relators.html)"""
|
|
92
|
+
|
|
93
|
+
sc = Namespace('http://www.shared-canvas.org/ns/')
|
|
94
|
+
"""[Shared Canvas Data Model](https://iiif.io/api/model/shared-canvas/1.0/)"""
|
|
95
|
+
|
|
96
|
+
schema = Namespace('https://schema.org/')
|
|
97
|
+
"""[Schema.org](https://schema.org/)"""
|
|
98
|
+
|
|
99
|
+
skos = Namespace('http://www.w3.org/2004/02/skos/core#')
|
|
100
|
+
"""[Simple Knowledge Organization System (SKOS)](https://www.w3.org/TR/skos-reference/)"""
|
|
101
|
+
|
|
102
|
+
umd = Namespace('http://vocab.lib.umd.edu/model#')
|
|
103
|
+
"""[UMD Content Models Vocabulary](http://vocab.lib.umd.edu/model)"""
|
|
104
|
+
|
|
105
|
+
umdaccess = Namespace('http://vocab.lib.umd.edu/access#')
|
|
106
|
+
"""[UMD Access Classes Vocabulary](http://vocab.lib.umd.edu/access)"""
|
|
107
|
+
|
|
108
|
+
umdform = Namespace('http://vocab.lib.umd.edu/form#')
|
|
109
|
+
"""[UMD Genre/Form Vocabulary](http://vocab.lib.umd.edu/form)"""
|
|
110
|
+
|
|
111
|
+
umdtype = Namespace('http://vocab.lib.umd.edu/datatype#')
|
|
112
|
+
"""[UMD Datatypes Vocabulary](http://vocab.lib.umd.edu/datatype)"""
|
|
113
|
+
|
|
114
|
+
umdact = Namespace('http://vocab.lib.umd.edu/activity#')
|
|
115
|
+
"""[UMD Activity Types Vocabulary](http://vocab.lib.umd.edu/activity)"""
|
|
116
|
+
|
|
117
|
+
webac = Namespace('http://fedora.info/definitions/v4/webac#')
|
|
118
|
+
"""[Fedora Commons WebAC Ontology](https://fedora.info/definitions/v4/2015/09/03/webac)"""
|
|
119
|
+
|
|
120
|
+
xsd = Namespace('http://www.w3.org/2001/XMLSchema#')
|
|
121
|
+
"""[XML Schema Datatypes](https://www.w3.org/TR/xmlschema-2/#built-in-datatypes)"""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_manager(graph: Optional[Graph] = None) -> NamespaceManager:
|
|
125
|
+
"""Scan this module's attributes for `Namespace` objects, and bind them
|
|
126
|
+
to a prefix corresponding to their attribute name defined above."""
|
|
127
|
+
if graph is None:
|
|
128
|
+
graph = Graph()
|
|
129
|
+
nsm = NamespaceManager(graph)
|
|
130
|
+
prefixes = {attr: value for attr, value in sys.modules[__name__].__dict__.items() if isinstance(value, Namespace)}
|
|
131
|
+
for prefix, ns in prefixes.items():
|
|
132
|
+
nsm.bind(prefix, ns)
|
|
133
|
+
return nsm
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
namespace_manager = get_manager()
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import collections.abc
|
|
2
|
+
import csv
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import re
|
|
7
|
+
from abc import ABC
|
|
8
|
+
from argparse import ArgumentTypeError
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Union, Mapping, Sequence, Optional, List
|
|
12
|
+
|
|
13
|
+
from rdflib import URIRef
|
|
14
|
+
from rdflib.term import Node
|
|
15
|
+
from rdflib.util import from_n3
|
|
16
|
+
|
|
17
|
+
from plastron import namespaces
|
|
18
|
+
|
|
19
|
+
DEFAULT_LOGGING_OPTIONS = {
|
|
20
|
+
'version': 1,
|
|
21
|
+
'formatters': {
|
|
22
|
+
'full': {
|
|
23
|
+
'format': '%(levelname)s|%(asctime)s|%(threadName)s|%(name)s|%(message)s'
|
|
24
|
+
},
|
|
25
|
+
'messageonly': {
|
|
26
|
+
'format': '%(message)s'
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
'handlers': {
|
|
30
|
+
'console': {
|
|
31
|
+
'class': 'logging.StreamHandler',
|
|
32
|
+
'level': 'INFO',
|
|
33
|
+
'formatter': 'messageonly',
|
|
34
|
+
'stream': 'ext://sys.stderr'
|
|
35
|
+
},
|
|
36
|
+
'file': {
|
|
37
|
+
'class': 'logging.FileHandler',
|
|
38
|
+
'level': 'DEBUG',
|
|
39
|
+
'formatter': 'full'
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
'loggers': {
|
|
43
|
+
'__main__': {
|
|
44
|
+
'level': 'DEBUG',
|
|
45
|
+
'handlers': ['console', 'file'],
|
|
46
|
+
'propagate': False
|
|
47
|
+
},
|
|
48
|
+
'plastron': {
|
|
49
|
+
'level': 'DEBUG',
|
|
50
|
+
'handlers': ['console', 'file'],
|
|
51
|
+
'propagate': False
|
|
52
|
+
},
|
|
53
|
+
# suppress logging output from paramiko by default
|
|
54
|
+
'paramiko': {
|
|
55
|
+
'propagate': False
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
'root': {
|
|
59
|
+
'level': 'DEBUG'
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
logger = logging.getLogger(__name__)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def datetimestamp(digits_only: bool = True) -> str:
|
|
66
|
+
"""Returns a string containing the current UTC timestamp. By default, it
|
|
67
|
+
is only digits (`20231117151827` vs. `2023-11-17T15:18:27`). If you want
|
|
68
|
+
the full ISO 8601 representation, set `digits_only` to `True`.
|
|
69
|
+
|
|
70
|
+
```pycon
|
|
71
|
+
>>> datetimestamp()
|
|
72
|
+
'20231117152014'
|
|
73
|
+
|
|
74
|
+
>>> datetimestamp(digits_only=False)
|
|
75
|
+
'2023-11-17T15:20:57'
|
|
76
|
+
```
|
|
77
|
+
"""
|
|
78
|
+
now = str(datetime.utcnow().isoformat(timespec='seconds'))
|
|
79
|
+
if digits_only:
|
|
80
|
+
return re.sub(r'[^0-9]', '', now)
|
|
81
|
+
else:
|
|
82
|
+
return now
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def envsubst(value: Union[str, list, dict], env: Mapping[str, str] = None) -> Union[str, list, dict]:
|
|
86
|
+
"""
|
|
87
|
+
Recursively replace `${VAR_NAME}` placeholders in value with the values of the
|
|
88
|
+
corresponding keys of env. If env is not given, it defaults to the environment
|
|
89
|
+
variables in os.environ.
|
|
90
|
+
|
|
91
|
+
Any placeholders that do not have a corresponding key in the env dictionary
|
|
92
|
+
are left as is.
|
|
93
|
+
|
|
94
|
+
:param value: String, list, or dictionary to search for `${VAR_NAME}` placeholders.
|
|
95
|
+
:param env: Dictionary of values to use as replacements. If not given, defaults
|
|
96
|
+
to `os.environ`.
|
|
97
|
+
:return: If `value` is a string, returns the result of replacing `${VAR_NAME}` with the
|
|
98
|
+
corresponding `value` from env. If `value` is a list, returns a new list where each
|
|
99
|
+
item in `value` replaced with the result of calling `envsubst()` on that item. If
|
|
100
|
+
`value` is a dictionary, returns a new dictionary where each item in `value` is replaced
|
|
101
|
+
with the result of calling `envsubst()` on that item.
|
|
102
|
+
"""
|
|
103
|
+
if env is None:
|
|
104
|
+
env = os.environ
|
|
105
|
+
if isinstance(value, str):
|
|
106
|
+
if '${' in value:
|
|
107
|
+
try:
|
|
108
|
+
return value.replace('${', '{').format(**env)
|
|
109
|
+
except KeyError as e:
|
|
110
|
+
missing_key = str(e.args[0])
|
|
111
|
+
logger.warning(f'Environment variable ${{{missing_key}}} not found')
|
|
112
|
+
# for a missing key, just return the string without substitution
|
|
113
|
+
return envsubst(value, {missing_key: f'${{{missing_key}}}', **env})
|
|
114
|
+
else:
|
|
115
|
+
return value
|
|
116
|
+
elif isinstance(value, list):
|
|
117
|
+
return [envsubst(v, env) for v in value]
|
|
118
|
+
elif isinstance(value, dict):
|
|
119
|
+
return {k: envsubst(v, env) for k, v in value.items()}
|
|
120
|
+
else:
|
|
121
|
+
return value
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def check_python_version():
|
|
125
|
+
# check Python version
|
|
126
|
+
major, minor, patch = (int(v) for v in platform.python_version_tuple())
|
|
127
|
+
if minor < 8:
|
|
128
|
+
logger.warning(
|
|
129
|
+
f'You appear to be running Python {platform.python_version()}. '
|
|
130
|
+
'Upgrading to Python 3.8+ is STRONGLY recommended.'
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def strtobool(val: str) -> int:
|
|
135
|
+
"""Convert a string representation of truth to true (1) or false (0).
|
|
136
|
+
|
|
137
|
+
True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
|
|
138
|
+
are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
|
|
139
|
+
'val' is anything else.
|
|
140
|
+
|
|
141
|
+
This implementation is copied from distutils/util.py in Python 3.10.4,
|
|
142
|
+
in order to retain this functionality once distutils is removed in
|
|
143
|
+
Python 3.12. See also https://peps.python.org/pep-0632/#migration-advice
|
|
144
|
+
and https://docs.python.org/3.10/whatsnew/3.10.html#distutils-deprecated.
|
|
145
|
+
|
|
146
|
+
Note that even though this function is named `strtobool`, it actually
|
|
147
|
+
returns an integer. This is copied directly from the distutils module.
|
|
148
|
+
"""
|
|
149
|
+
val = val.lower()
|
|
150
|
+
if val in ('y', 'yes', 't', 'true', 'on', '1'):
|
|
151
|
+
return 1
|
|
152
|
+
elif val in ('n', 'no', 'f', 'false', 'off', '0'):
|
|
153
|
+
return 0
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError("invalid truth value %r" % (val,))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class AppendableSequence(collections.abc.Sequence, ABC):
|
|
159
|
+
"""Abstract base class for appendable sequences"""
|
|
160
|
+
def append(self, _value):
|
|
161
|
+
raise NotImplementedError
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class NullLog(AppendableSequence):
|
|
165
|
+
"""Stub replacement for `ItemLog` that simply discards logged items
|
|
166
|
+
and returns `False` for any containment checks."""
|
|
167
|
+
def __len__(self) -> int:
|
|
168
|
+
return 0
|
|
169
|
+
|
|
170
|
+
def __getitem__(self, item):
|
|
171
|
+
raise IndexError
|
|
172
|
+
|
|
173
|
+
def __contains__(self, item):
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
def append(self, _value):
|
|
177
|
+
"""This class just discards the given value"""
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ItemLog(AppendableSequence):
|
|
182
|
+
"""Log backed by a CSV file that is used to record item information,
|
|
183
|
+
keyed by a particular column, with the ability to check whether a
|
|
184
|
+
given key exists in the log already.
|
|
185
|
+
|
|
186
|
+
`ItemLog` objects are iterable, and support direct indexing to a row
|
|
187
|
+
by key.
|
|
188
|
+
"""
|
|
189
|
+
def __init__(self, filename: Union[str, Path], fieldnames: Sequence[str], keyfield: str, header: bool = True):
|
|
190
|
+
self.filename: Path = Path(filename)
|
|
191
|
+
self.fieldnames: Sequence[str] = fieldnames
|
|
192
|
+
self.keyfield: str = keyfield
|
|
193
|
+
self.write_header: bool = header
|
|
194
|
+
self._item_keys = set()
|
|
195
|
+
self._fh = None
|
|
196
|
+
self._writer = None
|
|
197
|
+
if self.exists():
|
|
198
|
+
self._load_keys()
|
|
199
|
+
|
|
200
|
+
def exists(self) -> bool:
|
|
201
|
+
"""Returns `True` if the CSV log file exists."""
|
|
202
|
+
return self.filename.is_file()
|
|
203
|
+
|
|
204
|
+
def create(self):
|
|
205
|
+
"""Create the CSV log file. This will overwrite an existing file. If
|
|
206
|
+
`write_header` is `True`, it will also write a header row to the file."""
|
|
207
|
+
with self.filename.open(mode='w', buffering=1) as fh:
|
|
208
|
+
writer = csv.DictWriter(fh, fieldnames=self.fieldnames)
|
|
209
|
+
if self.write_header:
|
|
210
|
+
writer.writeheader()
|
|
211
|
+
|
|
212
|
+
def _load_keys(self):
|
|
213
|
+
for row in iter(self):
|
|
214
|
+
self._item_keys.add(row[self.keyfield])
|
|
215
|
+
|
|
216
|
+
def __iter__(self):
|
|
217
|
+
try:
|
|
218
|
+
with self.filename.open(mode='r', buffering=1) as fh:
|
|
219
|
+
reader = csv.DictReader(fh)
|
|
220
|
+
# check the validity of the map file data
|
|
221
|
+
if not reader.fieldnames == self.fieldnames:
|
|
222
|
+
logger.warning(
|
|
223
|
+
f'Fieldnames in {self.filename} do not match expected fieldnames; '
|
|
224
|
+
f'expected: {self.fieldnames}; found: {reader.fieldnames}'
|
|
225
|
+
)
|
|
226
|
+
# read the data from the existing file
|
|
227
|
+
yield from reader
|
|
228
|
+
except FileNotFoundError:
|
|
229
|
+
# log file not found, so stop the iteration
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def writer(self) -> csv.DictWriter:
|
|
234
|
+
"""CSV dictionary writer"""
|
|
235
|
+
if not self.exists():
|
|
236
|
+
self.create()
|
|
237
|
+
if self._fh is None:
|
|
238
|
+
self._fh = self.filename.open(mode='a', buffering=1)
|
|
239
|
+
if self._writer is None:
|
|
240
|
+
self._writer = csv.DictWriter(self._fh, fieldnames=self.fieldnames)
|
|
241
|
+
return self._writer
|
|
242
|
+
|
|
243
|
+
def append(self, row):
|
|
244
|
+
"""Write this `row` to the log."""
|
|
245
|
+
self.writer.writerow(row)
|
|
246
|
+
self._item_keys.add(row[self.keyfield])
|
|
247
|
+
|
|
248
|
+
def writerow(self, row):
|
|
249
|
+
"""Alias for `append`"""
|
|
250
|
+
self.append(row)
|
|
251
|
+
|
|
252
|
+
def __contains__(self, other):
|
|
253
|
+
return other in self._item_keys
|
|
254
|
+
|
|
255
|
+
def __len__(self):
|
|
256
|
+
return len(self._item_keys)
|
|
257
|
+
|
|
258
|
+
def __getitem__(self, item):
|
|
259
|
+
for n, row in enumerate(self):
|
|
260
|
+
if n == item:
|
|
261
|
+
return row
|
|
262
|
+
raise IndexError(item)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class ItemLogError(Exception):
|
|
266
|
+
pass
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def uri_or_curie(arg: str) -> URIRef:
|
|
270
|
+
"""Convert a string to a URIRef. If it begins with either `http://`
|
|
271
|
+
or `https://`, treat it as an absolute HTTP URI. Otherwise, try to
|
|
272
|
+
parse it as a CURIE (e.g., "dcterms:title") and return the expanded
|
|
273
|
+
URI. If the prefix is not recognized, or if `from_n3()` returns anything
|
|
274
|
+
but a URIRef, raises `ArgumentTypeError`."""
|
|
275
|
+
if arg and (arg.startswith('http://') or arg.startswith('https://')):
|
|
276
|
+
# looks like an absolute HTTP URI
|
|
277
|
+
return URIRef(arg)
|
|
278
|
+
try:
|
|
279
|
+
term = from_n3(arg, nsm=namespaces.get_manager())
|
|
280
|
+
except KeyError:
|
|
281
|
+
raise ArgumentTypeError(f'"{arg[:arg.index(":") + 1]}" is not a known prefix')
|
|
282
|
+
if not isinstance(term, URIRef):
|
|
283
|
+
raise ArgumentTypeError(f'"{arg}" must be a URI or CURIE')
|
|
284
|
+
return term
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def parse_predicate_list(string: str, delimiter: str = ',') -> Optional[List[Node]]:
|
|
288
|
+
if string is None:
|
|
289
|
+
return None
|
|
290
|
+
manager = namespaces.get_manager()
|
|
291
|
+
return [from_n3(p, nsm=manager) for p in string.split(delimiter)]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: plastron-utils
|
|
3
|
+
Version: 4.3.2
|
|
4
|
+
Summary: Utility functions and namespace definitions for Plastron
|
|
5
|
+
Author-email: University of Maryland Libraries <lib-ssdr@umd.edu>, Josh Westgard <westgard@umd.edu>, Peter Eichman <peichman@umd.edu>, Mohamed Abdul Rasheed <mohideen@umd.edu>, Ben Wallberg <wallberg@umd.edu>, David Steelman <dsteelma@umd.edu>, Marc Andreu Grillo Aguilar <aguilarm@umd.edu>
|
|
6
|
+
Project-URL: Homepage, https://github.com/umd-lib/plastron
|
|
7
|
+
Project-URL: Bug Reports, https://github.com/umd-lib/plastron/issues
|
|
8
|
+
Project-URL: Source, https://github.com/umd-lib/plastron
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: rdflib
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest; extra == "test"
|
|
14
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
15
|
+
|
|
16
|
+
# plastron-utils
|
|
17
|
+
|
|
18
|
+
Namespace definitions and other utilities used by Plastron
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
VERSION
|
|
3
|
+
pyproject.toml
|
|
4
|
+
../VERSION
|
|
5
|
+
src/plastron/namespaces/__init__.py
|
|
6
|
+
src/plastron/utils/__init__.py
|
|
7
|
+
src/plastron_utils.egg-info/PKG-INFO
|
|
8
|
+
src/plastron_utils.egg-info/SOURCES.txt
|
|
9
|
+
src/plastron_utils.egg-info/dependency_links.txt
|
|
10
|
+
src/plastron_utils.egg-info/requires.txt
|
|
11
|
+
src/plastron_utils.egg-info/top_level.txt
|
|
12
|
+
tests/test_envsubst.py
|
|
13
|
+
tests/test_util.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
plastron
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from plastron.utils import envsubst
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_simple_strings():
|
|
5
|
+
env = {'FOO': 'a', 'BAR': 'z'}
|
|
6
|
+
assert envsubst('${FOO}bc', env) == 'abc'
|
|
7
|
+
assert envsubst('${FOO}bc${BAR}yx', env) == 'abczyx'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_unknown_variable_name():
|
|
11
|
+
assert envsubst('${FOO}qrs', {}) == '${FOO}qrs'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_lists():
|
|
15
|
+
env = {'FOO': 'a', 'BAR': 'z'}
|
|
16
|
+
assert envsubst(['${FOO}', '${BAR}'], env) == ['a', 'z']
|
|
17
|
+
assert envsubst(['${FOO}', '${BAR}', '${BAZ}'], env) == ['a', 'z', '${BAZ}']
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_dicts():
|
|
21
|
+
env = {'FOO': 'a', 'BAR': 'z'}
|
|
22
|
+
assert envsubst({'foo': '${FOO}', 'bar': '${BAR}'}, env) == {'foo': 'a', 'bar': 'z'}
|
|
23
|
+
assert envsubst({'foo': '${FOO}', 'bar': '${BAZ}'}, env) == {'foo': 'a', 'bar': '${BAZ}'}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_deep_structure():
|
|
27
|
+
env = {'FOO': 'a'}
|
|
28
|
+
assert envsubst([{'foo': ['${FOO}']}], env) == [{'foo': ['a']}]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from argparse import ArgumentParser, ArgumentTypeError
|
|
4
|
+
|
|
5
|
+
from plastron.namespaces import dcterms, rdf, pcdm
|
|
6
|
+
from plastron.cli import parse_data_property, parse_object_property
|
|
7
|
+
from plastron.utils import uri_or_curie
|
|
8
|
+
from rdflib.term import URIRef, Literal
|
|
9
|
+
|
|
10
|
+
INVALID_URI_OR_CURIE_ARGS = [
|
|
11
|
+
# None
|
|
12
|
+
None,
|
|
13
|
+
# empty string
|
|
14
|
+
'',
|
|
15
|
+
# unrecognized namespace
|
|
16
|
+
'not_in_namespace:Foo'
|
|
17
|
+
]
|
|
18
|
+
VALID_URI_OR_CURIE_ARGS = [
|
|
19
|
+
# CURIE
|
|
20
|
+
'umdaccess:Public',
|
|
21
|
+
# N3-formatted URI
|
|
22
|
+
'<http://vocab.lib.umd.edu/access#Public>',
|
|
23
|
+
# plain string HTTP URI
|
|
24
|
+
'http://vocab.lib.umd.edu/access#Public'
|
|
25
|
+
]
|
|
26
|
+
EXPECTED_TERM = URIRef('http://vocab.lib.umd.edu/access#Public')
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Tests for the "uri_or_curie" function
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize(
|
|
32
|
+
'arg_value', INVALID_URI_OR_CURIE_ARGS
|
|
33
|
+
)
|
|
34
|
+
def test_given_invalid_uri_or_curie__raises_error(arg_value):
|
|
35
|
+
with pytest.raises(ArgumentTypeError):
|
|
36
|
+
uri_or_curie(arg_value)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.mark.parametrize(
|
|
40
|
+
'arg_value', VALID_URI_OR_CURIE_ARGS
|
|
41
|
+
)
|
|
42
|
+
def test_given_valid_uri_or_curie__returns_term(arg_value):
|
|
43
|
+
assert uri_or_curie(arg_value) == EXPECTED_TERM
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@pytest.mark.parametrize(
|
|
47
|
+
'arg_value', VALID_URI_OR_CURIE_ARGS
|
|
48
|
+
)
|
|
49
|
+
def test_given_valid_uri_or_curie_type__parse_args_returns_uriref(arg_value):
|
|
50
|
+
parser = ArgumentParser()
|
|
51
|
+
parser.add_argument('--access', type=uri_or_curie)
|
|
52
|
+
args = parser.parse_args(('--access', arg_value))
|
|
53
|
+
assert isinstance(args.access, URIRef)
|
|
54
|
+
assert args.access == EXPECTED_TERM
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.parametrize(
|
|
58
|
+
'arg_value', INVALID_URI_OR_CURIE_ARGS
|
|
59
|
+
)
|
|
60
|
+
def test_given_invalid_uri_or_curie_type__parse_args_exits(arg_value):
|
|
61
|
+
parser = ArgumentParser()
|
|
62
|
+
parser.add_argument('--access', type=uri_or_curie)
|
|
63
|
+
with pytest.raises(SystemExit):
|
|
64
|
+
parser.parse_args(('--access', arg_value))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.mark.parametrize(
|
|
68
|
+
('p', 'o', 'expected'),
|
|
69
|
+
[
|
|
70
|
+
('dcterms:title', 'Foobar', (dcterms.title, Literal('Foobar'))),
|
|
71
|
+
('dcterms:title', '"der Hund"@de', (dcterms.title, Literal('der Hund', lang='de')))
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
def test_parse_data_property(p, o, expected):
|
|
75
|
+
assert parse_data_property(p, o) == expected
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.parametrize(
|
|
79
|
+
('p', 'o', 'expected'),
|
|
80
|
+
[
|
|
81
|
+
('rdf:type', 'pcdm:Object', (rdf.type, pcdm.Object)),
|
|
82
|
+
('dcterms:creator', 'https://www.lib.umd.edu/', (dcterms.creator, URIRef('https://www.lib.umd.edu/'))),
|
|
83
|
+
('dcterms:creator', '<https://www.lib.umd.edu/>', (dcterms.creator, URIRef('https://www.lib.umd.edu/')))
|
|
84
|
+
]
|
|
85
|
+
)
|
|
86
|
+
def test_parse_object_property(p, o, expected):
|
|
87
|
+
assert parse_object_property(p, o) == expected
|