plastron-models 4.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plastron_models-4.3.2/PKG-INFO +96 -0
- plastron_models-4.3.2/README.md +73 -0
- plastron_models-4.3.2/VERSION +1 -0
- plastron_models-4.3.2/pyproject.toml +43 -0
- plastron_models-4.3.2/setup.cfg +4 -0
- plastron_models-4.3.2/src/plastron/handles/__init__.py +144 -0
- plastron_models-4.3.2/src/plastron/models/__init__.py +21 -0
- plastron_models-4.3.2/src/plastron/models/annotations.py +99 -0
- plastron_models-4.3.2/src/plastron/models/fedora.py +9 -0
- plastron_models-4.3.2/src/plastron/models/ldp.py +7 -0
- plastron_models-4.3.2/src/plastron/models/letter.py +200 -0
- plastron_models-4.3.2/src/plastron/models/newspaper.py +142 -0
- plastron_models-4.3.2/src/plastron/models/ore.py +18 -0
- plastron_models-4.3.2/src/plastron/models/pcdm.py +30 -0
- plastron_models-4.3.2/src/plastron/models/poster.py +69 -0
- plastron_models-4.3.2/src/plastron/models/umd.py +116 -0
- plastron_models-4.3.2/src/plastron/rdf/__init__.py +0 -0
- plastron_models-4.3.2/src/plastron/rdf/authority.py +42 -0
- plastron_models-4.3.2/src/plastron/rdf/ldp.py +207 -0
- plastron_models-4.3.2/src/plastron/rdf/oa.py +66 -0
- plastron_models-4.3.2/src/plastron/rdf/ocr.py +106 -0
- plastron_models-4.3.2/src/plastron/rdf/ore.py +111 -0
- plastron_models-4.3.2/src/plastron/rdf/pcdm.py +182 -0
- plastron_models-4.3.2/src/plastron/rdf/rdf.py +326 -0
- plastron_models-4.3.2/src/plastron/serializers/__init__.py +38 -0
- plastron_models-4.3.2/src/plastron/serializers/csv.py +455 -0
- plastron_models-4.3.2/src/plastron/serializers/turtle.py +29 -0
- plastron_models-4.3.2/src/plastron/validation/__init__.py +2 -0
- plastron_models-4.3.2/src/plastron/validation/rules.py +37 -0
- plastron_models-4.3.2/src/plastron/validation/vocabularies/__init__.py +49 -0
- plastron_models-4.3.2/src/plastron_models.egg-info/PKG-INFO +96 -0
- plastron_models-4.3.2/src/plastron_models.egg-info/SOURCES.txt +43 -0
- plastron_models-4.3.2/src/plastron_models.egg-info/dependency_links.txt +1 -0
- plastron_models-4.3.2/src/plastron_models.egg-info/requires.txt +16 -0
- plastron_models-4.3.2/src/plastron_models.egg-info/top_level.txt +1 -0
- plastron_models-4.3.2/tests/test_copyright_notice.py +38 -0
- plastron_models-4.3.2/tests/test_handles.py +123 -0
- plastron_models-4.3.2/tests/test_is_from_vocabulary.py +64 -0
- plastron_models-4.3.2/tests/test_issue_model.py +22 -0
- plastron_models-4.3.2/tests/test_letter_model.py +27 -0
- plastron_models-4.3.2/tests/test_poster_model.py +53 -0
- plastron_models-4.3.2/tests/test_presentation_set.py +93 -0
- plastron_models-4.3.2/tests/test_terms_of_use.py +64 -0
- plastron_models-4.3.2/tests/test_umd_model.py +37 -0
- plastron_models-4.3.2/tests/test_validation.py +181 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: plastron-models
|
|
3
|
+
Version: 4.3.2
|
|
4
|
+
Summary: Content modelling built on the Plastron RDF to Python object mapper
|
|
5
|
+
Author-email: University of Maryland Libraries <lib-ssdr@umd.edu>, Josh Westgard <westgard@umd.edu>, Peter Eichman <peichman@umd.edu>, Mohamed Abdul Rasheed <mohideen@umd.edu>, Ben Wallberg <wallberg@umd.edu>, David Steelman <dsteelma@umd.edu>, Marc Andreu Grillo Aguilar <aguilarm@umd.edu>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: edtf_validate
|
|
9
|
+
Requires-Dist: iso639
|
|
10
|
+
Requires-Dist: lxml
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: paramiko
|
|
13
|
+
Requires-Dist: pillow
|
|
14
|
+
Requires-Dist: plastron-client
|
|
15
|
+
Requires-Dist: plastron-rdf
|
|
16
|
+
Requires-Dist: plastron-utils
|
|
17
|
+
Requires-Dist: rdflib>=6.0.0
|
|
18
|
+
Provides-Extra: test
|
|
19
|
+
Requires-Dist: freezegun; extra == "test"
|
|
20
|
+
Requires-Dist: httpretty; extra == "test"
|
|
21
|
+
Requires-Dist: pytest; extra == "test"
|
|
22
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
23
|
+
|
|
24
|
+
# plastron-models
|
|
25
|
+
|
|
26
|
+
Metadata content models based on RDF
|
|
27
|
+
|
|
28
|
+
## Model Packages
|
|
29
|
+
|
|
30
|
+
* [annotations](src/plastron/models/annotations.py): Auxiliary model
|
|
31
|
+
classes for Web Annotations
|
|
32
|
+
* [letter](src/plastron/models/letter.py): Legacy content model for the
|
|
33
|
+
Katherine Anne Porter correspondence collection
|
|
34
|
+
* [newspaper](src/plastron/models/newspaper.py): Content model for the
|
|
35
|
+
Student Newspapers collection, based on the NDNP data format
|
|
36
|
+
* [poster](src/plastron/models/poster.py): Legacy content model for the
|
|
37
|
+
Prange Posters and Wall Newspapers collection
|
|
38
|
+
* [umd](src/plastron/models/umd.py): Standardized digital object content
|
|
39
|
+
model for current and future collections
|
|
40
|
+
|
|
41
|
+
## Vocabulary Retrieval
|
|
42
|
+
|
|
43
|
+
The `get_vocabulary` method in the
|
|
44
|
+
`plastron-models/src/plastron/validation/vocabularies/__init__.py` module
|
|
45
|
+
initializer controls how vocabularies used for validation are retrieved.
|
|
46
|
+
|
|
47
|
+
Vocabularies used to validate models are retrieved either from the local
|
|
48
|
+
filesystem, or from a vocabulary server on the network.
|
|
49
|
+
|
|
50
|
+
The code uses two variables:
|
|
51
|
+
|
|
52
|
+
* `VOCABULARIES_DIR` - The full filepath to the directory containing the local
|
|
53
|
+
vocabulary files
|
|
54
|
+
* `VOCABULARIES` - A dictionary mapping a URI to the name of the file containing
|
|
55
|
+
the vocabulary.
|
|
56
|
+
|
|
57
|
+
Vocabularies matching URIs in the `VOCABULARIES` dictionary are first looked
|
|
58
|
+
up locally, with the local file being used, if found. If not, a network lookup
|
|
59
|
+
using the URI as the vocabulary location is used.
|
|
60
|
+
|
|
61
|
+
Vocabulary URIs not in the `VOCABULARIES` dictionary are always looked up via
|
|
62
|
+
the network.
|
|
63
|
+
|
|
64
|
+
### Vocabulary Retrieval for Tests
|
|
65
|
+
|
|
66
|
+
In general, unit tests should be run without making calls to the network, as
|
|
67
|
+
making a network call makes the tests slower and less reliable.
|
|
68
|
+
|
|
69
|
+
The retrieval of the vocabularies via the `__init__.py` module initializer is
|
|
70
|
+
problematic for the tests, because the module initialization occurs before a
|
|
71
|
+
test is even run. This makes normal methods of overriding the network calls
|
|
72
|
+
ineffective. For example, trying to intercept the network calls using the
|
|
73
|
+
“httpretty” library doesn’t work, because by the time the
|
|
74
|
+
“@httpretty.activate” decorator is accessed, the module has already been
|
|
75
|
+
initialized. The same is true when attempting to “monkey patch” the module.
|
|
76
|
+
|
|
77
|
+
One method that was found to work was to add a
|
|
78
|
+
`conftest.py` file into the root directory of the project, with a
|
|
79
|
+
`pytest_configure` method. It is necessary to have the `conftest.py` in the
|
|
80
|
+
root directory, so that it will always be used when running pytests in any of
|
|
81
|
+
the Plastron modules (as those tests may use one of the content models with a
|
|
82
|
+
vocabulary). The `pytest_configure` method runs as soon as pytest starts, and
|
|
83
|
+
before any modules are loaded, providing an opportunity to set the
|
|
84
|
+
“VOCABULARIES_DIR” and “VOCABULARIES” variables to values that are suitable for
|
|
85
|
+
testing.
|
|
86
|
+
|
|
87
|
+
Any vocabularies needed for the tests should be added as follows:
|
|
88
|
+
|
|
89
|
+
1) Add a file containing the vocabulary (in "turtle" format) to the
|
|
90
|
+
"plastron-models/tests/data/vocabularies/" directory.
|
|
91
|
+
|
|
92
|
+
2) In the `conftest.py` file in the root directory, add the vocabulary URI and
|
|
93
|
+
filename to the `VOCABULARIES` dictionary.
|
|
94
|
+
|
|
95
|
+
Note that if a vocabulary is not added, a network call will still be attempted,
|
|
96
|
+
due to the fallback behavior of the `get_vocabularies` method.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# plastron-models
|
|
2
|
+
|
|
3
|
+
Metadata content models based on RDF
|
|
4
|
+
|
|
5
|
+
## Model Packages
|
|
6
|
+
|
|
7
|
+
* [annotations](src/plastron/models/annotations.py): Auxiliary model
|
|
8
|
+
classes for Web Annotations
|
|
9
|
+
* [letter](src/plastron/models/letter.py): Legacy content model for the
|
|
10
|
+
Katherine Anne Porter correspondence collection
|
|
11
|
+
* [newspaper](src/plastron/models/newspaper.py): Content model for the
|
|
12
|
+
Student Newspapers collection, based on the NDNP data format
|
|
13
|
+
* [poster](src/plastron/models/poster.py): Legacy content model for the
|
|
14
|
+
Prange Posters and Wall Newspapers collection
|
|
15
|
+
* [umd](src/plastron/models/umd.py): Standardized digital object content
|
|
16
|
+
model for current and future collections
|
|
17
|
+
|
|
18
|
+
## Vocabulary Retrieval
|
|
19
|
+
|
|
20
|
+
The `get_vocabulary` method in the
|
|
21
|
+
`plastron-models/src/plastron/validation/vocabularies/__init__.py` module
|
|
22
|
+
initializer controls how vocabularies used for validation are retrieved.
|
|
23
|
+
|
|
24
|
+
Vocabularies used to validate models are retrieved either from the local
|
|
25
|
+
filesystem, or from a vocabulary server on the network.
|
|
26
|
+
|
|
27
|
+
The code uses two variables:
|
|
28
|
+
|
|
29
|
+
* `VOCABULARIES_DIR` - The full filepath to the directory containing the local
|
|
30
|
+
vocabulary files
|
|
31
|
+
* `VOCABULARIES` - A dictionary mapping a URI to the name of the file containing
|
|
32
|
+
the vocabulary.
|
|
33
|
+
|
|
34
|
+
Vocabularies matching URIs in the `VOCABULARIES` dictionary are first looked
|
|
35
|
+
up locally, with the local file being used, if found. If not, a network lookup
|
|
36
|
+
using the URI as the vocabulary location is used.
|
|
37
|
+
|
|
38
|
+
Vocabulary URIs not in the `VOCABULARIES` dictionary are always looked up via
|
|
39
|
+
the network.
|
|
40
|
+
|
|
41
|
+
### Vocabulary Retrieval for Tests
|
|
42
|
+
|
|
43
|
+
In general, unit tests should be run without making calls to the network, as
|
|
44
|
+
making a network call makes the tests slower and less reliable.
|
|
45
|
+
|
|
46
|
+
The retrieval of the vocabularies via the `__init__.py` module initializer is
|
|
47
|
+
problematic for the tests, because the module initialization occurs before a
|
|
48
|
+
test is even run. This makes normal methods of overriding the network calls
|
|
49
|
+
ineffective. For example, trying to intercept the network calls using the
|
|
50
|
+
“httpretty” library doesn’t work, because by the time the
|
|
51
|
+
“@httpretty.activate” decorator is accessed, the module has already been
|
|
52
|
+
initialized. The same is true when attempting to “monkey patch” the module.
|
|
53
|
+
|
|
54
|
+
One method that was found to work was to add a
|
|
55
|
+
`conftest.py` file into the root directory of the project, with a
|
|
56
|
+
`pytest_configure` method. It is necessary to have the `conftest.py` in the
|
|
57
|
+
root directory, so that it will always be used when running pytests in any of
|
|
58
|
+
the Plastron modules (as those tests may use one of the content models with a
|
|
59
|
+
vocabulary). The `pytest_configure` method runs as soon as pytest starts, and
|
|
60
|
+
before any modules are loaded, providing an opportunity to set the
|
|
61
|
+
“VOCABULARIES_DIR” and “VOCABULARIES” variables to values that are suitable for
|
|
62
|
+
testing.
|
|
63
|
+
|
|
64
|
+
Any vocabularies needed for the tests should be added as follows:
|
|
65
|
+
|
|
66
|
+
1) Add a file containing the vocabulary (in "turtle" format) to the
|
|
67
|
+
"plastron-models/tests/data/vocabularies/" directory.
|
|
68
|
+
|
|
69
|
+
2) In the `conftest.py` file in the root directory, add the vocabulary URI and
|
|
70
|
+
filename to the `VOCABULARIES` dictionary.
|
|
71
|
+
|
|
72
|
+
Note that if a vocabulary is not added, a network call will still be attempted,
|
|
73
|
+
due to the fallback behavior of the `get_vocabularies` method.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
4.3.2
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "plastron-models"
|
|
3
|
+
description = "Content modelling built on the Plastron RDF to Python object mapper"
|
|
4
|
+
authors = [
|
|
5
|
+
{ name='University of Maryland Libraries', email='lib-ssdr@umd.edu' },
|
|
6
|
+
{ name='Josh Westgard', email='westgard@umd.edu' },
|
|
7
|
+
{ name='Peter Eichman', email='peichman@umd.edu' },
|
|
8
|
+
{ name='Mohamed Abdul Rasheed', email='mohideen@umd.edu' },
|
|
9
|
+
{ name='Ben Wallberg', email='wallberg@umd.edu' },
|
|
10
|
+
{ name='David Steelman', email='dsteelma@umd.edu' },
|
|
11
|
+
{ name='Marc Andreu Grillo Aguilar', email='aguilarm@umd.edu' },
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">= 3.8"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"edtf_validate",
|
|
17
|
+
"iso639",
|
|
18
|
+
"lxml",
|
|
19
|
+
"requests",
|
|
20
|
+
"paramiko",
|
|
21
|
+
"pillow",
|
|
22
|
+
"plastron-client",
|
|
23
|
+
"plastron-rdf",
|
|
24
|
+
"plastron-utils",
|
|
25
|
+
# rdflib 6.0.0 fixed the 308 HTTP redirect bug
|
|
26
|
+
"rdflib >= 6.0.0",
|
|
27
|
+
]
|
|
28
|
+
dynamic = ["version"]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
test = [
|
|
32
|
+
"freezegun",
|
|
33
|
+
"httpretty",
|
|
34
|
+
"pytest",
|
|
35
|
+
"pytest-cov",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[build-system]
|
|
39
|
+
requires = ["setuptools>=66.1.0"]
|
|
40
|
+
build-backend = "setuptools.build_meta"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.dynamic]
|
|
43
|
+
version = { "file" = "VERSION" }
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from requests import Session
|
|
7
|
+
from requests_jwtauth import HTTPBearerAuth
|
|
8
|
+
|
|
9
|
+
from plastron.namespaces import dcterms, umdtype
|
|
10
|
+
from plastron.rdfmapping.descriptors import DataProperty
|
|
11
|
+
from plastron.rdfmapping.resources import RDFResource
|
|
12
|
+
from plastron.validation.rules import is_handle
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_handle_string(handle: str) -> List[str]:
|
|
18
|
+
if handle.startswith('hdl:'):
|
|
19
|
+
handle = handle[4:]
|
|
20
|
+
try:
|
|
21
|
+
return handle.split('/', 1)
|
|
22
|
+
except ValueError as e:
|
|
23
|
+
raise HandleError(
|
|
24
|
+
'Handle must be a string in the form "{prefix}/{suffix}" or "hdl:{prefix}/{suffix}'
|
|
25
|
+
) from e
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_result(result: Dict[str, Any]) -> Dict[str, Any]:
|
|
29
|
+
logger.debug(f'Raw result: {result}')
|
|
30
|
+
if 'request' in result:
|
|
31
|
+
request = result['request']
|
|
32
|
+
del result['request']
|
|
33
|
+
result.update(request)
|
|
34
|
+
return result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class HandleInfo:
|
|
39
|
+
exists: bool
|
|
40
|
+
handle_url: str = None
|
|
41
|
+
prefix: str = None
|
|
42
|
+
suffix: str = None
|
|
43
|
+
url: str = None
|
|
44
|
+
repo: str = None
|
|
45
|
+
repo_id: str = None
|
|
46
|
+
|
|
47
|
+
def __str__(self):
|
|
48
|
+
"""The handle in `{prefix}/{suffix}` form"""
|
|
49
|
+
return f'{self.prefix}/{self.suffix}'
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def hdl_uri(self):
|
|
53
|
+
"""The handle in `hdl:{prefix}/{suffix}` form"""
|
|
54
|
+
return f'hdl:{self}'
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class HandleServiceClient:
|
|
58
|
+
def __init__(self, endpoint_url: str, jwt_token: str, default_prefix: str = None, default_repo: str = None):
|
|
59
|
+
self.endpoint_url = endpoint_url
|
|
60
|
+
self.default_prefix = default_prefix
|
|
61
|
+
self.default_repo = default_repo
|
|
62
|
+
self.session = Session()
|
|
63
|
+
self.session.auth = HTTPBearerAuth(jwt_token)
|
|
64
|
+
|
|
65
|
+
def get_info(self, prefix: str, suffix: str):
|
|
66
|
+
url = self.endpoint_url + '/handles/info'
|
|
67
|
+
response = self.session.get(
|
|
68
|
+
url=url,
|
|
69
|
+
params={
|
|
70
|
+
'prefix': prefix,
|
|
71
|
+
'suffix': suffix,
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
if not response.ok:
|
|
75
|
+
raise HandleServerError(str(response))
|
|
76
|
+
|
|
77
|
+
return HandleInfo(**parse_result(response.json()))
|
|
78
|
+
|
|
79
|
+
def find_handle(self, repo_id: str, repo: str = None) -> HandleInfo:
|
|
80
|
+
url = self.endpoint_url + '/handles/exists'
|
|
81
|
+
response = self.session.get(
|
|
82
|
+
url=url,
|
|
83
|
+
params={
|
|
84
|
+
'repo': repo or self.default_repo,
|
|
85
|
+
'repo_id': repo_id,
|
|
86
|
+
},
|
|
87
|
+
)
|
|
88
|
+
if not response.ok:
|
|
89
|
+
raise HandleServerError(str(response))
|
|
90
|
+
|
|
91
|
+
return HandleInfo(**parse_result(response.json()))
|
|
92
|
+
|
|
93
|
+
def create_handle(self, repo_id: str, url: str, prefix: str = None, repo: str = None) -> HandleInfo:
|
|
94
|
+
request = {
|
|
95
|
+
'prefix': prefix or self.default_prefix,
|
|
96
|
+
'repo': repo or self.default_repo,
|
|
97
|
+
'repo_id': repo_id,
|
|
98
|
+
'url': url,
|
|
99
|
+
}
|
|
100
|
+
response = self.session.post(
|
|
101
|
+
f'{self.endpoint_url}/handles',
|
|
102
|
+
json=request,
|
|
103
|
+
)
|
|
104
|
+
if not response.ok:
|
|
105
|
+
raise HandleServerError(str(response))
|
|
106
|
+
|
|
107
|
+
return HandleInfo(exists=True, **parse_result(response.json()))
|
|
108
|
+
|
|
109
|
+
def update_handle(self, handle_info: HandleInfo, **fields) -> HandleInfo:
|
|
110
|
+
updated_handle_info = dataclasses.replace(handle_info, **fields)
|
|
111
|
+
response = self.session.patch(
|
|
112
|
+
f'{self.endpoint_url}/handles/{handle_info.prefix}/{handle_info.suffix}',
|
|
113
|
+
json=dataclasses.asdict(updated_handle_info),
|
|
114
|
+
)
|
|
115
|
+
if not response.ok:
|
|
116
|
+
raise HandleServerError(str(response))
|
|
117
|
+
|
|
118
|
+
return updated_handle_info
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class HandleError(Exception):
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class HandleServerError(HandleError):
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class HandleNotFoundError(HandleServerError):
|
|
130
|
+
def __init__(self, handle, *args):
|
|
131
|
+
super().__init__(*args)
|
|
132
|
+
self.handle = handle
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class HandleBearingResource(RDFResource):
|
|
136
|
+
"""This class be used by itself for instances where the handle field is the only
|
|
137
|
+
one needed, or it can be used as a mix-in to other full models to give them a handle
|
|
138
|
+
field."""
|
|
139
|
+
handle = DataProperty(dcterms.identifier, datatype=umdtype.handle, validate=is_handle)
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def has_handle(self) -> bool:
|
|
143
|
+
"""Convenience property for whether this object has a valid handle."""
|
|
144
|
+
return bool(len(self.handle) > 0 and self.handle.is_valid)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import Type
|
|
3
|
+
|
|
4
|
+
from plastron.models.letter import Letter
|
|
5
|
+
from plastron.models.newspaper import Issue
|
|
6
|
+
from plastron.models.poster import Poster
|
|
7
|
+
from plastron.models.umd import Item
|
|
8
|
+
from plastron.rdfmapping.resources import RDFResourceBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelClassNotFoundError(Exception):
|
|
12
|
+
def __init__(self, model_name: str, *args):
|
|
13
|
+
super().__init__(*args)
|
|
14
|
+
self.model_name = model_name
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_model_class(model_name: str) -> Type[RDFResourceBase]:
|
|
18
|
+
try:
|
|
19
|
+
return getattr(sys.modules[__package__], model_name)
|
|
20
|
+
except AttributeError as e:
|
|
21
|
+
raise ModelClassNotFoundError(model_name) from e
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from rdflib import RDF, URIRef, Literal
|
|
2
|
+
|
|
3
|
+
from plastron.namespaces import dcterms, oa, prov, sc
|
|
4
|
+
from plastron.rdfmapping.decorators import rdf_type
|
|
5
|
+
from plastron.rdfmapping.descriptors import ObjectProperty, DataProperty
|
|
6
|
+
from plastron.rdfmapping.embed import embedded
|
|
7
|
+
from plastron.rdfmapping.resources import RDFResource
|
|
8
|
+
|
|
9
|
+
# alias the rdflib Namespace
|
|
10
|
+
ns = oa
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Annotation resources
|
|
14
|
+
@rdf_type(oa.Annotation)
|
|
15
|
+
class Annotation(RDFResource):
|
|
16
|
+
body = ObjectProperty(oa.hasBody, repeatable=True, cls=RDFResource)
|
|
17
|
+
target = ObjectProperty(oa.hasTarget, cls=RDFResource)
|
|
18
|
+
motivation = ObjectProperty(oa.motivatedBy)
|
|
19
|
+
|
|
20
|
+
def __str__(self):
|
|
21
|
+
return ' '.join([str(body) for body in self.body])
|
|
22
|
+
|
|
23
|
+
def add_body(self, body):
|
|
24
|
+
self.body.append(body)
|
|
25
|
+
body.annotation = self
|
|
26
|
+
|
|
27
|
+
def add_target(self, target):
|
|
28
|
+
self.target.append(target)
|
|
29
|
+
target.annotation = self
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@rdf_type(oa.TextualBody)
|
|
33
|
+
class TextualBody(RDFResource):
|
|
34
|
+
value = DataProperty(RDF.value)
|
|
35
|
+
content_type = DataProperty(dcterms['format'])
|
|
36
|
+
|
|
37
|
+
def __str__(self):
|
|
38
|
+
value = str(self.value)
|
|
39
|
+
return value if len(value) <= 25 else value[:24] + '…'
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@rdf_type(oa.SpecificResource)
|
|
43
|
+
class SpecificResource(RDFResource):
|
|
44
|
+
selector = ObjectProperty(oa.hasSelector, cls=RDFResource)
|
|
45
|
+
source = ObjectProperty(oa.hasSource)
|
|
46
|
+
|
|
47
|
+
def __str__(self):
|
|
48
|
+
return ' '.join([str(selector) for selector in self.selector])
|
|
49
|
+
|
|
50
|
+
def add_selector(self, selector):
|
|
51
|
+
self.selector.append(selector)
|
|
52
|
+
selector.annotation = self
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@rdf_type(oa.FragmentSelector)
|
|
56
|
+
class FragmentSelector(RDFResource):
|
|
57
|
+
value = DataProperty(RDF.value)
|
|
58
|
+
conforms_to = ObjectProperty(dcterms.conformsTo)
|
|
59
|
+
|
|
60
|
+
def __str__(self):
|
|
61
|
+
return str(self.value)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@rdf_type(oa.XPathSelector)
|
|
65
|
+
class XPathSelector(RDFResource):
|
|
66
|
+
value = DataProperty(RDF.value)
|
|
67
|
+
|
|
68
|
+
def __str__(self):
|
|
69
|
+
return str(self.value)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class FullTextAnnotation(Annotation):
|
|
73
|
+
derived_from = ObjectProperty(prov.wasDerivedFrom)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class TextblockOnPage(Annotation):
|
|
77
|
+
derived_from = ObjectProperty(prov.wasDerivedFrom, cls=RDFResource)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_textblock(cls, textblock, page, scale, ocr_file):
|
|
81
|
+
xywh = ','.join([str(i) for i in textblock.xywh(scale)])
|
|
82
|
+
return cls(
|
|
83
|
+
body=embedded(TextualBody)(
|
|
84
|
+
value=textblock.text(scale=scale),
|
|
85
|
+
content_type='text/plain'
|
|
86
|
+
),
|
|
87
|
+
target=embedded(SpecificResource)(
|
|
88
|
+
source=URIRef(page.url),
|
|
89
|
+
selector=embedded(FragmentSelector)(
|
|
90
|
+
value=Literal(f'xywh={xywh}'),
|
|
91
|
+
conforms_to=URIRef('http://www.w3.org/TR/media-frags/'),
|
|
92
|
+
),
|
|
93
|
+
),
|
|
94
|
+
derived_from=embedded(SpecificResource)(
|
|
95
|
+
source=URIRef(ocr_file.url),
|
|
96
|
+
selector=embedded(XPathSelector)(value=f'//*[@ID="{textblock.id}"]'),
|
|
97
|
+
),
|
|
98
|
+
motivation=sc.painting
|
|
99
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from plastron.namespaces import fedora, xsd
|
|
2
|
+
from plastron.rdfmapping.descriptors import ObjectProperty, DataProperty
|
|
3
|
+
from plastron.rdfmapping.resources import RDFResource
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FedoraResource(RDFResource):
|
|
7
|
+
created = DataProperty(fedora.created, datatype=xsd.dateTime)
|
|
8
|
+
last_modified = DataProperty(fedora.lastModified, datatype=xsd.dateTime)
|
|
9
|
+
parent = ObjectProperty(fedora.hasParent)
|