gamslib 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gamslib-0.2.6/.gitignore +19 -0
- gamslib-0.2.6/.gitlab-ci.yml +50 -0
- gamslib-0.2.6/CHANGELOG.md +29 -0
- gamslib-0.2.6/LICENSE +21 -0
- gamslib-0.2.6/PKG-INFO +71 -0
- gamslib-0.2.6/README.md +49 -0
- gamslib-0.2.6/coverage.sh +2 -0
- gamslib-0.2.6/pyproject.toml +53 -0
- gamslib-0.2.6/src/gamslib/__init__.py +6 -0
- gamslib-0.2.6/src/gamslib/objectcsv/__init__.py +57 -0
- gamslib-0.2.6/src/gamslib/objectcsv/create_csv.py +169 -0
- gamslib-0.2.6/src/gamslib/objectcsv/dublincore.py +188 -0
- gamslib-0.2.6/src/gamslib/objectcsv/manage_csv.py +85 -0
- gamslib-0.2.6/src/gamslib/objectcsv/objectcsv.py +280 -0
- gamslib-0.2.6/src/gamslib/objectcsv/utils.py +21 -0
- gamslib-0.2.6/src/gamslib/objectcsv/xlsx.py +53 -0
- gamslib-0.2.6/src/gamslib/projectconfiguration/__init__.py +49 -0
- gamslib-0.2.6/src/gamslib/projectconfiguration/configuration.py +108 -0
- gamslib-0.2.6/src/gamslib/projectconfiguration/resources/project.toml +23 -0
- gamslib-0.2.6/tests/objectcsv/test_create_csv/objects/obj1/DC.xml +14 -0
- gamslib-0.2.6/tests/objectcsv/test_create_csv/objects/obj1/SOURCE.xml +250 -0
- gamslib-0.2.6/tests/objectcsv/test_create_csv/objects/obj2/DC.xml +14 -0
- gamslib-0.2.6/tests/objectcsv/test_create_csv/project.toml +9 -0
- gamslib-0.2.6/tests/objectcsv/test_create_csv.py +116 -0
- gamslib-0.2.6/tests/objectcsv/test_dublincore/DC.xml +15 -0
- gamslib-0.2.6/tests/objectcsv/test_dublincore.py +166 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/collected_csvs/all_datastreams.csv +7 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/collected_csvs/all_objects.csv +3 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj1/DC.xml +0 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj1/datastreams.csv +4 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj1/object.csv +2 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj2/DC.xml +0 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj2/datastreams.csv +4 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv/objects/obj2/object.csv +2 -0
- gamslib-0.2.6/tests/objectcsv/test_manage_csv.py +115 -0
- gamslib-0.2.6/tests/objectcsv/test_objectcsv.py +282 -0
- gamslib-0.2.6/tests/objectcsv/test_utils.py +45 -0
- gamslib-0.2.6/tests/objectcsv/test_xlsx/datastreams.csv +4 -0
- gamslib-0.2.6/tests/objectcsv/test_xlsx/objects.csv +2 -0
- gamslib-0.2.6/tests/objectcsv/test_xlsx/simple.csv +3 -0
- gamslib-0.2.6/tests/objectcsv/test_xlsx.py +50 -0
- gamslib-0.2.6/tests/projectconfiguration/test_configuration/foo/bar/readme.txt +1 -0
- gamslib-0.2.6/tests/projectconfiguration/test_configuration/invalid.toml +1 -0
- gamslib-0.2.6/tests/projectconfiguration/test_configuration/invalid_value.toml +9 -0
- gamslib-0.2.6/tests/projectconfiguration/test_configuration/project.toml +10 -0
- gamslib-0.2.6/tests/projectconfiguration/test_configuration.py +301 -0
- gamslib-0.2.6/tests/projectconfiguration/test_init.py +10 -0
gamslib-0.2.6/.gitignore
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
variables:
|
|
2
|
+
RYE_VERSION: "0.42"
|
|
3
|
+
RYE_IMAGE: "jfxs/rye"
|
|
4
|
+
|
|
5
|
+
stages:
|
|
6
|
+
- test
|
|
7
|
+
- lint
|
|
8
|
+
|
|
9
|
+
# The Docker image (always latest or use the variables?)
|
|
10
|
+
image: jfxs/rye
|
|
11
|
+
|
|
12
|
+
# A hidden job template that can be used by different Python versions
|
|
13
|
+
.test-template:
|
|
14
|
+
stage: test
|
|
15
|
+
script:
|
|
16
|
+
- echo "Rye environment version"
|
|
17
|
+
- rye --version
|
|
18
|
+
- echo "Pinning ${PYTHON_VERSION}"
|
|
19
|
+
- rye pin ${PYTHON_VERSION}
|
|
20
|
+
- rye sync
|
|
21
|
+
- echo "Using python version:"
|
|
22
|
+
- python --version
|
|
23
|
+
- echo "Running tests with Rye"
|
|
24
|
+
- rye test
|
|
25
|
+
|
|
26
|
+
# Specific jobs for Python versions
|
|
27
|
+
test-py_3.11:
|
|
28
|
+
extends: .test-template
|
|
29
|
+
variables:
|
|
30
|
+
PYTHON_VERSION: "3.11"
|
|
31
|
+
|
|
32
|
+
test-py_3.12:
|
|
33
|
+
extends: .test-template
|
|
34
|
+
variables:
|
|
35
|
+
PYTHON_VERSION: "3.12"
|
|
36
|
+
|
|
37
|
+
# test-py_3.13:
|
|
38
|
+
# extends: .test-template
|
|
39
|
+
# variables:
|
|
40
|
+
# PYTHON_VERSION: "3.13"
|
|
41
|
+
|
|
42
|
+
lint:
|
|
43
|
+
stage: lint
|
|
44
|
+
script:
|
|
45
|
+
- echo "Rye environment version"
|
|
46
|
+
- rye --version
|
|
47
|
+
- echo "Using python version:"
|
|
48
|
+
- python --version
|
|
49
|
+
- echo "Running rye lint"
|
|
50
|
+
- rye lint
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## [Unreleased]
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
### Removed
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
## [0.2.6] - 2024-12-06
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
|
|
23
|
+
- CHANGELOG.md
|
|
24
|
+
- More tests
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- Extend pyproject.toml
|
|
29
|
+
|
gamslib-0.2.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Fabio Tosques, Gunter Vasold
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
gamslib-0.2.6/PKG-INFO
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: gamslib
|
|
3
|
+
Version: 0.2.6
|
|
4
|
+
Summary: Modules and subpackages used in various GAMS5 related projects
|
|
5
|
+
Project-URL: Homepage, https://github.com/DHGraz/gamslib
|
|
6
|
+
Project-URL: Repository, https://github.com/DHGraz/gamslib
|
|
7
|
+
Project-URL: Issues, https://github.com/DHGraz/gamslib/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/DHGraz/gamslib/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Gunter Vasold <gunter.vasold@uni-graz.at>, Fabio Tosques <fabio.tosques@uni-graz.at>
|
|
10
|
+
Keywords: GAMS
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: pydantic>=2.10.1
|
|
20
|
+
Requires-Dist: pylightxl>=1.61
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# gamslib
|
|
24
|
+
|
|
25
|
+
gamslib is a collection of GAMS related modules and packages, which are used in multiple other packages.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
gamslib is available on pypi.org and can be installed via pip:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
pip install gamslib
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Currently these subpackages are available (more to come):
|
|
35
|
+
|
|
36
|
+
## objectcsv
|
|
37
|
+
|
|
38
|
+
Handle object and datastream metadata in object csv files.
|
|
39
|
+
|
|
40
|
+
When creating bags for GAMS, we provide some metadata in csv
|
|
41
|
+
files (which are not part of the bag, btw).
|
|
42
|
+
|
|
43
|
+
The objectcsv package provides tools to handle this metadata.
|
|
44
|
+
|
|
45
|
+
* The ObjectCSV class represents the object
|
|
46
|
+
and datastream csv data for a single object. It is created by providing the
|
|
47
|
+
path to the object directory.
|
|
48
|
+
* The manage_csv module can be used to collect csv data from all objects
|
|
49
|
+
into a single file, which makes editing the data more efficient.
|
|
50
|
+
It also has a function to update the csv files in the object directories
|
|
51
|
+
based on the collected data.
|
|
52
|
+
* The xlsx module can be used to convert the csv files to xlsx files
|
|
53
|
+
and vice versa. This is useful for editing the data in a spreadsheet
|
|
54
|
+
without the hassles of importing and exporting the csv files, which
|
|
55
|
+
led to encoding problems in the past.
|
|
56
|
+
|
|
57
|
+
## projectconfiguration
|
|
58
|
+
|
|
59
|
+
This package contains a central class `Configuration` that represents the
|
|
60
|
+
project configuration. To create this object, the function
|
|
61
|
+
`load_configuration(OBJECT_ROOT, [PATH_TO_TOML_FILE])` should be used.
|
|
62
|
+
|
|
63
|
+
The function tries to find the project configuration file, validates
|
|
64
|
+
its content, and creates the central Configuration object with all
|
|
65
|
+
sub-objects (Each TOML inline table is provided as its own sub-object).
|
|
66
|
+
These sub-objects are currently:
|
|
67
|
+
|
|
68
|
+
* general
|
|
69
|
+
* metadata
|
|
70
|
+
|
|
71
|
+
A basic configuration file can be generated via the `create_condiguration()` function.
|
gamslib-0.2.6/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# gamslib
|
|
2
|
+
|
|
3
|
+
gamslib is a collection of GAMS related modules and packages, which are used in multiple other packages.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
gamslib is available on pypi.org and can be installed via pip:
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
pip install gamslib
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Currently these subpackages are available (more to come):
|
|
13
|
+
|
|
14
|
+
## objectcsv
|
|
15
|
+
|
|
16
|
+
Handle object and datastream metadata in object csv files.
|
|
17
|
+
|
|
18
|
+
When creating bags for GAMS, we provide some metadata in csv
|
|
19
|
+
files (which are not part of the bag, btw).
|
|
20
|
+
|
|
21
|
+
The objectcsv package provides tools to handle this metadata.
|
|
22
|
+
|
|
23
|
+
* The ObjectCSV class represents the object
|
|
24
|
+
and datastream csv data for a single object. It is created by providing the
|
|
25
|
+
path to the object directory.
|
|
26
|
+
* The manage_csv module can be used to collect csv data from all objects
|
|
27
|
+
into a single file, which makes editing the data more efficient.
|
|
28
|
+
It also has a function to update the csv files in the object directories
|
|
29
|
+
based on the collected data.
|
|
30
|
+
* The xlsx module can be used to convert the csv files to xlsx files
|
|
31
|
+
and vice versa. This is useful for editing the data in a spreadsheet
|
|
32
|
+
without the hassles of importing and exporting the csv files, which
|
|
33
|
+
led to encoding problems in the past.
|
|
34
|
+
|
|
35
|
+
## projectconfiguration
|
|
36
|
+
|
|
37
|
+
This package contains a central class `Configuration` that represents the
|
|
38
|
+
project configuration. To create this object, the function
|
|
39
|
+
`load_configuration(OBJECT_ROOT, [PATH_TO_TOML_FILE])` should be used.
|
|
40
|
+
|
|
41
|
+
The function tries to find the project configuration file, validates
|
|
42
|
+
its content, and creates the central Configuration object with all
|
|
43
|
+
sub-objects (Each TOML inline table is provided as its own sub-object).
|
|
44
|
+
These sub-objects are currently:
|
|
45
|
+
|
|
46
|
+
* general
|
|
47
|
+
* metadata
|
|
48
|
+
|
|
49
|
+
A basic configuration file can be generated via the `create_condiguration()` function.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "gamslib"
|
|
3
|
+
version = "0.2.6"
|
|
4
|
+
description = "Modules and subpackages used in various GAMS5 related projects"
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Gunter Vasold", email = "gunter.vasold@uni-graz.at" },
|
|
7
|
+
{ name = "Fabio Tosques", email = "fabio.tosques@uni-graz.at" }
|
|
8
|
+
]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"pylightxl>=1.61",
|
|
11
|
+
"pydantic>=2.10.1",
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">= 3.11"
|
|
15
|
+
keywords = ["GAMS"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/DHGraz/gamslib"
|
|
28
|
+
# Documentation = "https://readthedocs.org"
|
|
29
|
+
Repository = "https://github.com/DHGraz/gamslib"
|
|
30
|
+
Issues = "https://github.com/DHGraz/gamslib/issues"
|
|
31
|
+
Changelog = "https://github.com/DHGraz/gamslib/blob/main/CHANGELOG.md"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["hatchling"]
|
|
36
|
+
build-backend = "hatchling.build"
|
|
37
|
+
|
|
38
|
+
[tool.rye]
|
|
39
|
+
managed = true
|
|
40
|
+
dev-dependencies = [
|
|
41
|
+
"pylint>=3.3.1",
|
|
42
|
+
"pytest>=8.3.3",
|
|
43
|
+
"pytest-datadir>=1.5.0",
|
|
44
|
+
"pytest-cov>=6.0.0",
|
|
45
|
+
"pyright>=1.1.390",
|
|
46
|
+
"toml>=0.10.2",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[tool.hatch.metadata]
|
|
50
|
+
allow-direct-references = true
|
|
51
|
+
|
|
52
|
+
[tool.hatch.build.targets.wheel]
|
|
53
|
+
packages = ["src/gamslib"]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Handle object and datastream metadata in csv files.
|
|
2
|
+
|
|
3
|
+
When creating bags for GAMS, we provide some metadata in csv
|
|
4
|
+
files (which are not part of the bag, btw).
|
|
5
|
+
|
|
6
|
+
The objectcsv package provides tools to handle this metadata.
|
|
7
|
+
|
|
8
|
+
* The ObjectCSV class represents the object and datastream csv data
|
|
9
|
+
for a single object. It is created by providing the path to the
|
|
10
|
+
object directory. It is composed of two classes:
|
|
11
|
+
|
|
12
|
+
* ObjectCSVFile represents the object metadata. It hold typically
|
|
13
|
+
a single ObjectData object, but can hold multiple objects if needed.
|
|
14
|
+
* DatastreamsCSVFile represents the datastream metadata. It holds
|
|
15
|
+
typically multiple DSData objects, one for each datastream.
|
|
16
|
+
* The dublincore_csv module represents the object metadata stored in
|
|
17
|
+
the objects 'DC.xml' file. It provides useful functions for acessing
|
|
18
|
+
DC data e.g. for prefered languages etc.
|
|
19
|
+
* The create_csv module can be used to initally create the csv files for
|
|
20
|
+
all objects
|
|
21
|
+
* The manage_csv module can be used collect csv data from all objects
|
|
22
|
+
into a single file, which makes editing the data more efficient.
|
|
23
|
+
It also has a function to update the csv files in the object directories
|
|
24
|
+
based on the collected data.
|
|
25
|
+
* The xlsx module can be used to convert the csv files to xlsx files
|
|
26
|
+
and vice versa. This is useful for editing the data in a spreadsheet
|
|
27
|
+
without the hassles of importing and exporting the csv files, which
|
|
28
|
+
led to encoding problems in the past.
|
|
29
|
+
|
|
30
|
+
The "public" functions and classes from the submodules are directly
|
|
31
|
+
available in the objectcsv:
|
|
32
|
+
|
|
33
|
+
* ObjectCSV
|
|
34
|
+
* ObjectData
|
|
35
|
+
* DSData
|
|
36
|
+
* create_csv_files
|
|
37
|
+
* collect_csv_data
|
|
38
|
+
* update_csv_files
|
|
39
|
+
* csv_to_xlsx
|
|
40
|
+
* xlsx_to_csv
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from .objectcsv import ObjectCSV, ObjectData, DSData
|
|
44
|
+
from .create_csv import create_csv_files
|
|
45
|
+
from .manage_csv import collect_csv_data, update_csv_files
|
|
46
|
+
from .xlsx import csv_to_xlsx, xlsx_to_csv
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
"ObjectCSV",
|
|
50
|
+
"ObjectData",
|
|
51
|
+
"DSData",
|
|
52
|
+
"create_csv_files",
|
|
53
|
+
"collect_csv_data",
|
|
54
|
+
"update_csv_files",
|
|
55
|
+
"csv_to_xlsx",
|
|
56
|
+
"xlsx_to_csv",
|
|
57
|
+
]
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Create object.csv and datastreams.csv files.
|
|
2
|
+
|
|
3
|
+
This module creates the object.csv and datastreams.csv files for one or many given
|
|
4
|
+
object folder. It uses data from the DC.xml file and the project configuration
|
|
5
|
+
to fill in the metadata. When not enough information is available, some fields
|
|
6
|
+
will be left blank or filled with default values.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import mimetypes
|
|
11
|
+
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from gamslib.projectconfiguration import Configuration
|
|
15
|
+
|
|
16
|
+
from .objectcsv import DSData, ObjectCSV, ObjectData
|
|
17
|
+
from .dublincore import DublinCore
|
|
18
|
+
from .utils import find_object_folders
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger()
|
|
21
|
+
|
|
22
|
+
DEFAULT_RIGHTS = (
|
|
23
|
+
"Creative Commons Attribution-NonCommercial 4.0 "
|
|
24
|
+
"(https://creativecommons.org/licenses/by-nc/4.0/)"
|
|
25
|
+
)
|
|
26
|
+
DEFAULT_SOURCE = "local"
|
|
27
|
+
DEFAULT_OBJECT_TYPE = "text"
|
|
28
|
+
|
|
29
|
+
NAMESPACES = {
|
|
30
|
+
"dc": "http://purl.org/dc/elements/1.1/",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_rights(config: Configuration, dc: DublinCore) -> str:
|
|
35
|
+
"""Get the rights from various sources.
|
|
36
|
+
|
|
37
|
+
Lookup in this ortder:
|
|
38
|
+
|
|
39
|
+
1. Check if set in dublin core
|
|
40
|
+
2. Check if set in the configuration
|
|
41
|
+
3. Use a default value.
|
|
42
|
+
"""
|
|
43
|
+
rights = dc.get_element_as_str("rights", default="")
|
|
44
|
+
if not rights: # empty string is a valid value
|
|
45
|
+
if config.metadata.rights:
|
|
46
|
+
rights = config.metadata.rights
|
|
47
|
+
else:
|
|
48
|
+
rights = DEFAULT_RIGHTS
|
|
49
|
+
return rights
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_dsid(datastream: Path | str, keep_extension=True) -> str:
|
|
53
|
+
"""Extract and validate the datastream id from a datastream path.
|
|
54
|
+
|
|
55
|
+
If remove_extension is True, the file extension is removed from the PID.
|
|
56
|
+
"""
|
|
57
|
+
if isinstance(datastream, str):
|
|
58
|
+
datastream = Path(datastream)
|
|
59
|
+
|
|
60
|
+
pid = datastream.name
|
|
61
|
+
|
|
62
|
+
if not keep_extension:
|
|
63
|
+
# not everything after the last dot is an extension :-(
|
|
64
|
+
mtype = mimetypes.guess_type(datastream)[0]
|
|
65
|
+
if mtype is None:
|
|
66
|
+
known_extensions = []
|
|
67
|
+
else:
|
|
68
|
+
known_extensions = mimetypes.guess_all_extensions(mtype)
|
|
69
|
+
if datastream.suffix in known_extensions:
|
|
70
|
+
pid = pid.removesuffix(datastream.suffix)
|
|
71
|
+
logger.debug("Removed extension '%s' for ID: %s", datastream.suffix, pid)
|
|
72
|
+
else:
|
|
73
|
+
parts = pid.split(".")
|
|
74
|
+
if re.match(r"^[a-zA-Z]+\w?$", parts[-1]):
|
|
75
|
+
pid = ".".join(parts[:-1])
|
|
76
|
+
logger.debug("Removed extension for ID: %s", parts[0])
|
|
77
|
+
else:
|
|
78
|
+
logger.warning(
|
|
79
|
+
"'%s' does not look like an extension. Keeping it in PID.", pid[-1]
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if re.match(r"^[a-zA-Z0-9]+[-.%_a-zA-Z0-9]+[a-zA-Z0-9]+$", pid) is None:
|
|
83
|
+
raise ValueError(f"Invalid PID: '{pid}'")
|
|
84
|
+
|
|
85
|
+
logger.debug(
|
|
86
|
+
"Extracted PID: %s from %s (keep_extension=%s)", pid, datastream, keep_extension
|
|
87
|
+
)
|
|
88
|
+
return pid
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def collect_object_data(pid: str, config: Configuration, dc: DublinCore) -> ObjectData:
|
|
92
|
+
"""Find data for the object.csv by examining dc file and configuration.
|
|
93
|
+
|
|
94
|
+
This is the place to change the resolving order for data from other sources.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
title = "; ".join(dc.get_element("title", default=pid))
|
|
98
|
+
description = "; ".join(dc.get_element("description", default=""))
|
|
99
|
+
|
|
100
|
+
return ObjectData(
|
|
101
|
+
recid=pid,
|
|
102
|
+
title=title,
|
|
103
|
+
project=config.metadata.project_id,
|
|
104
|
+
description=description,
|
|
105
|
+
creator=config.metadata.creator,
|
|
106
|
+
rights=get_rights(config, dc),
|
|
107
|
+
source=DEFAULT_SOURCE,
|
|
108
|
+
objectType=DEFAULT_OBJECT_TYPE,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def collect_datastream_data(
|
|
113
|
+
ds_file: Path, config: Configuration, dc: DublinCore
|
|
114
|
+
) -> DSData:
|
|
115
|
+
"""Collect data for a single datastream."""
|
|
116
|
+
dsid = extract_dsid(ds_file, config.general.dsid_keep_extension)
|
|
117
|
+
|
|
118
|
+
# I think it's not possible to derive a ds title or description from the DC file
|
|
119
|
+
# title = "; ".join(dc.get_element("title", default=dsid)) # ??
|
|
120
|
+
# description = "; ".join(dc.get_element("description", default="")) #??
|
|
121
|
+
|
|
122
|
+
return DSData(
|
|
123
|
+
dspath=str(ds_file.relative_to(ds_file.parents[1])), # objectsdir
|
|
124
|
+
dsid=dsid,
|
|
125
|
+
title="",
|
|
126
|
+
description="",
|
|
127
|
+
mimetype=mimetypes.guess_type(ds_file)[0] or "",
|
|
128
|
+
creator=config.metadata.creator,
|
|
129
|
+
rights=get_rights(config, dc),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def create_csv(
|
|
134
|
+
object_directory: Path, configuration: Configuration
|
|
135
|
+
) -> ObjectCSV | None:
|
|
136
|
+
"""Generate the csv file containing the preliminary metadata for a single object."""
|
|
137
|
+
objectcsv = ObjectCSV(object_directory)
|
|
138
|
+
|
|
139
|
+
# Avoid that existing (and potentially already edited) metadata is replaced
|
|
140
|
+
if not objectcsv.is_new():
|
|
141
|
+
logger.info(
|
|
142
|
+
"CSV files for object '%s' already exist. Will not be re-created.",
|
|
143
|
+
objectcsv.object_id,
|
|
144
|
+
)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
dc = DublinCore(object_directory / "DC.xml")
|
|
148
|
+
objectcsv.add_objectdata(
|
|
149
|
+
collect_object_data(objectcsv.object_id, configuration, dc)
|
|
150
|
+
)
|
|
151
|
+
for ds_file in object_directory.glob("*"):
|
|
152
|
+
if ds_file.is_file() and ds_file.name not in ("object.csv", "datastreams.csv"):
|
|
153
|
+
objectcsv.add_datastream(
|
|
154
|
+
# collect_datastream_data(ds_file, objectcsv.object_id, configuration, dc)
|
|
155
|
+
collect_datastream_data(ds_file, configuration, dc)
|
|
156
|
+
)
|
|
157
|
+
objectcsv.write()
|
|
158
|
+
return objectcsv
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def create_csv_files(root_folder: Path, config: Configuration) -> list[ObjectCSV]:
|
|
162
|
+
"""Create the CSV files for all objects below root_folder."""
|
|
163
|
+
extended_objects: list[ObjectCSV] = []
|
|
164
|
+
for path in find_object_folders(root_folder):
|
|
165
|
+
extended_obj = create_csv(path, config)
|
|
166
|
+
|
|
167
|
+
if extended_obj is not None:
|
|
168
|
+
extended_objects.append(extended_obj)
|
|
169
|
+
return extended_objects
|