opex-manifest-generator 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator-1.2.3/.github/workflows/codeql.yml +71 -0
- opex_manifest_generator-1.2.3/PKG-INFO +527 -0
- opex_manifest_generator-1.2.3/README.md +508 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/cli.py +19 -7
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/common.py +20 -5
- opex_manifest_generator-1.2.3/opex_manifest_generator/hash.py +65 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/opex_manifest.py +143 -67
- opex_manifest_generator-1.2.3/opex_manifest_generator.egg-info/PKG-INFO +527 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator.egg-info/SOURCES.txt +7 -6
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/pyproject.toml +2 -1
- opex_manifest_generator-1.2.1/PKG-INFO +0 -16
- opex_manifest_generator-1.2.1/README.md +0 -382
- opex_manifest_generator-1.2.1/opex_manifest_generator/hash.py +0 -34
- opex_manifest_generator-1.2.1/opex_manifest_generator.egg-info/PKG-INFO +0 -16
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/.gitignore +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/LICENSE.md +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/assets/Column Headers.png +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/assets/FullName Column.png +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/assets/Hash Headers.png +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/assets/Identifiers Headers.png +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/assets/XML Headers.png +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/__init__.py +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/metadata/DublinCore Template.xml +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/metadata/EAD Template.xml +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/metadata/GDPR Template.xml +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/metadata/MODS Template.xml +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator/options.properties +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator.egg-info/dependency_links.txt +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator.egg-info/entry_points.txt +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator.egg-info/requires.txt +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/opex_manifest_generator.egg-info/top_level.txt +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/Opex.xml +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/opex_manifest_generator_AutoClass.xlsx +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/spreads/dctemplate.xlsx +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/spreads/eadtemplate.xlsx +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/spreads/gdprtemplate.xlsx +0 -0
- {opex_manifest_generator-1.2.1/opex_manifest_generator → opex_manifest_generator-1.2.3}/samples/spreads/modstemplate.xlsx +0 -0
- {opex_manifest_generator-1.2.1 → opex_manifest_generator-1.2.3}/setup.cfg +0 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# For most projects, this workflow file will not need changing; you simply need
|
|
2
|
+
# to commit it to your repository.
|
|
3
|
+
#
|
|
4
|
+
# You may wish to alter this file to override the set of languages analyzed,
|
|
5
|
+
# or to provide custom queries or build logic.
|
|
6
|
+
#
|
|
7
|
+
# ******** NOTE ********
|
|
8
|
+
# We have attempted to detect the languages in your repository. Please check
|
|
9
|
+
# the `language` matrix defined below to confirm you have the correct set of
|
|
10
|
+
# supported CodeQL languages.
|
|
11
|
+
#
|
|
12
|
+
name: "CodeQL"
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
branches: [ master ]
|
|
17
|
+
pull_request:
|
|
18
|
+
# The branches below must be a subset of the branches above
|
|
19
|
+
branches: [ master ]
|
|
20
|
+
schedule:
|
|
21
|
+
- cron: '20 10 * * 6'
|
|
22
|
+
|
|
23
|
+
jobs:
|
|
24
|
+
analyze:
|
|
25
|
+
name: Analyze
|
|
26
|
+
runs-on: ubuntu-latest
|
|
27
|
+
permissions:
|
|
28
|
+
actions: read
|
|
29
|
+
contents: read
|
|
30
|
+
security-events: write
|
|
31
|
+
|
|
32
|
+
strategy:
|
|
33
|
+
fail-fast: false
|
|
34
|
+
matrix:
|
|
35
|
+
language: [ 'python' ]
|
|
36
|
+
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
|
|
37
|
+
# Learn more:
|
|
38
|
+
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
|
|
39
|
+
|
|
40
|
+
steps:
|
|
41
|
+
- name: Checkout repository
|
|
42
|
+
uses: actions/checkout@v2
|
|
43
|
+
|
|
44
|
+
# Initializes the CodeQL tools for scanning.
|
|
45
|
+
- name: Initialize CodeQL
|
|
46
|
+
uses: github/codeql-action/init@v2
|
|
47
|
+
with:
|
|
48
|
+
languages: ${{ matrix.language }}
|
|
49
|
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
|
50
|
+
# By default, queries listed here will override any specified in a config file.
|
|
51
|
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
|
52
|
+
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
|
53
|
+
|
|
54
|
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
|
55
|
+
# If this step fails, then you should remove it and run the build manually (see below)
|
|
56
|
+
- name: Autobuild
|
|
57
|
+
uses: github/codeql-action/autobuild@v2
|
|
58
|
+
|
|
59
|
+
# ℹ️ Command-line programs to run using the OS shell.
|
|
60
|
+
# 📚 https://git.io/JvXDl
|
|
61
|
+
|
|
62
|
+
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
|
63
|
+
# and modify them (or add more) to build your code if your project
|
|
64
|
+
# uses a compiled language
|
|
65
|
+
|
|
66
|
+
#- run: |
|
|
67
|
+
# make bootstrap
|
|
68
|
+
# make release
|
|
69
|
+
|
|
70
|
+
- name: Perform CodeQL Analysis
|
|
71
|
+
uses: github/codeql-action/analyze@v2
|
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: opex_manifest_generator
|
|
3
|
+
Version: 1.2.3
|
|
4
|
+
Summary: Opex Manifest Generator Tool for use with Opex / Preservica
|
|
5
|
+
Author-email: Christopher Prince <c.pj.prince@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/CPJPRINCE/opex_manifest_generator
|
|
7
|
+
Project-URL: Issues, https://github.com/CPJPRINCE/opex_manifest_generator/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: System :: Archiving
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE.md
|
|
14
|
+
Requires-Dist: auto_classification_generator
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: openpyxl
|
|
17
|
+
Requires-Dist: lxml
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# Opex Manifest Generator Tool
|
|
21
|
+
|
|
22
|
+
[](https://pypi.org/project/opex_manifest_generator)
|
|
23
|
+
[](https://github.com/CPJPRINCE/opex_manifest_generator/actions/workflows/codeql.yml)
|
|
24
|
+
|
|
25
|
+
The Opex Manifest Generator is a Python programme for generating OPEX files for use with Preservica and system's compatible with the OPEX standard. It will recursively go through a 'root' directory and generate an OPEX files for each folder or, depending on specified options, files.
|
|
26
|
+
|
|
27
|
+
## Why use this tool?
|
|
28
|
+
|
|
29
|
+
This tool was primarily intended to allow users, to undertake larger uploads safely utilising bulk ingests, utilising the Opex Ingest Workflow, with Folder Manifest's checked to ensure safe transfer. However, it has been tested as functioning with:
|
|
30
|
+
- Bulk / Opex Ingest Workflow
|
|
31
|
+
- PUT Tool / Auto Ingest Workflows
|
|
32
|
+
- Manual Ingest
|
|
33
|
+
- Starter/UX2 Ingest uploads (Both File and Folder)
|
|
34
|
+
|
|
35
|
+
## Features
|
|
36
|
+
|
|
37
|
+
There are a number of features including:
|
|
38
|
+
- Generating Fixities for files, with SHA1, MD5, SHA256, SHA512 (Default is SHA1).
|
|
39
|
+
- Generate Multiple Fixities.
|
|
40
|
+
- Generate PAX fixities.
|
|
41
|
+
- OPEX's can be cleared out, for repeated / ease of use.
|
|
42
|
+
- OPEX's can be zipped with the file, for imports use with Starter/UX2/Manual ingest methods.
|
|
43
|
+
|
|
44
|
+
The Program also makes use of the Auto Classification Generator, allowing for:
|
|
45
|
+
- Reference's can be automatically generated and embedded into the Opex, with assignable prefixes.
|
|
46
|
+
- This can be utilised either in Catalog or Accession modes, or both.
|
|
47
|
+
- Clear and log empty folders.
|
|
48
|
+
- Remove and log Files / Folders.
|
|
49
|
+
- Ignore specific Files / Folders.
|
|
50
|
+
- Sorting!
|
|
51
|
+
- Keyword assignment!
|
|
52
|
+
|
|
53
|
+
A key feature of the program, is that the Auto Class spreadsheet can also act as an input, meaning you can utilise the generated spreadsheet to assign metadata to your files and folders. Currently this allows:
|
|
54
|
+
- Assignment of title, description, and security status fields.
|
|
55
|
+
- Assignment of standard and custom xml metadata templates.
|
|
56
|
+
- These fields are all 'drop-in', so only the fields as they are required need to be added.
|
|
57
|
+
|
|
58
|
+
All these features can be combined to create extensive and robust Opex files for file transfers.
|
|
59
|
+
|
|
60
|
+
## Prerequisites
|
|
61
|
+
|
|
62
|
+
Python Version 3.8+ is recommended; the program is OS independent and works on Windows, MacOS and Linux.
|
|
63
|
+
|
|
64
|
+
The following modules are utilised and installed with the package:
|
|
65
|
+
- auto_classification_generator
|
|
66
|
+
- pandas
|
|
67
|
+
- openpyxl
|
|
68
|
+
- lxml
|
|
69
|
+
|
|
70
|
+
Please ensure that Python is also added to your System's environmental variables.
|
|
71
|
+
|
|
72
|
+
## Installation / Updates
|
|
73
|
+
|
|
74
|
+
To install the package, simply run: `pip install -U opex_manifest_generator`. To update it simply run the same command.
|
|
75
|
+
|
|
76
|
+
## Usage
|
|
77
|
+
|
|
78
|
+
Useage of the program is from a command line interface / terminal program, such as PowerShell on Windows, Terminal on Mac, or one of the many terminal programs on Linux.
|
|
79
|
+
|
|
80
|
+
### Folder Manifest Generation
|
|
81
|
+
|
|
82
|
+
The basic version of the program will generate only folder manifests, this acts recursively, so every folder within that folder will have an Opex generated.
|
|
83
|
+
|
|
84
|
+
To run open up a terminal and run the command:
|
|
85
|
+
|
|
86
|
+
`opex_generate "{path/to/your/folder}"`
|
|
87
|
+
|
|
88
|
+
Replacing `{path/to/your/folder}` with your folder path in quotations; for instance, on Windows this looks like:
|
|
89
|
+
|
|
90
|
+
`opex_generate "C:\Users\Christopher\Downloads"`
|
|
91
|
+
|
|
92
|
+
### Fixity Generation
|
|
93
|
+
|
|
94
|
+
To generate a fixity for each file within a given folder and create an opex file. this also creates a text document of Fixities. To use the `-fx` option to enable this.
|
|
95
|
+
|
|
96
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -fx`
|
|
97
|
+
|
|
98
|
+
By default this will run with the SHA-1 algorithm. You can also utilise MD5, SHA-1, SHA-256, SHA-512 algorithms. Specify it like so:
|
|
99
|
+
|
|
100
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -fx SHA-256`
|
|
101
|
+
|
|
102
|
+
You can also generate multiple fixities, by comma seperation - Shoutout to Andrew Doty for adding this:
|
|
103
|
+
|
|
104
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -fx SHA-256,SHA-1`
|
|
105
|
+
|
|
106
|
+
You can also enable PAX Fixity generation to generate fixity checks for individual files in PAXes. This is done, as detailed [here (see PAX section)](https://developers.preservica.com/documentation/open-preservation-exchange-opex#opex-sections):
|
|
107
|
+
|
|
108
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -fx SHA-256 --pax-fixity`
|
|
109
|
+
|
|
110
|
+
*Sidenote, you can also generate multiple fixites for PAX files*
|
|
111
|
+
|
|
112
|
+
### Continuous Generation
|
|
113
|
+
|
|
114
|
+
If dealing with a large amount of files / large sized files the program is in built with the ability to continue where you left off.
|
|
115
|
+
|
|
116
|
+
By default, the program won't override any previously generated OPEXes. This means you can end the program (using Ctrl + C) and rerun the same (or a different) command and not worry about losing any progress.
|
|
117
|
+
|
|
118
|
+
### Clearing Opex's
|
|
119
|
+
|
|
120
|
+
Of course if you do make a mistake you or wish to start over, can utilise the clear option will remove all existing Opex's in a directory.
|
|
121
|
+
|
|
122
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -clr`
|
|
123
|
+
|
|
124
|
+
Running this command with no additional options will end the program after clearing the Opex's; if other options are enabled it will proceed with a new generation.
|
|
125
|
+
|
|
126
|
+
### Zipping
|
|
127
|
+
|
|
128
|
+
You can also utilise the zip option to bundle the opex and content into a zip file. For use with manual ingests or for Starter / UX2 users.
|
|
129
|
+
|
|
130
|
+
`opex_generate "C:\Users\Christopher\Downloads\" -fx SHA-1 -z`
|
|
131
|
+
|
|
132
|
+
Currently, no files will be removed after the zipping. **Be aware that because of this running this command multiple times in row can lead to lots of zips... Ensure you're at an end point before running this, as there's no easy way to undo this!**
|
|
133
|
+
|
|
134
|
+
### Removing Empty Directories
|
|
135
|
+
|
|
136
|
+
You can also clear any empty directories by using the `-rme` or `--remove-empty` option. This will remove any empty directories and generate a simple text document listing the directories that were removed. This process is not reversible and you will be asked to confirm your choice.
|
|
137
|
+
|
|
138
|
+
### Filtering
|
|
139
|
+
|
|
140
|
+
Currently 2 filters are applied across all generations.
|
|
141
|
+
|
|
142
|
+
1) Hidden directories / files, either by the hidden attribute in Windows or by a starting '.' in MacOS / Linux, are not included.
|
|
143
|
+
2) Folder's titled `meta` are not included.
|
|
144
|
+
|
|
145
|
+
Hidden files and directories can be included by utilising the `--hidden` option. `meta` folders currently can not be included except by changing their name.
|
|
146
|
+
|
|
147
|
+
## Note on 'meta' folders
|
|
148
|
+
|
|
149
|
+
Meta folders will be generated automatically when used with the `--fixity` and `-rme` options, as well as when some options from the Auto Classification Generator. You can redirect the path of the generated folder using the `-o` option: `-fx -o {/path/to/meta/output}`. Or you can also disable the generation of 'meta' folder using the `-dmd` option.
|
|
150
|
+
|
|
151
|
+
## Use with the Auto Classification Generator
|
|
152
|
+
|
|
153
|
+
The Opex Manifest generator becomes much more powerful when utilised with another tool: the Auto Classification Generator, see [here](https://github.com/CPJPRINCE/auto_classification_generator) for further details.
|
|
154
|
+
|
|
155
|
+
This is built-in to the Opex Manifest Generator and can be utilised to embed identifiers and metadata directly to an Opex or through the use of an Excel spreadsheet or CSV file.
|
|
156
|
+
|
|
157
|
+
The Opex Manifest Generator makes use of the auto_class_generator as a module, therefore it's behaviour differs a little different when compared to utilising the standalone command `auto_class.exe`.
|
|
158
|
+
|
|
159
|
+
### Identifier Generation
|
|
160
|
+
|
|
161
|
+
To generate an auto classification code, call on `-c` option with `catalog` choice. You can also assign a prefix using `-p "ARCH"`:
|
|
162
|
+
|
|
163
|
+
`opex_generate -c catalog -p "ARCH" C:\Users\Christopher\Downloads`
|
|
164
|
+
|
|
165
|
+
This will generate Opex's with an identifier `code` for each of the files / folders. As described in the Auto Class module, the reference codes will take the hierarchy of the directories. You can also use the `-s` option to set a starting reference.
|
|
166
|
+
|
|
167
|
+
You can alternatively utilise the "Accession" / running number mode of generating a code using `-c accession` with the prefix "2024". You can also utilise the `--accession-mode` option to determine whether to have a running number for `file, folder, both`.
|
|
168
|
+
|
|
169
|
+
`opex_generate -c accession -p "2024" C:\Users\Christopher\Downloads --accession-mode file`
|
|
170
|
+
|
|
171
|
+
To note: when using the `catalog` option, the key `code` is set by default, when using `accession` the default key is `accref`. *The default identifier can be set by the options.property file (accref cannot be changes)*
|
|
172
|
+
|
|
173
|
+
There are also options to generate `both` (Accession and Catalog references); or generate a `generic` set of metadata which will take the XIP metadata for the Title and Description fields, from the basename of the folder/file. It will also set the Security Status to "open": `opex_generate -c generic C:\Users\Christopher\Downloads`
|
|
174
|
+
|
|
175
|
+
You can also combine the generic options, like so: `catalog-generic, accession-generic, both-generic` to generate an identifier alongside generic data: `opex_generate -c catalog-generic C:\Users\Christopher\Downloads`
|
|
176
|
+
|
|
177
|
+
## Use of Input Override option.
|
|
178
|
+
|
|
179
|
+
This program also supports utilising an Auto Class spreadsheet as an 'input override', utilising the data added into said spreadsheet instead of generating them ad hoc like above.
|
|
180
|
+
|
|
181
|
+
Using this method XIP Metadata fields can be set on Ingest, including:
|
|
182
|
+
|
|
183
|
+
- Title
|
|
184
|
+
- Description
|
|
185
|
+
- Security Status
|
|
186
|
+
- Identifiers
|
|
187
|
+
- SourceID
|
|
188
|
+
|
|
189
|
+
XML metadata template data, from both the default templates and custom templates can also be set.
|
|
190
|
+
|
|
191
|
+
<details>
|
|
192
|
+
<summary>
|
|
193
|
+
Click to find out more!
|
|
194
|
+
</summary>
|
|
195
|
+
|
|
196
|
+
### XIP metadata - Title, Description and Security Status
|
|
197
|
+
|
|
198
|
+
To use an input override, we need to first create a spreadsheet with the path of. You can utilise the `auto_class` tool installed alongside the Opex Generator, like so:
|
|
199
|
+
|
|
200
|
+
`auto_class -p "ARCH" "C:\Users\Christopher\Downloads"`
|
|
201
|
+
|
|
202
|
+
In the resultant spreadsheet, add in "Title", "Description", and "Security" as new columns. The column headers are case-sensistive and have to match exactly. These fields would then be filled in with the relevant data.
|
|
203
|
+
|
|
204
|
+

|
|
205
|
+
|
|
206
|
+
Once the cells are filled in with data, run a generation like so: `opex_generate -i "{/path/to/your/spreadsheet.xlsx}" "{/path/to/root/directory}"`
|
|
207
|
+
|
|
208
|
+
Ensure that the root directory matches the original directory of the export. In the above case this would be: `opex_generate -i "C:\Users\Christopher\Downloads\meta\Downloads_AutoClass.xlsx" "C:\Users\Christopher\Downloads"`
|
|
209
|
+
|
|
210
|
+
### Headers Note
|
|
211
|
+
|
|
212
|
+
The column headers are drop-in, drop-out, meaning you can the columns as and when you need them. You can also leave cell's blank if you don't want them to have any data in that field.
|
|
213
|
+
|
|
214
|
+
To note: When assigning the `Security` field, the tag must be a match to an existing tag in your system. This is case-sensitive, so "Closed" will NOT match to a tag called "closed".
|
|
215
|
+
|
|
216
|
+
### Another Important Note
|
|
217
|
+
|
|
218
|
+
If there are any changes to the hierarchy data, such as a file/folder (not including a 'meta' folder) being removed or added after the export of the spreadsheet, the data may not be assigned correctly, or it may be assigned as "ERROR", or the program may simply fail.
|
|
219
|
+
|
|
220
|
+
### XIP Metadata - Identifiers
|
|
221
|
+
|
|
222
|
+
Custom Identifiers can be added by adding the columns: `"Archive_Reference", "Accession_Reference", "Identifier", or "Identifer:Keyname"`.
|
|
223
|
+
|
|
224
|
+

|
|
225
|
+
|
|
226
|
+
`Archive_Reference` or `Identifier` will default to the keyname `code`; `Accession_Reference` will default to `accref`. When using the Auto Classification Generator it will always generate a column called `Archive_Reference`, but you can simply rename or remove this column as neccessary.
|
|
227
|
+
|
|
228
|
+
To add a custom identifier import, do so like: `Identifier:{YourIdentifierName}`, without the curly brackets IE: `Identifier:MyCode`. Mulitple identifiers can be added as needed.
|
|
229
|
+
|
|
230
|
+
No additional parameter's need to be set in the command line when using Identifier's, addition is enabled by default. Leaving a cell blank will not add an identifer.
|
|
231
|
+
|
|
232
|
+
### XIP Metadata - Hashes
|
|
233
|
+
|
|
234
|
+
If you utilise the Auto Classification's tool for generating Hashes; when utilising the `-fx` option in combination with `-i`, if both the columns `Hash` and `Algorithm` are present, the program will read the hashes from the spreadsheet instead of generating them.
|
|
235
|
+
|
|
236
|
+

|
|
237
|
+
|
|
238
|
+
*Be aware that interruption / resuming is not currently supported with the Auto Class Tool; also doesn't support multiple hashes*
|
|
239
|
+
|
|
240
|
+
### XML Metadata - Basic Templates
|
|
241
|
+
|
|
242
|
+
DC, MODS, GPDR, and EAD templates are supported alongside installation of the package.
|
|
243
|
+
|
|
244
|
+
After exporting an Auto Class spreadsheet, you can add in additional columns to the spreadsheet and fill it out with data for an import. Like the XIP data, all fields are optional, and can added on a 'drop-in' basis.
|
|
245
|
+
|
|
246
|
+

|
|
247
|
+
|
|
248
|
+
The column header's can be added in either of two ways, what I term: `exactly` or `flatly`. (There are probably better words to describe this).
|
|
249
|
+
|
|
250
|
+
An `exactly` match requires that the full path of the tag in the XML document is added to the column header. With each parent and child separated by a `/`; 'flatly' requires only the matching end tag.
|
|
251
|
+
|
|
252
|
+
To give an example, from the mods template:
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
Exactly:
|
|
256
|
+
mods:recordInfo/mods:recordIdentifier
|
|
257
|
+
|
|
258
|
+
Flatly:
|
|
259
|
+
mods:recordIdentifier
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
Both cases match to the field `recordIdentifer`. Note that header includes both the namespace and tag, and is also case sensitive.
|
|
263
|
+
|
|
264
|
+
While using the `flatly` method is easier, be aware that if there's non-unique tags, such as `mods:note` in the Mods template. This method will only import to the first match, which might not be it's intended destination. Using the `exactly` method resolves this issue.
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
Once you have added in your headers and the necessary data to create the OPEX's simply add the `-m` option, with the chosen method of import `flat|exact`, so:
|
|
268
|
+
`opex_generate -i "{/path/to/your/spreadsheet.xlsx}" "{/path/to/root/directory}" -m flat` or
|
|
269
|
+
`opex_generate -i "{/path/to/your/spreadsheet.xlsx}" "{/path/to/root/directory}" -m exact`
|
|
270
|
+
|
|
271
|
+
### XML Metadata - Quick Note
|
|
272
|
+
|
|
273
|
+
When you have non-unique tags, again, such as `mods:note`, you will need add an index in square brackets `[0]` to indicate which tag to assign the data to, like: `mods:note[1] mods:notes[2] ...` The number of field will simply be the order they appear in the XML.
|
|
274
|
+
|
|
275
|
+
For convenience I've included the full templates for DC, MODS, GDPR and EAD, with the `exact` names in the headers [here](https://github.com/CPJPRINCE/opex_manifest_generator/tree/master/samples/spreads). I also created the `--print-xmls` function to display this info (including square bracket placement).
|
|
276
|
+
|
|
277
|
+
Also be aware that when using `-m` option and column headers for that XML document are present in the spreadsheet, it will add a metadata template to the OPEX, even if all the cells are left blank. As this is a useful function (adding blank templates to your import), I will leave this for now, but may adjust this in the future.
|
|
278
|
+
|
|
279
|
+
### XML Metadata Templates - Custom Templates
|
|
280
|
+
|
|
281
|
+
Any custom XML template, that is functioning in Preservica will work with this method. All XML's in a given `metadata` directory are checked when enabling the `-m` option.
|
|
282
|
+
|
|
283
|
+
The default location will be in the installation path of the program, typically under `/path/to/ptyhoninstall/Lib/site-packages/opex_manifest_generator/metadata`. However, you can also utilise the `-mdir` option to set this to a specific folder, to have a dedicated section.
|
|
284
|
+
|
|
285
|
+
After the xml is added to that directory, all that's required is to add the matching column headers into your spreadsheet. You can also utilise `--print-xmls` to obtian this.
|
|
286
|
+
|
|
287
|
+
### Additional Information for Auto Classification
|
|
288
|
+
#### SourceID
|
|
289
|
+
|
|
290
|
+
A SourceID can also be set by adding a `SourceID` header. The behaviour of this is not fully tested, likely won't be as I don't really utilise SourceIDs in my work :\).
|
|
291
|
+
|
|
292
|
+
#### Ignore
|
|
293
|
+
|
|
294
|
+
Ignoring Files can also be set by adding an `Ignore` header. When this is set to `TRUE` this will skip the generation of an Opex for the specified File or Folder; when done for folder's, the folder Opex will still include any ignored file's in its manifest.
|
|
295
|
+
|
|
296
|
+
#### Removals
|
|
297
|
+
|
|
298
|
+
Removing Files or Folders is also possible, by adding a `Removals` header. When this is set to `TRUE`, the specified File or Folder will be removed from the system. As a safeguard this must be enabled by adding the parameter `-rm, --remove`, and confirming the deletion when prompted.
|
|
299
|
+
|
|
300
|
+
#### Keywords
|
|
301
|
+
|
|
302
|
+
You can utilise keywords to replace reference numbers with abbreviated characters for instance: `--keywords "Secret Metadata Folder"` will replace the reference number with `"SMF"`. You can also set different modes with `--keywords-mode`. `intialise` will take the intials of each letter like in the previous example; `firstletters` will take the first x number of letters. So the above becomes `"SEC"`. You can set multiple keywords with by comma seperation. If `--keywords` is set without any set strings it will be applied to every word.
|
|
303
|
+
|
|
304
|
+
There are further details in the Options Section.
|
|
305
|
+
|
|
306
|
+
#### Sorting
|
|
307
|
+
|
|
308
|
+
You can also sort utilisiing `--sort-by`. There are currently two options: `foldersfirst` and `alphabetical`. Folders first sorts folders first, then files (both alphabetically); alphabetically sorts both folders and files alphabetically.
|
|
309
|
+
|
|
310
|
+
#### Options File
|
|
311
|
+
|
|
312
|
+
You can utilise your own option-file to change the default column headers for the Input override method. See the option `--option-file path/to/file`. Defaults are:
|
|
313
|
+
|
|
314
|
+
```[options]
|
|
315
|
+
|
|
316
|
+
INDEX_FIELD = FullName
|
|
317
|
+
TITLE_FIELD = Title
|
|
318
|
+
DESCRIPTION_FIELD = Description
|
|
319
|
+
SECUIRTY_FIELD = Security
|
|
320
|
+
IDENTIFIER_FIELD = Identifier
|
|
321
|
+
IDENTIFIER_DEFAULT = code
|
|
322
|
+
REMOVAL_FIELD = Removals
|
|
323
|
+
IGNORE_FIELD = Ignore
|
|
324
|
+
SOURCEID_FIELD = SourceID
|
|
325
|
+
HASH_FIELD = Hash
|
|
326
|
+
ALGORITHM_FIELD = Algorithm
|
|
327
|
+
```
|
|
328
|
+
#### Custom Spreadsheets - Quick Note
|
|
329
|
+
|
|
330
|
+
You technically don't have to utilise the AutoClass tool at all. Any old spreadsheet will do!
|
|
331
|
+
|
|
332
|
+
The only requirement to use the input override, is the presence of the `FullName` column. With an accurate list of paths.
|
|
333
|
+
|
|
334
|
+

|
|
335
|
+
|
|
336
|
+
</details>
|
|
337
|
+
|
|
338
|
+
## Further Options
|
|
339
|
+
|
|
340
|
+
The full options are given below; also see `opex_generate --help`
|
|
341
|
+
|
|
342
|
+
<details>
|
|
343
|
+
<summary>
|
|
344
|
+
Click here
|
|
345
|
+
</summary>
|
|
346
|
+
|
|
347
|
+
```
|
|
348
|
+
Options:
|
|
349
|
+
-h, --help Show Help dialog [boolean flag]
|
|
350
|
+
|
|
351
|
+
-v, --version Display version information [boolean flag]
|
|
352
|
+
|
|
353
|
+
Opex Options:
|
|
354
|
+
|
|
355
|
+
-fx, --fixity Generate a Fixity Check for files. [SHA-1,MD5, SHA-256, SHA-512
|
|
356
|
+
Can set multiple fixies with comma. | boolean flag]
|
|
357
|
+
IE MD5,SHA-1.
|
|
358
|
+
[Defaults to SHA-1 if not specified]
|
|
359
|
+
|
|
360
|
+
--pax-fixity Generates a Fixity Check for PAX files / Folders [boolean flag]
|
|
361
|
+
If not set PAX files / folders will be treated
|
|
362
|
+
as standard.
|
|
363
|
+
|
|
364
|
+
-clr, --clear-opex Will remove all existing Opex folders, [boolean flag]
|
|
365
|
+
When utilised with no other options, will end
|
|
366
|
+
the program.
|
|
367
|
+
|
|
368
|
+
-z, --zip Will zip the Opex's with the file itself to create [boolean flag]
|
|
369
|
+
a zip file. Existing file's are currently not removed.
|
|
370
|
+
***Use with caution, repeating the command multiple
|
|
371
|
+
times in a row, will break the Opex's.
|
|
372
|
+
|
|
373
|
+
--hidden Will generate Opex's for hidden files and directories [boolean flag]
|
|
374
|
+
|
|
375
|
+
-rm, --remove Will enable removals from a spreadsheet import [boolean flag]
|
|
376
|
+
|
|
377
|
+
-opt --options-file Specify an 'options.properties' file to change set [PATH/TO/FILE]
|
|
378
|
+
presets for column headers for input.
|
|
379
|
+
|
|
380
|
+
Auto Classification Options:
|
|
381
|
+
|
|
382
|
+
-c, --autoclass This will utilise the AutoClassification [{catalog, accession,both,
|
|
383
|
+
module to generate an Auto Class spreadsheet. generic, catalog-generic,
|
|
384
|
+
accesison-generic,
|
|
385
|
+
There are several options, {catalog} will generate both-generic}]
|
|
386
|
+
a Archival Reference following; {accession}
|
|
387
|
+
will create a running number of files
|
|
388
|
+
(Currently this is not configurable).
|
|
389
|
+
{both} will do Both!
|
|
390
|
+
{generic} will populate the Title and
|
|
391
|
+
Description fields with the folder/file's name,
|
|
392
|
+
if used in conjunction with one of the above options:
|
|
393
|
+
{generic-catalog,generic-accession, generic-both}
|
|
394
|
+
it will do both simultaneously.
|
|
395
|
+
|
|
396
|
+
--accession-mode Sets whether to have the runnig tally be for {file,folder,both}
|
|
397
|
+
files, folders or both,
|
|
398
|
+
when utilising the Accession option with
|
|
399
|
+
autoclass. Default is file.
|
|
400
|
+
|
|
401
|
+
-p, --prefix Assign a prefix to the Auto Classification, [PREFIX]
|
|
402
|
+
when utilising {both} fill in like:
|
|
403
|
+
"catalog-prefix","accession-prefix".
|
|
404
|
+
|
|
405
|
+
-rme, --remove- Remove and log empty directories in a structure [boolean flag]
|
|
406
|
+
empty Log will bee exported to 'meta' / output folder
|
|
407
|
+
|
|
408
|
+
-o, --output Set's the output of the 'meta' folder when [PATH/TO/FOLDER]
|
|
409
|
+
utilising AutoClass.
|
|
410
|
+
|
|
411
|
+
-s, --start-ref Sets the starting Reference in the Auto Class [int]
|
|
412
|
+
process.
|
|
413
|
+
|
|
414
|
+
-i --input Set whether to use an Auto Class spreadsheet as an [PATH/TO/FILE]
|
|
415
|
+
input. The input needs to be the (relative or
|
|
416
|
+
absolute) path of the spreadsheet.
|
|
417
|
+
|
|
418
|
+
This allows for use of the Auto Class spreadsheet
|
|
419
|
+
to customise the XIP metadata (and custom xml
|
|
420
|
+
metadata).
|
|
421
|
+
|
|
422
|
+
The following fields have to be added to the
|
|
423
|
+
spreadsheet and titled exactly as:
|
|
424
|
+
Title, Description, Security.
|
|
425
|
+
|
|
426
|
+
-m --metadata Toggles use of the metadata import method. [{none,flat,exact}
|
|
427
|
+
| boolean flag]
|
|
428
|
+
There are two methods utilised by this:
|
|
429
|
+
'exact',or 'flat'.
|
|
430
|
+
|
|
431
|
+
Exact requires that the column names in the spread
|
|
432
|
+
sheet match exactly to the XML:
|
|
433
|
+
{example:path/example:to/example:thing}
|
|
434
|
+
|
|
435
|
+
Flat only requires the final tag match.
|
|
436
|
+
IE {example:thing}. However, for more complex sets
|
|
437
|
+
of metadata, Flat will not function correctly.
|
|
438
|
+
|
|
439
|
+
Enabled with -m.
|
|
440
|
+
[Defaults to 'exact' method if not
|
|
441
|
+
specificed]
|
|
442
|
+
|
|
443
|
+
Use of metadata requires, XML documents to
|
|
444
|
+
be added to the metadata folder, see docs for
|
|
445
|
+
details.
|
|
446
|
+
|
|
447
|
+
-mdir --metadata Specify the metadata directroy to pull the XMLs files [PATH/TO/FOLDER]
|
|
448
|
+
-dir from.
|
|
449
|
+
[Defaults to lib folder if not set]
|
|
450
|
+
|
|
451
|
+
-dmd, --disable Will disable the creation of the 'meta' folder. [boolean flag]
|
|
452
|
+
-meta-dir Can also be enabled with output.
|
|
453
|
+
|
|
454
|
+
-ex --export Set whether to export any Auto Class generation [boolean flag]
|
|
455
|
+
to a spreadsheet
|
|
456
|
+
|
|
457
|
+
-fmt, --format Set whether to export as a CSV or XLSX file. {csv,xlsx}
|
|
458
|
+
[Default is to xlsx].
|
|
459
|
+
|
|
460
|
+
-dlm --delimiter Set to specify the delimiter between References [DELIMITER STRING]
|
|
461
|
+
|
|
462
|
+
-key --keywords Specify which keywords to look for and replace the [KEYWORDS ... | boolean flag]
|
|
463
|
+
generated reference with an abbreviation of the
|
|
464
|
+
word (depending on mode). For instance:
|
|
465
|
+
"A list Strings" will be abbreviated ALS.
|
|
466
|
+
|
|
467
|
+
Has to be an exact match to files / folders
|
|
468
|
+
names. Can set multiple strings to look for with
|
|
469
|
+
commas like so: "My Strings,I wish,to replace"
|
|
470
|
+
|
|
471
|
+
Can also be set without specifying any words to
|
|
472
|
+
apply to everything.
|
|
473
|
+
|
|
474
|
+
--keym --keywords Specify the mode to use for keywords {intialise,firstletters}
|
|
475
|
+
-mode Either 'intialise' taking the first letter of each
|
|
476
|
+
word between spaces IE "Department of Justice" becomes
|
|
477
|
+
"DOJ".
|
|
478
|
+
|
|
479
|
+
'firstletters' takes the first n amount of letters.
|
|
480
|
+
The aforementioned becomes "DEP"
|
|
481
|
+
|
|
482
|
+
--keywords-retain- Specify if you wish continue or reset reference [boolean flag]
|
|
483
|
+
-order numbering for references not in keywords.
|
|
484
|
+
|
|
485
|
+
IE By default if a keyword is found and replaced,
|
|
486
|
+
where it would normally be reference number '3'.
|
|
487
|
+
The next reference down would be given the number 3.
|
|
488
|
+
|
|
489
|
+
Using this option, the next reference would be given
|
|
490
|
+
4.
|
|
491
|
+
|
|
492
|
+
--keywords-abbreviation Set the number of characters to abbreviate to for [int]
|
|
493
|
+
-number keywords option
|
|
494
|
+
[Default is 3 first letters, -1 for intialise]
|
|
495
|
+
|
|
496
|
+
--sort-by Set the method to sort. Can either utilise {foldersfirst,alphabetical}
|
|
497
|
+
'foldersfirst' to sort folders first then
|
|
498
|
+
alphabetically or 'alphabetical to sort
|
|
499
|
+
both folders and files alphabetically
|
|
500
|
+
[Default is foldersfirst.]
|
|
501
|
+
```
|
|
502
|
+
</details>
|
|
503
|
+
|
|
504
|
+
## Future Developments
|
|
505
|
+
|
|
506
|
+
- ~~Customisable Filtering~~ *Added!*
|
|
507
|
+
- ~~Adjust Accession so the different modes can utilised from Opex.~~ *Added!*
|
|
508
|
+
- ~~Add SourceID as option for use with Auto Class Spreadsheets.~~ *Added!*
|
|
509
|
+
- ~~Allow for multiple Identifier's to be added with Auto Class Spreadsheets. Currently only 1 or 2 identifiers can be added at a time, under "Archive_Reference" or "Accesion_Refernce". These are also tied to be either "code" or "accref". An Option needs to be added to allow custom setting of identifier...~~ *Added!*
|
|
510
|
+
- ~~Add an option / make it a default for Metadata XML's to be located in a specified directory rather than in the package.~~ *Added!*
|
|
511
|
+
- Zipping to conform to PAX - Last on the check list; it techincally does...
|
|
512
|
+
- In theory, this tool should be compatible with any system that makes use of the OPEX standard... But in theory Communism works, in theory...
|
|
513
|
+
|
|
514
|
+
## Developers
|
|
515
|
+
|
|
516
|
+
For Developers you can also embed / use the program directly in Python. Though be warned I haven't tested this functionally much!
|
|
517
|
+
|
|
518
|
+
```
|
|
519
|
+
from opex_manifest_generator import OpexManifestGenerator as OMG
|
|
520
|
+
|
|
521
|
+
OMG(root="/my/directory/path", algorithm = "SHA-256").main()
|
|
522
|
+
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
## Contributing
|
|
526
|
+
|
|
527
|
+
I welcome further contributions and feedback! If there any issues please raise them [here](https://github.com/CPJPRINCE/opex_manifest_generator/issues)
|