ocrd 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ocrd/cli/__init__.py +2 -4
  2. ocrd/cli/bashlib.py +6 -117
  3. ocrd/cli/network.py +2 -0
  4. ocrd/cli/resmgr.py +29 -65
  5. ocrd/constants.py +0 -2
  6. ocrd/mets_server.py +5 -5
  7. ocrd/processor/base.py +6 -16
  8. ocrd/processor/builtin/dummy/ocrd-tool.json +25 -0
  9. ocrd/processor/builtin/merge_processor.py +131 -0
  10. ocrd/processor/builtin/param_command_header2unordered.json +7 -0
  11. ocrd/processor/builtin/param_command_heading2unordered.json +7 -0
  12. ocrd/processor/builtin/param_command_lines2orientation.json +6 -0
  13. ocrd/processor/builtin/param_command_page-update-version.json +5 -0
  14. ocrd/processor/builtin/param_command_transkribus-to-prima.json +8 -0
  15. ocrd/processor/builtin/shell_processor.py +128 -0
  16. ocrd/resource_manager.py +213 -124
  17. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/METADATA +23 -10
  18. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/RECORD +40 -34
  19. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/entry_points.txt +2 -0
  20. ocrd_models/ocrd_agent.py +3 -3
  21. ocrd_network/__init__.py +1 -0
  22. ocrd_network/cli/__init__.py +2 -0
  23. ocrd_network/cli/resmgr_server.py +23 -0
  24. ocrd_network/constants.py +3 -0
  25. ocrd_network/logging_utils.py +5 -0
  26. ocrd_network/models/job.py +29 -28
  27. ocrd_network/models/messages.py +3 -2
  28. ocrd_network/models/workspace.py +4 -4
  29. ocrd_network/resource_manager_server.py +182 -0
  30. ocrd_network/runtime_data/connection_clients.py +1 -1
  31. ocrd_network/runtime_data/hosts.py +43 -16
  32. ocrd_network/runtime_data/network_agents.py +15 -1
  33. ocrd_utils/__init__.py +5 -1
  34. ocrd_utils/constants.py +5 -0
  35. ocrd_utils/logging.py +3 -0
  36. ocrd_utils/os.py +142 -62
  37. ocrd_validators/ocrd_tool.schema.yml +7 -4
  38. ocrd/cli/log.py +0 -56
  39. ocrd/lib.bash +0 -310
  40. ocrd/resource_list.yml +0 -61
  41. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/LICENSE +0 -0
  42. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/WHEEL +0 -0
  43. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/top_level.txt +0 -0
ocrd/lib.bash DELETED
@@ -1,310 +0,0 @@
1
- ((BASH_VERSINFO<4 || BASH_VERSINFO==4 && BASH_VERSINFO[1]<4)) && \
2
- echo >&2 "bash $BASH_VERSION is too old. Please install bash 4.4 or newer." && \
3
- exit 1
4
-
5
- ## ### `ocrd__raise`
6
- ##
7
- ## Raise an error and exit.
8
- ocrd__raise () {
9
- echo >&2 "ERROR: $1"; exit 127
10
- }
11
-
12
- ## ### `ocrd__log`
13
- ##
14
- ## Delegate logging to `ocrd log`
15
- ocrd__log () {
16
- local log_level="${ocrd__argv[log_level]:-}"
17
- if [[ -n "$log_level" ]];then
18
- ocrd -l "$log_level" log "$@"
19
- else
20
- ocrd log "$@"
21
- fi
22
- }
23
-
24
-
25
- ## ### `ocrd__minversion`
26
- ##
27
- ## Ensure minimum version
28
- # ht https://stackoverflow.com/posts/4025065
29
- ocrd__minversion () {
30
- set -e
31
- local minversion_raw="$1"
32
- local version_raw=$(ocrd --version|sed 's/ocrd, version //')
33
- local version_mmp=$(echo "$version_raw" | grep -Eo '([0-9]+\.?){3}')
34
- local version_prerelease_suffix="${version_raw#$version_mmp}"
35
- if [[ -z $version_prerelease_suffix ]];then
36
- version_prerelease_suffix=0
37
- fi
38
- local minversion_mmp=$(echo "$minversion_raw" | grep -Eo '([0-9]+\.?){3}')
39
- local minversion_prerelease_suffix="${minversion_raw#$minversion_mmp}"
40
- if [[ -z $minversion_prerelease_suffix ]];then
41
- minversion_prerelease_suffix=0
42
- fi
43
- local IFS='.'
44
- version=($version_mmp)
45
- minversion=($minversion_mmp)
46
- # MAJOR > MAJOR
47
- if (( ${version[0]} > ${minversion[0]} ));then
48
- return
49
- # MAJOR == MAJOR
50
- elif (( ${version[0]} == ${minversion[0]} ));then
51
- # MINOR > MINOR
52
- if (( ${version[1]} > ${minversion[1]} ));then
53
- return
54
- # MINOR == MINOR
55
- elif (( ${version[1]} == ${minversion[1]} ));then
56
- # PATCH > PATCH
57
- if (( ${version[2]} > ${minversion[2]} ));then
58
- return
59
- elif (( ${version[2]} == ${minversion[2]}));then
60
- # Match prerelease suffix like a1, b1 alphabetically
61
- if [ "$version_prerelease_suffix" = "$minversion_prerelease_suffix" -o "$version_prerelease_suffix" \> "$minversion_prerelease_suffix" ]; then
62
- return
63
- fi
64
- fi
65
- fi
66
- fi
67
- ocrd__raise "ocrd/core is too old ($version_raw < $minversion_raw). Please update OCR-D/core"
68
- }
69
-
70
- ## ### `ocrd__dumpjson`
71
- ##
72
- ## Output ocrd-tool.json.
73
- ##
74
- ## Requires `$OCRD_TOOL_JSON` and `$OCRD_TOOL_NAME` to be set:
75
- ##
76
- ## ```sh
77
- ## export OCRD_TOOL_JSON=/path/to/ocrd-tool.json
78
- ## export OCRD_TOOL_NAME=ocrd-foo-bar
79
- ## ```
80
- ##
81
- ocrd__dumpjson () {
82
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" dump
83
- }
84
-
85
- ##
86
- ## Output file resource path.
87
- ##
88
- ocrd__resolve_resource () {
89
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" resolve-resource "$1"
90
- }
91
-
92
- ##
93
- ## Output file resource content.
94
- ##
95
- ocrd__show_resource () {
96
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" show-resource "$1"
97
- }
98
-
99
- ##
100
- ## Output file resources names.
101
- ##
102
- ocrd__list_resources () {
103
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" list-resources
104
- }
105
-
106
- ## ### `ocrd__usage`
107
- ##
108
- ## Print usage
109
- ##
110
- ocrd__usage () {
111
- declare -a _args=(ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" help)
112
- if [ -v ocrd__subcommand ];then
113
- _args+=($ocrd__subcommand)
114
- fi
115
- ocrd ${_args[@]}
116
- }
117
-
118
- ## ### `ocrd__parse_argv`
119
- ##
120
- ## Expects an associative array ("hash"/"dict") `ocrd__argv` to be defined:
121
- ##
122
- ## ```sh
123
- ## declare -A ocrd__argv=()
124
- ## ```
125
- ocrd__parse_argv () {
126
- set -e
127
-
128
- # if [[ -n "$ZSH_VERSION" ]];then
129
- # print -r -- ${+ocrd__argv} ${(t)ocrd__argv}
130
- # fi
131
- if ! declare -p "ocrd__argv" >/dev/null 2>/dev/null ;then
132
- ocrd__raise "Must set \$ocrd__argv (declare -A ocrd__argv)"
133
- fi
134
-
135
- if ! declare -p "params" >/dev/null 2>/dev/null ;then
136
- ocrd__raise "Must set \$params (declare -A params)"
137
- fi
138
-
139
- if ! declare -p "params_json" >/dev/null 2>/dev/null ;then
140
- ocrd__raise "Must set \$params_json (declare params_json)"
141
- fi
142
-
143
- if [[ $# = 0 ]];then
144
- ocrd__usage
145
- exit 1
146
- fi
147
-
148
- ocrd__argv[debug]=false
149
- ocrd__argv[overwrite]=false
150
- ocrd__argv[profile]=false
151
- ocrd__argv[profile_file]=
152
- ocrd__argv[mets_server_url]=
153
- ocrd__argv[mets_file]="$PWD/mets.xml"
154
-
155
- local __parameters=()
156
- local __parameter_overrides=()
157
-
158
- if [[ $1 == 'worker' || $1 == 'server' ]];then
159
- ocrd__subcommand="$1" ; shift ;
160
- fi
161
-
162
- while [[ "${1:-}" = -* ]];do
163
- case "$1" in
164
- -l|--log-level) ocrd__argv[log_level]=$2 ; shift ;;
165
- --log-filename) exec 2> "$2" ; shift ;;
166
- -h|--help|--usage) ocrd__usage; exit ;;
167
- -J|--dump-json) ocrd__dumpjson; exit ;;
168
- -D|--dump-module-dir) echo $(dirname "$OCRD_TOOL_JSON"); exit ;;
169
- -C|--show-resource) ocrd__show_resource "$2"; exit ;;
170
- -L|--list-resources) ocrd__list_resources; exit ;;
171
- -p|--parameter) __parameters+=(-p "$(ocrd__resolve_resource "$2" 2>/dev/null || echo "$2")") ; shift ;;
172
- -P|--parameter-override) __parameter_overrides+=(-P "$2" "$3") ; shift ; shift ;;
173
- -g|--page-id) ocrd__argv[page_id]=$2 ; shift ;;
174
- -O|--output-file-grp) ocrd__argv[output_file_grp]=$2 ; shift ;;
175
- -I|--input-file-grp) ocrd__argv[input_file_grp]=$2 ; shift ;;
176
- -w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
177
- -m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
178
- -U|--mets-server-url) ocrd__argv[mets_server_url]="$2" ; shift ;;
179
- --debug) ocrd__argv[debug]=true ;;
180
- --overwrite) ocrd__argv[overwrite]=true ;;
181
- --profile) ocrd__argv[profile]=true ;;
182
- --profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
183
- -V|--version) ocrd ocrd-tool "$OCRD_TOOL_JSON" version; exit ;;
184
- --queue) ocrd__worker_queue="$2" ; shift ;;
185
- --database) ocrd__worker_database="$2" ; shift ;;
186
- *) ocrd__raise "Unknown option '$1'" ;;
187
- esac
188
- shift
189
- done
190
-
191
- if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand ]; then
192
- if ! [ -v ocrd__subcommand ] ; then
193
- ocrd__raise "Provide subcommand 'worker' for Processing Worker"
194
- elif ! [ -v ocrd__worker_database ]; then
195
- ocrd__raise "For the Processing Worker --database is required"
196
- elif ! [ -v ocrd__worker_queue ]; then
197
- ocrd__raise "For the Processing Worker --queue is required"
198
- fi
199
- if [ ${ocrd__subcommand} = "worker" ]; then
200
- ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
201
- else
202
- ocrd__raise "subcommand must be 'worker' not '${ocrd__subcommand}'"
203
- fi
204
- exit
205
- fi
206
-
207
- if [[ ! -e "${ocrd__argv[mets_file]}" ]]; then
208
- ocrd__raise "METS file '${ocrd__argv[mets_file]}' not found"
209
- fi
210
-
211
- if [[ ! -d "${ocrd__argv[working_dir]:=$(dirname "${ocrd__argv[mets_file]}")}" ]]; then
212
- ocrd__raise "workdir '${ocrd__argv[working_dir]}' not a directory. Use -w/--working-dir to set correctly"
213
- fi
214
-
215
- if [[ ! "${ocrd__argv[log_level]:=INFO}" =~ OFF|ERROR|WARN|INFO|DEBUG|TRACE ]]; then
216
- ocrd__raise "log level '${ocrd__argv[log_level]}' is invalid"
217
- fi
218
-
219
- if [[ -z "${ocrd__argv[input_file_grp]:=}" ]]; then
220
- ocrd__raise "Provide --input-file-grp/-I explicitly!"
221
- fi
222
-
223
- if [[ -z "${ocrd__argv[output_file_grp]:=}" ]]; then
224
- ocrd__raise "Provide --output-file-grp/-O explicitly!"
225
- fi
226
-
227
- # enable profiling (to be extended/acted upon by caller)
228
- if [[ ${ocrd__argv[profile]} = true ]]; then
229
- if [[ -n "${ocrd__argv[profile_file]}" ]]; then
230
- exec 3> "${ocrd__argv[profile_file]}"
231
- else
232
- exec 3>&2
233
- fi
234
- BASH_XTRACEFD=3
235
- # just the builtin tracer (without timing):
236
- #set -x
237
- # our own (including timing):
238
- DEPTH=+++++++++++
239
- shopt -s extdebug
240
- showtime() { date "+${DEPTH:0:$BASH_SUBSHELL+1} %H:%M:%S $BASH_COMMAND" >&3; }
241
- declare +t showtime # no trace here
242
- trap showtime DEBUG
243
- fi
244
-
245
- # check parameters
246
- local params_parsed retval
247
- params_parsed="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params "${__parameters[@]}" "${__parameter_overrides[@]}")" || {
248
- retval=$?
249
- ocrd__raise "Failed to parse parameters (retval $retval):
250
- $params_parsed"
251
- }
252
- eval "$params_parsed"
253
- params_json="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params --json "${__parameters[@]}" "${__parameter_overrides[@]}")"
254
-
255
- }
256
-
257
- ocrd__wrap () {
258
- set -e
259
-
260
- declare -gx OCRD_TOOL_JSON="$1"
261
- declare -gx OCRD_TOOL_NAME="$2"
262
- shift
263
- shift
264
- declare -Agx params
265
- params=()
266
- declare -g params_json
267
- declare -Agx ocrd__argv
268
- ocrd__argv=()
269
-
270
- if ! which "ocrd" >/dev/null 2>/dev/null;then
271
- ocrd__raise "ocrd not in \$PATH"
272
- fi
273
-
274
- if ! declare -p "OCRD_TOOL_JSON" >/dev/null 2>/dev/null;then
275
- ocrd__raise "Must set \$OCRD_TOOL_JSON"
276
- elif [[ ! -r "$OCRD_TOOL_JSON" ]];then
277
- ocrd__raise "Cannot read \$OCRD_TOOL_JSON: '$OCRD_TOOL_JSON'"
278
- fi
279
-
280
- if [[ -z "$OCRD_TOOL_NAME" ]];then
281
- ocrd__raise "Must set \$OCRD_TOOL_NAME"
282
- elif ! ocrd ocrd-tool "$OCRD_TOOL_JSON" list-tools|grep -q "$OCRD_TOOL_NAME";then
283
- ocrd__raise "No such command \$OCRD_TOOL_NAME: $OCRD_TOOL_NAME"
284
- fi
285
-
286
- ocrd__parse_argv "$@"
287
-
288
- declare -ag ocrd__files
289
- IFS=$'\n'
290
- ocrd__files=( $(ocrd bashlib input-files \
291
- --ocrd-tool $OCRD_TOOL_JSON \
292
- --executable $OCRD_TOOL_NAME \
293
- $(if [[ ${ocrd__argv[debug]} = true ]]; then echo --debug; fi) \
294
- $(if [[ ${ocrd__argv[overwrite]} = true ]]; then echo --overwrite; fi) \
295
- -m "${ocrd__argv[mets_file]}" \
296
- -d "${ocrd__argv[working_dir]}" \
297
- ${ocrd__argv[mets_server_url]:+-U} ${ocrd__argv[mets_server_url]:-} \
298
- -p "$params_json" \
299
- -I "${ocrd__argv[input_file_grp]}" \
300
- -O "${ocrd__argv[output_file_grp]}" \
301
- ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) )
302
- IFS=$' \t\n'
303
- }
304
-
305
- ## usage: pageId=$(ocrd__input_file 3 pageId)
306
- ocrd__input_file() {
307
- declare -A input_file
308
- eval input_file=( "${ocrd__files[$1]}" )
309
- eval echo "${input_file[$2]}"
310
- }
ocrd/resource_list.yml DELETED
@@ -1,61 +0,0 @@
1
- # List available resources by processor for "ocrd resmgr"
2
- ocrd-calamari-recognize:
3
- # XXX disabled since older ocrd_calamari versions don't support resource resolving
4
- #- url: https://qurator-data.de/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/model.tar.xz
5
- # type: archive
6
- # name: qurator-gt4histocr-0.3
7
- # description: Calamari model trained with GT4HistOCR
8
- # size: 116439072
9
- # path_in_archive: '.'
10
- # version_range: '< 1.0.0'
11
- - url: https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz
12
- type: archive
13
- name: qurator-gt4histocr-1.0
14
- description: Calamari model trained with GT4HistOCR
15
- size: 90275264
16
- path_in_archive: '.'
17
- version_range: '>= 1.0.0'
18
- - url: https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_fraktur19-1.tar.gz
19
- type: archive
20
- name: zpd-fraktur19
21
- description: Model trained on 19th century german fraktur
22
- path_in_archive: 'c1_fraktur19-1'
23
- size: 86009886
24
- version_range: '>= 1.0.0'
25
- - url: https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_latin-script-hist-3.tar.gz
26
- type: archive
27
- name: zpd-latin-script-hist-3
28
- path_in_archive: 'c1_latin-script-hist-3'
29
- description: Model trained on historical latin-script texts
30
- size: 88416863
31
- version_range: '>= 1.0.0'
32
- ocrd-cis-ocropy-recognize:
33
- - url: https://github.com/zuphilip/ocropy-models/raw/master/en-default.pyrnn.gz
34
- name: en-default.pyrnn.gz
35
- description: Default ocropy model
36
- size: 83826134
37
- - url: https://github.com/zuphilip/ocropy-models/raw/master/fraktur.pyrnn.gz
38
- name: fraktur.pyrnn.gz
39
- description: Default ocropy fraktur model
40
- size: 43882365
41
- - url: https://github.com/jze/ocropus-model_fraktur/raw/master/fraktur.pyrnn.gz
42
- name: fraktur-jze.pyrnn.gz
43
- description: ocropy fraktur model by github.com/jze
44
- size: 2961298
45
- - url: https://github.com/chreul/OCR_Testdata_EarlyPrintedBooks/raw/master/LatinHist-98000.pyrnn.gz
46
- name: LatinHist.pyrnn.gz
47
- description: ocropy historical latin model by github.com/chreul
48
- size: 16989864
49
- ocrd-sbb-binarize:
50
- - url: https://qurator-data.de/sbb_binarization/2021-03-09/models.tar.gz
51
- description: updated default models provided by github.com/qurator-spk
52
- name: default-2021-03-09
53
- type: archive
54
- path_in_archive: models
55
- size: 133363179
56
- - url: https://qurator-data.de/sbb_binarization/models.tar.gz
57
- description: default models provided by github.com/qurator-spk
58
- name: default
59
- type: archive
60
- path_in_archive: models
61
- size: 1654623597
File without changes
File without changes