arkindex-base-worker 0.5.2a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arkindex_base_worker-0.5.2a1/LICENSE +21 -0
- arkindex_base_worker-0.5.2a1/PKG-INFO +49 -0
- arkindex_base_worker-0.5.2a1/README.md +16 -0
- arkindex_base_worker-0.5.2a1/arkindex_base_worker.egg-info/PKG-INFO +49 -0
- arkindex_base_worker-0.5.2a1/arkindex_base_worker.egg-info/SOURCES.txt +64 -0
- arkindex_base_worker-0.5.2a1/arkindex_base_worker.egg-info/dependency_links.txt +1 -0
- arkindex_base_worker-0.5.2a1/arkindex_base_worker.egg-info/requires.txt +11 -0
- arkindex_base_worker-0.5.2a1/arkindex_base_worker.egg-info/top_level.txt +7 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/__init__.py +10 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/cache.py +376 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/image.py +552 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/models.py +323 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/utils.py +307 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/__init__.py +456 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/base.py +568 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/classification.py +279 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/corpus.py +86 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/dataset.py +173 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/element.py +1080 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/entity.py +336 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/image.py +21 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/metadata.py +202 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/process.py +92 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/task.py +47 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/training.py +320 -0
- arkindex_base_worker-0.5.2a1/arkindex_worker/worker/transcription.py +506 -0
- arkindex_base_worker-0.5.2a1/examples/standalone/python/worker.py +171 -0
- arkindex_base_worker-0.5.2a1/examples/tooled/python/worker.py +50 -0
- arkindex_base_worker-0.5.2a1/hooks/pre_gen_project.py +3 -0
- arkindex_base_worker-0.5.2a1/pyproject.toml +104 -0
- arkindex_base_worker-0.5.2a1/setup.cfg +4 -0
- arkindex_base_worker-0.5.2a1/tests/__init__.py +8 -0
- arkindex_base_worker-0.5.2a1/tests/conftest.py +742 -0
- arkindex_base_worker-0.5.2a1/tests/test_base_worker.py +930 -0
- arkindex_base_worker-0.5.2a1/tests/test_cache.py +387 -0
- arkindex_base_worker-0.5.2a1/tests/test_dataset_worker.py +709 -0
- arkindex_base_worker-0.5.2a1/tests/test_element.py +478 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/__init__.py +11 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_classification.py +967 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_cli.py +93 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_corpus.py +168 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_dataset.py +376 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_element.py +601 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_element_create_multiple.py +715 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_element_create_single.py +528 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_element_list_children.py +969 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_element_list_parents.py +530 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_entity.py +910 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_image.py +66 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_metadata.py +607 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_process.py +89 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_task.py +198 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_training.py +269 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_transcription_create.py +873 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_transcription_list.py +450 -0
- arkindex_base_worker-0.5.2a1/tests/test_elements_worker/test_worker.py +904 -0
- arkindex_base_worker-0.5.2a1/tests/test_image.py +943 -0
- arkindex_base_worker-0.5.2a1/tests/test_merge.py +224 -0
- arkindex_base_worker-0.5.2a1/tests/test_modern_config.py +81 -0
- arkindex_base_worker-0.5.2a1/tests/test_utils.py +162 -0
- arkindex_base_worker-0.5.2a1/worker-demo/tests/__init__.py +0 -0
- arkindex_base_worker-0.5.2a1/worker-demo/tests/conftest.py +32 -0
- arkindex_base_worker-0.5.2a1/worker-demo/tests/test_worker.py +12 -0
- arkindex_base_worker-0.5.2a1/worker-demo/worker_demo/__init__.py +6 -0
- arkindex_base_worker-0.5.2a1/worker-demo/worker_demo/worker.py +19 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Teklia
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arkindex-base-worker
|
|
3
|
+
Version: 0.5.2a1
|
|
4
|
+
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
|
+
Author-email: Teklia <contact@teklia.com>
|
|
6
|
+
Maintainer-email: Teklia <contact@teklia.com>
|
|
7
|
+
Project-URL: Homepage, https://workers.arkindex.org
|
|
8
|
+
Project-URL: Documentation, https://workers.arkindex.org
|
|
9
|
+
Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
|
|
10
|
+
Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
|
|
11
|
+
Project-URL: Authors, https://teklia.com
|
|
12
|
+
Keywords: python
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: humanize==4.15.0
|
|
23
|
+
Requires-Dist: peewee~=3.17
|
|
24
|
+
Requires-Dist: Pillow==11.3.0
|
|
25
|
+
Requires-Dist: python-gnupg==0.5.6
|
|
26
|
+
Requires-Dist: shapely==2.0.6
|
|
27
|
+
Requires-Dist: teklia-toolbox==0.1.12
|
|
28
|
+
Requires-Dist: zstandard==0.25.0
|
|
29
|
+
Provides-Extra: tests
|
|
30
|
+
Requires-Dist: pytest-mock==3.15.1; extra == "tests"
|
|
31
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Arkindex base Worker
|
|
35
|
+
|
|
36
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
37
|
+
|
|
38
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
39
|
+
|
|
40
|
+
## Documentation
|
|
41
|
+
|
|
42
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
43
|
+
|
|
44
|
+
## Create a new worker using our template
|
|
45
|
+
|
|
46
|
+
```shell
|
|
47
|
+
pip install --user cookiecutter
|
|
48
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
49
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Arkindex base Worker
|
|
2
|
+
|
|
3
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
4
|
+
|
|
5
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
6
|
+
|
|
7
|
+
## Documentation
|
|
8
|
+
|
|
9
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
10
|
+
|
|
11
|
+
## Create a new worker using our template
|
|
12
|
+
|
|
13
|
+
```shell
|
|
14
|
+
pip install --user cookiecutter
|
|
15
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
16
|
+
```
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arkindex-base-worker
|
|
3
|
+
Version: 0.5.2a1
|
|
4
|
+
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
|
+
Author-email: Teklia <contact@teklia.com>
|
|
6
|
+
Maintainer-email: Teklia <contact@teklia.com>
|
|
7
|
+
Project-URL: Homepage, https://workers.arkindex.org
|
|
8
|
+
Project-URL: Documentation, https://workers.arkindex.org
|
|
9
|
+
Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
|
|
10
|
+
Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
|
|
11
|
+
Project-URL: Authors, https://teklia.com
|
|
12
|
+
Keywords: python
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: humanize==4.15.0
|
|
23
|
+
Requires-Dist: peewee~=3.17
|
|
24
|
+
Requires-Dist: Pillow==11.3.0
|
|
25
|
+
Requires-Dist: python-gnupg==0.5.6
|
|
26
|
+
Requires-Dist: shapely==2.0.6
|
|
27
|
+
Requires-Dist: teklia-toolbox==0.1.12
|
|
28
|
+
Requires-Dist: zstandard==0.25.0
|
|
29
|
+
Provides-Extra: tests
|
|
30
|
+
Requires-Dist: pytest-mock==3.15.1; extra == "tests"
|
|
31
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Arkindex base Worker
|
|
35
|
+
|
|
36
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
37
|
+
|
|
38
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
39
|
+
|
|
40
|
+
## Documentation
|
|
41
|
+
|
|
42
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
43
|
+
|
|
44
|
+
## Create a new worker using our template
|
|
45
|
+
|
|
46
|
+
```shell
|
|
47
|
+
pip install --user cookiecutter
|
|
48
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
49
|
+
```
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
arkindex_base_worker.egg-info/PKG-INFO
|
|
5
|
+
arkindex_base_worker.egg-info/SOURCES.txt
|
|
6
|
+
arkindex_base_worker.egg-info/dependency_links.txt
|
|
7
|
+
arkindex_base_worker.egg-info/requires.txt
|
|
8
|
+
arkindex_base_worker.egg-info/top_level.txt
|
|
9
|
+
arkindex_worker/__init__.py
|
|
10
|
+
arkindex_worker/cache.py
|
|
11
|
+
arkindex_worker/image.py
|
|
12
|
+
arkindex_worker/models.py
|
|
13
|
+
arkindex_worker/utils.py
|
|
14
|
+
arkindex_worker/worker/__init__.py
|
|
15
|
+
arkindex_worker/worker/base.py
|
|
16
|
+
arkindex_worker/worker/classification.py
|
|
17
|
+
arkindex_worker/worker/corpus.py
|
|
18
|
+
arkindex_worker/worker/dataset.py
|
|
19
|
+
arkindex_worker/worker/element.py
|
|
20
|
+
arkindex_worker/worker/entity.py
|
|
21
|
+
arkindex_worker/worker/image.py
|
|
22
|
+
arkindex_worker/worker/metadata.py
|
|
23
|
+
arkindex_worker/worker/process.py
|
|
24
|
+
arkindex_worker/worker/task.py
|
|
25
|
+
arkindex_worker/worker/training.py
|
|
26
|
+
arkindex_worker/worker/transcription.py
|
|
27
|
+
examples/standalone/python/worker.py
|
|
28
|
+
examples/tooled/python/worker.py
|
|
29
|
+
hooks/pre_gen_project.py
|
|
30
|
+
tests/__init__.py
|
|
31
|
+
tests/conftest.py
|
|
32
|
+
tests/test_base_worker.py
|
|
33
|
+
tests/test_cache.py
|
|
34
|
+
tests/test_dataset_worker.py
|
|
35
|
+
tests/test_element.py
|
|
36
|
+
tests/test_image.py
|
|
37
|
+
tests/test_merge.py
|
|
38
|
+
tests/test_modern_config.py
|
|
39
|
+
tests/test_utils.py
|
|
40
|
+
tests/test_elements_worker/__init__.py
|
|
41
|
+
tests/test_elements_worker/test_classification.py
|
|
42
|
+
tests/test_elements_worker/test_cli.py
|
|
43
|
+
tests/test_elements_worker/test_corpus.py
|
|
44
|
+
tests/test_elements_worker/test_dataset.py
|
|
45
|
+
tests/test_elements_worker/test_element.py
|
|
46
|
+
tests/test_elements_worker/test_element_create_multiple.py
|
|
47
|
+
tests/test_elements_worker/test_element_create_single.py
|
|
48
|
+
tests/test_elements_worker/test_element_list_children.py
|
|
49
|
+
tests/test_elements_worker/test_element_list_parents.py
|
|
50
|
+
tests/test_elements_worker/test_entity.py
|
|
51
|
+
tests/test_elements_worker/test_image.py
|
|
52
|
+
tests/test_elements_worker/test_metadata.py
|
|
53
|
+
tests/test_elements_worker/test_process.py
|
|
54
|
+
tests/test_elements_worker/test_task.py
|
|
55
|
+
tests/test_elements_worker/test_training.py
|
|
56
|
+
tests/test_elements_worker/test_transcription_create.py
|
|
57
|
+
tests/test_elements_worker/test_transcription_create_with_elements.py
|
|
58
|
+
tests/test_elements_worker/test_transcription_list.py
|
|
59
|
+
tests/test_elements_worker/test_worker.py
|
|
60
|
+
worker-demo/tests/__init__.py
|
|
61
|
+
worker-demo/tests/conftest.py
|
|
62
|
+
worker-demo/tests/test_worker.py
|
|
63
|
+
worker-demo/worker_demo/__init__.py
|
|
64
|
+
worker-demo/worker_demo/worker.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database mappings and helper methods for the experimental worker caching feature.
|
|
3
|
+
|
|
4
|
+
On methods that support caching, the database will be used for all reads,
|
|
5
|
+
and writes will go both to the Arkindex API and the database,
|
|
6
|
+
reducing network usage.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sqlite3
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from peewee import (
|
|
14
|
+
SQL,
|
|
15
|
+
BooleanField,
|
|
16
|
+
CharField,
|
|
17
|
+
Check,
|
|
18
|
+
CompositeKey,
|
|
19
|
+
Field,
|
|
20
|
+
FloatField,
|
|
21
|
+
ForeignKeyField,
|
|
22
|
+
IntegerField,
|
|
23
|
+
Model,
|
|
24
|
+
OperationalError,
|
|
25
|
+
SqliteDatabase,
|
|
26
|
+
TextField,
|
|
27
|
+
UUIDField,
|
|
28
|
+
)
|
|
29
|
+
from PIL import Image
|
|
30
|
+
|
|
31
|
+
from arkindex_worker import logger
|
|
32
|
+
|
|
33
|
+
db = SqliteDatabase(None)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class JSONField(Field):
|
|
37
|
+
"""
|
|
38
|
+
A Peewee field that stores a JSON payload as a string and parses it automatically.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
field_type = "text"
|
|
42
|
+
|
|
43
|
+
def db_value(self, value):
|
|
44
|
+
if value is None:
|
|
45
|
+
return
|
|
46
|
+
return json.dumps(value)
|
|
47
|
+
|
|
48
|
+
def python_value(self, value):
|
|
49
|
+
if value is None:
|
|
50
|
+
return
|
|
51
|
+
return json.loads(value)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Version(Model):
|
|
55
|
+
"""
|
|
56
|
+
Cache version table, used to warn about incompatible cache databases
|
|
57
|
+
when a worker uses an outdated version of ``base-worker``.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
version = IntegerField(primary_key=True)
|
|
61
|
+
|
|
62
|
+
class Meta:
|
|
63
|
+
database = db
|
|
64
|
+
table_name = "version"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class CachedImage(Model):
|
|
68
|
+
"""
|
|
69
|
+
Cache image table
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
id = UUIDField(primary_key=True)
|
|
73
|
+
width = IntegerField()
|
|
74
|
+
height = IntegerField()
|
|
75
|
+
url = TextField()
|
|
76
|
+
version = IntegerField(default=2)
|
|
77
|
+
|
|
78
|
+
class Meta:
|
|
79
|
+
database = db
|
|
80
|
+
table_name = "images"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class CachedElement(Model):
|
|
84
|
+
"""
|
|
85
|
+
Cache element table
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
id = UUIDField(primary_key=True)
|
|
89
|
+
parent_id = UUIDField(null=True)
|
|
90
|
+
type = CharField(max_length=50)
|
|
91
|
+
image = ForeignKeyField(CachedImage, backref="elements", null=True)
|
|
92
|
+
polygon = JSONField(null=True)
|
|
93
|
+
rotation_angle = IntegerField(default=0)
|
|
94
|
+
mirrored = BooleanField(default=False)
|
|
95
|
+
initial = BooleanField(default=False)
|
|
96
|
+
# Needed to filter elements with cache
|
|
97
|
+
worker_version_id = UUIDField(null=True)
|
|
98
|
+
worker_run_id = UUIDField(null=True)
|
|
99
|
+
confidence = FloatField(null=True)
|
|
100
|
+
|
|
101
|
+
class Meta:
|
|
102
|
+
database = db
|
|
103
|
+
table_name = "elements"
|
|
104
|
+
|
|
105
|
+
def open_image(
|
|
106
|
+
self,
|
|
107
|
+
*args,
|
|
108
|
+
max_width: int | None = None,
|
|
109
|
+
max_height: int | None = None,
|
|
110
|
+
**kwargs,
|
|
111
|
+
) -> Image:
|
|
112
|
+
"""
|
|
113
|
+
Open this element's image as a Pillow image.
|
|
114
|
+
This does not crop the image to the element's polygon.
|
|
115
|
+
IIIF servers with maxWidth, maxHeight or maxArea restrictions on image size are not supported.
|
|
116
|
+
|
|
117
|
+
Warns:
|
|
118
|
+
----
|
|
119
|
+
If both, ``max_width`` and ``max_height`` are set, the image ratio is not preserved.
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
:param *args: Positional arguments passed to [arkindex_worker.image.open_image][]
|
|
123
|
+
:param max_width: The maximum width of the image.
|
|
124
|
+
:param max_height: The maximum height of the image.
|
|
125
|
+
:param **kwargs: Keyword arguments passed to [arkindex_worker.image.open_image][]
|
|
126
|
+
:raises ValueError: When this element does not have an image ID or a polygon.
|
|
127
|
+
:return: A Pillow image.
|
|
128
|
+
"""
|
|
129
|
+
from arkindex_worker.image import open_image, polygon_bounding_box
|
|
130
|
+
|
|
131
|
+
if not self.image_id or not self.polygon:
|
|
132
|
+
raise ValueError(f"Element {self.id} has no image")
|
|
133
|
+
|
|
134
|
+
# Always fetch the image from the bounding box when size differs from full image
|
|
135
|
+
bounding_box = polygon_bounding_box(self.polygon)
|
|
136
|
+
if (
|
|
137
|
+
bounding_box.width != self.image.width
|
|
138
|
+
or bounding_box.height != self.image.height
|
|
139
|
+
):
|
|
140
|
+
box = f"{bounding_box.x},{bounding_box.y},{bounding_box.width},{bounding_box.height}"
|
|
141
|
+
else:
|
|
142
|
+
box = "full"
|
|
143
|
+
|
|
144
|
+
if max_width is None and max_height is None:
|
|
145
|
+
resize = "full"
|
|
146
|
+
else:
|
|
147
|
+
if (
|
|
148
|
+
# Do not resize for polygons that do not exactly match the images
|
|
149
|
+
# as the resize is made directly by the IIIF server using the box parameter
|
|
150
|
+
bounding_box.width != self.image.width
|
|
151
|
+
or bounding_box.height != self.image.height
|
|
152
|
+
) or (
|
|
153
|
+
# Do not resize when the image is below the maximum size
|
|
154
|
+
(max_width is None or self.image.width <= max_width)
|
|
155
|
+
and (max_height is None or self.image.height <= max_height)
|
|
156
|
+
):
|
|
157
|
+
resize = "full"
|
|
158
|
+
else:
|
|
159
|
+
resize = f"{max_width or ''},{max_height or ''}"
|
|
160
|
+
|
|
161
|
+
# Use `max` instead of `full` for IIIF 3, since `full` was deprecated in 2.1 then removed in 3.0
|
|
162
|
+
if self.image.version == 3 and resize == "full":
|
|
163
|
+
resize = "max"
|
|
164
|
+
|
|
165
|
+
url = self.image.url
|
|
166
|
+
if not url.endswith("/"):
|
|
167
|
+
url += "/"
|
|
168
|
+
|
|
169
|
+
return open_image(
|
|
170
|
+
f"{url}{box}/{resize}/0/default.jpg",
|
|
171
|
+
*args,
|
|
172
|
+
rotation_angle=self.rotation_angle,
|
|
173
|
+
mirrored=self.mirrored,
|
|
174
|
+
**kwargs,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class CachedTranscription(Model):
|
|
179
|
+
"""
|
|
180
|
+
Cache transcription table
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
id = UUIDField(primary_key=True)
|
|
184
|
+
element = ForeignKeyField(CachedElement, backref="transcriptions")
|
|
185
|
+
text = TextField()
|
|
186
|
+
confidence = FloatField(null=True)
|
|
187
|
+
orientation = CharField(max_length=50)
|
|
188
|
+
# Needed to filter transcriptions with cache
|
|
189
|
+
worker_version_id = UUIDField(null=True)
|
|
190
|
+
worker_run_id = UUIDField(null=True)
|
|
191
|
+
|
|
192
|
+
class Meta:
|
|
193
|
+
database = db
|
|
194
|
+
table_name = "transcriptions"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class CachedClassification(Model):
|
|
198
|
+
"""
|
|
199
|
+
Cache classification table
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
id = UUIDField(primary_key=True)
|
|
203
|
+
element = ForeignKeyField(CachedElement, backref="classifications")
|
|
204
|
+
class_name = TextField()
|
|
205
|
+
confidence = FloatField()
|
|
206
|
+
state = CharField(max_length=10)
|
|
207
|
+
worker_run_id = UUIDField(null=True)
|
|
208
|
+
|
|
209
|
+
class Meta:
|
|
210
|
+
database = db
|
|
211
|
+
table_name = "classifications"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class CachedTranscriptionEntity(Model):
|
|
215
|
+
"""
|
|
216
|
+
Cache transcription entity table
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
transcription = ForeignKeyField(
|
|
220
|
+
CachedTranscription, backref="transcription_entities"
|
|
221
|
+
)
|
|
222
|
+
type = CharField(max_length=50)
|
|
223
|
+
offset = IntegerField(constraints=[Check("offset >= 0")])
|
|
224
|
+
length = IntegerField(constraints=[Check("length > 0")])
|
|
225
|
+
worker_run_id = UUIDField(null=True)
|
|
226
|
+
confidence = FloatField(null=True)
|
|
227
|
+
|
|
228
|
+
class Meta:
|
|
229
|
+
primary_key = CompositeKey("transcription", "type")
|
|
230
|
+
database = db
|
|
231
|
+
table_name = "transcription_entities"
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class CachedDataset(Model):
|
|
235
|
+
id = UUIDField(primary_key=True)
|
|
236
|
+
name = CharField()
|
|
237
|
+
state = CharField(constraints=[SQL("DEFAULT 'open'")])
|
|
238
|
+
sets = TextField()
|
|
239
|
+
|
|
240
|
+
class Meta:
|
|
241
|
+
database = db
|
|
242
|
+
table_name = "datasets"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class CachedDatasetElement(Model):
|
|
246
|
+
id = UUIDField(primary_key=True)
|
|
247
|
+
element = ForeignKeyField(column_name="element_id", field="id", model=CachedElement)
|
|
248
|
+
dataset = ForeignKeyField(column_name="dataset_id", field="id", model=CachedDataset)
|
|
249
|
+
set_name = CharField()
|
|
250
|
+
|
|
251
|
+
class Meta:
|
|
252
|
+
database = db
|
|
253
|
+
table_name = "dataset_elements"
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# Add all the managed models in that list
|
|
257
|
+
# It's used here, but also in unit tests
|
|
258
|
+
MODELS = [
|
|
259
|
+
CachedImage,
|
|
260
|
+
CachedElement,
|
|
261
|
+
CachedTranscription,
|
|
262
|
+
CachedClassification,
|
|
263
|
+
CachedTranscriptionEntity,
|
|
264
|
+
CachedDataset,
|
|
265
|
+
CachedDatasetElement,
|
|
266
|
+
]
|
|
267
|
+
SQL_VERSION = 5
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def init_cache_db(path: Path):
|
|
271
|
+
"""
|
|
272
|
+
Create the cache database on the given path
|
|
273
|
+
:param path: Where the new database should be created
|
|
274
|
+
"""
|
|
275
|
+
db.init(
|
|
276
|
+
path,
|
|
277
|
+
pragmas={
|
|
278
|
+
# SQLite ignores foreign keys and check constraints by default!
|
|
279
|
+
"foreign_keys": 1,
|
|
280
|
+
"ignore_check_constraints": 0,
|
|
281
|
+
},
|
|
282
|
+
)
|
|
283
|
+
db.connect()
|
|
284
|
+
logger.info(f"Connected to cache on {path}")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def create_tables():
|
|
288
|
+
"""
|
|
289
|
+
Creates the tables in the cache DB only if they do not already exist.
|
|
290
|
+
"""
|
|
291
|
+
db.create_tables(MODELS)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def create_version_table():
|
|
295
|
+
"""
|
|
296
|
+
Creates the Version table in the cache DB.
|
|
297
|
+
This step must be independent from other tables creation since we only
|
|
298
|
+
want to create the table and add the one and only Version entry when the
|
|
299
|
+
cache is created from scratch.
|
|
300
|
+
"""
|
|
301
|
+
db.create_tables([Version])
|
|
302
|
+
Version.create(version=SQL_VERSION)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def check_version(cache_path: str | Path):
|
|
306
|
+
"""
|
|
307
|
+
Check the validity of the SQLite version
|
|
308
|
+
|
|
309
|
+
:param cache_path: Path towards a local SQLite database
|
|
310
|
+
"""
|
|
311
|
+
with SqliteDatabase(cache_path) as provided_db, provided_db.bind_ctx([Version]):
|
|
312
|
+
try:
|
|
313
|
+
version = Version.get().version
|
|
314
|
+
except OperationalError:
|
|
315
|
+
version = None
|
|
316
|
+
|
|
317
|
+
assert version == SQL_VERSION, (
|
|
318
|
+
f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def merge_parents_cache(paths: list, current_database: Path):
|
|
323
|
+
"""
|
|
324
|
+
Merge all the potential parent task's databases into the existing local one
|
|
325
|
+
:param paths: Path to cache databases
|
|
326
|
+
:param current_database: Path to the current database
|
|
327
|
+
"""
|
|
328
|
+
assert current_database.exists()
|
|
329
|
+
|
|
330
|
+
if not paths:
|
|
331
|
+
logger.info("No parents cache to use")
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
# Open a connection on current database
|
|
335
|
+
connection = sqlite3.connect(current_database)
|
|
336
|
+
cursor = connection.cursor()
|
|
337
|
+
|
|
338
|
+
# Merge each table into the local database
|
|
339
|
+
for idx, path in enumerate(paths):
|
|
340
|
+
# Check that the parent cache uses a compatible version
|
|
341
|
+
check_version(path)
|
|
342
|
+
|
|
343
|
+
with SqliteDatabase(path) as source, source.bind_ctx(MODELS):
|
|
344
|
+
source.create_tables(MODELS)
|
|
345
|
+
|
|
346
|
+
logger.info(f"Merging parent db {path} into {current_database}")
|
|
347
|
+
statements = [
|
|
348
|
+
"PRAGMA page_size=80000;",
|
|
349
|
+
"PRAGMA synchronous=OFF;",
|
|
350
|
+
f"ATTACH DATABASE '{path}' AS source_{idx};",
|
|
351
|
+
f"REPLACE INTO images SELECT * FROM source_{idx}.images;",
|
|
352
|
+
f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
|
|
353
|
+
f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
|
|
354
|
+
f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
|
|
355
|
+
f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
|
|
356
|
+
f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
|
|
357
|
+
f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
|
|
358
|
+
]
|
|
359
|
+
|
|
360
|
+
for statement in statements:
|
|
361
|
+
cursor.execute(statement)
|
|
362
|
+
connection.commit()
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def unsupported_cache(func):
|
|
366
|
+
def wrapper(self, *args, **kwargs):
|
|
367
|
+
results = func(self, *args, **kwargs)
|
|
368
|
+
|
|
369
|
+
if self.use_cache:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"This API helper `{func.__name__}` did not update the cache database"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
return results
|
|
375
|
+
|
|
376
|
+
return wrapper
|