arkindex-base-worker 0.3.6rc4__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arkindex_base_worker-0.3.7/LICENSE +21 -0
- arkindex_base_worker-0.3.7/PKG-INFO +77 -0
- arkindex_base_worker-0.3.7/README.md +16 -0
- arkindex_base_worker-0.3.7/arkindex_base_worker.egg-info/PKG-INFO +77 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/SOURCES.txt +8 -5
- arkindex_base_worker-0.3.7/arkindex_base_worker.egg-info/requires.txt +19 -0
- arkindex_base_worker-0.3.7/arkindex_base_worker.egg-info/top_level.txt +6 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/cache.py +14 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/image.py +29 -19
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/models.py +14 -2
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/utils.py +17 -3
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/__init__.py +122 -125
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/base.py +24 -24
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/classification.py +18 -25
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/dataset.py +24 -18
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/element.py +100 -19
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/entity.py +35 -4
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/metadata.py +21 -11
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/training.py +13 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/transcription.py +45 -5
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/version.py +22 -0
- arkindex_base_worker-0.3.7/hooks/pre_gen_project.py +3 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/pyproject.toml +36 -8
- arkindex_base_worker-0.3.7/setup.cfg +4 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/conftest.py +16 -8
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_base_worker.py +0 -6
- arkindex_base_worker-0.3.7/tests/test_dataset_worker.py +728 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_classifications.py +365 -539
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_cli.py +1 -1
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_dataset.py +97 -116
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_elements.py +354 -76
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_entities.py +22 -2
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_metadata.py +53 -27
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_training.py +35 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_transcriptions.py +149 -16
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_worker.py +19 -6
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_image.py +37 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_utils.py +23 -1
- arkindex_base_worker-0.3.7/worker-demo/tests/__init__.py +0 -0
- arkindex_base_worker-0.3.7/worker-demo/tests/conftest.py +32 -0
- arkindex_base_worker-0.3.7/worker-demo/tests/test_worker.py +12 -0
- arkindex_base_worker-0.3.7/worker-demo/worker_demo/__init__.py +6 -0
- arkindex_base_worker-0.3.7/worker-demo/worker_demo/worker.py +19 -0
- arkindex-base-worker-0.3.6rc4/PKG-INFO +0 -47
- arkindex-base-worker-0.3.6rc4/README.md +0 -10
- arkindex-base-worker-0.3.6rc4/arkindex_base_worker.egg-info/PKG-INFO +0 -47
- arkindex-base-worker-0.3.6rc4/arkindex_base_worker.egg-info/requires.txt +0 -17
- arkindex-base-worker-0.3.6rc4/arkindex_base_worker.egg-info/top_level.txt +0 -2
- arkindex-base-worker-0.3.6rc4/docs-requirements.txt +0 -7
- arkindex-base-worker-0.3.6rc4/requirements.txt +0 -8
- arkindex-base-worker-0.3.6rc4/setup.cfg +0 -8
- arkindex-base-worker-0.3.6rc4/setup.py +0 -4
- arkindex-base-worker-0.3.6rc4/tests/test_dataset_worker.py +0 -846
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/task.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_cache.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_element.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex-base-worker-0.3.6rc4 → arkindex_base_worker-0.3.7}/tests/test_merge.py +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Teklia
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: arkindex-base-worker
|
|
3
|
+
Version: 0.3.7
|
|
4
|
+
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
|
+
Author-email: Teklia <contact@teklia.com>
|
|
6
|
+
Maintainer-email: Teklia <contact@teklia.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2023 Teklia
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://workers.arkindex.org
|
|
30
|
+
Project-URL: Documentation, https://workers.arkindex.org
|
|
31
|
+
Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
|
|
32
|
+
Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
|
|
33
|
+
Project-URL: Authors, https://teklia.com
|
|
34
|
+
Keywords: python
|
|
35
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
41
|
+
Requires-Python: >=3.10
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Requires-Dist: peewee==3.17.1
|
|
45
|
+
Requires-Dist: Pillow==10.3.0
|
|
46
|
+
Requires-Dist: pymdown-extensions==10.7.1
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
48
|
+
Requires-Dist: shapely==2.0.3
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.4
|
|
50
|
+
Requires-Dist: zstandard==0.22.0
|
|
51
|
+
Provides-Extra: docs
|
|
52
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
53
|
+
Requires-Dist: doc8==1.1.1; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
56
|
+
Requires-Dist: recommonmark==0.7.1; extra == "docs"
|
|
57
|
+
Provides-Extra: tests
|
|
58
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
59
|
+
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
60
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
61
|
+
|
|
62
|
+
# Arkindex base Worker
|
|
63
|
+
|
|
64
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
65
|
+
|
|
66
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
67
|
+
|
|
68
|
+
## Documentation
|
|
69
|
+
|
|
70
|
+
The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
71
|
+
|
|
72
|
+
## Create a new worker using our template
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
pip install --user cookiecutter
|
|
76
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
77
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Arkindex base Worker
|
|
2
|
+
|
|
3
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
4
|
+
|
|
5
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
6
|
+
|
|
7
|
+
## Documentation
|
|
8
|
+
|
|
9
|
+
The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
10
|
+
|
|
11
|
+
## Create a new worker using our template
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
pip install --user cookiecutter
|
|
15
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
16
|
+
```
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: arkindex-base-worker
|
|
3
|
+
Version: 0.3.7
|
|
4
|
+
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
|
+
Author-email: Teklia <contact@teklia.com>
|
|
6
|
+
Maintainer-email: Teklia <contact@teklia.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2023 Teklia
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://workers.arkindex.org
|
|
30
|
+
Project-URL: Documentation, https://workers.arkindex.org
|
|
31
|
+
Project-URL: Repository, https://gitlab.teklia.com/workers/base-worker
|
|
32
|
+
Project-URL: Bug Tracker, https://gitlab.teklia.com/workers/base-worker/issues
|
|
33
|
+
Project-URL: Authors, https://teklia.com
|
|
34
|
+
Keywords: python
|
|
35
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
41
|
+
Requires-Python: >=3.10
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Requires-Dist: peewee==3.17.1
|
|
45
|
+
Requires-Dist: Pillow==10.3.0
|
|
46
|
+
Requires-Dist: pymdown-extensions==10.7.1
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
48
|
+
Requires-Dist: shapely==2.0.3
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.4
|
|
50
|
+
Requires-Dist: zstandard==0.22.0
|
|
51
|
+
Provides-Extra: docs
|
|
52
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
53
|
+
Requires-Dist: doc8==1.1.1; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
56
|
+
Requires-Dist: recommonmark==0.7.1; extra == "docs"
|
|
57
|
+
Provides-Extra: tests
|
|
58
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
59
|
+
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
60
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
61
|
+
|
|
62
|
+
# Arkindex base Worker
|
|
63
|
+
|
|
64
|
+
An easy to use Python 3 high level API client, to build ML tasks.
|
|
65
|
+
|
|
66
|
+
This is an open-source project, licensed using [the MIT license](https://opensource.org/license/mit/).
|
|
67
|
+
|
|
68
|
+
## Documentation
|
|
69
|
+
|
|
70
|
+
The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
71
|
+
|
|
72
|
+
## Create a new worker using our template
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
pip install --user cookiecutter
|
|
76
|
+
cookiecutter git@gitlab.teklia.com:workers/base-worker.git
|
|
77
|
+
```
|
|
@@ -1,9 +1,6 @@
|
|
|
1
|
+
LICENSE
|
|
1
2
|
README.md
|
|
2
|
-
docs-requirements.txt
|
|
3
3
|
pyproject.toml
|
|
4
|
-
requirements.txt
|
|
5
|
-
setup.cfg
|
|
6
|
-
setup.py
|
|
7
4
|
arkindex_base_worker.egg-info/PKG-INFO
|
|
8
5
|
arkindex_base_worker.egg-info/SOURCES.txt
|
|
9
6
|
arkindex_base_worker.egg-info/dependency_links.txt
|
|
@@ -25,6 +22,7 @@ arkindex_worker/worker/task.py
|
|
|
25
22
|
arkindex_worker/worker/training.py
|
|
26
23
|
arkindex_worker/worker/transcription.py
|
|
27
24
|
arkindex_worker/worker/version.py
|
|
25
|
+
hooks/pre_gen_project.py
|
|
28
26
|
tests/__init__.py
|
|
29
27
|
tests/conftest.py
|
|
30
28
|
tests/test_base_worker.py
|
|
@@ -44,4 +42,9 @@ tests/test_elements_worker/test_metadata.py
|
|
|
44
42
|
tests/test_elements_worker/test_task.py
|
|
45
43
|
tests/test_elements_worker/test_training.py
|
|
46
44
|
tests/test_elements_worker/test_transcriptions.py
|
|
47
|
-
tests/test_elements_worker/test_worker.py
|
|
45
|
+
tests/test_elements_worker/test_worker.py
|
|
46
|
+
worker-demo/tests/__init__.py
|
|
47
|
+
worker-demo/tests/conftest.py
|
|
48
|
+
worker-demo/tests/test_worker.py
|
|
49
|
+
worker-demo/worker_demo/__init__.py
|
|
50
|
+
worker-demo/worker_demo/worker.py
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
peewee==3.17.1
|
|
2
|
+
Pillow==10.3.0
|
|
3
|
+
pymdown-extensions==10.7.1
|
|
4
|
+
python-gnupg==0.5.2
|
|
5
|
+
shapely==2.0.3
|
|
6
|
+
teklia-toolbox==0.1.4
|
|
7
|
+
zstandard==0.22.0
|
|
8
|
+
|
|
9
|
+
[docs]
|
|
10
|
+
black==24.4.0
|
|
11
|
+
doc8==1.1.1
|
|
12
|
+
mkdocs-material==9.5.17
|
|
13
|
+
mkdocstrings-python==1.9.2
|
|
14
|
+
recommonmark==0.7.1
|
|
15
|
+
|
|
16
|
+
[tests]
|
|
17
|
+
pytest==8.1.1
|
|
18
|
+
pytest-mock==3.14.0
|
|
19
|
+
pytest-responses==0.5.1
|
|
@@ -374,3 +374,17 @@ def merge_parents_cache(paths: list, current_database: Path):
|
|
|
374
374
|
for statement in statements:
|
|
375
375
|
cursor.execute(statement)
|
|
376
376
|
connection.commit()
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def unsupported_cache(func):
|
|
380
|
+
def wrapper(self, *args, **kwargs):
|
|
381
|
+
results = func(self, *args, **kwargs)
|
|
382
|
+
|
|
383
|
+
if not (self.is_read_only or self.use_cache):
|
|
384
|
+
logger.warning(
|
|
385
|
+
f"This API helper `{func.__name__}` did not update the cache database"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return results
|
|
389
|
+
|
|
390
|
+
return wrapper
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Helper methods to download and open IIIF images, and manage polygons.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import re
|
|
5
6
|
from collections import namedtuple
|
|
6
7
|
from io import BytesIO
|
|
@@ -20,6 +21,7 @@ from tenacity import (
|
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
from arkindex_worker import logger
|
|
24
|
+
from teklia_toolbox.requests import should_verify_cert
|
|
23
25
|
|
|
24
26
|
# Avoid circular imports error when type checking
|
|
25
27
|
if TYPE_CHECKING:
|
|
@@ -114,32 +116,38 @@ def download_image(url: str) -> Image:
|
|
|
114
116
|
)
|
|
115
117
|
else:
|
|
116
118
|
raise e
|
|
117
|
-
except requests.exceptions.SSLError:
|
|
118
|
-
logger.warning(
|
|
119
|
-
"An SSLError occurred during image download, retrying with a weaker and unsafe SSL configuration"
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
# Saving current ciphers
|
|
123
|
-
previous_ciphers = requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS
|
|
124
|
-
|
|
125
|
-
# Downgrading ciphers to download the image
|
|
126
|
-
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = "ALL:@SECLEVEL=1"
|
|
127
|
-
resp = _retried_request(url)
|
|
128
|
-
|
|
129
|
-
# Restoring previous ciphers
|
|
130
|
-
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = previous_ciphers
|
|
131
119
|
|
|
132
120
|
# Preprocess the image and prepare it for classification
|
|
133
121
|
image = Image.open(BytesIO(resp.content))
|
|
134
122
|
logger.info(
|
|
135
|
-
"Downloaded image {} - size={}x{} in {}"
|
|
136
|
-
url, image.size[0], image.size[1], resp.elapsed
|
|
137
|
-
)
|
|
123
|
+
f"Downloaded image {url} - size={image.size[0]}x{image.size[1]} in {resp.elapsed}"
|
|
138
124
|
)
|
|
139
125
|
|
|
140
126
|
return image
|
|
141
127
|
|
|
142
128
|
|
|
129
|
+
def upload_image(image: Image, url: str) -> requests.Response:
|
|
130
|
+
"""
|
|
131
|
+
Upload a Pillow image to a URL.
|
|
132
|
+
|
|
133
|
+
:param image: Pillow image to upload.
|
|
134
|
+
:param url: Destination URL.
|
|
135
|
+
:returns: The upload response.
|
|
136
|
+
"""
|
|
137
|
+
assert url.startswith("http"), "Destination URL for the image must be HTTP(S)"
|
|
138
|
+
|
|
139
|
+
# Retrieve a binarized version of the image
|
|
140
|
+
image_bytes = BytesIO()
|
|
141
|
+
image.save(image_bytes, format="jpeg")
|
|
142
|
+
image_bytes.seek(0)
|
|
143
|
+
|
|
144
|
+
# Upload the image
|
|
145
|
+
resp = _retried_request(url, method=requests.put, data=image_bytes)
|
|
146
|
+
logger.info(f"Uploaded image to {url} in {resp.elapsed}")
|
|
147
|
+
|
|
148
|
+
return resp
|
|
149
|
+
|
|
150
|
+
|
|
143
151
|
def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
|
|
144
152
|
"""
|
|
145
153
|
Compute the rectangle bounding box of a polygon.
|
|
@@ -167,8 +175,10 @@ def _retry_log(retry_state, *args, **kwargs):
|
|
|
167
175
|
before_sleep=_retry_log,
|
|
168
176
|
reraise=True,
|
|
169
177
|
)
|
|
170
|
-
def _retried_request(url):
|
|
171
|
-
resp =
|
|
178
|
+
def _retried_request(url, *args, method=requests.get, **kwargs):
|
|
179
|
+
resp = method(
|
|
180
|
+
url, *args, timeout=DOWNLOAD_TIMEOUT, verify=should_verify_cert(url), **kwargs
|
|
181
|
+
)
|
|
172
182
|
resp.raise_for_status()
|
|
173
183
|
return resp
|
|
174
184
|
|
|
@@ -20,6 +20,8 @@ class MagicDict(dict):
|
|
|
20
20
|
Automagically convert lists and dicts to MagicDicts and lists of MagicDicts
|
|
21
21
|
Allows for nested access: foo.bar.baz
|
|
22
22
|
"""
|
|
23
|
+
if isinstance(item, Dataset):
|
|
24
|
+
return item
|
|
23
25
|
if isinstance(item, list):
|
|
24
26
|
return list(map(self._magify, item))
|
|
25
27
|
if isinstance(item, dict):
|
|
@@ -75,10 +77,10 @@ class Element(MagicDict):
|
|
|
75
77
|
|
|
76
78
|
def image_url(self, size: str = "full") -> str | None:
|
|
77
79
|
"""
|
|
78
|
-
Build
|
|
80
|
+
Build a URL to access the image.
|
|
79
81
|
When possible, will return the S3 URL for images, so an ML worker can bypass IIIF servers.
|
|
80
82
|
:param size: Subresolution of the image, following the syntax of the IIIF resize parameter.
|
|
81
|
-
:returns:
|
|
83
|
+
:returns: A URL to the image, or None if the element does not have an image.
|
|
82
84
|
"""
|
|
83
85
|
if not self.get("zone"):
|
|
84
86
|
return
|
|
@@ -272,6 +274,16 @@ class Dataset(ArkindexModel):
|
|
|
272
274
|
return f"{self.id}.tar.zst"
|
|
273
275
|
|
|
274
276
|
|
|
277
|
+
class Set(MagicDict):
|
|
278
|
+
"""
|
|
279
|
+
Describes an Arkindex dataset set.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __str__(self):
|
|
283
|
+
# Not using ArkindexModel.__str__ as we do not retrieve the Set ID
|
|
284
|
+
return f"{self.__class__.__name__} ({self.name}) from {self.dataset}"
|
|
285
|
+
|
|
286
|
+
|
|
275
287
|
class Artifact(ArkindexModel):
|
|
276
288
|
"""
|
|
277
289
|
Describes an Arkindex artifact.
|
|
@@ -10,6 +10,19 @@ import zstandard as zstd
|
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
MANUAL_SOURCE = "manual"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_source_id(value: str) -> bool | str | None:
|
|
17
|
+
"""
|
|
18
|
+
Parse a UUID argument (Worker Version, Worker Run, ...) to use it directly in the API.
|
|
19
|
+
Arkindex API filters generally expect `False` to filter manual sources.
|
|
20
|
+
"""
|
|
21
|
+
if value == MANUAL_SOURCE:
|
|
22
|
+
return False
|
|
23
|
+
return value or None
|
|
24
|
+
|
|
25
|
+
|
|
13
26
|
CHUNK_SIZE = 1024
|
|
14
27
|
"""Chunk Size used for ZSTD compression"""
|
|
15
28
|
|
|
@@ -31,9 +44,10 @@ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
|
|
|
31
44
|
|
|
32
45
|
logger.debug(f"Uncompressing file to {archive_path}")
|
|
33
46
|
try:
|
|
34
|
-
with
|
|
35
|
-
"
|
|
36
|
-
|
|
47
|
+
with (
|
|
48
|
+
compressed_archive.open("rb") as compressed,
|
|
49
|
+
archive_path.open("wb") as decompressed,
|
|
50
|
+
):
|
|
37
51
|
dctx.copy_stream(compressed, decompressed)
|
|
38
52
|
logger.debug(f"Successfully uncompressed archive {compressed_archive}")
|
|
39
53
|
except zstandard.ZstdError as e:
|