clarifai 11.3.0rc2__py3-none-any.whl → 11.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/__main__.py +1 -1
- clarifai/cli/base.py +144 -136
- clarifai/cli/compute_cluster.py +45 -31
- clarifai/cli/deployment.py +93 -76
- clarifai/cli/model.py +578 -180
- clarifai/cli/nodepool.py +100 -82
- clarifai/client/__init__.py +12 -2
- clarifai/client/app.py +973 -911
- clarifai/client/auth/helper.py +345 -342
- clarifai/client/auth/register.py +7 -7
- clarifai/client/auth/stub.py +107 -106
- clarifai/client/base.py +185 -178
- clarifai/client/compute_cluster.py +214 -180
- clarifai/client/dataset.py +793 -698
- clarifai/client/deployment.py +55 -50
- clarifai/client/input.py +1223 -1088
- clarifai/client/lister.py +47 -45
- clarifai/client/model.py +1939 -1717
- clarifai/client/model_client.py +525 -502
- clarifai/client/module.py +82 -73
- clarifai/client/nodepool.py +358 -213
- clarifai/client/runner.py +58 -0
- clarifai/client/search.py +342 -309
- clarifai/client/user.py +419 -414
- clarifai/client/workflow.py +294 -274
- clarifai/constants/dataset.py +11 -17
- clarifai/constants/model.py +8 -2
- clarifai/datasets/export/inputs_annotations.py +233 -217
- clarifai/datasets/upload/base.py +63 -51
- clarifai/datasets/upload/features.py +43 -38
- clarifai/datasets/upload/image.py +237 -207
- clarifai/datasets/upload/loaders/coco_captions.py +34 -32
- clarifai/datasets/upload/loaders/coco_detection.py +72 -65
- clarifai/datasets/upload/loaders/imagenet_classification.py +57 -53
- clarifai/datasets/upload/loaders/xview_detection.py +274 -132
- clarifai/datasets/upload/multimodal.py +55 -46
- clarifai/datasets/upload/text.py +55 -47
- clarifai/datasets/upload/utils.py +250 -234
- clarifai/errors.py +51 -50
- clarifai/models/api.py +260 -238
- clarifai/modules/css.py +50 -50
- clarifai/modules/pages.py +33 -33
- clarifai/rag/rag.py +312 -288
- clarifai/rag/utils.py +91 -84
- clarifai/runners/models/model_builder.py +906 -802
- clarifai/runners/models/model_class.py +370 -331
- clarifai/runners/models/model_run_locally.py +459 -419
- clarifai/runners/models/model_runner.py +170 -162
- clarifai/runners/models/model_servicer.py +78 -70
- clarifai/runners/server.py +111 -101
- clarifai/runners/utils/code_script.py +225 -187
- clarifai/runners/utils/const.py +4 -1
- clarifai/runners/utils/data_types/__init__.py +12 -0
- clarifai/runners/utils/data_types/data_types.py +598 -0
- clarifai/runners/utils/data_utils.py +387 -440
- clarifai/runners/utils/loader.py +247 -227
- clarifai/runners/utils/method_signatures.py +411 -386
- clarifai/runners/utils/openai_convertor.py +108 -109
- clarifai/runners/utils/serializers.py +175 -179
- clarifai/runners/utils/url_fetcher.py +35 -35
- clarifai/schema/search.py +56 -63
- clarifai/urls/helper.py +125 -102
- clarifai/utils/cli.py +129 -123
- clarifai/utils/config.py +127 -87
- clarifai/utils/constants.py +49 -0
- clarifai/utils/evaluation/helpers.py +503 -466
- clarifai/utils/evaluation/main.py +431 -393
- clarifai/utils/evaluation/testset_annotation_parser.py +154 -144
- clarifai/utils/logging.py +324 -306
- clarifai/utils/misc.py +60 -56
- clarifai/utils/model_train.py +165 -146
- clarifai/utils/protobuf.py +126 -103
- clarifai/versions.py +3 -1
- clarifai/workflows/export.py +48 -50
- clarifai/workflows/utils.py +39 -36
- clarifai/workflows/validate.py +55 -43
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/METADATA +16 -6
- clarifai-11.4.0.dist-info/RECORD +109 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/WHEEL +1 -1
- clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-310.pyc +0 -0
- clarifai/__pycache__/errors.cpython-311.pyc +0 -0
- clarifai/__pycache__/versions.cpython-310.pyc +0 -0
- clarifai/__pycache__/versions.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
- clarifai/client/cli/__init__.py +0 -0
- clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/base_cli.py +0 -88
- clarifai/client/cli/model_cli.py +0 -29
- clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/coco_detection.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/imagenet_classification.cpython-311.pyc +0 -0
- clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
- clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
- clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
- clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
- clarifai/runners/models/base_typed_model.py +0 -238
- clarifai/runners/models/model_class_refract.py +0 -80
- clarifai/runners/models/model_upload.py +0 -607
- clarifai/runners/models/temp.py +0 -25
- clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_handler.py +0 -231
- clarifai/runners/utils/data_handler_refract.py +0 -213
- clarifai/runners/utils/data_types.py +0 -469
- clarifai/runners/utils/logger.py +0 -0
- clarifai/runners/utils/openai_format.py +0 -87
- clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/helpers.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
- clarifai-11.3.0rc2.dist-info/RECORD +0 -322
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/entry_points.txt +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info/licenses}/LICENSE +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/top_level.txt +0 -0
@@ -18,223 +18,239 @@ from clarifai.utils.logging import logger
|
|
18
18
|
|
19
19
|
|
20
20
|
class DatasetExportReader:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
self
|
101
|
-
|
102
|
-
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
session: requests.Session = None,
|
24
|
+
archive_url: Optional[str] = None,
|
25
|
+
local_archive_path: Optional[str] = None,
|
26
|
+
):
|
27
|
+
"""Download/Reads the zipfile archive and yields every api.Input object.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
session: requests.Session object
|
31
|
+
archive_url: URL of the DatasetVersionExport archive
|
32
|
+
local_archive_path: Path to the DatasetVersionExport archive
|
33
|
+
"""
|
34
|
+
self.input_count = None
|
35
|
+
self.temp_file = None
|
36
|
+
self.session = session
|
37
|
+
if not self.session:
|
38
|
+
self.session = requests.Session()
|
39
|
+
|
40
|
+
assert archive_url or local_archive_path, UserError(
|
41
|
+
"Either archive_url or local_archive_path must be provided."
|
42
|
+
)
|
43
|
+
assert not (archive_url and local_archive_path), UserError(
|
44
|
+
"Only one of archive_url or local_archive_path must be provided."
|
45
|
+
)
|
46
|
+
|
47
|
+
if archive_url:
|
48
|
+
logger.info('url: %s' % archive_url)
|
49
|
+
self.temp_file = self._download_temp_archive(archive_url)
|
50
|
+
self.archive = zipfile.ZipFile(self.temp_file)
|
51
|
+
else:
|
52
|
+
logger.info("path: %s" % local_archive_path)
|
53
|
+
self.archive = zipfile.ZipFile(local_archive_path)
|
54
|
+
|
55
|
+
self.file_name_list = self.archive.namelist()
|
56
|
+
assert "mimetype" in self.file_name_list, (
|
57
|
+
"Missing mimetype file in the dataset export archive."
|
58
|
+
)
|
59
|
+
assert self.archive.read("mimetype") == b"application/x.clarifai-data+protobuf"
|
60
|
+
self.file_name_list.remove("mimetype")
|
61
|
+
|
62
|
+
logger.info("Obtained file name list. %d entries." % len(self.file_name_list))
|
63
|
+
self.split_dir = (
|
64
|
+
os.path.dirname(self.file_name_list[0]) if len(self.file_name_list) else ""
|
65
|
+
)
|
66
|
+
|
67
|
+
def _download_temp_archive(
|
68
|
+
self, archive_url: str, chunk_size: int = 128
|
69
|
+
) -> tempfile.TemporaryFile:
|
70
|
+
"""Downloads the temp archive of InputBatches."""
|
71
|
+
r = self.session.get(archive_url, stream=True)
|
72
|
+
if r.headers['content-type'] == CONTENT_TYPE['json']:
|
73
|
+
raise Exception("File is a json file :\n {}".format(r.json()))
|
74
|
+
elif r.headers['content-type'] != CONTENT_TYPE['zip']:
|
75
|
+
raise Exception('File is not a zip file')
|
76
|
+
temp_file = tempfile.TemporaryFile()
|
77
|
+
for chunk in r.iter_content(chunk_size=chunk_size):
|
78
|
+
temp_file.write(chunk)
|
79
|
+
|
80
|
+
return temp_file
|
81
|
+
|
82
|
+
def __len__(self) -> int:
|
83
|
+
if self.input_count is None:
|
84
|
+
input_count = 0
|
85
|
+
if self.file_name_list is not None:
|
86
|
+
for filename in self.file_name_list:
|
87
|
+
input_count += int(filename.split('_n')[-1])
|
88
|
+
self.input_count = input_count
|
89
|
+
|
90
|
+
return self.input_count
|
91
|
+
|
92
|
+
def __iter__(self) -> Iterator[resources_pb2.Input]:
|
93
|
+
"""Loops through all InputBatches in the DatasetVersionExport and yields every api.Input object"""
|
94
|
+
if self.file_name_list is not None:
|
95
|
+
for filename in self.file_name_list:
|
96
|
+
db = resources_pb2.InputBatch().FromString(self.archive.read(filename))
|
97
|
+
for db_input in db.inputs:
|
98
|
+
yield db_input
|
99
|
+
|
100
|
+
def __enter__(self) -> 'DatasetExportReader':
|
101
|
+
return self
|
102
|
+
|
103
|
+
def __exit__(self, *args: Any) -> None:
|
104
|
+
self.close()
|
105
|
+
|
106
|
+
def close(self) -> None:
|
107
|
+
logger.info("closing file objects.")
|
108
|
+
self.archive.close()
|
109
|
+
if self.temp_file:
|
110
|
+
self.temp_file.close()
|
103
111
|
|
104
112
|
|
105
113
|
class InputAnnotationDownloader:
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
114
|
+
def __init__(
|
115
|
+
self, session: requests.Session, input_iterator: DatasetExportReader, num_workers: int = 4
|
116
|
+
):
|
117
|
+
"""Downloads the archive from the URL into an archive of inputs, annotations in the directory format
|
118
|
+
{split}/inputs and {split}/annotations.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
session: requests.Session object
|
122
|
+
input_iterator: Iterable of DatasetExportReader object
|
123
|
+
num_workers: Number of threads to use for downloading
|
124
|
+
"""
|
125
|
+
self.input_iterator = input_iterator
|
126
|
+
self.num_workers = min(num_workers, 10) # Max 10 threads
|
127
|
+
self.num_inputs = 0
|
128
|
+
self.num_annotations = 0
|
129
|
+
self.split_prefix = None
|
130
|
+
self.session = session
|
131
|
+
self.input_ext = dict(image=".png", text=".txt", audio=".mp3", video=".mp4")
|
132
|
+
if isinstance(self.input_iterator, DatasetExportReader):
|
133
|
+
self.split_prefix = self.input_iterator.split_dir
|
134
|
+
|
135
|
+
def _save_image_to_archive(
|
136
|
+
self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
|
137
|
+
) -> None:
|
138
|
+
"""Use PIL ImageFile to return image parsed from the response bytestring (from requests) and append to zip file."""
|
139
|
+
p = ImageFile.Parser()
|
140
|
+
p.feed(self.session.get(hosted_url).content)
|
141
|
+
image = p.close()
|
142
|
+
image_file = BytesIO()
|
143
|
+
image.save(image_file, 'PNG')
|
144
|
+
new_archive.writestr(file_name, image_file.getvalue())
|
145
|
+
|
146
|
+
def _save_text_to_archive(
|
147
|
+
self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
|
148
|
+
) -> None:
|
149
|
+
"""Gets the text response bytestring (from requests) and append to zip file."""
|
150
|
+
text_content = self.session.get(hosted_url).content
|
151
|
+
new_archive.writestr(file_name, text_content)
|
152
|
+
|
153
|
+
def _save_audio_to_archive(
|
154
|
+
self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
|
155
|
+
) -> None:
|
156
|
+
"""Gets the audio response bytestring (from requests) as chunks and append to zip file."""
|
157
|
+
audio_response = self.session.get(hosted_url, stream=True)
|
158
|
+
audio_stream = BytesIO()
|
159
|
+
# Retrieve the audio content in chunks and write to the BytesIO object
|
160
|
+
for chunk in audio_response.iter_content(chunk_size=128):
|
161
|
+
audio_stream.write(chunk)
|
162
|
+
new_archive.writestr(file_name, audio_stream.getvalue())
|
163
|
+
|
164
|
+
def _save_video_to_archive(
|
165
|
+
self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
|
166
|
+
) -> None:
|
167
|
+
"""Gets the video response bytestring (from requests) as chunks and append to zip file."""
|
168
|
+
video_response = self.session.get(hosted_url)
|
169
|
+
video_stream = BytesIO()
|
170
|
+
# Retrieve the video content in chunks and write to the BytesIO object
|
171
|
+
for chunk in video_response.iter_content(chunk_size=128):
|
172
|
+
video_stream.write(chunk)
|
173
|
+
new_archive.writestr(file_name, video_stream.getvalue())
|
174
|
+
|
175
|
+
def _save_annotation_to_archive(
|
176
|
+
self, new_archive: zipfile.ZipFile, annot_data: List[Dict], file_name: str
|
177
|
+
) -> None:
|
178
|
+
"""Gets the annotation response bytestring (from requests) and append to zip file."""
|
179
|
+
# Fill zero values for missing bounding box keys
|
180
|
+
for annot in annot_data:
|
181
|
+
if annot.get('regionInfo') and annot['regionInfo'].get('boundingBox'):
|
182
|
+
bbox = annot['regionInfo']['boundingBox']
|
183
|
+
bbox.setdefault('topRow', 0)
|
184
|
+
bbox.setdefault('leftCol', 0)
|
185
|
+
bbox.setdefault('bottomRow', 0)
|
186
|
+
bbox.setdefault('rightCol', 0)
|
187
|
+
# Serialize the dictionary to a JSON string
|
188
|
+
json_str = json.dumps(annot_data)
|
189
|
+
# Convert the JSON string to bytes
|
190
|
+
bytes_object = json_str.encode()
|
191
|
+
|
192
|
+
new_archive.writestr(file_name, bytes_object)
|
193
|
+
|
194
|
+
def _write_archive(self, input_, new_archive, split: Optional[str] = None) -> None:
|
195
|
+
"""Writes the input, annotation archive into prefix dir."""
|
196
|
+
data_dict = MessageToDict(input_.data)
|
197
|
+
input_type = list(
|
198
|
+
filter(lambda x: x in list(data_dict.keys()), list(self.input_ext.keys()))
|
199
|
+
)[0]
|
200
|
+
hosted = getattr(input_.data, input_type).hosted
|
201
|
+
if hosted.prefix:
|
202
|
+
assert 'orig' in hosted.sizes
|
203
|
+
hosted_url = f"{hosted.prefix}/orig/{hosted.suffix}"
|
204
|
+
file_name = os.path.join(split, "inputs", input_.id + self.input_ext[input_type])
|
205
|
+
if input_type == "image":
|
206
|
+
self._save_image_to_archive(new_archive, hosted_url, file_name)
|
207
|
+
elif input_type == "text":
|
208
|
+
self._save_text_to_archive(new_archive, hosted_url, file_name)
|
209
|
+
elif input_type == "audio":
|
210
|
+
self._save_audio_to_archive(new_archive, hosted_url, file_name)
|
211
|
+
elif input_type == "video":
|
212
|
+
self._save_video_to_archive(new_archive, hosted_url, file_name)
|
213
|
+
self.num_inputs += 1
|
214
|
+
|
215
|
+
if data_dict.get("metadata") or data_dict.get("concepts") or data_dict.get("regions"):
|
216
|
+
file_name = os.path.join(split, "annotations", input_.id + ".json")
|
217
|
+
annot_data = (
|
218
|
+
[{"metadata": data_dict.get("metadata", {})}]
|
219
|
+
+ data_dict.get("regions", [])
|
220
|
+
+ data_dict.get("concepts", [])
|
221
|
+
)
|
222
|
+
|
223
|
+
self._save_annotation_to_archive(new_archive, annot_data, file_name)
|
224
|
+
self.num_annotations += 1
|
225
|
+
|
226
|
+
def _check_output_archive(self, save_path: str) -> None:
|
227
|
+
try:
|
228
|
+
archive = zipfile.ZipFile(save_path, 'r')
|
229
|
+
except zipfile.BadZipFile as e:
|
230
|
+
raise e
|
231
|
+
assert len(archive.namelist()) == self.num_inputs + self.num_annotations, (
|
232
|
+
"Archive has %d inputs+annotations | expecting %d inputs+annotations"
|
233
|
+
% (len(archive.namelist()), self.num_inputs + self.num_annotations)
|
234
|
+
)
|
235
|
+
|
236
|
+
def download_archive(self, save_path: str, split: Optional[str] = None) -> None:
|
237
|
+
"""Downloads the archive from the URL into an archive of inputs, annotations in the directory format
|
238
|
+
{split}/inputs and {split}/annotations.
|
239
|
+
"""
|
240
|
+
with zipfile.ZipFile(save_path, "a") as new_archive:
|
241
|
+
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
242
|
+
with tqdm(total=len(self.input_iterator), desc='Downloading Dataset') as progress:
|
243
|
+
# Submit all jobs to the executor and store the returned futures
|
244
|
+
futures = [
|
245
|
+
executor.submit(self._write_archive, input_, new_archive, split)
|
246
|
+
for input_ in self.input_iterator
|
247
|
+
]
|
248
|
+
|
249
|
+
for _ in as_completed(futures):
|
250
|
+
progress.update()
|
251
|
+
|
252
|
+
self._check_output_archive(save_path)
|
253
|
+
logger.info(
|
254
|
+
"Downloaded %d inputs and %d annotations to %s"
|
255
|
+
% (self.num_inputs, self.num_annotations, save_path)
|
256
|
+
)
|
clarifai/datasets/upload/base.py
CHANGED
@@ -4,68 +4,80 @@ from typing import Iterator, List, Tuple, TypeVar, Union
|
|
4
4
|
from clarifai_grpc.grpc.api import resources_pb2
|
5
5
|
|
6
6
|
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
|
7
|
-
from clarifai.datasets.upload.features import (
|
8
|
-
|
9
|
-
|
7
|
+
from clarifai.datasets.upload.features import (
|
8
|
+
MultiModalFeatures,
|
9
|
+
TextFeatures,
|
10
|
+
VisualClassificationFeatures,
|
11
|
+
VisualDetectionFeatures,
|
12
|
+
VisualSegmentationFeatures,
|
13
|
+
)
|
10
14
|
|
11
15
|
OutputFeaturesType = TypeVar(
|
12
16
|
'OutputFeaturesType',
|
13
|
-
bound=Union[
|
14
|
-
|
17
|
+
bound=Union[
|
18
|
+
TextFeatures,
|
19
|
+
VisualClassificationFeatures,
|
20
|
+
VisualDetectionFeatures,
|
21
|
+
VisualSegmentationFeatures,
|
22
|
+
MultiModalFeatures,
|
23
|
+
],
|
24
|
+
)
|
15
25
|
|
16
26
|
|
17
27
|
class ClarifaiDataset:
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
28
|
+
"""Clarifai datasets base class."""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self, data_generator: 'ClarifaiDataLoader', dataset_id: str, max_workers: int = 4
|
32
|
+
) -> None:
|
33
|
+
self.data_generator = data_generator
|
34
|
+
self.dataset_id = dataset_id
|
35
|
+
self.max_workers = max_workers
|
36
|
+
self.all_input_ids = {}
|
37
|
+
self._all_input_protos = {}
|
38
|
+
self._all_annotation_protos = defaultdict(list)
|
39
|
+
|
40
|
+
def __len__(self) -> int:
|
41
|
+
"""Get size of all input protos"""
|
42
|
+
return len(self.data_generator)
|
43
|
+
|
44
|
+
def _to_list(self, input_protos: Iterator) -> List:
|
45
|
+
"""Parse protos iterator to list."""
|
46
|
+
return list(input_protos)
|
47
|
+
|
48
|
+
def _extract_protos(self) -> None:
|
49
|
+
"""Create input image protos for each data generator item."""
|
50
|
+
raise NotImplementedError()
|
51
|
+
|
52
|
+
def get_protos(
|
53
|
+
self, input_ids: List[int]
|
54
|
+
) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
|
55
|
+
"""Get input and annotation protos based on input_ids.
|
56
|
+
Args:
|
57
|
+
input_ids: List of input IDs to retrieve the protos for.
|
58
|
+
Returns:
|
59
|
+
Input and Annotation proto iterators for the specified input IDs.
|
60
|
+
"""
|
61
|
+
input_protos, annotation_protos = self._extract_protos(input_ids)
|
62
|
+
|
63
|
+
return input_protos, annotation_protos
|
52
64
|
|
53
65
|
|
54
66
|
class ClarifaiDataLoader:
|
55
|
-
|
67
|
+
"""Clarifai data loader base class."""
|
56
68
|
|
57
|
-
|
58
|
-
|
69
|
+
def __init__(self) -> None:
|
70
|
+
pass
|
59
71
|
|
60
|
-
|
61
|
-
|
62
|
-
|
72
|
+
@property
|
73
|
+
def task(self):
|
74
|
+
raise NotImplementedError("Task should be one of {}".format(DATASET_UPLOAD_TASKS))
|
63
75
|
|
64
|
-
|
65
|
-
|
76
|
+
def load_data(self) -> None:
|
77
|
+
raise NotImplementedError()
|
66
78
|
|
67
|
-
|
68
|
-
|
79
|
+
def __len__(self) -> int:
|
80
|
+
raise NotImplementedError()
|
69
81
|
|
70
|
-
|
71
|
-
|
82
|
+
def __getitem__(self, index: int) -> OutputFeaturesType:
|
83
|
+
raise NotImplementedError()
|