PyPI - clarifai - Versions diffs - 11.3.0rc2__py3-none-any.whl → 11.4.0__py3-none-any.whl - Mend

clarifai 11.3.0rc2py3-none-any.whl → 11.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (300) hide show

clarifai/__init__.py +1 -1
clarifai/cli/__main__.py +1 -1
clarifai/cli/base.py +144 -136
clarifai/cli/compute_cluster.py +45 -31
clarifai/cli/deployment.py +93 -76
clarifai/cli/model.py +578 -180
clarifai/cli/nodepool.py +100 -82
clarifai/client/__init__.py +12 -2
clarifai/client/app.py +973 -911
clarifai/client/auth/helper.py +345 -342
clarifai/client/auth/register.py +7 -7
clarifai/client/auth/stub.py +107 -106
clarifai/client/base.py +185 -178
clarifai/client/compute_cluster.py +214 -180
clarifai/client/dataset.py +793 -698
clarifai/client/deployment.py +55 -50
clarifai/client/input.py +1223 -1088
clarifai/client/lister.py +47 -45
clarifai/client/model.py +1939 -1717
clarifai/client/model_client.py +525 -502
clarifai/client/module.py +82 -73
clarifai/client/nodepool.py +358 -213
clarifai/client/runner.py +58 -0
clarifai/client/search.py +342 -309
clarifai/client/user.py +419 -414
clarifai/client/workflow.py +294 -274
clarifai/constants/dataset.py +11 -17
clarifai/constants/model.py +8 -2
clarifai/datasets/export/inputs_annotations.py +233 -217
clarifai/datasets/upload/base.py +63 -51
clarifai/datasets/upload/features.py +43 -38
clarifai/datasets/upload/image.py +237 -207
clarifai/datasets/upload/loaders/coco_captions.py +34 -32
clarifai/datasets/upload/loaders/coco_detection.py +72 -65
clarifai/datasets/upload/loaders/imagenet_classification.py +57 -53
clarifai/datasets/upload/loaders/xview_detection.py +274 -132
clarifai/datasets/upload/multimodal.py +55 -46
clarifai/datasets/upload/text.py +55 -47
clarifai/datasets/upload/utils.py +250 -234
clarifai/errors.py +51 -50
clarifai/models/api.py +260 -238
clarifai/modules/css.py +50 -50
clarifai/modules/pages.py +33 -33
clarifai/rag/rag.py +312 -288
clarifai/rag/utils.py +91 -84
clarifai/runners/models/model_builder.py +906 -802
clarifai/runners/models/model_class.py +370 -331
clarifai/runners/models/model_run_locally.py +459 -419
clarifai/runners/models/model_runner.py +170 -162
clarifai/runners/models/model_servicer.py +78 -70
clarifai/runners/server.py +111 -101
clarifai/runners/utils/code_script.py +225 -187
clarifai/runners/utils/const.py +4 -1
clarifai/runners/utils/data_types/__init__.py +12 -0
clarifai/runners/utils/data_types/data_types.py +598 -0
clarifai/runners/utils/data_utils.py +387 -440
clarifai/runners/utils/loader.py +247 -227
clarifai/runners/utils/method_signatures.py +411 -386
clarifai/runners/utils/openai_convertor.py +108 -109
clarifai/runners/utils/serializers.py +175 -179
clarifai/runners/utils/url_fetcher.py +35 -35
clarifai/schema/search.py +56 -63
clarifai/urls/helper.py +125 -102
clarifai/utils/cli.py +129 -123
clarifai/utils/config.py +127 -87
clarifai/utils/constants.py +49 -0
clarifai/utils/evaluation/helpers.py +503 -466
clarifai/utils/evaluation/main.py +431 -393
clarifai/utils/evaluation/testset_annotation_parser.py +154 -144
clarifai/utils/logging.py +324 -306
clarifai/utils/misc.py +60 -56
clarifai/utils/model_train.py +165 -146
clarifai/utils/protobuf.py +126 -103
clarifai/versions.py +3 -1
clarifai/workflows/export.py +48 -50
clarifai/workflows/utils.py +39 -36
clarifai/workflows/validate.py +55 -43
{clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/METADATA +16 -6
clarifai-11.4.0.dist-info/RECORD +109 -0
{clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/WHEEL +1 -1
clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/__pycache__/errors.cpython-310.pyc +0 -0
clarifai/__pycache__/errors.cpython-311.pyc +0 -0
clarifai/__pycache__/versions.cpython-310.pyc +0 -0
clarifai/__pycache__/versions.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/base.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/compute_cluster.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/deployment.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/model.cpython-311.pyc +0 -0
clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
clarifai/cli/__pycache__/nodepool.cpython-311.pyc +0 -0
clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
clarifai/client/cli/__init__.py +0 -0
clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
clarifai/client/cli/base_cli.py +0 -88
clarifai/client/cli/model_cli.py +0 -29
clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/rag.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/datasets/upload/loaders/__pycache__/coco_detection.cpython-311.pyc +0 -0
clarifai/datasets/upload/loaders/__pycache__/imagenet_classification.cpython-311.pyc +0 -0
clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/rag/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
clarifai/rag/__pycache__/rag.cpython-311.pyc +0 -0
clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
clarifai/rag/__pycache__/utils.cpython-311.pyc +0 -0
clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
clarifai/runners/models/__pycache__/base_typed_model.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
clarifai/runners/models/__pycache__/model_run_locally.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
clarifai/runners/models/base_typed_model.py +0 -238
clarifai/runners/models/model_class_refract.py +0 -80
clarifai/runners/models/model_upload.py +0 -607
clarifai/runners/models/temp.py +0 -25
clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/data_handler.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
clarifai/runners/utils/data_handler.py +0 -231
clarifai/runners/utils/data_handler_refract.py +0 -213
clarifai/runners/utils/data_types.py +0 -469
clarifai/runners/utils/logger.py +0 -0
clarifai/runners/utils/openai_format.py +0 -87
clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/cli.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
clarifai/utils/evaluation/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/utils/evaluation/__pycache__/helpers.cpython-311.pyc +0 -0
clarifai/utils/evaluation/__pycache__/main.cpython-311.pyc +0 -0
clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
clarifai-11.3.0rc2.dist-info/RECORD +0 -322
{clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/entry_points.txt +0 -0
{clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info/licenses}/LICENSE +0 -0
{clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/top_level.txt +0 -0

clarifai/datasets/export/inputs_annotations.py CHANGED Viewed

@@ -18,223 +18,239 @@ from clarifai.utils.logging import logger
 class DatasetExportReader:
-  def __init__(self,
-               session: requests.Session = None,
-               archive_url: Optional[str] = None,
-               local_archive_path: Optional[str] = None):
-    """Download/Reads the zipfile archive and yields every api.Input object.
-    Args:
-        session: requests.Session object
-        archive_url: URL of the DatasetVersionExport archive
-        local_archive_path: Path to the DatasetVersionExport archive
-    """
-    self.input_count = None
-    self.temp_file = None
-    self.session = session
-    if not self.session:
-      self.session = requests.Session()
-    assert archive_url or local_archive_path, UserError(
-        "Either archive_url or local_archive_path must be provided.")
-    assert not (archive_url and local_archive_path), UserError(
-        "Only one of archive_url or local_archive_path must be provided.")
-    if archive_url:
-      logger.info('url: %s' % archive_url)
-      self.temp_file = self._download_temp_archive(archive_url)
-      self.archive = zipfile.ZipFile(self.temp_file)
-    else:
-      logger.info("path: %s" % local_archive_path)
-      self.archive = zipfile.ZipFile(local_archive_path)
-    self.file_name_list = self.archive.namelist()
-    assert "mimetype" in self.file_name_list, "Missing mimetype file in the dataset export archive."
-    assert self.archive.read("mimetype") == b"application/x.clarifai-data+protobuf"
-    self.file_name_list.remove("mimetype")
-    logger.info("Obtained file name list. %d entries." % len(self.file_name_list))
-    self.split_dir = os.path.dirname(self.file_name_list[0]) if len(self.file_name_list) else ""
-  def _download_temp_archive(self, archive_url: str,
-                             chunk_size: int = 128) -> tempfile.TemporaryFile:
-    """Downloads the temp archive of InputBatches."""
-    r = self.session.get(archive_url, stream=True)
-    if r.headers['content-type'] == CONTENT_TYPE['json']:
-      raise Exception("File is a json file :\n {}".format(r.json()))
-    elif r.headers['content-type'] != CONTENT_TYPE['zip']:
-      raise Exception('File is not a zip file')
-    temp_file = tempfile.TemporaryFile()
-    for chunk in r.iter_content(chunk_size=chunk_size):
-      temp_file.write(chunk)
-    return temp_file
-  def __len__(self) -> int:
-    if self.input_count is None:
-      input_count = 0
-      if self.file_name_list is not None:
-        for filename in self.file_name_list:
-          input_count += int(filename.split('_n')[-1])
-      self.input_count = input_count
-    return self.input_count
-  def __iter__(self) -> Iterator[resources_pb2.Input]:
-    """Loops through all InputBatches in the DatasetVersionExport and yields every api.Input object"""
-    if self.file_name_list is not None:
-      for filename in self.file_name_list:
-        db = resources_pb2.InputBatch().FromString(self.archive.read(filename))
-        for db_input in db.inputs:
-          yield db_input
-  def __enter__(self) -> 'DatasetExportReader':
-    return self
-  def __exit__(self, *args: Any) -> None:
-    self.close()
-  def close(self) -> None:
-    logger.info("closing file objects.")
-    self.archive.close()
-    if self.temp_file:
-      self.temp_file.close()
+    def __init__(
+        self,
+        session: requests.Session = None,
+        archive_url: Optional[str] = None,
+        local_archive_path: Optional[str] = None,
+    ):
+        """Download/Reads the zipfile archive and yields every api.Input object.
+        Args:
+            session: requests.Session object
+            archive_url: URL of the DatasetVersionExport archive
+            local_archive_path: Path to the DatasetVersionExport archive
+        """
+        self.input_count = None
+        self.temp_file = None
+        self.session = session
+        if not self.session:
+            self.session = requests.Session()
+        assert archive_url or local_archive_path, UserError(
+            "Either archive_url or local_archive_path must be provided."
+        )
+        assert not (archive_url and local_archive_path), UserError(
+            "Only one of archive_url or local_archive_path must be provided."
+        )
+        if archive_url:
+            logger.info('url: %s' % archive_url)
+            self.temp_file = self._download_temp_archive(archive_url)
+            self.archive = zipfile.ZipFile(self.temp_file)
+        else:
+            logger.info("path: %s" % local_archive_path)
+            self.archive = zipfile.ZipFile(local_archive_path)
+        self.file_name_list = self.archive.namelist()
+        assert "mimetype" in self.file_name_list, (
+            "Missing mimetype file in the dataset export archive."
+        )
+        assert self.archive.read("mimetype") == b"application/x.clarifai-data+protobuf"
+        self.file_name_list.remove("mimetype")
+        logger.info("Obtained file name list. %d entries." % len(self.file_name_list))
+        self.split_dir = (
+            os.path.dirname(self.file_name_list[0]) if len(self.file_name_list) else ""
+        )
+    def _download_temp_archive(
+        self, archive_url: str, chunk_size: int = 128
+    ) -> tempfile.TemporaryFile:
+        """Downloads the temp archive of InputBatches."""
+        r = self.session.get(archive_url, stream=True)
+        if r.headers['content-type'] == CONTENT_TYPE['json']:
+            raise Exception("File is a json file :\n {}".format(r.json()))
+        elif r.headers['content-type'] != CONTENT_TYPE['zip']:
+            raise Exception('File is not a zip file')
+        temp_file = tempfile.TemporaryFile()
+        for chunk in r.iter_content(chunk_size=chunk_size):
+            temp_file.write(chunk)
+        return temp_file
+    def __len__(self) -> int:
+        if self.input_count is None:
+            input_count = 0
+            if self.file_name_list is not None:
+                for filename in self.file_name_list:
+                    input_count += int(filename.split('_n')[-1])
+            self.input_count = input_count
+        return self.input_count
+    def __iter__(self) -> Iterator[resources_pb2.Input]:
+        """Loops through all InputBatches in the DatasetVersionExport and yields every api.Input object"""
+        if self.file_name_list is not None:
+            for filename in self.file_name_list:
+                db = resources_pb2.InputBatch().FromString(self.archive.read(filename))
+                for db_input in db.inputs:
+                    yield db_input
+    def __enter__(self) -> 'DatasetExportReader':
+        return self
+    def __exit__(self, *args: Any) -> None:
+        self.close()
+    def close(self) -> None:
+        logger.info("closing file objects.")
+        self.archive.close()
+        if self.temp_file:
+            self.temp_file.close()
 class InputAnnotationDownloader:
-  def __init__(self,
-               session: requests.Session,
-               input_iterator: DatasetExportReader,
-               num_workers: int = 4):
-    """Downloads the archive from the URL into an archive of inputs, annotations in the directory format
-    {split}/inputs and {split}/annotations.
-    Args:
-        session: requests.Session object
-        input_iterator: Iterable of DatasetExportReader object
-        num_workers: Number of threads to use for downloading
-    """
-    self.input_iterator = input_iterator
-    self.num_workers = min(num_workers, 10)  # Max 10 threads
-    self.num_inputs = 0
-    self.num_annotations = 0
-    self.split_prefix = None
-    self.session = session
-    self.input_ext = dict(image=".png", text=".txt", audio=".mp3", video=".mp4")
-    if isinstance(self.input_iterator, DatasetExportReader):
-      self.split_prefix = self.input_iterator.split_dir
-  def _save_image_to_archive(self, new_archive: zipfile.ZipFile, hosted_url: str,
-                             file_name: str) -> None:
-    """Use PIL ImageFile to return image parsed from the response bytestring (from requests) and append to zip file."""
-    p = ImageFile.Parser()
-    p.feed(self.session.get(hosted_url).content)
-    image = p.close()
-    image_file = BytesIO()
-    image.save(image_file, 'PNG')
-    new_archive.writestr(file_name, image_file.getvalue())
-  def _save_text_to_archive(self, new_archive: zipfile.ZipFile, hosted_url: str,
-                            file_name: str) -> None:
-    """Gets the text response bytestring (from requests) and append to zip file."""
-    text_content = self.session.get(hosted_url).content
-    new_archive.writestr(file_name, text_content)
-  def _save_audio_to_archive(self, new_archive: zipfile.ZipFile, hosted_url: str,
-                             file_name: str) -> None:
-    """Gets the audio response bytestring (from requests) as chunks and append to zip file."""
-    audio_response = self.session.get(hosted_url, stream=True)
-    audio_stream = BytesIO()
-    # Retrieve the audio content in chunks and write to the BytesIO object
-    for chunk in audio_response.iter_content(chunk_size=128):
-      audio_stream.write(chunk)
-    new_archive.writestr(file_name, audio_stream.getvalue())
-  def _save_video_to_archive(self, new_archive: zipfile.ZipFile, hosted_url: str,
-                             file_name: str) -> None:
-    """Gets the video response bytestring (from requests) as chunks and append to zip file."""
-    video_response = self.session.get(hosted_url)
-    video_stream = BytesIO()
-    # Retrieve the video content in chunks and write to the BytesIO object
-    for chunk in video_response.iter_content(chunk_size=128):
-      video_stream.write(chunk)
-    new_archive.writestr(file_name, video_stream.getvalue())
-  def _save_annotation_to_archive(self, new_archive: zipfile.ZipFile, annot_data: List[Dict],
-                                  file_name: str) -> None:
-    """Gets the annotation response bytestring (from requests) and append to zip file."""
-    # Fill zero values for missing bounding box keys
-    for annot in annot_data:
-      if annot.get('regionInfo') and annot['regionInfo'].get('boundingBox'):
-        bbox = annot['regionInfo']['boundingBox']
-        bbox.setdefault('topRow', 0)
-        bbox.setdefault('leftCol', 0)
-        bbox.setdefault('bottomRow', 0)
-        bbox.setdefault('rightCol', 0)
-    # Serialize the dictionary to a JSON string
-    json_str = json.dumps(annot_data)
-    # Convert the JSON string to bytes
-    bytes_object = json_str.encode()
-    new_archive.writestr(file_name, bytes_object)
-  def _write_archive(self, input_, new_archive, split: Optional[str] = None) -> None:
-    """Writes the input, annotation archive into prefix dir."""
-    data_dict = MessageToDict(input_.data)
-    input_type = list(filter(lambda x: x in list(data_dict.keys()),
-                             list(self.input_ext.keys())))[0]
-    hosted = getattr(input_.data, input_type).hosted
-    if hosted.prefix:
-      assert 'orig' in hosted.sizes
-      hosted_url = f"{hosted.prefix}/orig/{hosted.suffix}"
-      file_name = os.path.join(split, "inputs", input_.id + self.input_ext[input_type])
-      if input_type == "image":
-        self._save_image_to_archive(new_archive, hosted_url, file_name)
-      elif input_type == "text":
-        self._save_text_to_archive(new_archive, hosted_url, file_name)
-      elif input_type == "audio":
-        self._save_audio_to_archive(new_archive, hosted_url, file_name)
-      elif input_type == "video":
-        self._save_video_to_archive(new_archive, hosted_url, file_name)
-      self.num_inputs += 1
-    if data_dict.get("metadata") or data_dict.get("concepts") or data_dict.get("regions"):
-      file_name = os.path.join(split, "annotations", input_.id + ".json")
-      annot_data = [{
-          "metadata": data_dict.get("metadata", {})
-      }] + data_dict.get("regions", []) + data_dict.get("concepts", [])
-      self._save_annotation_to_archive(new_archive, annot_data, file_name)
-      self.num_annotations += 1
-  def _check_output_archive(self, save_path: str) -> None:
-    try:
-      archive = zipfile.ZipFile(save_path, 'r')
-    except zipfile.BadZipFile as e:
-      raise e
-    assert len(
-        archive.namelist()
-    ) == self.num_inputs + self.num_annotations, "Archive has %d inputs+annotations | expecting %d inputs+annotations" % (
-        len(archive.namelist()), self.num_inputs + self.num_annotations)
-  def download_archive(self, save_path: str, split: Optional[str] = None) -> None:
-    """Downloads the archive from the URL into an archive of inputs, annotations in the directory format
-    {split}/inputs and {split}/annotations.
-    """
-    with zipfile.ZipFile(save_path, "a") as new_archive:
-      with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
-        with tqdm(total=len(self.input_iterator), desc='Downloading Dataset') as progress:
-          # Submit all jobs to the executor and store the returned futures
-          futures = [
-              executor.submit(self._write_archive, input_, new_archive, split)
-              for input_ in self.input_iterator
-          ]
-          for _ in as_completed(futures):
-            progress.update()
-    self._check_output_archive(save_path)
-    logger.info("Downloaded %d inputs and %d annotations to %s" %
-                (self.num_inputs, self.num_annotations, save_path))
+    def __init__(
+        self, session: requests.Session, input_iterator: DatasetExportReader, num_workers: int = 4
+    ):
+        """Downloads the archive from the URL into an archive of inputs, annotations in the directory format
+        {split}/inputs and {split}/annotations.
+        Args:
+            session: requests.Session object
+            input_iterator: Iterable of DatasetExportReader object
+            num_workers: Number of threads to use for downloading
+        """
+        self.input_iterator = input_iterator
+        self.num_workers = min(num_workers, 10)  # Max 10 threads
+        self.num_inputs = 0
+        self.num_annotations = 0
+        self.split_prefix = None
+        self.session = session
+        self.input_ext = dict(image=".png", text=".txt", audio=".mp3", video=".mp4")
+        if isinstance(self.input_iterator, DatasetExportReader):
+            self.split_prefix = self.input_iterator.split_dir
+    def _save_image_to_archive(
+        self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
+    ) -> None:
+        """Use PIL ImageFile to return image parsed from the response bytestring (from requests) and append to zip file."""
+        p = ImageFile.Parser()
+        p.feed(self.session.get(hosted_url).content)
+        image = p.close()
+        image_file = BytesIO()
+        image.save(image_file, 'PNG')
+        new_archive.writestr(file_name, image_file.getvalue())
+    def _save_text_to_archive(
+        self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
+    ) -> None:
+        """Gets the text response bytestring (from requests) and append to zip file."""
+        text_content = self.session.get(hosted_url).content
+        new_archive.writestr(file_name, text_content)
+    def _save_audio_to_archive(
+        self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
+    ) -> None:
+        """Gets the audio response bytestring (from requests) as chunks and append to zip file."""
+        audio_response = self.session.get(hosted_url, stream=True)
+        audio_stream = BytesIO()
+        # Retrieve the audio content in chunks and write to the BytesIO object
+        for chunk in audio_response.iter_content(chunk_size=128):
+            audio_stream.write(chunk)
+        new_archive.writestr(file_name, audio_stream.getvalue())
+    def _save_video_to_archive(
+        self, new_archive: zipfile.ZipFile, hosted_url: str, file_name: str
+    ) -> None:
+        """Gets the video response bytestring (from requests) as chunks and append to zip file."""
+        video_response = self.session.get(hosted_url)
+        video_stream = BytesIO()
+        # Retrieve the video content in chunks and write to the BytesIO object
+        for chunk in video_response.iter_content(chunk_size=128):
+            video_stream.write(chunk)
+        new_archive.writestr(file_name, video_stream.getvalue())
+    def _save_annotation_to_archive(
+        self, new_archive: zipfile.ZipFile, annot_data: List[Dict], file_name: str
+    ) -> None:
+        """Gets the annotation response bytestring (from requests) and append to zip file."""
+        # Fill zero values for missing bounding box keys
+        for annot in annot_data:
+            if annot.get('regionInfo') and annot['regionInfo'].get('boundingBox'):
+                bbox = annot['regionInfo']['boundingBox']
+                bbox.setdefault('topRow', 0)
+                bbox.setdefault('leftCol', 0)
+                bbox.setdefault('bottomRow', 0)
+                bbox.setdefault('rightCol', 0)
+        # Serialize the dictionary to a JSON string
+        json_str = json.dumps(annot_data)
+        # Convert the JSON string to bytes
+        bytes_object = json_str.encode()
+        new_archive.writestr(file_name, bytes_object)
+    def _write_archive(self, input_, new_archive, split: Optional[str] = None) -> None:
+        """Writes the input, annotation archive into prefix dir."""
+        data_dict = MessageToDict(input_.data)
+        input_type = list(
+            filter(lambda x: x in list(data_dict.keys()), list(self.input_ext.keys()))
+        )[0]
+        hosted = getattr(input_.data, input_type).hosted
+        if hosted.prefix:
+            assert 'orig' in hosted.sizes
+            hosted_url = f"{hosted.prefix}/orig/{hosted.suffix}"
+            file_name = os.path.join(split, "inputs", input_.id + self.input_ext[input_type])
+            if input_type == "image":
+                self._save_image_to_archive(new_archive, hosted_url, file_name)
+            elif input_type == "text":
+                self._save_text_to_archive(new_archive, hosted_url, file_name)
+            elif input_type == "audio":
+                self._save_audio_to_archive(new_archive, hosted_url, file_name)
+            elif input_type == "video":
+                self._save_video_to_archive(new_archive, hosted_url, file_name)
+            self.num_inputs += 1
+        if data_dict.get("metadata") or data_dict.get("concepts") or data_dict.get("regions"):
+            file_name = os.path.join(split, "annotations", input_.id + ".json")
+            annot_data = (
+                [{"metadata": data_dict.get("metadata", {})}]
+                + data_dict.get("regions", [])
+                + data_dict.get("concepts", [])
+            )
+            self._save_annotation_to_archive(new_archive, annot_data, file_name)
+            self.num_annotations += 1
+    def _check_output_archive(self, save_path: str) -> None:
+        try:
+            archive = zipfile.ZipFile(save_path, 'r')
+        except zipfile.BadZipFile as e:
+            raise e
+        assert len(archive.namelist()) == self.num_inputs + self.num_annotations, (
+            "Archive has %d inputs+annotations | expecting %d inputs+annotations"
+            % (len(archive.namelist()), self.num_inputs + self.num_annotations)
+        )
+    def download_archive(self, save_path: str, split: Optional[str] = None) -> None:
+        """Downloads the archive from the URL into an archive of inputs, annotations in the directory format
+        {split}/inputs and {split}/annotations.
+        """
+        with zipfile.ZipFile(save_path, "a") as new_archive:
+            with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+                with tqdm(total=len(self.input_iterator), desc='Downloading Dataset') as progress:
+                    # Submit all jobs to the executor and store the returned futures
+                    futures = [
+                        executor.submit(self._write_archive, input_, new_archive, split)
+                        for input_ in self.input_iterator
+                    ]
+                    for _ in as_completed(futures):
+                        progress.update()
+        self._check_output_archive(save_path)
+        logger.info(
+            "Downloaded %d inputs and %d annotations to %s"
+            % (self.num_inputs, self.num_annotations, save_path)
+        )

clarifai/datasets/upload/base.py CHANGED Viewed

@@ -4,68 +4,80 @@ from typing import Iterator, List, Tuple, TypeVar, Union
 from clarifai_grpc.grpc.api import resources_pb2
 from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
-from clarifai.datasets.upload.features import (MultiModalFeatures, TextFeatures,
-                                               VisualClassificationFeatures,
-                                               VisualDetectionFeatures, VisualSegmentationFeatures)
+from clarifai.datasets.upload.features import (
+    MultiModalFeatures,
+    TextFeatures,
+    VisualClassificationFeatures,
+    VisualDetectionFeatures,
+    VisualSegmentationFeatures,
+)
 OutputFeaturesType = TypeVar(
     'OutputFeaturesType',
-    bound=Union[TextFeatures, VisualClassificationFeatures, VisualDetectionFeatures,
-                VisualSegmentationFeatures, MultiModalFeatures])
+    bound=Union[
+        TextFeatures,
+        VisualClassificationFeatures,
+        VisualDetectionFeatures,
+        VisualSegmentationFeatures,
+        MultiModalFeatures,
+    ],
+)
 class ClarifaiDataset:
-  """Clarifai datasets base class."""
-  def __init__(self, data_generator: 'ClarifaiDataLoader', dataset_id: str,
-               max_workers: int = 4) -> None:
-    self.data_generator = data_generator
-    self.dataset_id = dataset_id
-    self.max_workers = max_workers
-    self.all_input_ids = {}
-    self._all_input_protos = {}
-    self._all_annotation_protos = defaultdict(list)
-  def __len__(self) -> int:
-    """Get size of all input protos"""
-    return len(self.data_generator)
-  def _to_list(self, input_protos: Iterator) -> List:
-    """Parse protos iterator to list."""
-    return list(input_protos)
-  def _extract_protos(self) -> None:
-    """Create input image protos for each data generator item."""
-    raise NotImplementedError()
-  def get_protos(self, input_ids: List[int]
-                ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
-    """Get input and annotation protos based on input_ids.
-    Args:
-      input_ids: List of input IDs to retrieve the protos for.
-    Returns:
-      Input and Annotation proto iterators for the specified input IDs.
-    """
-    input_protos, annotation_protos = self._extract_protos(input_ids)
-    return input_protos, annotation_protos
+    """Clarifai datasets base class."""
+    def __init__(
+        self, data_generator: 'ClarifaiDataLoader', dataset_id: str, max_workers: int = 4
+    ) -> None:
+        self.data_generator = data_generator
+        self.dataset_id = dataset_id
+        self.max_workers = max_workers
+        self.all_input_ids = {}
+        self._all_input_protos = {}
+        self._all_annotation_protos = defaultdict(list)
+    def __len__(self) -> int:
+        """Get size of all input protos"""
+        return len(self.data_generator)
+    def _to_list(self, input_protos: Iterator) -> List:
+        """Parse protos iterator to list."""
+        return list(input_protos)
+    def _extract_protos(self) -> None:
+        """Create input image protos for each data generator item."""
+        raise NotImplementedError()
+    def get_protos(
+        self, input_ids: List[int]
+    ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
+        """Get input and annotation protos based on input_ids.
+        Args:
+          input_ids: List of input IDs to retrieve the protos for.
+        Returns:
+          Input and Annotation proto iterators for the specified input IDs.
+        """
+        input_protos, annotation_protos = self._extract_protos(input_ids)
+        return input_protos, annotation_protos
 class ClarifaiDataLoader:
-  """Clarifai data loader base class."""
+    """Clarifai data loader base class."""
-  def __init__(self) -> None:
-    pass
+    def __init__(self) -> None:
+        pass
-  @property
-  def task(self):
-    raise NotImplementedError("Task should be one of {}".format(DATASET_UPLOAD_TASKS))
+    @property
+    def task(self):
+        raise NotImplementedError("Task should be one of {}".format(DATASET_UPLOAD_TASKS))
-  def load_data(self) -> None:
-    raise NotImplementedError()
+    def load_data(self) -> None:
+        raise NotImplementedError()
-  def __len__(self) -> int:
-    raise NotImplementedError()
+    def __len__(self) -> int:
+        raise NotImplementedError()
-  def __getitem__(self, index: int) -> OutputFeaturesType:
-    raise NotImplementedError()
+    def __getitem__(self, index: int) -> OutputFeaturesType:
+        raise NotImplementedError()

clarifai 11.3.0rc2__py3-none-any.whl → 11.4.0__py3-none-any.whl

clarifai 11.3.0rc2py3-none-any.whl → 11.4.0py3-none-any.whl