PyPI - code-loader - Versions diffs - 1.0.50__tar.gz → 1.0.52__tar.gz - Mend

code-loader 1.0.50tar.gz → 1.0.52tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{code_loader-1.0.50 → code_loader-1.0.52}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: code-loader
-Version: 1.0.50
+Version: 1.0.52
 Summary:
 Home-page: https://github.com/tensorleap/code-loader
 License: MIT
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: matplotlib (>=3.3,<3.4)
 Requires-Dist: numpy (>=1.22.3,<2.0.0)
 Requires-Dist: psutil (>=5.9.5,<6.0.0)
 Requires-Dist: pyyaml (>=6.0.2,<7.0.0)

code_loader-1.0.52/code_loader/code_inegration_processes_manager.py ADDED Viewed

@@ -0,0 +1,83 @@
+# mypy: ignore-errors
+import traceback
+from dataclasses import dataclass
+from typing import List, Tuple, Optional
+from multiprocessing import Process, Queue
+from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
+from code_loader.leaploader import LeapLoader
+from code_loader.contract.enums import DataStateEnum
+from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
+from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
+@dataclass
+class SampleSerializableError:
+    state: DataStateEnum
+    index: int
+    leap_script_trace: str
+    exception_as_str: str
+class CodeIntegrationProcessesManager:
+    def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
+                 max_samples_in_queue: int = 128) -> None:
+        self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
+        self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
+    def _create_and_start_process(self) -> Process:
+        process = self.multiprocessing_context.Process(
+            target=CodeIntegrationProcessesManager._process_func,
+            args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
+                  self._ready_processed_results))
+        process.daemon = True
+        process.start()
+        return process
+    def _run_and_warm_first_process(self):
+        process = self._create_and_start_process()
+        self.processes = [process]
+        # needed in order to make sure the preprocess func runs once in nonparallel
+        self._start_process_inputs([(DataStateEnum.training, 0)])
+        self._get_next_ready_processed_result()
+    def _operation_decider(self):
+        if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
+            self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
+            return 'metric'
+        if self.samples_generator_parallelized._ready_processed_results.empty() and not \
+            self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
+            return 'dataset'
+    @staticmethod
+    def _process_func(code_path: str, code_entry_name: str,
+                      samples_to_process: Queue, ready_samples: Queue,
+                      metrics_to_process: Queue, ready_metrics: Queue) -> None:
+        import os
+        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+        leap_loader = LeapLoader(code_path, code_entry_name)
+        while True:
+            # decide on sample or metric to process
+            state, idx = samples_to_process.get(block=True)
+            leap_loader._preprocess_result()
+            try:
+                sample = leap_loader.get_sample(state, idx)
+            except Exception as e:
+                leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
+                ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
+                continue
+            ready_samples.put(sample)
+    def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
+        return self.start_process_inputs(sample_identities)

{code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/datasetclasses.py RENAMED Viewed

@@ -32,21 +32,39 @@ class PreprocessResponse:
         }
         response = PreprocessResponse(length=len(preprocessed_data), data=preprocessed_data)
     """
-    length: int
-    data: Any
-SectionCallableInterface = Callable[[int, PreprocessResponse], npt.NDArray[np.float32]]
+    length: Optional[int] = None  # Deprecated. Please use sample_ids instead
+    data: Any = None
+    sample_ids: Optional[Union[List[str], List[int]]] = None
+    state: Optional[DataStateType] = None
+    sample_id_type: Optional[Union[Type[str], Type[int]]] = None
+    def __post_init__(self) -> None:
+        if self.length is not None and self.sample_ids is None:
+            self.sample_ids = [i for i in range(self.length)]
+            self.sample_id_type = int
+        elif self.length is None and self.sample_ids is not None:
+            self.length = len(self.sample_ids)
+            if self.sample_id_type is None:
+                self.sample_id_type = str
+        else:
+            raise Exception("length is deprecated.")
+    def __len__(self) -> int:
+        assert self.sample_ids is not None
+        return len(self.sample_ids)
+SectionCallableInterface = Callable[[Union[int, str], PreprocessResponse], npt.NDArray[np.float32]]
 MetadataSectionCallableInterface = Union[
-    Callable[[int, PreprocessResponse], int],
-    Callable[[int, PreprocessResponse], Dict[str, int]],
-    Callable[[int, PreprocessResponse], str],
-    Callable[[int, PreprocessResponse], Dict[str, str]],
-    Callable[[int, PreprocessResponse], bool],
-    Callable[[int, PreprocessResponse], Dict[str, bool]],
-    Callable[[int, PreprocessResponse], float],
-    Callable[[int, PreprocessResponse], Dict[str, float]]
+    Callable[[Union[int, str], PreprocessResponse], int],
+    Callable[[Union[int, str], PreprocessResponse], Dict[str, int]],
+    Callable[[Union[int, str], PreprocessResponse], str],
+    Callable[[Union[int, str], PreprocessResponse], Dict[str, str]],
+    Callable[[Union[int, str], PreprocessResponse], bool],
+    Callable[[Union[int, str], PreprocessResponse], Dict[str, bool]],
+    Callable[[Union[int, str], PreprocessResponse], float],
+    Callable[[Union[int, str], PreprocessResponse], Dict[str, float]]
 ]
@@ -181,5 +199,5 @@ class DatasetSample:
     inputs: Dict[str, npt.NDArray[np.float32]]
     gt: Optional[Dict[str, npt.NDArray[np.float32]]]
     metadata: Dict[str, Union[str, int, bool, float]]
-    index: int
+    index: Union[int, str]
     state: DataStateEnum

{code_loader-1.0.50 → code_loader-1.0.52}/code_loader/inner_leap_binder/leapbinder.py RENAMED Viewed

@@ -37,6 +37,8 @@ class LeapBinder:
         self._encoder_names: List[str] = list()
         self._extend_with_default_visualizers()
+        self.batch_size_to_validate: Optional[int] = None
     def _extend_with_default_visualizers(self) -> None:
         self.set_visualizer(function=default_image_visualizer, name=DefaultVisualizer.Image.value,
                             visualizer_type=LeapDataType.Image)
@@ -389,17 +391,36 @@ class LeapBinder:
         if preprocess is None:
             raise Exception("Please make sure you call the leap_binder.set_preprocess method")
         preprocess_results = preprocess.function()
-        preprocess_result_dict = {
-            DataStateEnum(i): preprocess_result
-            for i, preprocess_result in enumerate(preprocess_results)
-        }
+        preprocess_result_dict = {}
+        for i, preprocess_result in enumerate(preprocess_results):
+            if preprocess_result.state is None:
+                state_enum = DataStateEnum(i)
+                preprocess_result.state = DataStateType(state_enum.name)
+            else:
+                state_enum = DataStateEnum[preprocess_result.state.name]
-        unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
-        if unlabeled_preprocess is not None:
-            preprocess_result_dict[DataStateEnum.unlabeled] = unlabeled_preprocess.function()
+            if state_enum in preprocess_result_dict:
+                raise Exception(f"Duplicate state {state_enum.name} in preprocess results")
+            preprocess_result_dict[state_enum] = preprocess_result
+        if DataStateEnum.unlabeled not in preprocess_result_dict:
+            preprocess_unlabeled_result = self.get_preprocess_unlabeled_result()
+            if preprocess_unlabeled_result is not None:
+                preprocess_result_dict[DataStateEnum.unlabeled] = preprocess_unlabeled_result
+        if DataStateEnum.training not in preprocess_result_dict:
+            raise Exception("Training data is required")
+        if DataStateEnum.validation not in preprocess_result_dict:
+            raise Exception("Validation data is required")
         return preprocess_result_dict
+    def get_preprocess_unlabeled_result(self) -> Optional[PreprocessResponse]:
+        unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
+        if unlabeled_preprocess is not None:
+            return unlabeled_preprocess.function()
+        return None
     def _get_all_dataset_base_handlers(self) -> List[Union[DatasetBaseHandler, MetadataHandler]]:
         all_dataset_base_handlers: List[Union[DatasetBaseHandler, MetadataHandler]] = []
         all_dataset_base_handlers.extend(self.setup_container.inputs)
@@ -411,7 +432,8 @@ class LeapBinder:
     def check_handler(
             preprocess_response: PreprocessResponse, test_result: List[DatasetTestResultPayload],
             dataset_base_handler: Union[DatasetBaseHandler, MetadataHandler]) -> List[DatasetTestResultPayload]:
-        raw_result = dataset_base_handler.function(0, preprocess_response)
+        assert preprocess_response.sample_ids is not None
+        raw_result = dataset_base_handler.function(preprocess_response.sample_ids[0], preprocess_response)
         handler_type = 'metadata' if isinstance(dataset_base_handler, MetadataHandler) else None
         if isinstance(dataset_base_handler, MetadataHandler) and isinstance(raw_result, dict):
             metadata_test_result_payloads = [
@@ -452,4 +474,9 @@ class LeapBinder:
         self.check_handlers(preprocess_result)
         print("Successful!")
+    def set_batch_size_to_validate(self, batch_size: int) -> None:
+        self.batch_size_to_validate = batch_size

code_loader-1.0.52/code_loader/inner_leap_binder/leapbinder_decorators.py ADDED Viewed

@@ -0,0 +1,380 @@
+# mypy: ignore-errors
+from typing import Optional, Union, Callable, List
+import numpy as np
+import numpy.typing as npt
+from code_loader.contract.datasetclasses import CustomCallableInterfaceMultiArgs, \
+    CustomMultipleReturnCallableInterfaceMultiArgs, ConfusionMatrixCallableInterfaceMultiArgs, CustomCallableInterface, \
+    VisualizerCallableInterface, MetadataSectionCallableInterface, PreprocessResponse, SectionCallableInterface, \
+    ConfusionMatrixElement
+from code_loader.contract.enums import MetricDirection, LeapDataType
+from code_loader import leap_binder
+from code_loader.contract.visualizer_classes import LeapImage, LeapImageMask, LeapTextMask, LeapText, LeapGraph, \
+    LeapHorizontalBar, LeapImageWithBBox, LeapImageWithHeatmap
+def tensorleap_custom_metric(name: str, direction: Optional[MetricDirection] = MetricDirection.Downward):
+    def decorating_function(
+            user_function: Union[CustomCallableInterfaceMultiArgs,
+            CustomMultipleReturnCallableInterfaceMultiArgs,
+            ConfusionMatrixCallableInterfaceMultiArgs]
+    ):
+        for metric_handler in leap_binder.setup_container.metrics:
+            if metric_handler.name == name:
+                raise Exception(f'Metric with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.add_custom_metric(user_function, name, direction)
+        def _validate_input_args(*args, **kwargs) -> None:
+            for i, arg in enumerate(args):
+                assert isinstance(arg, np.ndarray), (f'tensorleap_custom_metric validation failed: '
+                                                     f'Argument #{i} should be a numpy array. Got {type(arg)}.')
+                if leap_binder.batch_size_to_validate:
+                    assert arg.shape[0] == leap_binder.batch_size_to_validate, \
+                        (f'tensorleap_custom_metric validation failed: Argument #{i} '
+                         f'first dim should be as the batch size. Got {arg.shape[0]} '
+                         f'instead of {leap_binder.batch_size_to_validate}')
+            for _arg_name, arg in kwargs.items():
+                assert isinstance(arg, np.ndarray), (f'tensorleap_custom_metric validation failed: '
+                                                     f'Argument {_arg_name} should be a numpy array. Got {type(arg)}.')
+                if leap_binder.batch_size_to_validate:
+                    assert arg.shape[0] == leap_binder.batch_size_to_validate, \
+                        (f'tensorleap_custom_metric validation failed: Argument {_arg_name} '
+                         f'first dim should be as the batch size. Got {arg.shape[0]} '
+                         f'instead of {leap_binder.batch_size_to_validate}')
+        def _validate_result(result) -> None:
+            supported_types_message = (f'tensorleap_custom_metric validation failed: '
+                                       f'Metric has returned unsupported type. Supported types are List[float], '
+                                       f'List[List[ConfusionMatrixElement]], NDArray[np.float32]. ')
+            if isinstance(result, list):
+                if isinstance(result[0], list):
+                    assert isinstance(result[0][0], ConfusionMatrixElement), \
+                        f'{supported_types_message}Got List[List[{type(result[0][0])}]].'
+                else:
+                    assert isinstance(result[0], float), f'{supported_types_message}Got List[{type(result[0])}].'
+            else:
+                assert isinstance(result, np.ndarray), f'{supported_types_message}Got {type(result)}.'
+                assert len(result.shape) == 1, (f'tensorleap_custom_metric validation failed: '
+                                                f'The return shape should be 1D. Got {len(result.shape)}D.')
+            if leap_binder.batch_size_to_validate:
+                assert len(result) == leap_binder.batch_size_to_validate, \
+                    f'tensorleap_custom_metrix validation failed: The return len should be as the batch size.'
+        def inner(*args, **kwargs):
+            _validate_input_args(*args, **kwargs)
+            result = user_function(*args, **kwargs)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_custom_visualizer(name: str, visualizer_type: LeapDataType,
+                                 heatmap_function: Optional[Callable[..., npt.NDArray[np.float32]]] = None):
+    def decorating_function(user_function: VisualizerCallableInterface):
+        for viz_handler in leap_binder.setup_container.visualizers:
+            if viz_handler.name == name:
+                raise Exception(f'Visualizer with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.set_visualizer(user_function, name, visualizer_type, heatmap_function)
+        def _validate_input_args(*args, **kwargs):
+            for i, arg in enumerate(args):
+                assert isinstance(arg, np.ndarray), (f'tensorleap_custom_visualizer validation failed: '
+                                                     f'Argument #{i} should be a numpy array. Got {type(arg)}.')
+                if leap_binder.batch_size_to_validate:
+                    assert arg.shape[0] == leap_binder.batch_size_to_validate, \
+                        (f'tensorleap_custom_visualizer validation failed: Argument #{i} '
+                         f'first dim should be 1. The visualizers will always run with batch size 1. Got {arg.shape[0]}')
+            for _arg_name, arg in kwargs.items():
+                assert isinstance(arg, np.ndarray), (f'tensorleap_custom_visualizer validation failed: '
+                                                     f'Argument {_arg_name} should be a numpy array. Got {type(arg)}.')
+                if leap_binder.batch_size_to_validate:
+                    assert arg.shape[0] == leap_binder.batch_size_to_validate, \
+                        (f'tensorleap_custom_visualizer validation failed: Argument {_arg_name} '
+                         f'first dim should be 1. The visualizers will always run with batch size 1. Got {arg.shape[0]}')
+        def _validate_result(result):
+            result_type_map = {
+                LeapDataType.Image: LeapImage,
+                LeapDataType.ImageMask: LeapImageMask,
+                LeapDataType.TextMask: LeapTextMask,
+                LeapDataType.Text: LeapText,
+                LeapDataType.Graph: LeapGraph,
+                LeapDataType.HorizontalBar: LeapHorizontalBar,
+                LeapDataType.ImageWithBBox: LeapImageWithBBox,
+                LeapDataType.ImageWithHeatmap: LeapImageWithHeatmap
+            }
+            assert isinstance(result, result_type_map[visualizer_type]), \
+                (f'tensorleap_custom_visualizer validation failed: '
+                 f'The return type should be {result_type_map[visualizer_type]}. Got {type(result)}.')
+        def inner(*args, **kwargs):
+            _validate_input_args(*args, **kwargs)
+            result = user_function(*args, **kwargs)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_metadata(name: str):
+    def decorating_function(user_function: MetadataSectionCallableInterface):
+        for metadata_handler in leap_binder.setup_container.metadata:
+            if metadata_handler.name == name:
+                raise Exception(f'Metadata with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.set_metadata(user_function, name)
+        def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
+            assert isinstance(sample_id, (int, str)), \
+                (f'tensorleap_metadata validation failed: '
+                 f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
+            assert isinstance(preprocess_response, PreprocessResponse), \
+                (f'tensorleap_metadata validation failed: '
+                 f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
+            assert type(sample_id) == preprocess_response.sample_id_type, \
+                (f'tensorleap_metadata validation failed: '
+                 f'Argument sample_id should be as the same type as defined in the preprocess response '
+                 f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
+        def _validate_result(result):
+            supported_result_types = (int, str, bool, float, dict, np.floating,
+                                      np.bool_, np.unsignedinteger, np.signedinteger, np.integer)
+            assert isinstance(result, supported_result_types), \
+                (f'tensorleap_metadata validation failed: '
+                 f'Unsupported return type. Got {type(result)}. should be any of {str(supported_result_types)}')
+            if isinstance(result, dict):
+                for key, value in result.items():
+                    assert isinstance(key, str), \
+                        (f'tensorleap_metadata validation failed: '
+                         f'Keys in the return dict should be of type str. Got {type(key)}.')
+                    assert isinstance(value, supported_result_types), \
+                        (f'tensorleap_metadata validation failed: '
+                         f'Values in the return dict should be of type {str(supported_result_types)}. Got {type(value)}.')
+        def inner(sample_id, preprocess_response):
+            _validate_input_args(sample_id, preprocess_response)
+            result = user_function(sample_id, preprocess_response)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_preprocess():
+    def decorating_function(user_function: Callable[[], List[PreprocessResponse]]):
+        leap_binder.set_preprocess(user_function)
+        def _validate_input_args(*args, **kwargs):
+            assert len(args) == 0 and len(kwargs) == 0, \
+                (f'tensorleap_preprocess validation failed: '
+                 f'The function should not take any arguments. Got {args} and {kwargs}.')
+        def _validate_result(result):
+            assert isinstance(result, list), \
+                (f'tensorleap_preprocess validation failed: '
+                 f'The return type should be a list. Got {type(result)}.')
+            for i, response in enumerate(result):
+                assert isinstance(response, PreprocessResponse), \
+                    (f'tensorleap_preprocess validation failed: '
+                     f'Element #{i} in the return list should be a PreprocessResponse. Got {type(response)}.')
+            assert len(set(result)) == len(result), \
+                (f'tensorleap_preprocess validation failed: '
+                 f'The return list should not contain duplicate PreprocessResponse objects.')
+        def inner(*args, **kwargs):
+            _validate_input_args(*args, **kwargs)
+            result = user_function()
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_unlabeled_preprocess():
+    def decorating_function(user_function: Callable[[], PreprocessResponse]):
+        leap_binder.set_unlabeled_data_preprocess(user_function)
+        def _validate_input_args(*args, **kwargs):
+            assert len(args) == 0 and len(kwargs) == 0, \
+                (f'tensorleap_unlabeled_preprocess validation failed: '
+                 f'The function should not take any arguments. Got {args} and {kwargs}.')
+        def _validate_result(result):
+            assert isinstance(result, PreprocessResponse), \
+                (f'tensorleap_unlabeled_preprocess validation failed: '
+                 f'The return type should be a PreprocessResponse. Got {type(result)}.')
+        def inner(*args, **kwargs):
+            _validate_input_args(*args, **kwargs)
+            result = user_function()
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_input_encoder(name: str):
+    def decorating_function(user_function: SectionCallableInterface):
+        for input_handler in leap_binder.setup_container.inputs:
+            if input_handler.name == name:
+                raise Exception(f'Input with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.set_input(user_function, name)
+        def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
+            assert isinstance(sample_id, (int, str)), \
+                (f'tensorleap_input_encoder validation failed: '
+                 f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
+            assert isinstance(preprocess_response, PreprocessResponse), \
+                (f'tensorleap_input_encoder validation failed: '
+                 f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
+            assert type(sample_id) == preprocess_response.sample_id_type, \
+                (f'tensorleap_input_encoder validation failed: '
+                 f'Argument sample_id should be as the same type as defined in the preprocess response '
+                 f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
+        def _validate_result(result):
+            assert isinstance(result, np.ndarray), \
+                (f'tensorleap_input_encoder validation failed: '
+                 f'Unsupported return type. Should be a numpy array. Got {type(result)}.')
+            assert result.dtype == np.float32, \
+                (f'tensorleap_input_encoder validation failed: '
+                 f'The return type should be a numpy array of type float32. Got {result.dtype}.')
+        def inner(sample_id, preprocess_response):
+            _validate_input_args(sample_id, preprocess_response)
+            result = user_function(sample_id, preprocess_response)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_gt_encoder(name: str):
+    def decorating_function(user_function: SectionCallableInterface):
+        for gt_handler in leap_binder.setup_container.ground_truths:
+            if gt_handler.name == name:
+                raise Exception(f'Input with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.set_ground_truth(user_function, name)
+        def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
+            assert isinstance(sample_id, (int, str)), \
+                (f'tensorleap_gt_encoder validation failed: '
+                 f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
+            assert isinstance(preprocess_response, PreprocessResponse), \
+                (f'tensorleap_gt_encoder validation failed: '
+                 f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
+            assert type(sample_id) == preprocess_response.sample_id_type, \
+                (f'tensorleap_gt_encoder validation failed: '
+                 f'Argument sample_id should be as the same type as defined in the preprocess response '
+                 f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
+        def _validate_result(result):
+            assert isinstance(result, np.ndarray), \
+                (f'tensorleap_gt_encoder validation failed: '
+                 f'Unsupported return type. Should be a numpy array. Got {type(result)}.')
+            assert result.dtype == np.float32, \
+                (f'tensorleap_gt_encoder validation failed: '
+                 f'The return type should be a numpy array of type float32. Got {result.dtype}.')
+        def inner(sample_id, preprocess_response):
+            _validate_input_args(sample_id, preprocess_response)
+            result = user_function(sample_id, preprocess_response)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_custom_loss(name: str):
+    def decorating_function(user_function: CustomCallableInterface):
+        for loss_handler in leap_binder.setup_container.custom_loss_handlers:
+            if loss_handler.name == name:
+                raise Exception(f'Input with name {name} already exists. '
+                                f'Please choose another')
+        leap_binder.add_custom_loss(user_function, name)
+        def _validate_input_args(*args, **kwargs):
+            try:
+                import tensorflow as tf
+            except ImportError:
+                raise Exception('the input arguments of the custom loss function should be tensorflow tensors')
+            for i, arg in enumerate(args):
+                assert isinstance(arg, tf.Tensor), (f'tensorleap_custom_loss validation failed: '
+                                                    f'Argument #{i} should be a tensorflow tensor. Got {type(arg)}.')
+            for _arg_name, arg in kwargs.items():
+                assert isinstance(arg, tf.Tensor), (f'tensorleap_custom_loss validation failed: '
+                                                    f'Argument {_arg_name} should be a tensorflow tensor. Got {type(arg)}.')
+        def _validate_result(result):
+            try:
+                import tensorflow as tf
+            except ImportError:
+                raise Exception('the input arguments of the custom loss function should be tensorflow tensors')
+            assert isinstance(result, (np.ndarray, tf.Tensor)), \
+                (f'tensorleap_custom_loss validation failed: '
+                 f'The return type should be a numpy array or a tensorflow tensor. Got {type(result)}.')
+        def inner(sample_id, preprocess_response):
+            _validate_input_args(sample_id, preprocess_response)
+            result = user_function(sample_id, preprocess_response)
+            _validate_result(result)
+            return result
+        return inner
+    return decorating_function
+def tensorleap_custom_layer(name: str):
+    def decorating_function(custom_layer):
+        for custom_layer_handler in leap_binder.setup_container.custom_layers.values():
+            if custom_layer_handler.name == name:
+                raise Exception(f'Custom Layer with name {name} already exists. '
+                                f'Please choose another')
+        try:
+            import tensorflow as tf
+        except ImportError:
+            raise Exception('The custom layer should be inherited from tf.keras.layers.Layer')
+        if not issubclass(custom_layer, tf.keras.layers.Layer):
+            raise Exception('The custom layer should be inherited from tf.keras.layers.Layer')
+        leap_binder.set_custom_layer(custom_layer, name)
+        return custom_layer
+    return decorating_function

{code_loader-1.0.50 → code_loader-1.0.52}/code_loader/leaploader.py RENAMED Viewed

@@ -2,10 +2,11 @@
 import importlib.util
 import io
 import sys
+import time
 from contextlib import redirect_stdout
 from functools import lru_cache
 from pathlib import Path
-from typing import Dict, List, Iterable, Union, Any
+from typing import Dict, List, Iterable, Union, Any, Type
 import numpy as np
 import numpy.typing as npt
@@ -27,6 +28,8 @@ class LeapLoader:
         self.code_entry_name = code_entry_name
         self.code_path = code_path
+        self._preprocess_result_cached = None
     @lru_cache()
     def exec_script(self) -> None:
         try:
@@ -103,12 +106,16 @@ class LeapLoader:
             for prediction_type in setup.prediction_types
         }
-    def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
+    def get_sample(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
         self.exec_script()
-        sample = DatasetSample(inputs=self._get_inputs(state, idx),
-                               gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
-                               metadata=self._get_metadata(state, idx),
-                               index=idx,
+        preprocess_result = self._preprocess_result()
+        if state == DataStateEnum.unlabeled and sample_id not in preprocess_result[state].sample_ids:
+            self._preprocess_result(update_unlabeled_preprocess=True)
+        sample = DatasetSample(inputs=self._get_inputs(state, sample_id),
+                               gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, sample_id),
+                               metadata=self._get_metadata(state, sample_id),
+                               index=sample_id,
                                state=state)
         return sample
@@ -148,6 +155,13 @@ class LeapLoader:
         test_result = DatasetTestResultPayload('preprocess')
         try:
             preprocess_result = self._preprocess_result()
+            if self.get_sample_id_type() is str:
+                max_allowed_item_size = np.dtype('<U256').itemsize
+                for state, preprocess_response in preprocess_result.items():
+                    sample_ids_array = np.array(preprocess_response.sample_ids)
+                    if sample_ids_array.dtype.itemsize > max_allowed_item_size:
+                        raise Exception(f"Sample id are too long. Max allowed length is 256 charecters.")
             global_leap_binder.check_preprocess(preprocess_result)
         except Exception as e:
             line_number, file_name, stacktrace = get_root_exception_file_and_line_number()
@@ -279,27 +293,42 @@ class LeapLoader:
         ]
         return ModelSetup(custom_layer_instances)
-    @lru_cache()
-    def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
+    def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
         self.exec_script()
-        return global_leap_binder.get_preprocess_result()
+        if self._preprocess_result_cached is None:
+            self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
+        if update_unlabeled_preprocess:
+            self._preprocess_result_cached[
+                DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
+        return self._preprocess_result_cached
+    def get_preprocess_sample_ids(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
+        preprocess_result = self._preprocess_result(update_unlabeled_preprocess)
+        sample_ids = {}
+        for state, preprocess_response in preprocess_result.items():
+            sample_ids[state] = preprocess_response.sample_ids
+        return sample_ids
     def _get_dataset_handlers(self, handlers: Iterable[DatasetBaseHandler],
-                              state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
+                              state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
         result_agg = {}
         preprocess_result = self._preprocess_result()
         preprocess_state = preprocess_result[state]
         for handler in handlers:
-            handler_result = handler.function(idx, preprocess_state)
+            handler_result = handler.function(sample_id, preprocess_state)
             handler_name = handler.name
             result_agg[handler_name] = handler_result
         return result_agg
-    def _get_inputs(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
-        return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, idx)
+    def _get_inputs(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
+        return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, sample_id)
-    def _get_gt(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
-        return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, idx)
+    def _get_gt(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
+        return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, sample_id)
     @lru_cache()
     def _metadata_name_to_type(self) -> Dict[str, DatasetMetadataType]:
@@ -334,12 +363,12 @@ class LeapLoader:
         return converted_value
-    def _get_metadata(self, state: DataStateEnum, idx: int) -> Dict[str, Union[str, int, bool, float]]:
+    def _get_metadata(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, Union[str, int, bool, float]]:
         result_agg = {}
         preprocess_result = self._preprocess_result()
         preprocess_state = preprocess_result[state]
         for handler in global_leap_binder.setup_container.metadata:
-            handler_result = handler.function(idx, preprocess_state)
+            handler_result = handler.function(sample_id, preprocess_state)
             if isinstance(handler_result, dict):
                 for single_metadata_name, single_metadata_result in handler_result.items():
                     handler_name = f'{handler.name}_{single_metadata_name}'
@@ -349,3 +378,14 @@ class LeapLoader:
                 result_agg[handler_name] = self._convert_metadata_to_correct_type(handler_name, handler_result)
         return result_agg
+    @lru_cache()
+    def get_sample_id_type(self) -> Type:
+        preprocess_results = list(self._preprocess_result().values())
+        id_type = preprocess_results[0].sample_id_type
+        for preprocess_result in preprocess_results:
+            if preprocess_result.sample_id_type != id_type:
+                raise Exception("Different id types in preprocess results")
+        return id_type

{code_loader-1.0.50 → code_loader-1.0.52}/code_loader/utils.py RENAMED Viewed

@@ -10,7 +10,7 @@ from code_loader.contract.datasetclasses import SectionCallableInterface, Prepro
 def to_numpy_return_wrapper(encoder_function: SectionCallableInterface) -> SectionCallableInterface:
-    def numpy_encoder_function(idx: int, samples: PreprocessResponse) -> npt.NDArray[np.float32]:
+    def numpy_encoder_function(idx: Union[int, str], samples: PreprocessResponse) -> npt.NDArray[np.float32]:
         result = encoder_function(idx, samples)
         numpy_result: npt.NDArray[np.float32] = np.array(result)
         return numpy_result

{code_loader-1.0.50 → code_loader-1.0.52}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "code-loader"
-version = "1.0.50"
+version = "1.0.52"
 description = ""
 authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
 license = "MIT"
@@ -15,6 +15,7 @@ include = [
 python = ">=3.8,<3.12"
 numpy = "^1.22.3"
 psutil = "^5.9.5"
+matplotlib = ">=3.3,<3.4"
 requests = "^2.32.3"
 pyyaml = "^6.0.2"