PyPI - code-loader - Versions diffs - 1.0.59__tar.gz → 1.0.60.dev2__tar.gz - Mend

code-loader 1.0.59tar.gz → 1.0.60.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: code-loader
-Version: 1.0.59
+Version: 1.0.60.dev2
 Summary:
 Home-page: https://github.com/tensorleap/code-loader
 License: MIT

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/code_loader/contract/datasetclasses.py RENAMED Viewed

@@ -154,7 +154,7 @@ class DatasetBaseHandler:
 @dataclass
 class InputHandler(DatasetBaseHandler):
     shape: Optional[List[int]] = None
+    channel_dim: Optional[int] = -1
 @dataclass
 class GroundTruthHandler(DatasetBaseHandler):

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/code_loader/contract/responsedataclasses.py RENAMED Viewed

@@ -20,6 +20,7 @@ class DatasetBaseSectionInstance:
 @dataclass
 class DatasetInputInstance(DatasetBaseSectionInstance):
     shape: List[int]
+    channel_dim: Optional[int] = -1
 @dataclass

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/code_loader/inner_leap_binder/leapbinder.py RENAMED Viewed

@@ -181,13 +181,14 @@ class LeapBinder:
         """
         self.setup_container.unlabeled_data_preprocess = UnlabeledDataPreprocessHandler(function)
-    def set_input(self, function: SectionCallableInterface, name: str) -> None:
+    def set_input(self, function: SectionCallableInterface, name: str, channel_dim: int = -1) -> None:
         """
         Set the input handler function.
         Args:
         function (SectionCallableInterface): The input handler function.
         name (str): The name of the input section.
+        channel_dim (int): The dimension of the channels axis
         Example:
             def input_encoder(subset: PreprocessResponse, index: int) -> np.ndarray:
@@ -197,10 +198,10 @@ class LeapBinder:
                 img = normalize(img)
                 return img
-            leap_binder.set_input(input_encoder, name='input_encoder')
+            leap_binder.set_input(input_encoder, name='input_encoder', channel_dim=-1)
         """
         function = to_numpy_return_wrapper(function)
-        self.setup_container.inputs.append(InputHandler(name, function))
+        self.setup_container.inputs.append(InputHandler(name, function, channel_dim=channel_dim))
         self._encoder_names.append(name)

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/code_loader/inner_leap_binder/leapbinder_decorators.py RENAMED Viewed

@@ -245,14 +245,16 @@ def tensorleap_unlabeled_preprocess():
     return decorating_function
-def tensorleap_input_encoder(name: str):
+def tensorleap_input_encoder(name: str, channel_dim=-1):
     def decorating_function(user_function: SectionCallableInterface):
         for input_handler in leap_binder.setup_container.inputs:
             if input_handler.name == name:
                 raise Exception(f'Input with name {name} already exists. '
                                 f'Please choose another')
+        if channel_dim < 0 and channel_dim != -1:
+            raise Exception(f"Channel dim for input {name} is expected to be either -1 or positive")
-        leap_binder.set_input(user_function, name)
+        leap_binder.set_input(user_function, name, channel_dim=channel_dim)
         def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
             assert isinstance(sample_id, (int, str)), \
@@ -273,6 +275,8 @@ def tensorleap_input_encoder(name: str):
             assert result.dtype == np.float32, \
                 (f'tensorleap_input_encoder validation failed: '
                  f'The return type should be a numpy array of type float32. Got {result.dtype}.')
+            assert channel_dim - 1 <= len(result.shape), (f'tensorleap_input_encoder validation failed: '
+                 f'The channel_dim ({channel_dim}) should be <= to the rank of the resulting input rank ({len(result.shape)}).')
         def inner(sample_id, preprocess_response):
             _validate_input_args(sample_id, preprocess_response)

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/code_loader/leaploader.py RENAMED Viewed

@@ -221,8 +221,8 @@ class LeapLoader:
         if global_leap_binder.setup_container.unlabeled_data_preprocess:
             unlabeled_length = global_leap_binder.setup_container.unlabeled_data_preprocess.data_length
         dataset_preprocess = DatasetPreprocess(
-            training_length=setup.preprocess.data_length[DataStateType.training],
-            validation_length=setup.preprocess.data_length[DataStateType.validation],
+            training_length=setup.preprocess.data_length.get(DataStateType.training, 0),
+            validation_length=setup.preprocess.data_length.get(DataStateType.validation, 0),
             test_length=setup.preprocess.data_length.get(DataStateType.test),
             unlabeled_length=unlabeled_length
         )
@@ -231,7 +231,7 @@ class LeapLoader:
         for inp in setup.inputs:
             if inp.shape is None:
                 raise Exception(f"cant calculate shape for input, input name:{inp.name}")
-            inputs.append(DatasetInputInstance(name=inp.name, shape=inp.shape))
+            inputs.append(DatasetInputInstance(name=inp.name, shape=inp.shape, channel_dim=inp.channel_dim))
         ground_truths = []
         for gt in setup.ground_truths:

{code_loader-1.0.59 → code_loader-1.0.60.dev2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "code-loader"
-version = "1.0.59"
+version = "1.0.60.dev2"
 description = ""
 authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
 license = "MIT"

code_loader-1.0.59/code_loader/code_inegration_processes_manager.py DELETED Viewed

@@ -1,83 +0,0 @@
-# mypy: ignore-errors
-import traceback
-from dataclasses import dataclass
-from typing import List, Tuple, Optional
-from multiprocessing import Process, Queue
-from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
-from code_loader.leaploader import LeapLoader
-from code_loader.contract.enums import DataStateEnum
-from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
-from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
-@dataclass
-class SampleSerializableError:
-    state: DataStateEnum
-    index: int
-    leap_script_trace: str
-    exception_as_str: str
-class CodeIntegrationProcessesManager:
-    def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
-                 max_samples_in_queue: int = 128) -> None:
-        self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
-        self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
-    def _create_and_start_process(self) -> Process:
-        process = self.multiprocessing_context.Process(
-            target=CodeIntegrationProcessesManager._process_func,
-            args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
-                  self._ready_processed_results))
-        process.daemon = True
-        process.start()
-        return process
-    def _run_and_warm_first_process(self):
-        process = self._create_and_start_process()
-        self.processes = [process]
-        # needed in order to make sure the preprocess func runs once in nonparallel
-        self._start_process_inputs([(DataStateEnum.training, 0)])
-        self._get_next_ready_processed_result()
-    def _operation_decider(self):
-        if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
-            self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
-            return 'metric'
-        if self.samples_generator_parallelized._ready_processed_results.empty() and not \
-            self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
-            return 'dataset'
-    @staticmethod
-    def _process_func(code_path: str, code_entry_name: str,
-                      samples_to_process: Queue, ready_samples: Queue,
-                      metrics_to_process: Queue, ready_metrics: Queue) -> None:
-        import os
-        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-        leap_loader = LeapLoader(code_path, code_entry_name)
-        while True:
-            # decide on sample or metric to process
-            state, idx = samples_to_process.get(block=True)
-            leap_loader._preprocess_result()
-            try:
-                sample = leap_loader.get_sample(state, idx)
-            except Exception as e:
-                leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
-                ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
-                continue
-            ready_samples.put(sample)
-    def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
-        return self.start_process_inputs(sample_identities)