PyPI - code-loader - Versions diffs - 1.0.14__tar.gz → 1.0.15__tar.gz - Mend

code-loader 1.0.14tar.gz → 1.0.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{code_loader-1.0.14 → code_loader-1.0.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: code-loader
-Version: 1.0.14
+Version: 1.0.15
 Summary:
 Home-page: https://github.com/tensorleap/code-loader
 License: MIT

{code_loader-1.0.14 → code_loader-1.0.15}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "code-loader"
-version = "1.0.14"
+version = "1.0.15"
 description = ""
 authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
 license = "MIT"

code_loader-1.0.14/code_loader/leap_loader_parallelized_base.py DELETED Viewed

@@ -1,140 +0,0 @@
-# mypy: ignore-errors
-import multiprocessing
-from abc import ABC, abstractmethod
-from functools import lru_cache
-from queue import Empty
-from threading import Thread
-from typing import List, Optional, Any
-from multiprocessing import Process, Queue
-import psutil
-class LeapLoaderParallelizedBase(ABC):
-    def __init__(self, code_path: str, code_entry_name: str,
-                 n_workers: Optional[int] = 2, max_ready_results_in_queue: int = 128,
-                 multiprocessing_context: Optional[str] = None) -> None:
-        self.multiprocessing_context = multiprocessing
-        if multiprocessing_context is not None:
-            self.multiprocessing_context = multiprocessing.get_context(multiprocessing_context)
-        self.code_entry_name = code_entry_name
-        self.code_path = code_path
-        if n_workers is not None and n_workers <= 0:
-            raise Exception("need at least one worker")
-        self.n_workers = n_workers
-        self.max_ready_results_in_queue = max_ready_results_in_queue
-        self._n_inputs_waiting_to_be_process = 0
-        self._inputs_waiting_to_be_process: Optional[Queue] = None
-        self._ready_processed_results: Optional[Queue] = None
-        self.processes: Optional[List[Process]] = None
-        self._generate_inputs_thread: Optional[Thread] = None
-        self._should_stop_thread = False
-    def _calculate_n_workers_by_hardware(self) -> int:
-        p = psutil.Process(self.processes[0].pid)
-        memory_usage_in_bytes = p.memory_info().rss
-        total_memory_in_bytes = psutil.virtual_memory().total
-        n_workers = min(int(multiprocessing.cpu_count()),
-                        int(total_memory_in_bytes * 0.5 / memory_usage_in_bytes))
-        n_workers = max(n_workers, 1)
-        return n_workers
-    @abstractmethod
-    def _create_and_start_process(self) -> Process:
-        pass
-    def _run_and_warm_first_process(self):
-        pass
-    @lru_cache()
-    def start(self) -> None:
-        self._inputs_waiting_to_be_process = self.multiprocessing_context.Queue(5000)
-        self._ready_processed_results = self.multiprocessing_context.Queue(self.max_ready_results_in_queue)
-        self._run_and_warm_first_process()
-        n_workers = self.n_workers
-        if self.n_workers is None:
-            n_workers = self._calculate_n_workers_by_hardware()
-        if self.processes is None:
-            self.processes = []
-        for _ in range(n_workers):
-            self.processes.append(self._create_and_start_process())
-    def _start_process_inputs(self, inputs: List[Any]):
-        assert self._inputs_waiting_to_be_process is not None
-        assert self._ready_processed_results is not None
-        for _input in inputs:
-            if self._should_stop_thread:
-                break
-            self._n_inputs_waiting_to_be_process += 1
-            self._inputs_waiting_to_be_process.put(_input)
-    def _clear_queues(self):
-        if self._generate_inputs_thread is not None:
-            self._should_stop_thread = True
-            try:
-                self._inputs_waiting_to_be_process.get_nowait()
-                self._n_inputs_waiting_to_be_process -= 1
-            except Empty:
-                pass
-            self._generate_inputs_thread.join()
-        while not self._inputs_waiting_to_be_process.empty():
-            try:
-                self._inputs_waiting_to_be_process.get_nowait()
-                self._n_inputs_waiting_to_be_process -= 1
-            except Empty:
-                pass
-        for _ in range(self._n_inputs_waiting_to_be_process):
-            self._get_next_ready_processed_result()
-        self._should_stop_thread = False
-    def _get_next_ready_processed_result(self):
-        result = self._ready_processed_results.get()
-        self._n_inputs_waiting_to_be_process -= 1
-        return result
-    def start_process_inputs(self, inputs: List[Any]):
-        self.start()
-        self._clear_queues()
-        self._generate_inputs_thread = Thread(target=self._start_process_inputs, args=(inputs,))
-        self._generate_inputs_thread.start()
-        return self._get_next_ready_processed_result
-    @staticmethod
-    def _release_queue(queue: Queue):
-        assert queue is not None
-        queue.close()
-        queue.join_thread()
-    def release(self) -> None:
-        if self.processes is None:
-            return
-        self._clear_queues()
-        self._release_queue(self._inputs_waiting_to_be_process)
-        self._release_queue(self._ready_processed_results)
-        for process in self.processes:
-            process.terminate()
-            process.kill()
-            process.join()
-            process.close()
-        self.processes = None
-    def __del__(self) -> None:
-        self.release()
-    def check_processes(self) -> None:
-        for process in self.processes:
-            if not process.is_alive():
-                raise Exception(f'Sub process unexpected failure. Exit code: {process.exitcode}')

code_loader-1.0.14/code_loader/metric_calculator_parallelized.py DELETED Viewed

@@ -1,52 +0,0 @@
-# mypy: ignore-errors
-from typing import Optional, List, Tuple, Dict
-from multiprocessing import Process, Queue
-from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
-import traceback
-from dataclasses import dataclass
-import numpy as np
-from code_loader.leaploader import LeapLoader
-@dataclass
-class MetricSerializableError:
-    metric_id: str
-    metric_name: str
-    leap_script_trace: str
-    exception_as_str: str
-class MetricCalculatorParallelized(LeapLoaderParallelizedBase):
-    def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
-                 max_samples_in_queue: int = 128) -> None:
-        super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue, "spawn")
-    @staticmethod
-    def _process_func(code_path: str, code_entry_name: str,
-                      metrics_to_process: Queue, ready_samples: Queue) -> None:
-        import os
-        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-        leap_loader = LeapLoader(code_path, code_entry_name)
-        while True:
-            metric_id, metric_name, input_arg_name_to_tensor = metrics_to_process.get(block=True)
-            try:
-                metric_result = leap_loader.metric_by_name()[metric_name].function(**input_arg_name_to_tensor)
-            except Exception as e:
-                leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
-                ready_samples.put(MetricSerializableError(metric_id, metric_name, leap_script_trace, str(e)))
-                continue
-            ready_samples.put((metric_id, metric_result))
-    def _create_and_start_process(self) -> Process:
-        process = self.multiprocessing_context.Process(
-            target=MetricCalculatorParallelized._process_func,
-            args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
-                  self._ready_processed_results))
-        process.daemon = True
-        process.start()
-        return process
-    def calculate_metrics(self, input_arg_name_to_tensor_list: List[Tuple[str, str, Dict[str, np.array]]]):
-        return self.start_process_inputs(input_arg_name_to_tensor_list)

code_loader-1.0.14/code_loader/samples_generator_parallelized.py DELETED Viewed

@@ -1,65 +0,0 @@
-# mypy: ignore-errors
-import traceback
-from dataclasses import dataclass
-from typing import List, Tuple, Optional
-from multiprocessing import Process, Queue
-from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
-from code_loader.leaploader import LeapLoader
-from code_loader.contract.enums import DataStateEnum
-@dataclass
-class SampleSerializableError:
-    state: DataStateEnum
-    index: int
-    leap_script_trace: str
-    exception_as_str: str
-class SamplesGeneratorParallelized(LeapLoaderParallelizedBase):
-    def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
-                 max_samples_in_queue: int = 128) -> None:
-        super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue)
-    def _create_and_start_process(self) -> Process:
-        process = self.multiprocessing_context.Process(
-            target=SamplesGeneratorParallelized._process_func,
-            args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
-                  self._ready_processed_results))
-        process.daemon = True
-        process.start()
-        return process
-    def _run_and_warm_first_process(self):
-        process = self._create_and_start_process()
-        self.processes = [process]
-        # needed in order to make sure the preprocess func runs once in nonparallel
-        self._start_process_inputs([(DataStateEnum.training, 0)])
-        self._get_next_ready_processed_result()
-    @staticmethod
-    def _process_func(code_path: str, code_entry_name: str,
-                      samples_to_process: Queue, ready_samples: Queue) -> None:
-        import os
-        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-        leap_loader = LeapLoader(code_path, code_entry_name)
-        while True:
-            state, idx = samples_to_process.get(block=True)
-            try:
-                sample = leap_loader.get_sample(state, idx)
-            except Exception as e:
-                leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
-                ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
-                continue
-            ready_samples.put(sample)
-    def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
-        return self.start_process_inputs(sample_identities)

code_loader-1.0.14/code_loader/visualizer_calculator_parallelized.py DELETED Viewed

@@ -1,61 +0,0 @@
-# mypy: ignore-errors
-from typing import Optional, List, Tuple, Dict
-from multiprocessing import Process, Queue
-import numpy as np
-from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
-from dataclasses import dataclass
-from code_loader.leaploader import LeapLoader
-@dataclass
-class VisualizerSerializableError:
-    visualizer_id: str
-    visualizer_name: str
-    index_in_batch: int
-    exception_as_str: str
-class VisualizerCalculatorParallelized(LeapLoaderParallelizedBase):
-    def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
-                 max_samples_in_queue: int = 128) -> None:
-        super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue, "spawn")
-    @staticmethod
-    def _process_func(code_path: str, code_entry_name: str,
-                      visualizers_to_process: Queue, ready_visualizations: Queue) -> None:
-        import os
-        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-        leap_loader = LeapLoader(code_path, code_entry_name)
-        # running preprocessing to sync preprocessing in main thread (can be valuable when preprocess is filling a
-        # global param that visualizer is using)
-        leap_loader._preprocess_result()
-        leap_loader._preprocess_result.cache_clear()
-        while True:
-            index_in_batch, visualizer_id, visualizer_name, input_arg_name_to_tensor = \
-                visualizers_to_process.get(block=True)
-            try:
-                visualizer_result = \
-                    leap_loader.visualizer_by_name()[visualizer_name].function(**input_arg_name_to_tensor)
-            except Exception as e:
-                ready_visualizations.put(VisualizerSerializableError(
-                    visualizer_id, visualizer_name, index_in_batch, str(e)))
-                continue
-            ready_visualizations.put((index_in_batch, visualizer_id, visualizer_result))
-    def _create_and_start_process(self) -> Process:
-        process = self.multiprocessing_context.Process(
-            target=VisualizerCalculatorParallelized._process_func,
-            args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
-                  self._ready_processed_results))
-        process.daemon = True
-        process.start()
-        return process
-    def calculate_visualizers(self, input_arg_name_to_tensor_list: List[Tuple[int, str, str, Dict[str, np.array]]]):
-        return self.start_process_inputs(input_arg_name_to_tensor_list)