cars 1.0.0rc2__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cars might be problematic. Click here for more details.
- cars/__init__.py +86 -0
- cars/applications/__init__.py +40 -0
- cars/applications/application.py +117 -0
- cars/applications/application_constants.py +29 -0
- cars/applications/application_template.py +146 -0
- cars/applications/auxiliary_filling/__init__.py +29 -0
- cars/applications/auxiliary_filling/abstract_auxiliary_filling_app.py +105 -0
- cars/applications/auxiliary_filling/auxiliary_filling_algo.py +475 -0
- cars/applications/auxiliary_filling/auxiliary_filling_from_sensors_app.py +632 -0
- cars/applications/auxiliary_filling/auxiliary_filling_wrappers.py +90 -0
- cars/applications/dem_generation/__init__.py +30 -0
- cars/applications/dem_generation/abstract_dem_generation_app.py +116 -0
- cars/applications/dem_generation/bulldozer_config/base_config.yaml +42 -0
- cars/applications/dem_generation/bulldozer_dem_app.py +641 -0
- cars/applications/dem_generation/bulldozer_memory.py +55 -0
- cars/applications/dem_generation/dem_generation_algo.py +107 -0
- cars/applications/dem_generation/dem_generation_constants.py +32 -0
- cars/applications/dem_generation/dem_generation_wrappers.py +323 -0
- cars/applications/dense_match_filling/__init__.py +30 -0
- cars/applications/dense_match_filling/abstract_dense_match_filling_app.py +242 -0
- cars/applications/dense_match_filling/fill_disp_algo.py +113 -0
- cars/applications/dense_match_filling/fill_disp_constants.py +39 -0
- cars/applications/dense_match_filling/fill_disp_wrappers.py +83 -0
- cars/applications/dense_match_filling/zero_padding_app.py +302 -0
- cars/applications/dense_matching/__init__.py +30 -0
- cars/applications/dense_matching/abstract_dense_matching_app.py +261 -0
- cars/applications/dense_matching/census_mccnn_sgm_app.py +1461 -0
- cars/applications/dense_matching/cpp/__init__.py +0 -0
- cars/applications/dense_matching/cpp/dense_matching_cpp.cp312-win_amd64.dll.a +0 -0
- cars/applications/dense_matching/cpp/dense_matching_cpp.cp312-win_amd64.pyd +0 -0
- cars/applications/dense_matching/cpp/dense_matching_cpp.py +94 -0
- cars/applications/dense_matching/cpp/includes/dense_matching.hpp +58 -0
- cars/applications/dense_matching/cpp/meson.build +9 -0
- cars/applications/dense_matching/cpp/src/bindings.cpp +13 -0
- cars/applications/dense_matching/cpp/src/dense_matching.cpp +207 -0
- cars/applications/dense_matching/dense_matching_algo.py +401 -0
- cars/applications/dense_matching/dense_matching_constants.py +89 -0
- cars/applications/dense_matching/dense_matching_wrappers.py +951 -0
- cars/applications/dense_matching/disparity_grid_algo.py +597 -0
- cars/applications/dense_matching/loaders/__init__.py +23 -0
- cars/applications/dense_matching/loaders/config_census_sgm_default.json +31 -0
- cars/applications/dense_matching/loaders/config_census_sgm_homogeneous.json +30 -0
- cars/applications/dense_matching/loaders/config_census_sgm_mountain_and_vegetation.json +30 -0
- cars/applications/dense_matching/loaders/config_census_sgm_shadow.json +30 -0
- cars/applications/dense_matching/loaders/config_census_sgm_sparse.json +36 -0
- cars/applications/dense_matching/loaders/config_census_sgm_urban.json +30 -0
- cars/applications/dense_matching/loaders/config_mapping.json +13 -0
- cars/applications/dense_matching/loaders/config_mccnn.json +28 -0
- cars/applications/dense_matching/loaders/global_land_cover_map.tif +0 -0
- cars/applications/dense_matching/loaders/pandora_loader.py +593 -0
- cars/applications/dsm_filling/__init__.py +32 -0
- cars/applications/dsm_filling/abstract_dsm_filling_app.py +101 -0
- cars/applications/dsm_filling/border_interpolation_app.py +278 -0
- cars/applications/dsm_filling/bulldozer_config/base_config.yaml +44 -0
- cars/applications/dsm_filling/bulldozer_filling_app.py +288 -0
- cars/applications/dsm_filling/exogenous_filling_app.py +341 -0
- cars/applications/dsm_merging/__init__.py +28 -0
- cars/applications/dsm_merging/abstract_dsm_merging_app.py +101 -0
- cars/applications/dsm_merging/weighted_fusion_app.py +639 -0
- cars/applications/grid_correction/__init__.py +30 -0
- cars/applications/grid_correction/abstract_grid_correction_app.py +103 -0
- cars/applications/grid_correction/grid_correction_app.py +557 -0
- cars/applications/grid_generation/__init__.py +30 -0
- cars/applications/grid_generation/abstract_grid_generation_app.py +142 -0
- cars/applications/grid_generation/epipolar_grid_generation_app.py +327 -0
- cars/applications/grid_generation/grid_generation_algo.py +388 -0
- cars/applications/grid_generation/grid_generation_constants.py +46 -0
- cars/applications/grid_generation/transform_grid.py +88 -0
- cars/applications/ground_truth_reprojection/__init__.py +30 -0
- cars/applications/ground_truth_reprojection/abstract_ground_truth_reprojection_app.py +137 -0
- cars/applications/ground_truth_reprojection/direct_localization_app.py +629 -0
- cars/applications/ground_truth_reprojection/ground_truth_reprojection_algo.py +275 -0
- cars/applications/point_cloud_outlier_removal/__init__.py +30 -0
- cars/applications/point_cloud_outlier_removal/abstract_outlier_removal_app.py +385 -0
- cars/applications/point_cloud_outlier_removal/outlier_removal_algo.py +392 -0
- cars/applications/point_cloud_outlier_removal/outlier_removal_constants.py +43 -0
- cars/applications/point_cloud_outlier_removal/small_components_app.py +522 -0
- cars/applications/point_cloud_outlier_removal/statistical_app.py +528 -0
- cars/applications/rasterization/__init__.py +30 -0
- cars/applications/rasterization/abstract_pc_rasterization_app.py +183 -0
- cars/applications/rasterization/rasterization_algo.py +534 -0
- cars/applications/rasterization/rasterization_constants.py +38 -0
- cars/applications/rasterization/rasterization_wrappers.py +639 -0
- cars/applications/rasterization/simple_gaussian_app.py +1152 -0
- cars/applications/resampling/__init__.py +28 -0
- cars/applications/resampling/abstract_resampling_app.py +187 -0
- cars/applications/resampling/bicubic_resampling_app.py +760 -0
- cars/applications/resampling/resampling_algo.py +590 -0
- cars/applications/resampling/resampling_constants.py +36 -0
- cars/applications/resampling/resampling_wrappers.py +309 -0
- cars/applications/sensors_subsampling/__init__.py +32 -0
- cars/applications/sensors_subsampling/abstract_subsampling_app.py +109 -0
- cars/applications/sensors_subsampling/rasterio_subsampling_app.py +420 -0
- cars/applications/sensors_subsampling/subsampling_algo.py +108 -0
- cars/applications/sparse_matching/__init__.py +30 -0
- cars/applications/sparse_matching/abstract_sparse_matching_app.py +599 -0
- cars/applications/sparse_matching/sift_app.py +724 -0
- cars/applications/sparse_matching/sparse_matching_algo.py +360 -0
- cars/applications/sparse_matching/sparse_matching_constants.py +66 -0
- cars/applications/sparse_matching/sparse_matching_wrappers.py +282 -0
- cars/applications/triangulation/__init__.py +32 -0
- cars/applications/triangulation/abstract_triangulation_app.py +227 -0
- cars/applications/triangulation/line_of_sight_intersection_app.py +1243 -0
- cars/applications/triangulation/pc_transform.py +552 -0
- cars/applications/triangulation/triangulation_algo.py +371 -0
- cars/applications/triangulation/triangulation_constants.py +38 -0
- cars/applications/triangulation/triangulation_wrappers.py +259 -0
- cars/bundleadjustment.py +750 -0
- cars/cars.py +179 -0
- cars/conf/__init__.py +23 -0
- cars/conf/geoid/egm96.grd +0 -0
- cars/conf/geoid/egm96.grd.hdr +15 -0
- cars/conf/input_parameters.py +156 -0
- cars/conf/mask_cst.py +35 -0
- cars/core/__init__.py +23 -0
- cars/core/cars_logging.py +402 -0
- cars/core/constants.py +191 -0
- cars/core/constants_disparity.py +50 -0
- cars/core/datasets.py +140 -0
- cars/core/geometry/__init__.py +27 -0
- cars/core/geometry/abstract_geometry.py +1119 -0
- cars/core/geometry/shareloc_geometry.py +598 -0
- cars/core/inputs.py +568 -0
- cars/core/outputs.py +176 -0
- cars/core/preprocessing.py +722 -0
- cars/core/projection.py +843 -0
- cars/core/roi_tools.py +215 -0
- cars/core/tiling.py +774 -0
- cars/core/utils.py +164 -0
- cars/data_structures/__init__.py +23 -0
- cars/data_structures/cars_dataset.py +1544 -0
- cars/data_structures/cars_dict.py +74 -0
- cars/data_structures/corresponding_tiles_tools.py +186 -0
- cars/data_structures/dataframe_converter.py +185 -0
- cars/data_structures/format_transformation.py +297 -0
- cars/devibrate.py +689 -0
- cars/extractroi.py +264 -0
- cars/orchestrator/__init__.py +23 -0
- cars/orchestrator/achievement_tracker.py +125 -0
- cars/orchestrator/cluster/__init__.py +37 -0
- cars/orchestrator/cluster/abstract_cluster.py +250 -0
- cars/orchestrator/cluster/abstract_dask_cluster.py +381 -0
- cars/orchestrator/cluster/dask_cluster_tools.py +103 -0
- cars/orchestrator/cluster/dask_config/README.md +94 -0
- cars/orchestrator/cluster/dask_config/dask.yaml +21 -0
- cars/orchestrator/cluster/dask_config/distributed.yaml +70 -0
- cars/orchestrator/cluster/dask_config/jobqueue.yaml +26 -0
- cars/orchestrator/cluster/dask_config/reference_confs/dask-schema.yaml +137 -0
- cars/orchestrator/cluster/dask_config/reference_confs/dask.yaml +26 -0
- cars/orchestrator/cluster/dask_config/reference_confs/distributed-schema.yaml +1009 -0
- cars/orchestrator/cluster/dask_config/reference_confs/distributed.yaml +273 -0
- cars/orchestrator/cluster/dask_config/reference_confs/jobqueue.yaml +212 -0
- cars/orchestrator/cluster/dask_jobqueue_utils.py +204 -0
- cars/orchestrator/cluster/local_dask_cluster.py +116 -0
- cars/orchestrator/cluster/log_wrapper.py +728 -0
- cars/orchestrator/cluster/mp_cluster/__init__.py +27 -0
- cars/orchestrator/cluster/mp_cluster/mp_factorizer.py +212 -0
- cars/orchestrator/cluster/mp_cluster/mp_objects.py +535 -0
- cars/orchestrator/cluster/mp_cluster/mp_tools.py +93 -0
- cars/orchestrator/cluster/mp_cluster/mp_wrapper.py +505 -0
- cars/orchestrator/cluster/mp_cluster/multiprocessing_cluster.py +986 -0
- cars/orchestrator/cluster/mp_cluster/multiprocessing_profiler.py +399 -0
- cars/orchestrator/cluster/pbs_dask_cluster.py +207 -0
- cars/orchestrator/cluster/sequential_cluster.py +139 -0
- cars/orchestrator/cluster/slurm_dask_cluster.py +234 -0
- cars/orchestrator/memory_tools.py +47 -0
- cars/orchestrator/orchestrator.py +755 -0
- cars/orchestrator/orchestrator_constants.py +29 -0
- cars/orchestrator/registry/__init__.py +23 -0
- cars/orchestrator/registry/abstract_registry.py +143 -0
- cars/orchestrator/registry/compute_registry.py +106 -0
- cars/orchestrator/registry/id_generator.py +116 -0
- cars/orchestrator/registry/replacer_registry.py +213 -0
- cars/orchestrator/registry/saver_registry.py +363 -0
- cars/orchestrator/registry/unseen_registry.py +118 -0
- cars/orchestrator/tiles_profiler.py +279 -0
- cars/pipelines/__init__.py +26 -0
- cars/pipelines/conf_resolution/conf_final_resolution.yaml +5 -0
- cars/pipelines/conf_resolution/conf_first_resolution.yaml +4 -0
- cars/pipelines/conf_resolution/conf_intermediate_resolution.yaml +2 -0
- cars/pipelines/default/__init__.py +26 -0
- cars/pipelines/default/default_pipeline.py +1088 -0
- cars/pipelines/filling/__init__.py +26 -0
- cars/pipelines/filling/filling.py +981 -0
- cars/pipelines/formatting/__init__.py +26 -0
- cars/pipelines/formatting/formatting.py +186 -0
- cars/pipelines/merging/__init__.py +26 -0
- cars/pipelines/merging/merging.py +439 -0
- cars/pipelines/parameters/__init__.py +0 -0
- cars/pipelines/parameters/advanced_parameters.py +256 -0
- cars/pipelines/parameters/advanced_parameters_constants.py +68 -0
- cars/pipelines/parameters/application_parameters.py +72 -0
- cars/pipelines/parameters/depth_map_inputs.py +0 -0
- cars/pipelines/parameters/dsm_inputs.py +349 -0
- cars/pipelines/parameters/dsm_inputs_constants.py +25 -0
- cars/pipelines/parameters/output_constants.py +52 -0
- cars/pipelines/parameters/output_parameters.py +438 -0
- cars/pipelines/parameters/sensor_inputs.py +859 -0
- cars/pipelines/parameters/sensor_inputs_constants.py +51 -0
- cars/pipelines/parameters/sensor_loaders/__init__.py +29 -0
- cars/pipelines/parameters/sensor_loaders/basic_classif_loader.py +86 -0
- cars/pipelines/parameters/sensor_loaders/basic_image_loader.py +98 -0
- cars/pipelines/parameters/sensor_loaders/pivot_classif_loader.py +90 -0
- cars/pipelines/parameters/sensor_loaders/pivot_image_loader.py +105 -0
- cars/pipelines/parameters/sensor_loaders/sensor_loader.py +93 -0
- cars/pipelines/parameters/sensor_loaders/sensor_loader_template.py +71 -0
- cars/pipelines/parameters/sensor_loaders/slurp_classif_loader.py +86 -0
- cars/pipelines/pipeline.py +119 -0
- cars/pipelines/pipeline_constants.py +38 -0
- cars/pipelines/pipeline_template.py +135 -0
- cars/pipelines/subsampling/__init__.py +26 -0
- cars/pipelines/subsampling/subsampling.py +358 -0
- cars/pipelines/surface_modeling/__init__.py +26 -0
- cars/pipelines/surface_modeling/surface_modeling.py +2098 -0
- cars/pipelines/tie_points/__init__.py +26 -0
- cars/pipelines/tie_points/tie_points.py +536 -0
- cars/starter.py +167 -0
- cars-1.0.0rc2.dist-info/DELVEWHEEL +2 -0
- cars-1.0.0rc2.dist-info/METADATA +289 -0
- cars-1.0.0rc2.dist-info/RECORD +225 -0
- cars-1.0.0rc2.dist-info/WHEEL +4 -0
- cars-1.0.0rc2.dist-info/entry_points.txt +8 -0
- cars.libs/libgcc_s_seh-1-b2494fcbd4d80cf2c98fdd5261f6d850.dll +0 -0
- cars.libs/libstdc++-6-e9b0d12ae0e9555bbae55e8dfd08c3f7.dll +0 -0
- cars.libs/libwinpthread-1-7882d1b093714ccdfaf4e0789a817792.dll +0 -0
|
@@ -0,0 +1,986 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf8
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2020 Centre National d'Etudes Spatiales (CNES).
|
|
5
|
+
#
|
|
6
|
+
# This file is part of CARS
|
|
7
|
+
# (see https://github.com/CNES/cars).
|
|
8
|
+
#
|
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
# you may not use this file except in compliance with the License.
|
|
11
|
+
# You may obtain a copy of the License at
|
|
12
|
+
#
|
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
#
|
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
# See the License for the specific language governing permissions and
|
|
19
|
+
# limitations under the License.
|
|
20
|
+
#
|
|
21
|
+
"""
|
|
22
|
+
Contains abstract function for multiprocessing Cluster
|
|
23
|
+
"""
|
|
24
|
+
# pylint: disable=C0302
|
|
25
|
+
|
|
26
|
+
import copy
|
|
27
|
+
import itertools
|
|
28
|
+
import logging
|
|
29
|
+
import logging.handlers
|
|
30
|
+
|
|
31
|
+
# Standard imports
|
|
32
|
+
import multiprocessing as mp
|
|
33
|
+
import os
|
|
34
|
+
import platform
|
|
35
|
+
import re
|
|
36
|
+
import shutil
|
|
37
|
+
import signal
|
|
38
|
+
import subprocess
|
|
39
|
+
import threading
|
|
40
|
+
import time
|
|
41
|
+
import traceback
|
|
42
|
+
from functools import wraps
|
|
43
|
+
from multiprocessing import freeze_support
|
|
44
|
+
from queue import Queue
|
|
45
|
+
|
|
46
|
+
# Third party imports
|
|
47
|
+
from json_checker import And, Checker, Or
|
|
48
|
+
|
|
49
|
+
from cars.core import cars_logging
|
|
50
|
+
|
|
51
|
+
# CARS imports
|
|
52
|
+
from cars.orchestrator.cluster import abstract_cluster
|
|
53
|
+
from cars.orchestrator.cluster.log_wrapper import cars_profile
|
|
54
|
+
from cars.orchestrator.cluster.mp_cluster import mp_factorizer, mp_wrapper
|
|
55
|
+
from cars.orchestrator.cluster.mp_cluster.mp_objects import (
|
|
56
|
+
FactorizedObject,
|
|
57
|
+
MpDelayed,
|
|
58
|
+
MpDelayedTask,
|
|
59
|
+
MpFuture,
|
|
60
|
+
MpFutureIterator,
|
|
61
|
+
MpJob,
|
|
62
|
+
)
|
|
63
|
+
from cars.orchestrator.cluster.mp_cluster.mp_tools import replace_data
|
|
64
|
+
from cars.orchestrator.cluster.mp_cluster.multiprocessing_profiler import (
|
|
65
|
+
MultiprocessingProfiler,
|
|
66
|
+
)
|
|
67
|
+
from cars.orchestrator.memory_tools import get_available_ram, get_total_ram
|
|
68
|
+
|
|
69
|
+
SYS_PLATFORM = platform.system().lower()
|
|
70
|
+
IS_WIN = "windows" == SYS_PLATFORM
|
|
71
|
+
|
|
72
|
+
RUN = 0
|
|
73
|
+
TERMINATE = 1
|
|
74
|
+
|
|
75
|
+
# Refresh time between every iteration, to prevent from freezing
|
|
76
|
+
REFRESH_TIME = 0.05
|
|
77
|
+
|
|
78
|
+
job_counter = itertools.count()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@abstract_cluster.AbstractCluster.register_subclass("mp", "multiprocessing")
|
|
82
|
+
class MultiprocessingCluster(abstract_cluster.AbstractCluster):
|
|
83
|
+
"""
|
|
84
|
+
MultiprocessingCluster
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
# pylint: disable=too-many-instance-attributes
|
|
88
|
+
@cars_profile(name="Multiprocessing orchestrator initialization")
|
|
89
|
+
def __init__( # pylint: disable=too-many-positional-arguments
|
|
90
|
+
self,
|
|
91
|
+
conf_cluster,
|
|
92
|
+
out_dir,
|
|
93
|
+
log_dir,
|
|
94
|
+
launch_worker=True,
|
|
95
|
+
data_to_propagate=None,
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Init function of MultiprocessingCluster
|
|
99
|
+
|
|
100
|
+
:param conf_cluster: configuration for cluster
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# TODO: remove message
|
|
105
|
+
if conf_cluster is not None and conf_cluster["mode"] == "mp":
|
|
106
|
+
message = (
|
|
107
|
+
" 'mp' keyword has been deprecated, use "
|
|
108
|
+
"'multiprocessing' instead"
|
|
109
|
+
)
|
|
110
|
+
logging.warning(message)
|
|
111
|
+
|
|
112
|
+
self.out_dir = out_dir
|
|
113
|
+
self.log_dir = log_dir
|
|
114
|
+
# call parent init
|
|
115
|
+
super().__init__(
|
|
116
|
+
conf_cluster,
|
|
117
|
+
out_dir,
|
|
118
|
+
log_dir,
|
|
119
|
+
launch_worker=launch_worker,
|
|
120
|
+
data_to_propagate=data_to_propagate,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# retrieve parameters
|
|
124
|
+
self.nb_workers = self.checked_conf_cluster["nb_workers"]
|
|
125
|
+
self.mp_mode = self.checked_conf_cluster["mp_mode"]
|
|
126
|
+
self.task_timeout = self.checked_conf_cluster["task_timeout"]
|
|
127
|
+
self.max_tasks_per_worker = self.checked_conf_cluster[
|
|
128
|
+
"max_tasks_per_worker"
|
|
129
|
+
]
|
|
130
|
+
self.dump_to_disk = self.checked_conf_cluster["dump_to_disk"]
|
|
131
|
+
self.per_job_timeout = self.checked_conf_cluster["per_job_timeout"]
|
|
132
|
+
self.factorize_tasks = self.checked_conf_cluster["factorize_tasks"]
|
|
133
|
+
# Set multiprocessing mode
|
|
134
|
+
self.mp_mode = self.checked_conf_cluster["mp_mode"]
|
|
135
|
+
|
|
136
|
+
if IS_WIN:
|
|
137
|
+
self.mp_mode = "spawn"
|
|
138
|
+
logging.warning(
|
|
139
|
+
"{} is not functionnal in windows,"
|
|
140
|
+
"spawn will be used instead".format(self.mp_mode)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
self.launch_worker = launch_worker
|
|
144
|
+
|
|
145
|
+
self.tmp_dir = None
|
|
146
|
+
|
|
147
|
+
# affinity issues caused by numpy
|
|
148
|
+
if IS_WIN is False:
|
|
149
|
+
os.system(
|
|
150
|
+
"taskset -p 0xffffffff %d > /dev/null 2>&1" % os.getpid()
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if self.launch_worker:
|
|
154
|
+
# Create wrapper object
|
|
155
|
+
if self.dump_to_disk:
|
|
156
|
+
if self.out_dir is None:
|
|
157
|
+
raise RuntimeError("Not out_dir provided")
|
|
158
|
+
if not os.path.exists(self.out_dir):
|
|
159
|
+
os.makedirs(self.out_dir)
|
|
160
|
+
self.tmp_dir = os.path.join(self.out_dir, "tmp_save_disk")
|
|
161
|
+
if not os.path.exists(self.tmp_dir):
|
|
162
|
+
os.makedirs(self.tmp_dir)
|
|
163
|
+
self.wrapper = mp_wrapper.WrapperDisk(self.tmp_dir)
|
|
164
|
+
else:
|
|
165
|
+
self.wrapper = mp_wrapper.WrapperNone(None)
|
|
166
|
+
|
|
167
|
+
# Create pool
|
|
168
|
+
ctx_in_main = mp.get_context(self.mp_mode)
|
|
169
|
+
# import cars for env variables firts
|
|
170
|
+
# import cars pipelines for numba compilation
|
|
171
|
+
ctx_in_main.set_forkserver_preload(["cars", "cars.pipelines"])
|
|
172
|
+
self.pool = ctx_in_main.Pool(
|
|
173
|
+
self.nb_workers,
|
|
174
|
+
initializer=freeze_support,
|
|
175
|
+
maxtasksperchild=self.max_tasks_per_worker,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
self.queue = Queue()
|
|
179
|
+
self.task_cache = {}
|
|
180
|
+
|
|
181
|
+
# Variable used for cleaning
|
|
182
|
+
# Clone of iterator future list
|
|
183
|
+
self.cl_future_list = []
|
|
184
|
+
|
|
185
|
+
# set the exception hook
|
|
186
|
+
threading.excepthook = log_error_hook
|
|
187
|
+
|
|
188
|
+
# Refresh worker
|
|
189
|
+
self.refresh_worker = threading.Thread(
|
|
190
|
+
target=MultiprocessingCluster.refresh_task_cache,
|
|
191
|
+
args=(
|
|
192
|
+
self.pool,
|
|
193
|
+
self.task_cache,
|
|
194
|
+
self.queue,
|
|
195
|
+
self.per_job_timeout,
|
|
196
|
+
self.cl_future_list,
|
|
197
|
+
self.nb_workers,
|
|
198
|
+
self.wrapper,
|
|
199
|
+
),
|
|
200
|
+
)
|
|
201
|
+
self.refresh_worker.daemon = True
|
|
202
|
+
self.refresh_worker._state = RUN
|
|
203
|
+
self.refresh_worker.start()
|
|
204
|
+
|
|
205
|
+
# Profile pool
|
|
206
|
+
mp_dataframe = None
|
|
207
|
+
timer = None
|
|
208
|
+
if self.data_to_propagate is not None:
|
|
209
|
+
mp_dataframe = self.data_to_propagate.get("mp_dataframe", None)
|
|
210
|
+
timer = self.data_to_propagate.get("mp_timer", None)
|
|
211
|
+
|
|
212
|
+
self.profiler = MultiprocessingProfiler(
|
|
213
|
+
self.pool,
|
|
214
|
+
self.log_dir,
|
|
215
|
+
self.checked_conf_cluster["max_ram_per_worker"],
|
|
216
|
+
mp_dataframe=mp_dataframe,
|
|
217
|
+
timer=timer,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
self.data_to_propagate = {
|
|
221
|
+
"mp_dataframe": self.profiler.memory_data,
|
|
222
|
+
"mp_timer": self.profiler.timer,
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
def check_conf(self, conf):
|
|
226
|
+
"""
|
|
227
|
+
Check configuration
|
|
228
|
+
|
|
229
|
+
:param conf: configuration to check
|
|
230
|
+
:type conf: dict
|
|
231
|
+
|
|
232
|
+
:return: overloaded configuration
|
|
233
|
+
:rtype: dict
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
# init conf
|
|
238
|
+
if conf is not None:
|
|
239
|
+
overloaded_conf = conf.copy()
|
|
240
|
+
else:
|
|
241
|
+
conf = {}
|
|
242
|
+
overloaded_conf = {}
|
|
243
|
+
|
|
244
|
+
available_cpu = mp.cpu_count() # TODO returns full node nb cpus
|
|
245
|
+
# TODO robustify if a partial node is used
|
|
246
|
+
# One process per cpu for memory usage estimated
|
|
247
|
+
|
|
248
|
+
# Modify some env variables for memory usage
|
|
249
|
+
# TODO
|
|
250
|
+
# set ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS = 1
|
|
251
|
+
|
|
252
|
+
# Overload conf
|
|
253
|
+
overloaded_conf["mode"] = conf.get("mode", "multiprocessing")
|
|
254
|
+
overloaded_conf["mp_mode"] = conf.get("mp_mode", "forkserver")
|
|
255
|
+
overloaded_conf["max_ram_per_worker"] = conf.get(
|
|
256
|
+
"max_ram_per_worker", 2000
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
nb_workers = conf.get("nb_workers", "auto")
|
|
260
|
+
if nb_workers == "auto":
|
|
261
|
+
logging.info("auto mode : nb_workers will be set automatically")
|
|
262
|
+
# Compute parameters for auto mode
|
|
263
|
+
nb_workers = compute_conf_auto_mode(
|
|
264
|
+
IS_WIN, overloaded_conf["max_ram_per_worker"]
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
overloaded_conf["nb_workers"] = min(available_cpu, nb_workers)
|
|
268
|
+
overloaded_conf["task_timeout"] = conf.get("task_timeout", 600)
|
|
269
|
+
overloaded_conf["max_tasks_per_worker"] = conf.get(
|
|
270
|
+
"max_tasks_per_worker", 10
|
|
271
|
+
)
|
|
272
|
+
overloaded_conf["dump_to_disk"] = conf.get("dump_to_disk", True)
|
|
273
|
+
overloaded_conf["per_job_timeout"] = conf.get("per_job_timeout", 600)
|
|
274
|
+
overloaded_conf["factorize_tasks"] = conf.get("factorize_tasks", True)
|
|
275
|
+
|
|
276
|
+
cluster_schema = {
|
|
277
|
+
"mode": str,
|
|
278
|
+
"dump_to_disk": bool,
|
|
279
|
+
"mp_mode": str,
|
|
280
|
+
"nb_workers": And(int, lambda x: x > 0),
|
|
281
|
+
"task_timeout": And(int, lambda x: x > 0),
|
|
282
|
+
"max_ram_per_worker": And(Or(float, int), lambda x: x > 0),
|
|
283
|
+
"max_tasks_per_worker": And(int, lambda x: x > 0),
|
|
284
|
+
"per_job_timeout": Or(float, int),
|
|
285
|
+
"factorize_tasks": bool,
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
# Check conf
|
|
289
|
+
checker = Checker(cluster_schema)
|
|
290
|
+
checker.validate(overloaded_conf)
|
|
291
|
+
|
|
292
|
+
return overloaded_conf
|
|
293
|
+
|
|
294
|
+
def get_delayed_type(self):
|
|
295
|
+
"""
|
|
296
|
+
Get delayed type
|
|
297
|
+
"""
|
|
298
|
+
return MpDelayed
|
|
299
|
+
|
|
300
|
+
def cleanup(self, keep_shared_dir=False):
|
|
301
|
+
"""
|
|
302
|
+
Cleanup cluster
|
|
303
|
+
:param keep_shared_dir: do not clean directory of shared objects
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
# Save profiling
|
|
307
|
+
self.profiler.save_plot()
|
|
308
|
+
|
|
309
|
+
# clean profiler
|
|
310
|
+
self.profiler.cleanup()
|
|
311
|
+
|
|
312
|
+
# Terminate worker
|
|
313
|
+
self.refresh_worker._state = TERMINATE # pylint: disable=W0212
|
|
314
|
+
while self.refresh_worker.is_alive():
|
|
315
|
+
time.sleep(0)
|
|
316
|
+
|
|
317
|
+
# close pool
|
|
318
|
+
self.pool.terminate()
|
|
319
|
+
self.pool.join()
|
|
320
|
+
|
|
321
|
+
# clean tmpdir if exists
|
|
322
|
+
self.wrapper.cleanup(keep_shared_dir=keep_shared_dir)
|
|
323
|
+
|
|
324
|
+
if not keep_shared_dir:
|
|
325
|
+
if self.tmp_dir is not None:
|
|
326
|
+
shutil.rmtree(self.tmp_dir)
|
|
327
|
+
|
|
328
|
+
def scatter(self, data):
|
|
329
|
+
"""
|
|
330
|
+
Distribute data through workers
|
|
331
|
+
|
|
332
|
+
:param data: data to dump
|
|
333
|
+
"""
|
|
334
|
+
return self.wrapper.scatter_obj(data)
|
|
335
|
+
|
|
336
|
+
def create_task_wrapped(self, func, nout=1):
|
|
337
|
+
"""
|
|
338
|
+
Create task
|
|
339
|
+
|
|
340
|
+
:param func: function
|
|
341
|
+
:param nout: number of outputs
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
@wraps(func)
|
|
345
|
+
def mp_delayed_builder(*argv, **kwargs):
|
|
346
|
+
"""
|
|
347
|
+
Create a MPDelayed builder
|
|
348
|
+
|
|
349
|
+
:param argv: args of func
|
|
350
|
+
:param kwargs: kwargs of func
|
|
351
|
+
"""
|
|
352
|
+
new_kwargs = kwargs
|
|
353
|
+
new_kwargs["log_dir"] = self.worker_log_dir
|
|
354
|
+
new_kwargs["log_level"] = self.log_level
|
|
355
|
+
new_kwargs["log_fun"] = func
|
|
356
|
+
# create delayed_task
|
|
357
|
+
delayed_task = MpDelayedTask(
|
|
358
|
+
cars_logging.logger_func, list(argv), new_kwargs
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
delayed_object_list = []
|
|
362
|
+
for idx in range(nout):
|
|
363
|
+
delayed_object_list.append(
|
|
364
|
+
MpDelayed(delayed_task, return_index=idx)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
res = None
|
|
368
|
+
if len(delayed_object_list) == 1:
|
|
369
|
+
res = delayed_object_list[0]
|
|
370
|
+
else:
|
|
371
|
+
res = (*delayed_object_list,)
|
|
372
|
+
|
|
373
|
+
return res
|
|
374
|
+
|
|
375
|
+
return mp_delayed_builder
|
|
376
|
+
|
|
377
|
+
def start_tasks(self, task_list):
|
|
378
|
+
"""
|
|
379
|
+
Start all tasks
|
|
380
|
+
|
|
381
|
+
:param task_list: task list
|
|
382
|
+
"""
|
|
383
|
+
memorize = {}
|
|
384
|
+
# Use a copy of input delayed
|
|
385
|
+
task_list = copy.deepcopy(task_list)
|
|
386
|
+
if self.factorize_tasks:
|
|
387
|
+
mp_factorizer.factorize_delayed(task_list)
|
|
388
|
+
future_list = [self.rec_start(task, memorize) for task in task_list]
|
|
389
|
+
# signal that we reached the end of this batch
|
|
390
|
+
self.queue.put("END_BATCH")
|
|
391
|
+
return future_list
|
|
392
|
+
|
|
393
|
+
def rec_start(self, delayed_object, memorize):
|
|
394
|
+
"""
|
|
395
|
+
Record task
|
|
396
|
+
|
|
397
|
+
:param delayed_object: delayed object to record
|
|
398
|
+
:type delayed_object: MpDelayed
|
|
399
|
+
:param memorize: list of MpDelayed already recorded
|
|
400
|
+
|
|
401
|
+
"""
|
|
402
|
+
# check if this task is already started
|
|
403
|
+
if delayed_object in memorize.keys():
|
|
404
|
+
return memorize[delayed_object]
|
|
405
|
+
|
|
406
|
+
can_run = True
|
|
407
|
+
|
|
408
|
+
current_delayed_task = delayed_object.delayed_task
|
|
409
|
+
|
|
410
|
+
# Modify delayed with wrapper here
|
|
411
|
+
current_delayed_task.modify_delayed_task(self.wrapper)
|
|
412
|
+
|
|
413
|
+
def transform_delayed_to_mp_job(args_or_kawargs):
|
|
414
|
+
"""
|
|
415
|
+
Replace MpDelayed in list or dict by a MpJob
|
|
416
|
+
|
|
417
|
+
:param args_or_kawargs: list or dict of data
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
def transform_mp_delayed_to_jobs(obj):
|
|
421
|
+
"""
|
|
422
|
+
Replace MpDelayed by MpJob
|
|
423
|
+
|
|
424
|
+
:param data: data to replace if necessary
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
new_data = obj
|
|
428
|
+
if isinstance(obj, MpDelayed):
|
|
429
|
+
rec_future = self.rec_start(obj, memorize)
|
|
430
|
+
new_data = MpJob(
|
|
431
|
+
rec_future.mp_future_task.job_id,
|
|
432
|
+
rec_future.return_index,
|
|
433
|
+
)
|
|
434
|
+
return new_data
|
|
435
|
+
|
|
436
|
+
# replace data
|
|
437
|
+
return replace_data(args_or_kawargs, transform_mp_delayed_to_jobs)
|
|
438
|
+
|
|
439
|
+
# Transform MpDelayed to MpJob
|
|
440
|
+
|
|
441
|
+
filt_args = transform_delayed_to_mp_job(current_delayed_task.args)
|
|
442
|
+
|
|
443
|
+
filt_kw = transform_delayed_to_mp_job(current_delayed_task.kw_args)
|
|
444
|
+
|
|
445
|
+
# Check if can be run
|
|
446
|
+
dependencies = compute_dependencies(filt_args, filt_kw)
|
|
447
|
+
can_run = True
|
|
448
|
+
if len(dependencies) > 0:
|
|
449
|
+
can_run = False
|
|
450
|
+
|
|
451
|
+
# start current task
|
|
452
|
+
task_future = MpFutureTask(self)
|
|
453
|
+
|
|
454
|
+
self.queue.put(
|
|
455
|
+
(
|
|
456
|
+
task_future.job_id,
|
|
457
|
+
can_run,
|
|
458
|
+
current_delayed_task.func,
|
|
459
|
+
filt_args,
|
|
460
|
+
filt_kw,
|
|
461
|
+
)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Create future object
|
|
465
|
+
object_future = MpFuture(task_future, delayed_object.return_index)
|
|
466
|
+
memorize[delayed_object] = object_future
|
|
467
|
+
|
|
468
|
+
# Create other futures associated to this task
|
|
469
|
+
for other_delayed_obj in current_delayed_task.associated_objects:
|
|
470
|
+
if other_delayed_obj != delayed_object:
|
|
471
|
+
memorize[other_delayed_obj] = MpFuture(
|
|
472
|
+
task_future, other_delayed_obj.return_index
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
return object_future
|
|
476
|
+
|
|
477
|
+
# pylint: disable=too-many-positional-arguments
|
|
478
|
+
@staticmethod # noqa: C901
|
|
479
|
+
def refresh_task_cache( # noqa: C901
|
|
480
|
+
pool,
|
|
481
|
+
task_cache,
|
|
482
|
+
in_queue,
|
|
483
|
+
per_job_timeout,
|
|
484
|
+
cl_future_list,
|
|
485
|
+
nb_workers,
|
|
486
|
+
wrapper_obj,
|
|
487
|
+
):
|
|
488
|
+
"""
|
|
489
|
+
Refresh task cache
|
|
490
|
+
|
|
491
|
+
:param task_cache: task cache list
|
|
492
|
+
:param in_queue: queue
|
|
493
|
+
:param per_job_timeout: per job timeout
|
|
494
|
+
:param cl_future_list: current future list used in iterator
|
|
495
|
+
:param nb_workers: number of workers
|
|
496
|
+
"""
|
|
497
|
+
thread = threading.current_thread()
|
|
498
|
+
|
|
499
|
+
# initialize lists
|
|
500
|
+
wait_list = {}
|
|
501
|
+
in_progress_list = {}
|
|
502
|
+
dependencies_list = {}
|
|
503
|
+
done_task_results = {}
|
|
504
|
+
job_ids_to_launch_prioritized = []
|
|
505
|
+
max_nb_tasks_running = 2 * nb_workers
|
|
506
|
+
|
|
507
|
+
while thread._state == RUN: # pylint: disable=W0212
|
|
508
|
+
# wait before next iteration
|
|
509
|
+
time.sleep(REFRESH_TIME)
|
|
510
|
+
# get new task from queue
|
|
511
|
+
if not in_queue.empty():
|
|
512
|
+
# get nb_workers task from this batch
|
|
513
|
+
for job_id, can_run, func, args, kw_args in iter(
|
|
514
|
+
in_queue.get, "END_BATCH"
|
|
515
|
+
):
|
|
516
|
+
wait_list[job_id] = [func, args, kw_args]
|
|
517
|
+
if can_run:
|
|
518
|
+
job_ids_to_launch_prioritized.append(job_id)
|
|
519
|
+
# add to dependencies (-1 to identify initial tasks)
|
|
520
|
+
dependencies_list[job_id] = [-1]
|
|
521
|
+
else:
|
|
522
|
+
# get dependencies
|
|
523
|
+
dependencies_list[job_id] = compute_dependencies(
|
|
524
|
+
args, kw_args
|
|
525
|
+
)
|
|
526
|
+
if len(dependencies_list[job_id]) == 0:
|
|
527
|
+
dependencies_list[job_id] = [-1]
|
|
528
|
+
|
|
529
|
+
# check for ready results
|
|
530
|
+
done_list = []
|
|
531
|
+
next_priority_tasks = []
|
|
532
|
+
for job_id, job_id_progress in in_progress_list.items():
|
|
533
|
+
if job_id_progress.ready():
|
|
534
|
+
try:
|
|
535
|
+
res = job_id_progress.get(timeout=per_job_timeout)
|
|
536
|
+
success = True
|
|
537
|
+
except: # pylint: disable=W0702 # noqa: B001, E722
|
|
538
|
+
res = traceback.format_exc()
|
|
539
|
+
success = False
|
|
540
|
+
logging.error("Exception in worker: {}".format(res))
|
|
541
|
+
done_list.append(job_id)
|
|
542
|
+
done_task_results[job_id] = [success, res]
|
|
543
|
+
|
|
544
|
+
# remove from dependance list
|
|
545
|
+
dependencies_list.pop(job_id)
|
|
546
|
+
|
|
547
|
+
# search related priority task
|
|
548
|
+
for job_id2 in wait_list.keys(): # pylint: disable=C0201
|
|
549
|
+
depending_tasks = list(dependencies_list[job_id2])
|
|
550
|
+
if job_id in depending_tasks:
|
|
551
|
+
next_priority_tasks += depending_tasks
|
|
552
|
+
# remove duplicate dependance task
|
|
553
|
+
next_priority_tasks = list(dict.fromkeys(next_priority_tasks))
|
|
554
|
+
# clean done jobs
|
|
555
|
+
for job_id in done_list:
|
|
556
|
+
# delete
|
|
557
|
+
del in_progress_list[job_id]
|
|
558
|
+
# copy results to futures
|
|
559
|
+
# (they remove themselves from task_cache
|
|
560
|
+
task_cache[job_id].set(done_task_results[job_id])
|
|
561
|
+
|
|
562
|
+
(
|
|
563
|
+
ready_list,
|
|
564
|
+
failed_list,
|
|
565
|
+
) = MultiprocessingCluster.get_ready_failed_tasks(
|
|
566
|
+
wait_list, dependencies_list, done_task_results
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# add ready task in next_priority_tasks
|
|
570
|
+
priority_list = list(
|
|
571
|
+
filter(lambda job_id: job_id in next_priority_tasks, ready_list)
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
job_ids_to_launch_prioritized = update_job_id_priority(
|
|
575
|
+
job_ids_to_launch_prioritized, priority_list, ready_list
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Deal with failed tasks
|
|
579
|
+
for job_id in failed_list:
|
|
580
|
+
done_list.append(job_id)
|
|
581
|
+
done_task_results[job_id] = [
|
|
582
|
+
False,
|
|
583
|
+
"Failed depending task",
|
|
584
|
+
]
|
|
585
|
+
# copy results to futures
|
|
586
|
+
# (they remove themselves from task_cache
|
|
587
|
+
task_cache[job_id].set(done_task_results[job_id])
|
|
588
|
+
del wait_list[job_id]
|
|
589
|
+
|
|
590
|
+
while (
|
|
591
|
+
len(in_progress_list) < max_nb_tasks_running
|
|
592
|
+
and len(job_ids_to_launch_prioritized) > 0
|
|
593
|
+
):
|
|
594
|
+
job_id = job_ids_to_launch_prioritized.pop()
|
|
595
|
+
func, args, kw_args = wait_list[job_id]
|
|
596
|
+
# replace jobs by real data
|
|
597
|
+
new_args = replace_job_by_data(args, done_task_results)
|
|
598
|
+
new_kw_args = replace_job_by_data(kw_args, done_task_results)
|
|
599
|
+
# launch task
|
|
600
|
+
in_progress_list[job_id] = pool.apply_async(
|
|
601
|
+
func, args=new_args, kwds=new_kw_args
|
|
602
|
+
)
|
|
603
|
+
del wait_list[job_id]
|
|
604
|
+
# find done jobs that can be cleaned
|
|
605
|
+
cleanable_jobid = []
|
|
606
|
+
|
|
607
|
+
for job_id in done_task_results.keys(): # pylint: disable=C0201
|
|
608
|
+
# check if needed
|
|
609
|
+
still_need = False
|
|
610
|
+
for dependance_task_list in dependencies_list.values():
|
|
611
|
+
if job_id in dependance_task_list:
|
|
612
|
+
still_need = True
|
|
613
|
+
if not still_need:
|
|
614
|
+
cleanable_jobid.append(job_id)
|
|
615
|
+
|
|
616
|
+
# clean unused in the future jobs through wrapper
|
|
617
|
+
for job_id_to_clean in cleanable_jobid:
|
|
618
|
+
if job_id_to_clean not in get_job_ids_from_futures(
|
|
619
|
+
cl_future_list
|
|
620
|
+
):
|
|
621
|
+
# not needed by iterator -> can be cleaned
|
|
622
|
+
# Cleanup with wrapper
|
|
623
|
+
wrapper_obj.cleanup_future_res(
|
|
624
|
+
done_task_results[job_id_to_clean][1]
|
|
625
|
+
)
|
|
626
|
+
# cleanup list
|
|
627
|
+
done_task_results.pop(job_id_to_clean)
|
|
628
|
+
|
|
629
|
+
@staticmethod
|
|
630
|
+
def get_ready_failed_tasks(wait_list, dependencies_list, done_task_results):
|
|
631
|
+
"""
|
|
632
|
+
Return the new ready tasks without constraint
|
|
633
|
+
and failed tasks
|
|
634
|
+
"""
|
|
635
|
+
ready_list = []
|
|
636
|
+
failed_list = []
|
|
637
|
+
done_task_result_keys = done_task_results.keys()
|
|
638
|
+
for job_id in wait_list.keys(): # pylint: disable=C0201
|
|
639
|
+
depending_tasks = dependencies_list[job_id]
|
|
640
|
+
# check if all tasks are finished
|
|
641
|
+
can_run = True
|
|
642
|
+
failed = False
|
|
643
|
+
for depend in list(filter(lambda dep: dep != -1, depending_tasks)):
|
|
644
|
+
if depend not in done_task_result_keys:
|
|
645
|
+
can_run = False
|
|
646
|
+
else:
|
|
647
|
+
if not done_task_results[depend][0]:
|
|
648
|
+
# not a success
|
|
649
|
+
can_run = False
|
|
650
|
+
failed = True
|
|
651
|
+
if failed:
|
|
652
|
+
# Add to done list with failed status
|
|
653
|
+
failed_list.append(job_id)
|
|
654
|
+
if can_run:
|
|
655
|
+
ready_list.append(job_id)
|
|
656
|
+
return ready_list, failed_list
|
|
657
|
+
|
|
658
|
+
@staticmethod
|
|
659
|
+
def get_tasks_without_deps(dependencies_list, ready_list, nb_ready_task):
|
|
660
|
+
"""
|
|
661
|
+
Return the list of ready tasks without dependencies
|
|
662
|
+
and not considered like initial task (dependance = -1)
|
|
663
|
+
"""
|
|
664
|
+
priority_list = []
|
|
665
|
+
for _ in range(nb_ready_task):
|
|
666
|
+
task_id = next(
|
|
667
|
+
filter(
|
|
668
|
+
lambda job_id: len(dependencies_list[job_id]) != 1
|
|
669
|
+
and dependencies_list[job_id][0] != -1,
|
|
670
|
+
ready_list,
|
|
671
|
+
),
|
|
672
|
+
None,
|
|
673
|
+
)
|
|
674
|
+
if task_id:
|
|
675
|
+
priority_list.append(task_id)
|
|
676
|
+
return priority_list
|
|
677
|
+
|
|
678
|
+
def future_iterator(self, future_list, timeout=None):
|
|
679
|
+
"""
|
|
680
|
+
Start all tasks
|
|
681
|
+
|
|
682
|
+
:param future_list: future_list list
|
|
683
|
+
"""
|
|
684
|
+
|
|
685
|
+
return MpFutureIterator(future_list, self, timeout=timeout)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def get_job_ids_from_futures(future_list):
|
|
689
|
+
"""
|
|
690
|
+
Get list of jobs ids in future list
|
|
691
|
+
|
|
692
|
+
:param future_list: list of futures
|
|
693
|
+
:type future_list: MpFuture
|
|
694
|
+
|
|
695
|
+
:return: list of job id
|
|
696
|
+
:rtype: list(int)
|
|
697
|
+
"""
|
|
698
|
+
|
|
699
|
+
list_ids = []
|
|
700
|
+
|
|
701
|
+
for future in future_list:
|
|
702
|
+
list_ids.append(future.mp_future_task.job_id)
|
|
703
|
+
|
|
704
|
+
return list_ids
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def replace_job_by_data(args_or_kawargs, done_task_results):
|
|
708
|
+
"""
|
|
709
|
+
Replace MpJob in list or dict by their real data
|
|
710
|
+
|
|
711
|
+
:param args_or_kawargs: list or dict of data
|
|
712
|
+
:param done_task_results: dict of done tasks
|
|
713
|
+
"""
|
|
714
|
+
|
|
715
|
+
def get_data(data, done_task_results):
|
|
716
|
+
"""
|
|
717
|
+
Replace MpJob in list or dict by their real data
|
|
718
|
+
|
|
719
|
+
:param data: data to replace if necessary
|
|
720
|
+
:param done_task_results: dict of done tasks
|
|
721
|
+
"""
|
|
722
|
+
|
|
723
|
+
new_data = data
|
|
724
|
+
if isinstance(data, MpJob):
|
|
725
|
+
task_id = data.task_id
|
|
726
|
+
idx = data.r_idx
|
|
727
|
+
|
|
728
|
+
full_res = done_task_results[task_id][1]
|
|
729
|
+
if not done_task_results[task_id][0]:
|
|
730
|
+
raise RuntimeError("Current task failed {}".format(full_res))
|
|
731
|
+
|
|
732
|
+
if isinstance(full_res, tuple):
|
|
733
|
+
new_data = full_res[idx]
|
|
734
|
+
else:
|
|
735
|
+
if idx > 0:
|
|
736
|
+
raise ValueError("Asked for index > 0 in a singleton")
|
|
737
|
+
new_data = full_res
|
|
738
|
+
|
|
739
|
+
return new_data
|
|
740
|
+
|
|
741
|
+
# replace data
|
|
742
|
+
return replace_data(args_or_kawargs, get_data, done_task_results)
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def compute_dependencies(args, kw_args):
|
|
746
|
+
"""
|
|
747
|
+
Compute dependencies from args and kw_args
|
|
748
|
+
|
|
749
|
+
:param args: arguments
|
|
750
|
+
:type args: list
|
|
751
|
+
:param kw_args: key arguments
|
|
752
|
+
:type kw_args: dict
|
|
753
|
+
|
|
754
|
+
:return: dependencies
|
|
755
|
+
:rtype: list
|
|
756
|
+
"""
|
|
757
|
+
|
|
758
|
+
def get_job_id(data):
|
|
759
|
+
"""
|
|
760
|
+
Get job id from data if is MpJob
|
|
761
|
+
|
|
762
|
+
:param data
|
|
763
|
+
|
|
764
|
+
:return job id if exists, None if doesnt exist
|
|
765
|
+
:rtype: int
|
|
766
|
+
"""
|
|
767
|
+
job_id = None
|
|
768
|
+
|
|
769
|
+
if isinstance(data, MpJob):
|
|
770
|
+
job_id = data.task_id
|
|
771
|
+
|
|
772
|
+
return job_id
|
|
773
|
+
|
|
774
|
+
def get_ids_rec(list_or_dict):
|
|
775
|
+
"""
|
|
776
|
+
Compute dependencies from list or dict or simple data
|
|
777
|
+
|
|
778
|
+
:param list_or_dict: arguments
|
|
779
|
+
:type list_or_dict: list or dict
|
|
780
|
+
|
|
781
|
+
:return: dependencies
|
|
782
|
+
:rtype: list
|
|
783
|
+
"""
|
|
784
|
+
|
|
785
|
+
list_ids = []
|
|
786
|
+
|
|
787
|
+
if isinstance(list_or_dict, (list, tuple)):
|
|
788
|
+
for arg in list_or_dict:
|
|
789
|
+
list_ids += get_ids_rec(arg)
|
|
790
|
+
|
|
791
|
+
elif isinstance(list_or_dict, dict):
|
|
792
|
+
for key in list_or_dict:
|
|
793
|
+
list_ids += get_ids_rec(list_or_dict[key])
|
|
794
|
+
|
|
795
|
+
elif isinstance(list_or_dict, FactorizedObject):
|
|
796
|
+
facto_args = list_or_dict.get_args()
|
|
797
|
+
for arg in facto_args:
|
|
798
|
+
list_ids += get_ids_rec(arg)
|
|
799
|
+
facto_kwargs = list_or_dict.get_kwargs()
|
|
800
|
+
for key in facto_kwargs:
|
|
801
|
+
list_ids += get_ids_rec(facto_kwargs[key])
|
|
802
|
+
|
|
803
|
+
else:
|
|
804
|
+
current_id = get_job_id(list_or_dict)
|
|
805
|
+
if current_id is not None:
|
|
806
|
+
list_ids.append(current_id)
|
|
807
|
+
|
|
808
|
+
return list_ids
|
|
809
|
+
|
|
810
|
+
# compute dependencies
|
|
811
|
+
dependencies = get_ids_rec(args) + get_ids_rec(kw_args)
|
|
812
|
+
|
|
813
|
+
return list(dict.fromkeys(dependencies))
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
class MpFutureTask: # pylint: disable=R0903
|
|
817
|
+
"""
|
|
818
|
+
multiprocessing version of distributed.future
|
|
819
|
+
"""
|
|
820
|
+
|
|
821
|
+
def __init__(self, cluster):
|
|
822
|
+
"""
|
|
823
|
+
Init function of MpFutureTask
|
|
824
|
+
|
|
825
|
+
:param cluster: mp cluster
|
|
826
|
+
|
|
827
|
+
"""
|
|
828
|
+
self._cluster = cluster
|
|
829
|
+
self.result = None
|
|
830
|
+
self._success = None
|
|
831
|
+
self.event = threading.Event()
|
|
832
|
+
self.job_id = next(job_counter)
|
|
833
|
+
|
|
834
|
+
self.task_cache = cluster.task_cache
|
|
835
|
+
self.task_cache[self.job_id] = self
|
|
836
|
+
|
|
837
|
+
self.associated_futures = []
|
|
838
|
+
|
|
839
|
+
def set(self, obj):
|
|
840
|
+
"""
|
|
841
|
+
Set result to associated delayed object, and clean cache
|
|
842
|
+
|
|
843
|
+
:param obj: result object
|
|
844
|
+
:type obj: tuple(bool, Union(dataset, dataframe))
|
|
845
|
+
|
|
846
|
+
"""
|
|
847
|
+
self._success, self.result = obj
|
|
848
|
+
|
|
849
|
+
# set result to all futures
|
|
850
|
+
for future in self.associated_futures:
|
|
851
|
+
future.set(self._success, self.result)
|
|
852
|
+
|
|
853
|
+
del self.task_cache[self.job_id]
|
|
854
|
+
self._cluster = None
|
|
855
|
+
self.event.clear()
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def log_error_hook(args):
|
|
859
|
+
"""
|
|
860
|
+
Exception hook for cluster thread
|
|
861
|
+
"""
|
|
862
|
+
exc = "Cluster MP thread failed: {}".format(args.exc_value)
|
|
863
|
+
logging.error(exc)
|
|
864
|
+
# Kill thread
|
|
865
|
+
os.kill(os.getpid(), signal.SIGKILL)
|
|
866
|
+
raise RuntimeError(exc)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
def update_job_id_priority(
|
|
870
|
+
job_ids_to_launch_prioritized, priority_list, ready_list
|
|
871
|
+
):
|
|
872
|
+
"""
|
|
873
|
+
Update job to launch list with new priority list and ready list
|
|
874
|
+
|
|
875
|
+
:return: updated list
|
|
876
|
+
"""
|
|
877
|
+
|
|
878
|
+
res = priority_list + ready_list + job_ids_to_launch_prioritized
|
|
879
|
+
res = list(dict.fromkeys(res))
|
|
880
|
+
|
|
881
|
+
return res
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def compute_conf_auto_mode(is_windows, max_ram_per_worker):
|
|
885
|
+
"""
|
|
886
|
+
Compute confuration to use in auto mode
|
|
887
|
+
|
|
888
|
+
:param is_windows: True if runs on windows
|
|
889
|
+
:type is_windows: bool
|
|
890
|
+
:param max_ram_per_worker: max ram per worker in MB
|
|
891
|
+
:type max_ram_per_worker: int
|
|
892
|
+
"""
|
|
893
|
+
|
|
894
|
+
on_slurm, nb_cpu_slurm, max_ram_slurm = get_slurm_data()
|
|
895
|
+
|
|
896
|
+
if on_slurm:
|
|
897
|
+
available_cpu = nb_cpu_slurm
|
|
898
|
+
else:
|
|
899
|
+
available_cpu = (
|
|
900
|
+
mp.cpu_count() if is_windows else len(os.sched_getaffinity(0))
|
|
901
|
+
)
|
|
902
|
+
logging.info("available cpu : {}".format(available_cpu))
|
|
903
|
+
|
|
904
|
+
if available_cpu == 1:
|
|
905
|
+
logging.warning("Only one CPU detected.")
|
|
906
|
+
available_cpu = 2
|
|
907
|
+
elif available_cpu == 0:
|
|
908
|
+
logging.warning("No CPU detected.")
|
|
909
|
+
available_cpu = 2
|
|
910
|
+
|
|
911
|
+
if on_slurm:
|
|
912
|
+
ram_to_use = max_ram_slurm
|
|
913
|
+
else:
|
|
914
|
+
ram_to_use = get_total_ram()
|
|
915
|
+
logging.info("total ram : {}".format(ram_to_use))
|
|
916
|
+
|
|
917
|
+
# use 50% of total ram
|
|
918
|
+
ram_to_use *= 0.5
|
|
919
|
+
|
|
920
|
+
possible_workers = int(ram_to_use // max_ram_per_worker)
|
|
921
|
+
if possible_workers == 0:
|
|
922
|
+
logging.warning("Not enough memory available : failure might occur")
|
|
923
|
+
nb_workers_to_use = max(1, min(possible_workers, available_cpu - 1))
|
|
924
|
+
|
|
925
|
+
logging.info("Number of workers : {}".format(nb_workers_to_use))
|
|
926
|
+
logging.info("Max memory per worker : {} MB".format(max_ram_per_worker))
|
|
927
|
+
|
|
928
|
+
# Check with available ram
|
|
929
|
+
available_ram = get_available_ram()
|
|
930
|
+
if int(nb_workers_to_use) * int(max_ram_per_worker) > available_ram:
|
|
931
|
+
logging.warning(
|
|
932
|
+
"CARS will use 50% of total RAM, "
|
|
933
|
+
" more than currently available RAM"
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
return int(nb_workers_to_use)
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
def get_slurm_data():
|
|
940
|
+
"""
|
|
941
|
+
Get slurm data
|
|
942
|
+
"""
|
|
943
|
+
|
|
944
|
+
def get_data(chain, pattern):
|
|
945
|
+
"""
|
|
946
|
+
Get data from pattern
|
|
947
|
+
|
|
948
|
+
:param chain: chain of character to parse
|
|
949
|
+
:param pattern: pattern to find
|
|
950
|
+
|
|
951
|
+
:return: found data
|
|
952
|
+
"""
|
|
953
|
+
|
|
954
|
+
match = re.search(pattern, chain)
|
|
955
|
+
value = None
|
|
956
|
+
if match:
|
|
957
|
+
value = match.group(1)
|
|
958
|
+
return int(value)
|
|
959
|
+
|
|
960
|
+
on_slurm = False
|
|
961
|
+
slurm_nb_cpu = None
|
|
962
|
+
slurm_max_ram = None
|
|
963
|
+
try:
|
|
964
|
+
sub_res = subprocess.run(
|
|
965
|
+
"scontrol show job $SLURM_JOB_ID",
|
|
966
|
+
shell=True,
|
|
967
|
+
capture_output=True,
|
|
968
|
+
text=True,
|
|
969
|
+
check=False,
|
|
970
|
+
)
|
|
971
|
+
slurm_infos = sub_res.stdout
|
|
972
|
+
|
|
973
|
+
slurm_nb_cpu = get_data(slurm_infos, r"ReqTRES=cpu=(\d+)")
|
|
974
|
+
slurm_max_ram = get_data(slurm_infos, r"ReqTRES=cpu=.*?mem=(\d+)")
|
|
975
|
+
# convert to Mb
|
|
976
|
+
slurm_max_ram *= 1024
|
|
977
|
+
logging.info("Available CPUs in SLURM : {}".format(slurm_nb_cpu))
|
|
978
|
+
logging.info("Available RAM in SLURM : {}".format(slurm_max_ram))
|
|
979
|
+
|
|
980
|
+
except Exception as exc:
|
|
981
|
+
logging.debug("Not on Slurm cluster")
|
|
982
|
+
logging.debug(str(exc))
|
|
983
|
+
if slurm_nb_cpu is not None and slurm_max_ram is not None:
|
|
984
|
+
on_slurm = True
|
|
985
|
+
|
|
986
|
+
return on_slurm, slurm_nb_cpu, slurm_max_ram
|