cars 1.0.0rc1__cp313-cp313-musllinux_1_2_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cars might be problematic. Click here for more details.

Files changed (202) hide show
  1. cars/__init__.py +74 -0
  2. cars/applications/__init__.py +37 -0
  3. cars/applications/application.py +117 -0
  4. cars/applications/application_constants.py +29 -0
  5. cars/applications/application_template.py +146 -0
  6. cars/applications/auxiliary_filling/__init__.py +29 -0
  7. cars/applications/auxiliary_filling/abstract_auxiliary_filling_app.py +104 -0
  8. cars/applications/auxiliary_filling/auxiliary_filling_algo.py +475 -0
  9. cars/applications/auxiliary_filling/auxiliary_filling_from_sensors_app.py +630 -0
  10. cars/applications/auxiliary_filling/auxiliary_filling_wrappers.py +90 -0
  11. cars/applications/dem_generation/__init__.py +30 -0
  12. cars/applications/dem_generation/abstract_dem_generation_app.py +116 -0
  13. cars/applications/dem_generation/bulldozer_config/base_config.yaml +42 -0
  14. cars/applications/dem_generation/bulldozer_dem_app.py +655 -0
  15. cars/applications/dem_generation/bulldozer_memory.py +55 -0
  16. cars/applications/dem_generation/dem_generation_algo.py +107 -0
  17. cars/applications/dem_generation/dem_generation_constants.py +32 -0
  18. cars/applications/dem_generation/dem_generation_wrappers.py +323 -0
  19. cars/applications/dense_match_filling/__init__.py +30 -0
  20. cars/applications/dense_match_filling/abstract_dense_match_filling_app.py +242 -0
  21. cars/applications/dense_match_filling/fill_disp_algo.py +113 -0
  22. cars/applications/dense_match_filling/fill_disp_constants.py +39 -0
  23. cars/applications/dense_match_filling/fill_disp_wrappers.py +83 -0
  24. cars/applications/dense_match_filling/zero_padding_app.py +302 -0
  25. cars/applications/dense_matching/__init__.py +30 -0
  26. cars/applications/dense_matching/abstract_dense_matching_app.py +261 -0
  27. cars/applications/dense_matching/census_mccnn_sgm_app.py +1460 -0
  28. cars/applications/dense_matching/cpp/__init__.py +0 -0
  29. cars/applications/dense_matching/cpp/dense_matching_cpp.cpython-313-i386-linux-musl.so +0 -0
  30. cars/applications/dense_matching/cpp/dense_matching_cpp.py +94 -0
  31. cars/applications/dense_matching/cpp/includes/dense_matching.hpp +58 -0
  32. cars/applications/dense_matching/cpp/meson.build +9 -0
  33. cars/applications/dense_matching/cpp/src/bindings.cpp +13 -0
  34. cars/applications/dense_matching/cpp/src/dense_matching.cpp +207 -0
  35. cars/applications/dense_matching/dense_matching_algo.py +401 -0
  36. cars/applications/dense_matching/dense_matching_constants.py +89 -0
  37. cars/applications/dense_matching/dense_matching_wrappers.py +951 -0
  38. cars/applications/dense_matching/disparity_grid_algo.py +588 -0
  39. cars/applications/dense_matching/loaders/__init__.py +23 -0
  40. cars/applications/dense_matching/loaders/config_census_sgm_default.json +31 -0
  41. cars/applications/dense_matching/loaders/config_census_sgm_homogeneous.json +30 -0
  42. cars/applications/dense_matching/loaders/config_census_sgm_mountain_and_vegetation.json +30 -0
  43. cars/applications/dense_matching/loaders/config_census_sgm_shadow.json +30 -0
  44. cars/applications/dense_matching/loaders/config_census_sgm_sparse.json +36 -0
  45. cars/applications/dense_matching/loaders/config_census_sgm_urban.json +30 -0
  46. cars/applications/dense_matching/loaders/config_mapping.json +13 -0
  47. cars/applications/dense_matching/loaders/config_mccnn.json +28 -0
  48. cars/applications/dense_matching/loaders/global_land_cover_map.tif +0 -0
  49. cars/applications/dense_matching/loaders/pandora_loader.py +593 -0
  50. cars/applications/dsm_filling/__init__.py +32 -0
  51. cars/applications/dsm_filling/abstract_dsm_filling_app.py +101 -0
  52. cars/applications/dsm_filling/border_interpolation_app.py +270 -0
  53. cars/applications/dsm_filling/bulldozer_config/base_config.yaml +44 -0
  54. cars/applications/dsm_filling/bulldozer_filling_app.py +279 -0
  55. cars/applications/dsm_filling/exogenous_filling_app.py +333 -0
  56. cars/applications/grid_generation/__init__.py +30 -0
  57. cars/applications/grid_generation/abstract_grid_generation_app.py +142 -0
  58. cars/applications/grid_generation/epipolar_grid_generation_app.py +327 -0
  59. cars/applications/grid_generation/grid_correction_app.py +496 -0
  60. cars/applications/grid_generation/grid_generation_algo.py +388 -0
  61. cars/applications/grid_generation/grid_generation_constants.py +46 -0
  62. cars/applications/grid_generation/transform_grid.py +88 -0
  63. cars/applications/ground_truth_reprojection/__init__.py +30 -0
  64. cars/applications/ground_truth_reprojection/abstract_ground_truth_reprojection_app.py +137 -0
  65. cars/applications/ground_truth_reprojection/direct_localization_app.py +629 -0
  66. cars/applications/ground_truth_reprojection/ground_truth_reprojection_algo.py +275 -0
  67. cars/applications/point_cloud_outlier_removal/__init__.py +30 -0
  68. cars/applications/point_cloud_outlier_removal/abstract_outlier_removal_app.py +385 -0
  69. cars/applications/point_cloud_outlier_removal/outlier_removal_algo.py +392 -0
  70. cars/applications/point_cloud_outlier_removal/outlier_removal_constants.py +43 -0
  71. cars/applications/point_cloud_outlier_removal/small_components_app.py +527 -0
  72. cars/applications/point_cloud_outlier_removal/statistical_app.py +531 -0
  73. cars/applications/rasterization/__init__.py +30 -0
  74. cars/applications/rasterization/abstract_pc_rasterization_app.py +183 -0
  75. cars/applications/rasterization/rasterization_algo.py +534 -0
  76. cars/applications/rasterization/rasterization_constants.py +38 -0
  77. cars/applications/rasterization/rasterization_wrappers.py +634 -0
  78. cars/applications/rasterization/simple_gaussian_app.py +1152 -0
  79. cars/applications/resampling/__init__.py +28 -0
  80. cars/applications/resampling/abstract_resampling_app.py +187 -0
  81. cars/applications/resampling/bicubic_resampling_app.py +762 -0
  82. cars/applications/resampling/resampling_algo.py +614 -0
  83. cars/applications/resampling/resampling_constants.py +36 -0
  84. cars/applications/resampling/resampling_wrappers.py +309 -0
  85. cars/applications/sparse_matching/__init__.py +30 -0
  86. cars/applications/sparse_matching/abstract_sparse_matching_app.py +498 -0
  87. cars/applications/sparse_matching/sift_app.py +735 -0
  88. cars/applications/sparse_matching/sparse_matching_algo.py +360 -0
  89. cars/applications/sparse_matching/sparse_matching_constants.py +68 -0
  90. cars/applications/sparse_matching/sparse_matching_wrappers.py +238 -0
  91. cars/applications/triangulation/__init__.py +32 -0
  92. cars/applications/triangulation/abstract_triangulation_app.py +227 -0
  93. cars/applications/triangulation/line_of_sight_intersection_app.py +1243 -0
  94. cars/applications/triangulation/pc_transform.py +552 -0
  95. cars/applications/triangulation/triangulation_algo.py +371 -0
  96. cars/applications/triangulation/triangulation_constants.py +38 -0
  97. cars/applications/triangulation/triangulation_wrappers.py +259 -0
  98. cars/bundleadjustment.py +757 -0
  99. cars/cars.py +177 -0
  100. cars/conf/__init__.py +23 -0
  101. cars/conf/geoid/egm96.grd +0 -0
  102. cars/conf/geoid/egm96.grd.hdr +15 -0
  103. cars/conf/input_parameters.py +156 -0
  104. cars/conf/mask_cst.py +35 -0
  105. cars/core/__init__.py +23 -0
  106. cars/core/cars_logging.py +402 -0
  107. cars/core/constants.py +191 -0
  108. cars/core/constants_disparity.py +50 -0
  109. cars/core/datasets.py +140 -0
  110. cars/core/geometry/__init__.py +27 -0
  111. cars/core/geometry/abstract_geometry.py +1119 -0
  112. cars/core/geometry/shareloc_geometry.py +598 -0
  113. cars/core/inputs.py +568 -0
  114. cars/core/outputs.py +176 -0
  115. cars/core/preprocessing.py +722 -0
  116. cars/core/projection.py +843 -0
  117. cars/core/roi_tools.py +215 -0
  118. cars/core/tiling.py +774 -0
  119. cars/core/utils.py +164 -0
  120. cars/data_structures/__init__.py +23 -0
  121. cars/data_structures/cars_dataset.py +1541 -0
  122. cars/data_structures/cars_dict.py +74 -0
  123. cars/data_structures/corresponding_tiles_tools.py +186 -0
  124. cars/data_structures/dataframe_converter.py +185 -0
  125. cars/data_structures/format_transformation.py +297 -0
  126. cars/devibrate.py +689 -0
  127. cars/extractroi.py +264 -0
  128. cars/orchestrator/__init__.py +23 -0
  129. cars/orchestrator/achievement_tracker.py +125 -0
  130. cars/orchestrator/cluster/__init__.py +37 -0
  131. cars/orchestrator/cluster/abstract_cluster.py +244 -0
  132. cars/orchestrator/cluster/abstract_dask_cluster.py +375 -0
  133. cars/orchestrator/cluster/dask_cluster_tools.py +103 -0
  134. cars/orchestrator/cluster/dask_config/README.md +94 -0
  135. cars/orchestrator/cluster/dask_config/dask.yaml +21 -0
  136. cars/orchestrator/cluster/dask_config/distributed.yaml +70 -0
  137. cars/orchestrator/cluster/dask_config/jobqueue.yaml +26 -0
  138. cars/orchestrator/cluster/dask_config/reference_confs/dask-schema.yaml +137 -0
  139. cars/orchestrator/cluster/dask_config/reference_confs/dask.yaml +26 -0
  140. cars/orchestrator/cluster/dask_config/reference_confs/distributed-schema.yaml +1009 -0
  141. cars/orchestrator/cluster/dask_config/reference_confs/distributed.yaml +273 -0
  142. cars/orchestrator/cluster/dask_config/reference_confs/jobqueue.yaml +212 -0
  143. cars/orchestrator/cluster/dask_jobqueue_utils.py +204 -0
  144. cars/orchestrator/cluster/local_dask_cluster.py +116 -0
  145. cars/orchestrator/cluster/log_wrapper.py +1075 -0
  146. cars/orchestrator/cluster/mp_cluster/__init__.py +27 -0
  147. cars/orchestrator/cluster/mp_cluster/mp_factorizer.py +212 -0
  148. cars/orchestrator/cluster/mp_cluster/mp_objects.py +535 -0
  149. cars/orchestrator/cluster/mp_cluster/mp_tools.py +93 -0
  150. cars/orchestrator/cluster/mp_cluster/mp_wrapper.py +505 -0
  151. cars/orchestrator/cluster/mp_cluster/multiprocessing_cluster.py +873 -0
  152. cars/orchestrator/cluster/mp_cluster/multiprocessing_profiler.py +399 -0
  153. cars/orchestrator/cluster/pbs_dask_cluster.py +207 -0
  154. cars/orchestrator/cluster/sequential_cluster.py +139 -0
  155. cars/orchestrator/cluster/slurm_dask_cluster.py +234 -0
  156. cars/orchestrator/orchestrator.py +905 -0
  157. cars/orchestrator/orchestrator_constants.py +29 -0
  158. cars/orchestrator/registry/__init__.py +23 -0
  159. cars/orchestrator/registry/abstract_registry.py +143 -0
  160. cars/orchestrator/registry/compute_registry.py +106 -0
  161. cars/orchestrator/registry/id_generator.py +116 -0
  162. cars/orchestrator/registry/replacer_registry.py +213 -0
  163. cars/orchestrator/registry/saver_registry.py +363 -0
  164. cars/orchestrator/registry/unseen_registry.py +118 -0
  165. cars/orchestrator/tiles_profiler.py +279 -0
  166. cars/pipelines/__init__.py +26 -0
  167. cars/pipelines/conf_resolution/conf_final_resolution.yaml +5 -0
  168. cars/pipelines/conf_resolution/conf_first_resolution.yaml +2 -0
  169. cars/pipelines/conf_resolution/conf_intermediate_resolution.yaml +2 -0
  170. cars/pipelines/default/__init__.py +26 -0
  171. cars/pipelines/default/default_pipeline.py +786 -0
  172. cars/pipelines/parameters/__init__.py +0 -0
  173. cars/pipelines/parameters/advanced_parameters.py +417 -0
  174. cars/pipelines/parameters/advanced_parameters_constants.py +69 -0
  175. cars/pipelines/parameters/application_parameters.py +71 -0
  176. cars/pipelines/parameters/depth_map_inputs.py +0 -0
  177. cars/pipelines/parameters/dsm_inputs.py +918 -0
  178. cars/pipelines/parameters/dsm_inputs_constants.py +25 -0
  179. cars/pipelines/parameters/output_constants.py +52 -0
  180. cars/pipelines/parameters/output_parameters.py +454 -0
  181. cars/pipelines/parameters/sensor_inputs.py +842 -0
  182. cars/pipelines/parameters/sensor_inputs_constants.py +49 -0
  183. cars/pipelines/parameters/sensor_loaders/__init__.py +29 -0
  184. cars/pipelines/parameters/sensor_loaders/basic_classif_loader.py +86 -0
  185. cars/pipelines/parameters/sensor_loaders/basic_image_loader.py +98 -0
  186. cars/pipelines/parameters/sensor_loaders/pivot_classif_loader.py +90 -0
  187. cars/pipelines/parameters/sensor_loaders/pivot_image_loader.py +105 -0
  188. cars/pipelines/parameters/sensor_loaders/sensor_loader.py +93 -0
  189. cars/pipelines/parameters/sensor_loaders/sensor_loader_template.py +71 -0
  190. cars/pipelines/parameters/sensor_loaders/slurp_classif_loader.py +86 -0
  191. cars/pipelines/pipeline.py +119 -0
  192. cars/pipelines/pipeline_constants.py +31 -0
  193. cars/pipelines/pipeline_template.py +139 -0
  194. cars/pipelines/unit/__init__.py +26 -0
  195. cars/pipelines/unit/unit_pipeline.py +2850 -0
  196. cars/starter.py +167 -0
  197. cars-1.0.0rc1.dist-info/METADATA +292 -0
  198. cars-1.0.0rc1.dist-info/RECORD +202 -0
  199. cars-1.0.0rc1.dist-info/WHEEL +5 -0
  200. cars-1.0.0rc1.dist-info/entry_points.txt +8 -0
  201. cars.libs/libgcc_s-1257a076.so.1 +0 -0
  202. cars.libs/libstdc++-0530927c.so.6.0.32 +0 -0
@@ -0,0 +1,873 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf8
3
+ #
4
+ # Copyright (c) 2020 Centre National d'Etudes Spatiales (CNES).
5
+ #
6
+ # This file is part of CARS
7
+ # (see https://github.com/CNES/cars).
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ #
21
+ """
22
+ Contains abstract function for multiprocessing Cluster
23
+ """
24
+ # pylint: disable=C0302
25
+
26
+ import copy
27
+ import itertools
28
+ import logging
29
+ import logging.handlers
30
+
31
+ # Standard imports
32
+ import multiprocessing as mp
33
+ import os
34
+ import platform
35
+ import shutil
36
+ import signal
37
+ import threading
38
+ import time
39
+ import traceback
40
+ from functools import wraps
41
+ from multiprocessing import freeze_support
42
+ from queue import Queue
43
+
44
+ # Third party imports
45
+ from json_checker import And, Checker, Or
46
+
47
+ from cars.core import cars_logging
48
+
49
+ # CARS imports
50
+ from cars.orchestrator.cluster import abstract_cluster
51
+ from cars.orchestrator.cluster.log_wrapper import cars_profile
52
+ from cars.orchestrator.cluster.mp_cluster import mp_factorizer, mp_wrapper
53
+ from cars.orchestrator.cluster.mp_cluster.mp_objects import (
54
+ FactorizedObject,
55
+ MpDelayed,
56
+ MpDelayedTask,
57
+ MpFuture,
58
+ MpFutureIterator,
59
+ MpJob,
60
+ )
61
+ from cars.orchestrator.cluster.mp_cluster.mp_tools import replace_data
62
+ from cars.orchestrator.cluster.mp_cluster.multiprocessing_profiler import (
63
+ MultiprocessingProfiler,
64
+ )
65
+
66
+ SYS_PLATFORM = platform.system().lower()
67
+ IS_WIN = "windows" == SYS_PLATFORM
68
+
69
+ RUN = 0
70
+ TERMINATE = 1
71
+
72
+ # Refresh time between every iteration, to prevent from freezing
73
+ REFRESH_TIME = 0.05
74
+
75
+ job_counter = itertools.count()
76
+
77
+
78
+ @abstract_cluster.AbstractCluster.register_subclass("mp", "multiprocessing")
79
+ class MultiprocessingCluster(abstract_cluster.AbstractCluster):
80
+ """
81
+ MultiprocessingCluster
82
+ """
83
+
84
+ # pylint: disable=too-many-instance-attributes
85
+ @cars_profile(name="Multiprocessing orchestrator initialization")
86
+ def __init__( # pylint: disable=too-many-positional-arguments
87
+ self,
88
+ conf_cluster,
89
+ out_dir,
90
+ log_dir,
91
+ launch_worker=True,
92
+ data_to_propagate=None,
93
+ ):
94
+ """
95
+ Init function of MultiprocessingCluster
96
+
97
+ :param conf_cluster: configuration for cluster
98
+
99
+ """
100
+
101
+ # TODO: remove message
102
+ if conf_cluster["mode"] == "mp":
103
+ message = (
104
+ " 'mp' keyword has been deprecated, use "
105
+ "'multiprocessing' instead"
106
+ )
107
+ logging.warning(message)
108
+
109
+ self.out_dir = out_dir
110
+ self.log_dir = log_dir
111
+ # call parent init
112
+ super().__init__(
113
+ conf_cluster,
114
+ out_dir,
115
+ log_dir,
116
+ launch_worker=launch_worker,
117
+ data_to_propagate=data_to_propagate,
118
+ )
119
+
120
+ # retrieve parameters
121
+ self.nb_workers = self.checked_conf_cluster["nb_workers"]
122
+ self.mp_mode = self.checked_conf_cluster["mp_mode"]
123
+ self.task_timeout = self.checked_conf_cluster["task_timeout"]
124
+ self.max_tasks_per_worker = self.checked_conf_cluster[
125
+ "max_tasks_per_worker"
126
+ ]
127
+ self.dump_to_disk = self.checked_conf_cluster["dump_to_disk"]
128
+ self.per_job_timeout = self.checked_conf_cluster["per_job_timeout"]
129
+ self.profiling = self.checked_conf_cluster["profiling"]
130
+ self.factorize_tasks = self.checked_conf_cluster["factorize_tasks"]
131
+ # Set multiprocessing mode
132
+ self.mp_mode = self.checked_conf_cluster["mp_mode"]
133
+
134
+ if IS_WIN:
135
+ self.mp_mode = "spawn"
136
+ logging.warning(
137
+ "{} is not functionnal in windows,"
138
+ "spawn will be used instead".format(self.mp_mode)
139
+ )
140
+
141
+ self.launch_worker = launch_worker
142
+
143
+ self.tmp_dir = None
144
+
145
+ # affinity issues caused by numpy
146
+ if IS_WIN is False:
147
+ os.system(
148
+ "taskset -p 0xffffffff %d > /dev/null 2>&1" % os.getpid()
149
+ )
150
+
151
+ if self.launch_worker:
152
+ # Create wrapper object
153
+ if self.dump_to_disk:
154
+ if self.out_dir is None:
155
+ raise RuntimeError("Not out_dir provided")
156
+ if not os.path.exists(self.out_dir):
157
+ os.makedirs(self.out_dir)
158
+ self.tmp_dir = os.path.join(self.out_dir, "tmp_save_disk")
159
+ if not os.path.exists(self.tmp_dir):
160
+ os.makedirs(self.tmp_dir)
161
+ self.wrapper = mp_wrapper.WrapperDisk(self.tmp_dir)
162
+ else:
163
+ self.wrapper = mp_wrapper.WrapperNone(None)
164
+
165
+ # Create pool
166
+ ctx_in_main = mp.get_context(self.mp_mode)
167
+ # import cars for env variables firts
168
+ # import cars pipelines for numba compilation
169
+ ctx_in_main.set_forkserver_preload(["cars", "cars.pipelines"])
170
+ self.pool = ctx_in_main.Pool(
171
+ self.nb_workers,
172
+ initializer=freeze_support,
173
+ maxtasksperchild=self.max_tasks_per_worker,
174
+ )
175
+
176
+ self.queue = Queue()
177
+ self.task_cache = {}
178
+
179
+ # Variable used for cleaning
180
+ # Clone of iterator future list
181
+ self.cl_future_list = []
182
+
183
+ # set the exception hook
184
+ threading.excepthook = log_error_hook
185
+
186
+ # Refresh worker
187
+ self.refresh_worker = threading.Thread(
188
+ target=MultiprocessingCluster.refresh_task_cache,
189
+ args=(
190
+ self.pool,
191
+ self.task_cache,
192
+ self.queue,
193
+ self.per_job_timeout,
194
+ self.cl_future_list,
195
+ self.nb_workers,
196
+ self.wrapper,
197
+ ),
198
+ )
199
+ self.refresh_worker.daemon = True
200
+ self.refresh_worker._state = RUN
201
+ self.refresh_worker.start()
202
+
203
+ # Profile pool
204
+ mp_dataframe = None
205
+ timer = None
206
+ if self.data_to_propagate is not None:
207
+ mp_dataframe = self.data_to_propagate.get("mp_dataframe", None)
208
+ timer = self.data_to_propagate.get("mp_timer", None)
209
+
210
+ self.profiler = MultiprocessingProfiler(
211
+ self.pool,
212
+ self.log_dir,
213
+ self.checked_conf_cluster["max_ram_per_worker"],
214
+ mp_dataframe=mp_dataframe,
215
+ timer=timer,
216
+ )
217
+
218
+ self.data_to_propagate = {
219
+ "mp_dataframe": self.profiler.memory_data,
220
+ "mp_timer": self.profiler.timer,
221
+ }
222
+
223
+ def check_conf(self, conf):
224
+ """
225
+ Check configuration
226
+
227
+ :param conf: configuration to check
228
+ :type conf: dict
229
+
230
+ :return: overloaded configuration
231
+ :rtype: dict
232
+
233
+ """
234
+
235
+ # init conf
236
+ if conf is not None:
237
+ overloaded_conf = conf.copy()
238
+ else:
239
+ conf = {}
240
+ overloaded_conf = {}
241
+
242
+ available_cpu = mp.cpu_count() # TODO returns full node nb cpus
243
+ # TODO robustify if a partial node is used
244
+ # One process per cpu for memory usage estimated
245
+
246
+ # Modify some env variables for memory usage
247
+ # TODO
248
+ # set ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS = 1
249
+
250
+ # Overload conf
251
+ overloaded_conf["mode"] = conf.get("mode", "mp")
252
+ overloaded_conf["mp_mode"] = conf.get("mp_mode", "forkserver")
253
+ nb_workers = conf.get("nb_workers", 2)
254
+ overloaded_conf["nb_workers"] = min(available_cpu, nb_workers)
255
+ overloaded_conf["task_timeout"] = conf.get("task_timeout", 600)
256
+ overloaded_conf["max_ram_per_worker"] = conf.get(
257
+ "max_ram_per_worker", 2000
258
+ )
259
+ overloaded_conf["max_tasks_per_worker"] = conf.get(
260
+ "max_tasks_per_worker", 10
261
+ )
262
+ overloaded_conf["dump_to_disk"] = conf.get("dump_to_disk", True)
263
+ overloaded_conf["per_job_timeout"] = conf.get("per_job_timeout", 600)
264
+ overloaded_conf["factorize_tasks"] = conf.get("factorize_tasks", True)
265
+ overloaded_conf["profiling"] = conf.get("profiling", {})
266
+
267
+ cluster_schema = {
268
+ "mode": str,
269
+ "dump_to_disk": bool,
270
+ "mp_mode": str,
271
+ "nb_workers": And(int, lambda x: x > 0),
272
+ "task_timeout": And(int, lambda x: x > 0),
273
+ "max_ram_per_worker": And(Or(float, int), lambda x: x > 0),
274
+ "max_tasks_per_worker": And(int, lambda x: x > 0),
275
+ "per_job_timeout": Or(float, int),
276
+ "profiling": dict,
277
+ "factorize_tasks": bool,
278
+ }
279
+
280
+ # Check conf
281
+ checker = Checker(cluster_schema)
282
+ checker.validate(overloaded_conf)
283
+
284
+ return overloaded_conf
285
+
286
+ def get_delayed_type(self):
287
+ """
288
+ Get delayed type
289
+ """
290
+ return MpDelayed
291
+
292
+ def cleanup(self, keep_shared_dir=False):
293
+ """
294
+ Cleanup cluster
295
+ :param keep_shared_dir: do not clean directory of shared objects
296
+ """
297
+
298
+ # Save profiling
299
+ self.profiler.save_plot()
300
+
301
+ # clean profiler
302
+ self.profiler.cleanup()
303
+
304
+ # Terminate worker
305
+ self.refresh_worker._state = TERMINATE # pylint: disable=W0212
306
+ while self.refresh_worker.is_alive():
307
+ time.sleep(0)
308
+
309
+ # close pool
310
+ self.pool.terminate()
311
+ self.pool.join()
312
+
313
+ # clean tmpdir if exists
314
+ self.wrapper.cleanup(keep_shared_dir=keep_shared_dir)
315
+
316
+ if not keep_shared_dir:
317
+ if self.tmp_dir is not None:
318
+ shutil.rmtree(self.tmp_dir)
319
+
320
+ def scatter(self, data):
321
+ """
322
+ Distribute data through workers
323
+
324
+ :param data: data to dump
325
+ """
326
+ return self.wrapper.scatter_obj(data)
327
+
328
+ def create_task_wrapped(self, func, nout=1):
329
+ """
330
+ Create task
331
+
332
+ :param func: function
333
+ :param nout: number of outputs
334
+ """
335
+
336
+ @wraps(func)
337
+ def mp_delayed_builder(*argv, **kwargs):
338
+ """
339
+ Create a MPDelayed builder
340
+
341
+ :param argv: args of func
342
+ :param kwargs: kwargs of func
343
+ """
344
+ new_kwargs = kwargs
345
+ new_kwargs["log_dir"] = self.worker_log_dir
346
+ new_kwargs["log_level"] = self.log_level
347
+ new_kwargs["log_fun"] = func
348
+ # create delayed_task
349
+ delayed_task = MpDelayedTask(
350
+ cars_logging.logger_func, list(argv), new_kwargs
351
+ )
352
+
353
+ delayed_object_list = []
354
+ for idx in range(nout):
355
+ delayed_object_list.append(
356
+ MpDelayed(delayed_task, return_index=idx)
357
+ )
358
+
359
+ res = None
360
+ if len(delayed_object_list) == 1:
361
+ res = delayed_object_list[0]
362
+ else:
363
+ res = (*delayed_object_list,)
364
+
365
+ return res
366
+
367
+ return mp_delayed_builder
368
+
369
+ def start_tasks(self, task_list):
370
+ """
371
+ Start all tasks
372
+
373
+ :param task_list: task list
374
+ """
375
+ memorize = {}
376
+ # Use a copy of input delayed
377
+ task_list = copy.deepcopy(task_list)
378
+ if self.factorize_tasks:
379
+ mp_factorizer.factorize_delayed(task_list)
380
+ future_list = [self.rec_start(task, memorize) for task in task_list]
381
+ # signal that we reached the end of this batch
382
+ self.queue.put("END_BATCH")
383
+ return future_list
384
+
385
+ def rec_start(self, delayed_object, memorize):
386
+ """
387
+ Record task
388
+
389
+ :param delayed_object: delayed object to record
390
+ :type delayed_object: MpDelayed
391
+ :param memorize: list of MpDelayed already recorded
392
+
393
+ """
394
+ # check if this task is already started
395
+ if delayed_object in memorize.keys():
396
+ return memorize[delayed_object]
397
+
398
+ can_run = True
399
+
400
+ current_delayed_task = delayed_object.delayed_task
401
+
402
+ # Modify delayed with wrapper here
403
+ current_delayed_task.modify_delayed_task(self.wrapper)
404
+
405
+ def transform_delayed_to_mp_job(args_or_kawargs):
406
+ """
407
+ Replace MpDelayed in list or dict by a MpJob
408
+
409
+ :param args_or_kawargs: list or dict of data
410
+ """
411
+
412
+ def transform_mp_delayed_to_jobs(obj):
413
+ """
414
+ Replace MpDelayed by MpJob
415
+
416
+ :param data: data to replace if necessary
417
+ """
418
+
419
+ new_data = obj
420
+ if isinstance(obj, MpDelayed):
421
+ rec_future = self.rec_start(obj, memorize)
422
+ new_data = MpJob(
423
+ rec_future.mp_future_task.job_id,
424
+ rec_future.return_index,
425
+ )
426
+ return new_data
427
+
428
+ # replace data
429
+ return replace_data(args_or_kawargs, transform_mp_delayed_to_jobs)
430
+
431
+ # Transform MpDelayed to MpJob
432
+
433
+ filt_args = transform_delayed_to_mp_job(current_delayed_task.args)
434
+
435
+ filt_kw = transform_delayed_to_mp_job(current_delayed_task.kw_args)
436
+
437
+ # Check if can be run
438
+ dependencies = compute_dependencies(filt_args, filt_kw)
439
+ can_run = True
440
+ if len(dependencies) > 0:
441
+ can_run = False
442
+
443
+ # start current task
444
+ task_future = MpFutureTask(self)
445
+
446
+ self.queue.put(
447
+ (
448
+ task_future.job_id,
449
+ can_run,
450
+ current_delayed_task.func,
451
+ filt_args,
452
+ filt_kw,
453
+ )
454
+ )
455
+
456
+ # Create future object
457
+ object_future = MpFuture(task_future, delayed_object.return_index)
458
+ memorize[delayed_object] = object_future
459
+
460
+ # Create other futures associated to this task
461
+ for other_delayed_obj in current_delayed_task.associated_objects:
462
+ if other_delayed_obj != delayed_object:
463
+ memorize[other_delayed_obj] = MpFuture(
464
+ task_future, other_delayed_obj.return_index
465
+ )
466
+
467
+ return object_future
468
+
469
+ # pylint: disable=too-many-positional-arguments
470
+ @staticmethod # noqa: C901
471
+ def refresh_task_cache( # noqa: C901
472
+ pool,
473
+ task_cache,
474
+ in_queue,
475
+ per_job_timeout,
476
+ cl_future_list,
477
+ nb_workers,
478
+ wrapper_obj,
479
+ ):
480
+ """
481
+ Refresh task cache
482
+
483
+ :param task_cache: task cache list
484
+ :param in_queue: queue
485
+ :param per_job_timeout: per job timeout
486
+ :param cl_future_list: current future list used in iterator
487
+ :param nb_workers: number of workers
488
+ """
489
+ thread = threading.current_thread()
490
+
491
+ # initialize lists
492
+ wait_list = {}
493
+ in_progress_list = {}
494
+ dependencies_list = {}
495
+ done_task_results = {}
496
+ job_ids_to_launch_prioritized = []
497
+ max_nb_tasks_running = 2 * nb_workers
498
+
499
+ while thread._state == RUN: # pylint: disable=W0212
500
+ # wait before next iteration
501
+ time.sleep(REFRESH_TIME)
502
+ # get new task from queue
503
+ if not in_queue.empty():
504
+ # get nb_workers task from this batch
505
+ for job_id, can_run, func, args, kw_args in iter(
506
+ in_queue.get, "END_BATCH"
507
+ ):
508
+ wait_list[job_id] = [func, args, kw_args]
509
+ if can_run:
510
+ job_ids_to_launch_prioritized.append(job_id)
511
+ # add to dependencies (-1 to identify initial tasks)
512
+ dependencies_list[job_id] = [-1]
513
+ else:
514
+ # get dependencies
515
+ dependencies_list[job_id] = compute_dependencies(
516
+ args, kw_args
517
+ )
518
+ if len(dependencies_list[job_id]) == 0:
519
+ dependencies_list[job_id] = [-1]
520
+
521
+ # check for ready results
522
+ done_list = []
523
+ next_priority_tasks = []
524
+ for job_id, job_id_progress in in_progress_list.items():
525
+ if job_id_progress.ready():
526
+ try:
527
+ res = job_id_progress.get(timeout=per_job_timeout)
528
+ success = True
529
+ except: # pylint: disable=W0702 # noqa: B001, E722
530
+ res = traceback.format_exc()
531
+ success = False
532
+ logging.error("Exception in worker: {}".format(res))
533
+ done_list.append(job_id)
534
+ done_task_results[job_id] = [success, res]
535
+
536
+ # remove from dependance list
537
+ dependencies_list.pop(job_id)
538
+
539
+ # search related priority task
540
+ for job_id2 in wait_list.keys(): # pylint: disable=C0201
541
+ depending_tasks = list(dependencies_list[job_id2])
542
+ if job_id in depending_tasks:
543
+ next_priority_tasks += depending_tasks
544
+ # remove duplicate dependance task
545
+ next_priority_tasks = list(dict.fromkeys(next_priority_tasks))
546
+ # clean done jobs
547
+ for job_id in done_list:
548
+ # delete
549
+ del in_progress_list[job_id]
550
+ # copy results to futures
551
+ # (they remove themselves from task_cache
552
+ task_cache[job_id].set(done_task_results[job_id])
553
+
554
+ (
555
+ ready_list,
556
+ failed_list,
557
+ ) = MultiprocessingCluster.get_ready_failed_tasks(
558
+ wait_list, dependencies_list, done_task_results
559
+ )
560
+
561
+ # add ready task in next_priority_tasks
562
+ priority_list = list(
563
+ filter(lambda job_id: job_id in next_priority_tasks, ready_list)
564
+ )
565
+
566
+ job_ids_to_launch_prioritized = update_job_id_priority(
567
+ job_ids_to_launch_prioritized, priority_list, ready_list
568
+ )
569
+
570
+ # Deal with failed tasks
571
+ for job_id in failed_list:
572
+ done_list.append(job_id)
573
+ done_task_results[job_id] = [
574
+ False,
575
+ "Failed depending task",
576
+ ]
577
+ # copy results to futures
578
+ # (they remove themselves from task_cache
579
+ task_cache[job_id].set(done_task_results[job_id])
580
+ del wait_list[job_id]
581
+
582
+ while (
583
+ len(in_progress_list) < max_nb_tasks_running
584
+ and len(job_ids_to_launch_prioritized) > 0
585
+ ):
586
+ job_id = job_ids_to_launch_prioritized.pop()
587
+ func, args, kw_args = wait_list[job_id]
588
+ # replace jobs by real data
589
+ new_args = replace_job_by_data(args, done_task_results)
590
+ new_kw_args = replace_job_by_data(kw_args, done_task_results)
591
+ # launch task
592
+ in_progress_list[job_id] = pool.apply_async(
593
+ func, args=new_args, kwds=new_kw_args
594
+ )
595
+ del wait_list[job_id]
596
+ # find done jobs that can be cleaned
597
+ cleanable_jobid = []
598
+
599
+ for job_id in done_task_results.keys(): # pylint: disable=C0201
600
+ # check if needed
601
+ still_need = False
602
+ for dependance_task_list in dependencies_list.values():
603
+ if job_id in dependance_task_list:
604
+ still_need = True
605
+ if not still_need:
606
+ cleanable_jobid.append(job_id)
607
+
608
+ # clean unused in the future jobs through wrapper
609
+ for job_id_to_clean in cleanable_jobid:
610
+ if job_id_to_clean not in get_job_ids_from_futures(
611
+ cl_future_list
612
+ ):
613
+ # not needed by iterator -> can be cleaned
614
+ # Cleanup with wrapper
615
+ wrapper_obj.cleanup_future_res(
616
+ done_task_results[job_id_to_clean][1]
617
+ )
618
+ # cleanup list
619
+ done_task_results.pop(job_id_to_clean)
620
+
621
+ @staticmethod
622
+ def get_ready_failed_tasks(wait_list, dependencies_list, done_task_results):
623
+ """
624
+ Return the new ready tasks without constraint
625
+ and failed tasks
626
+ """
627
+ ready_list = []
628
+ failed_list = []
629
+ done_task_result_keys = done_task_results.keys()
630
+ for job_id in wait_list.keys(): # pylint: disable=C0201
631
+ depending_tasks = dependencies_list[job_id]
632
+ # check if all tasks are finished
633
+ can_run = True
634
+ failed = False
635
+ for depend in list(filter(lambda dep: dep != -1, depending_tasks)):
636
+ if depend not in done_task_result_keys:
637
+ can_run = False
638
+ else:
639
+ if not done_task_results[depend][0]:
640
+ # not a success
641
+ can_run = False
642
+ failed = True
643
+ if failed:
644
+ # Add to done list with failed status
645
+ failed_list.append(job_id)
646
+ if can_run:
647
+ ready_list.append(job_id)
648
+ return ready_list, failed_list
649
+
650
+ @staticmethod
651
+ def get_tasks_without_deps(dependencies_list, ready_list, nb_ready_task):
652
+ """
653
+ Return the list of ready tasks without dependencies
654
+ and not considered like initial task (dependance = -1)
655
+ """
656
+ priority_list = []
657
+ for _ in range(nb_ready_task):
658
+ task_id = next(
659
+ filter(
660
+ lambda job_id: len(dependencies_list[job_id]) != 1
661
+ and dependencies_list[job_id][0] != -1,
662
+ ready_list,
663
+ ),
664
+ None,
665
+ )
666
+ if task_id:
667
+ priority_list.append(task_id)
668
+ return priority_list
669
+
670
+ def future_iterator(self, future_list, timeout=None):
671
+ """
672
+ Start all tasks
673
+
674
+ :param future_list: future_list list
675
+ """
676
+
677
+ return MpFutureIterator(future_list, self, timeout=timeout)
678
+
679
+
680
+ def get_job_ids_from_futures(future_list):
681
+ """
682
+ Get list of jobs ids in future list
683
+
684
+ :param future_list: list of futures
685
+ :type future_list: MpFuture
686
+
687
+ :return: list of job id
688
+ :rtype: list(int)
689
+ """
690
+
691
+ list_ids = []
692
+
693
+ for future in future_list:
694
+ list_ids.append(future.mp_future_task.job_id)
695
+
696
+ return list_ids
697
+
698
+
699
+ def replace_job_by_data(args_or_kawargs, done_task_results):
700
+ """
701
+ Replace MpJob in list or dict by their real data
702
+
703
+ :param args_or_kawargs: list or dict of data
704
+ :param done_task_results: dict of done tasks
705
+ """
706
+
707
+ def get_data(data, done_task_results):
708
+ """
709
+ Replace MpJob in list or dict by their real data
710
+
711
+ :param data: data to replace if necessary
712
+ :param done_task_results: dict of done tasks
713
+ """
714
+
715
+ new_data = data
716
+ if isinstance(data, MpJob):
717
+ task_id = data.task_id
718
+ idx = data.r_idx
719
+
720
+ full_res = done_task_results[task_id][1]
721
+ if not done_task_results[task_id][0]:
722
+ raise RuntimeError("Current task failed {}".format(full_res))
723
+
724
+ if isinstance(full_res, tuple):
725
+ new_data = full_res[idx]
726
+ else:
727
+ if idx > 0:
728
+ raise ValueError("Asked for index > 0 in a singleton")
729
+ new_data = full_res
730
+
731
+ return new_data
732
+
733
+ # replace data
734
+ return replace_data(args_or_kawargs, get_data, done_task_results)
735
+
736
+
737
+ def compute_dependencies(args, kw_args):
738
+ """
739
+ Compute dependencies from args and kw_args
740
+
741
+ :param args: arguments
742
+ :type args: list
743
+ :param kw_args: key arguments
744
+ :type kw_args: dict
745
+
746
+ :return: dependencies
747
+ :rtype: list
748
+ """
749
+
750
+ def get_job_id(data):
751
+ """
752
+ Get job id from data if is MpJob
753
+
754
+ :param data
755
+
756
+ :return job id if exists, None if doesnt exist
757
+ :rtype: int
758
+ """
759
+ job_id = None
760
+
761
+ if isinstance(data, MpJob):
762
+ job_id = data.task_id
763
+
764
+ return job_id
765
+
766
+ def get_ids_rec(list_or_dict):
767
+ """
768
+ Compute dependencies from list or dict or simple data
769
+
770
+ :param list_or_dict: arguments
771
+ :type list_or_dict: list or dict
772
+
773
+ :return: dependencies
774
+ :rtype: list
775
+ """
776
+
777
+ list_ids = []
778
+
779
+ if isinstance(list_or_dict, (list, tuple)):
780
+ for arg in list_or_dict:
781
+ list_ids += get_ids_rec(arg)
782
+
783
+ elif isinstance(list_or_dict, dict):
784
+ for key in list_or_dict:
785
+ list_ids += get_ids_rec(list_or_dict[key])
786
+
787
+ elif isinstance(list_or_dict, FactorizedObject):
788
+ facto_args = list_or_dict.get_args()
789
+ for arg in facto_args:
790
+ list_ids += get_ids_rec(arg)
791
+ facto_kwargs = list_or_dict.get_kwargs()
792
+ for key in facto_kwargs:
793
+ list_ids += get_ids_rec(facto_kwargs[key])
794
+
795
+ else:
796
+ current_id = get_job_id(list_or_dict)
797
+ if current_id is not None:
798
+ list_ids.append(current_id)
799
+
800
+ return list_ids
801
+
802
+ # compute dependencies
803
+ dependencies = get_ids_rec(args) + get_ids_rec(kw_args)
804
+
805
+ return list(dict.fromkeys(dependencies))
806
+
807
+
808
+ class MpFutureTask: # pylint: disable=R0903
809
+ """
810
+ multiprocessing version of distributed.future
811
+ """
812
+
813
+ def __init__(self, cluster):
814
+ """
815
+ Init function of MpFutureTask
816
+
817
+ :param cluster: mp cluster
818
+
819
+ """
820
+ self._cluster = cluster
821
+ self.result = None
822
+ self._success = None
823
+ self.event = threading.Event()
824
+ self.job_id = next(job_counter)
825
+
826
+ self.task_cache = cluster.task_cache
827
+ self.task_cache[self.job_id] = self
828
+
829
+ self.associated_futures = []
830
+
831
+ def set(self, obj):
832
+ """
833
+ Set result to associated delayed object, and clean cache
834
+
835
+ :param obj: result object
836
+ :type obj: tuple(bool, Union(dataset, dataframe))
837
+
838
+ """
839
+ self._success, self.result = obj
840
+
841
+ # set result to all futures
842
+ for future in self.associated_futures:
843
+ future.set(self._success, self.result)
844
+
845
+ del self.task_cache[self.job_id]
846
+ self._cluster = None
847
+ self.event.clear()
848
+
849
+
850
+ def log_error_hook(args):
851
+ """
852
+ Exception hook for cluster thread
853
+ """
854
+ exc = "Cluster MP thread failed: {}".format(args.exc_value)
855
+ logging.error(exc)
856
+ # Kill thread
857
+ os.kill(os.getpid(), signal.SIGKILL)
858
+ raise RuntimeError(exc)
859
+
860
+
861
+ def update_job_id_priority(
862
+ job_ids_to_launch_prioritized, priority_list, ready_list
863
+ ):
864
+ """
865
+ Update job to launch list with new priority list and ready list
866
+
867
+ :return: updated list
868
+ """
869
+
870
+ res = priority_list + ready_list + job_ids_to_launch_prioritized
871
+ res = list(dict.fromkeys(res))
872
+
873
+ return res