runbooks 0.2.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. conftest.py +26 -0
  2. jupyter-agent/.env.template +2 -0
  3. jupyter-agent/.gitattributes +35 -0
  4. jupyter-agent/README.md +16 -0
  5. jupyter-agent/app.py +256 -0
  6. jupyter-agent/cloudops-agent.png +0 -0
  7. jupyter-agent/ds-system-prompt.txt +154 -0
  8. jupyter-agent/jupyter-agent.png +0 -0
  9. jupyter-agent/llama3_template.jinja +123 -0
  10. jupyter-agent/requirements.txt +9 -0
  11. jupyter-agent/utils.py +409 -0
  12. runbooks/__init__.py +71 -3
  13. runbooks/__main__.py +13 -0
  14. runbooks/aws/ec2_describe_instances.py +1 -1
  15. runbooks/aws/ec2_run_instances.py +8 -2
  16. runbooks/aws/ec2_start_stop_instances.py +17 -4
  17. runbooks/aws/ec2_unused_volumes.py +5 -1
  18. runbooks/aws/s3_create_bucket.py +4 -2
  19. runbooks/aws/s3_list_objects.py +6 -1
  20. runbooks/aws/tagging_lambda_handler.py +13 -2
  21. runbooks/aws/tags.json +12 -0
  22. runbooks/base.py +353 -0
  23. runbooks/cfat/README.md +49 -0
  24. runbooks/cfat/__init__.py +74 -0
  25. runbooks/cfat/app.ts +644 -0
  26. runbooks/cfat/assessment/__init__.py +40 -0
  27. runbooks/cfat/assessment/asana-import.csv +39 -0
  28. runbooks/cfat/assessment/cfat-checks.csv +31 -0
  29. runbooks/cfat/assessment/cfat.txt +520 -0
  30. runbooks/cfat/assessment/collectors.py +200 -0
  31. runbooks/cfat/assessment/jira-import.csv +39 -0
  32. runbooks/cfat/assessment/runner.py +387 -0
  33. runbooks/cfat/assessment/validators.py +290 -0
  34. runbooks/cfat/cli.py +103 -0
  35. runbooks/cfat/docs/asana-import.csv +24 -0
  36. runbooks/cfat/docs/cfat-checks.csv +31 -0
  37. runbooks/cfat/docs/cfat.txt +335 -0
  38. runbooks/cfat/docs/checks-output.png +0 -0
  39. runbooks/cfat/docs/cloudshell-console-run.png +0 -0
  40. runbooks/cfat/docs/cloudshell-download.png +0 -0
  41. runbooks/cfat/docs/cloudshell-output.png +0 -0
  42. runbooks/cfat/docs/downloadfile.png +0 -0
  43. runbooks/cfat/docs/jira-import.csv +24 -0
  44. runbooks/cfat/docs/open-cloudshell.png +0 -0
  45. runbooks/cfat/docs/report-header.png +0 -0
  46. runbooks/cfat/models.py +1026 -0
  47. runbooks/cfat/package-lock.json +5116 -0
  48. runbooks/cfat/package.json +38 -0
  49. runbooks/cfat/report.py +496 -0
  50. runbooks/cfat/reporting/__init__.py +46 -0
  51. runbooks/cfat/reporting/exporters.py +337 -0
  52. runbooks/cfat/reporting/formatters.py +496 -0
  53. runbooks/cfat/reporting/templates.py +135 -0
  54. runbooks/cfat/run-assessment.sh +23 -0
  55. runbooks/cfat/runner.py +69 -0
  56. runbooks/cfat/src/actions/check-cloudtrail-existence.ts +43 -0
  57. runbooks/cfat/src/actions/check-config-existence.ts +37 -0
  58. runbooks/cfat/src/actions/check-control-tower.ts +37 -0
  59. runbooks/cfat/src/actions/check-ec2-existence.ts +46 -0
  60. runbooks/cfat/src/actions/check-iam-users.ts +50 -0
  61. runbooks/cfat/src/actions/check-legacy-cur.ts +30 -0
  62. runbooks/cfat/src/actions/check-org-cloudformation.ts +30 -0
  63. runbooks/cfat/src/actions/check-vpc-existence.ts +43 -0
  64. runbooks/cfat/src/actions/create-asanaimport.ts +14 -0
  65. runbooks/cfat/src/actions/create-backlog.ts +372 -0
  66. runbooks/cfat/src/actions/create-jiraimport.ts +15 -0
  67. runbooks/cfat/src/actions/create-report.ts +616 -0
  68. runbooks/cfat/src/actions/define-account-type.ts +51 -0
  69. runbooks/cfat/src/actions/get-enabled-org-policy-types.ts +40 -0
  70. runbooks/cfat/src/actions/get-enabled-org-services.ts +26 -0
  71. runbooks/cfat/src/actions/get-idc-info.ts +34 -0
  72. runbooks/cfat/src/actions/get-org-da-accounts.ts +34 -0
  73. runbooks/cfat/src/actions/get-org-details.ts +35 -0
  74. runbooks/cfat/src/actions/get-org-member-accounts.ts +44 -0
  75. runbooks/cfat/src/actions/get-org-ous.ts +35 -0
  76. runbooks/cfat/src/actions/get-regions.ts +22 -0
  77. runbooks/cfat/src/actions/zip-assessment.ts +27 -0
  78. runbooks/cfat/src/types/index.d.ts +147 -0
  79. runbooks/cfat/tests/__init__.py +141 -0
  80. runbooks/cfat/tests/test_cli.py +340 -0
  81. runbooks/cfat/tests/test_integration.py +290 -0
  82. runbooks/cfat/tests/test_models.py +505 -0
  83. runbooks/cfat/tests/test_reporting.py +354 -0
  84. runbooks/cfat/tsconfig.json +16 -0
  85. runbooks/cfat/webpack.config.cjs +27 -0
  86. runbooks/config.py +260 -0
  87. runbooks/finops/__init__.py +88 -0
  88. runbooks/finops/aws_client.py +245 -0
  89. runbooks/finops/cli.py +151 -0
  90. runbooks/finops/cost_processor.py +410 -0
  91. runbooks/finops/dashboard_runner.py +448 -0
  92. runbooks/finops/helpers.py +355 -0
  93. runbooks/finops/main.py +14 -0
  94. runbooks/finops/profile_processor.py +174 -0
  95. runbooks/finops/types.py +66 -0
  96. runbooks/finops/visualisations.py +80 -0
  97. runbooks/inventory/.gitignore +354 -0
  98. runbooks/inventory/ArgumentsClass.py +261 -0
  99. runbooks/inventory/Inventory_Modules.py +6130 -0
  100. runbooks/inventory/LandingZone/delete_lz.py +1075 -0
  101. runbooks/inventory/README.md +1320 -0
  102. runbooks/inventory/__init__.py +62 -0
  103. runbooks/inventory/account_class.py +532 -0
  104. runbooks/inventory/all_my_instances_wrapper.py +123 -0
  105. runbooks/inventory/aws_decorators.py +201 -0
  106. runbooks/inventory/cfn_move_stack_instances.py +1526 -0
  107. runbooks/inventory/check_cloudtrail_compliance.py +614 -0
  108. runbooks/inventory/check_controltower_readiness.py +1107 -0
  109. runbooks/inventory/check_landingzone_readiness.py +711 -0
  110. runbooks/inventory/cloudtrail.md +727 -0
  111. runbooks/inventory/collectors/__init__.py +20 -0
  112. runbooks/inventory/collectors/aws_compute.py +518 -0
  113. runbooks/inventory/collectors/aws_networking.py +275 -0
  114. runbooks/inventory/collectors/base.py +222 -0
  115. runbooks/inventory/core/__init__.py +19 -0
  116. runbooks/inventory/core/collector.py +303 -0
  117. runbooks/inventory/core/formatter.py +296 -0
  118. runbooks/inventory/delete_s3_buckets_objects.py +169 -0
  119. runbooks/inventory/discovery.md +81 -0
  120. runbooks/inventory/draw_org_structure.py +748 -0
  121. runbooks/inventory/ec2_vpc_utils.py +341 -0
  122. runbooks/inventory/find_cfn_drift_detection.py +272 -0
  123. runbooks/inventory/find_cfn_orphaned_stacks.py +719 -0
  124. runbooks/inventory/find_cfn_stackset_drift.py +733 -0
  125. runbooks/inventory/find_ec2_security_groups.py +669 -0
  126. runbooks/inventory/find_landingzone_versions.py +201 -0
  127. runbooks/inventory/find_vpc_flow_logs.py +1221 -0
  128. runbooks/inventory/inventory.sh +659 -0
  129. runbooks/inventory/list_cfn_stacks.py +558 -0
  130. runbooks/inventory/list_cfn_stackset_operation_results.py +252 -0
  131. runbooks/inventory/list_cfn_stackset_operations.py +734 -0
  132. runbooks/inventory/list_cfn_stacksets.py +453 -0
  133. runbooks/inventory/list_config_recorders_delivery_channels.py +681 -0
  134. runbooks/inventory/list_ds_directories.py +354 -0
  135. runbooks/inventory/list_ec2_availability_zones.py +286 -0
  136. runbooks/inventory/list_ec2_ebs_volumes.py +244 -0
  137. runbooks/inventory/list_ec2_instances.py +425 -0
  138. runbooks/inventory/list_ecs_clusters_and_tasks.py +562 -0
  139. runbooks/inventory/list_elbs_load_balancers.py +411 -0
  140. runbooks/inventory/list_enis_network_interfaces.py +526 -0
  141. runbooks/inventory/list_guardduty_detectors.py +568 -0
  142. runbooks/inventory/list_iam_policies.py +404 -0
  143. runbooks/inventory/list_iam_roles.py +518 -0
  144. runbooks/inventory/list_iam_saml_providers.py +359 -0
  145. runbooks/inventory/list_lambda_functions.py +882 -0
  146. runbooks/inventory/list_org_accounts.py +446 -0
  147. runbooks/inventory/list_org_accounts_users.py +354 -0
  148. runbooks/inventory/list_rds_db_instances.py +406 -0
  149. runbooks/inventory/list_route53_hosted_zones.py +318 -0
  150. runbooks/inventory/list_servicecatalog_provisioned_products.py +575 -0
  151. runbooks/inventory/list_sns_topics.py +360 -0
  152. runbooks/inventory/list_ssm_parameters.py +402 -0
  153. runbooks/inventory/list_vpc_subnets.py +433 -0
  154. runbooks/inventory/list_vpcs.py +422 -0
  155. runbooks/inventory/lockdown_cfn_stackset_role.py +224 -0
  156. runbooks/inventory/models/__init__.py +24 -0
  157. runbooks/inventory/models/account.py +192 -0
  158. runbooks/inventory/models/inventory.py +309 -0
  159. runbooks/inventory/models/resource.py +247 -0
  160. runbooks/inventory/recover_cfn_stack_ids.py +205 -0
  161. runbooks/inventory/requirements.txt +12 -0
  162. runbooks/inventory/run_on_multi_accounts.py +211 -0
  163. runbooks/inventory/tests/common_test_data.py +3661 -0
  164. runbooks/inventory/tests/common_test_functions.py +204 -0
  165. runbooks/inventory/tests/script_test_data.py +0 -0
  166. runbooks/inventory/tests/setup.py +24 -0
  167. runbooks/inventory/tests/src.py +18 -0
  168. runbooks/inventory/tests/test_cfn_describe_stacks.py +208 -0
  169. runbooks/inventory/tests/test_ec2_describe_instances.py +162 -0
  170. runbooks/inventory/tests/test_inventory_modules.py +55 -0
  171. runbooks/inventory/tests/test_lambda_list_functions.py +86 -0
  172. runbooks/inventory/tests/test_moto_integration_example.py +273 -0
  173. runbooks/inventory/tests/test_org_list_accounts.py +49 -0
  174. runbooks/inventory/update_aws_actions.py +173 -0
  175. runbooks/inventory/update_cfn_stacksets.py +1215 -0
  176. runbooks/inventory/update_cloudwatch_logs_retention_policy.py +294 -0
  177. runbooks/inventory/update_iam_roles_cross_accounts.py +478 -0
  178. runbooks/inventory/update_s3_public_access_block.py +539 -0
  179. runbooks/inventory/utils/__init__.py +23 -0
  180. runbooks/inventory/utils/aws_helpers.py +510 -0
  181. runbooks/inventory/utils/threading_utils.py +493 -0
  182. runbooks/inventory/utils/validation.py +682 -0
  183. runbooks/inventory/verify_ec2_security_groups.py +1430 -0
  184. runbooks/main.py +785 -0
  185. runbooks/organizations/__init__.py +12 -0
  186. runbooks/organizations/manager.py +374 -0
  187. runbooks/security_baseline/README.md +324 -0
  188. runbooks/security_baseline/checklist/alternate_contacts.py +8 -1
  189. runbooks/security_baseline/checklist/bucket_public_access.py +4 -1
  190. runbooks/security_baseline/checklist/cloudwatch_alarm_configuration.py +9 -2
  191. runbooks/security_baseline/checklist/guardduty_enabled.py +9 -2
  192. runbooks/security_baseline/checklist/multi_region_instance_usage.py +5 -1
  193. runbooks/security_baseline/checklist/root_access_key.py +6 -1
  194. runbooks/security_baseline/config-origin.json +1 -1
  195. runbooks/security_baseline/config.json +1 -1
  196. runbooks/security_baseline/permission.json +1 -1
  197. runbooks/security_baseline/report_generator.py +10 -2
  198. runbooks/security_baseline/report_template_en.html +7 -7
  199. runbooks/security_baseline/report_template_jp.html +7 -7
  200. runbooks/security_baseline/report_template_kr.html +12 -12
  201. runbooks/security_baseline/report_template_vn.html +7 -7
  202. runbooks/security_baseline/requirements.txt +7 -0
  203. runbooks/security_baseline/run_script.py +8 -2
  204. runbooks/security_baseline/security_baseline_tester.py +10 -2
  205. runbooks/security_baseline/utils/common.py +5 -1
  206. runbooks/utils/__init__.py +204 -0
  207. runbooks-0.6.1.dist-info/METADATA +373 -0
  208. runbooks-0.6.1.dist-info/RECORD +237 -0
  209. {runbooks-0.2.5.dist-info → runbooks-0.6.1.dist-info}/WHEEL +1 -1
  210. runbooks-0.6.1.dist-info/entry_points.txt +7 -0
  211. runbooks-0.6.1.dist-info/licenses/LICENSE +201 -0
  212. runbooks-0.6.1.dist-info/top_level.txt +3 -0
  213. runbooks/python101/calculator.py +0 -34
  214. runbooks/python101/config.py +0 -1
  215. runbooks/python101/exceptions.py +0 -16
  216. runbooks/python101/file_manager.py +0 -218
  217. runbooks/python101/toolkit.py +0 -153
  218. runbooks-0.2.5.dist-info/METADATA +0 -439
  219. runbooks-0.2.5.dist-info/RECORD +0 -61
  220. runbooks-0.2.5.dist-info/entry_points.txt +0 -3
  221. runbooks-0.2.5.dist-info/top_level.txt +0 -1
@@ -0,0 +1,493 @@
1
+ """
2
+ Threading and concurrency utilities for inventory operations.
3
+
4
+ This module provides thread pool management, concurrent execution helpers,
5
+ and progress tracking for multi-threaded inventory collection operations.
6
+ """
7
+
8
+ import threading
9
+ import time
10
+ from concurrent.futures import Future, ThreadPoolExecutor, as_completed
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timedelta
13
+ from queue import Empty, Queue
14
+ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
15
+
16
+ from loguru import logger
17
+
18
+
19
+ @dataclass
20
+ class TaskResult:
21
+ """Result from a threaded task execution."""
22
+
23
+ task_id: str
24
+ success: bool
25
+ result: Any = None
26
+ error: Optional[Exception] = None
27
+ start_time: datetime = field(default_factory=datetime.utcnow)
28
+ end_time: Optional[datetime] = None
29
+ duration: Optional[timedelta] = None
30
+
31
+ def mark_completed(self, success: bool, result: Any = None, error: Optional[Exception] = None):
32
+ """Mark task as completed with result or error."""
33
+ self.success = success
34
+ self.result = result
35
+ self.error = error
36
+ self.end_time = datetime.utcnow()
37
+ self.duration = self.end_time - self.start_time
38
+
39
+ def get_duration_seconds(self) -> float:
40
+ """Get task duration in seconds."""
41
+ if self.duration:
42
+ return self.duration.total_seconds()
43
+ if self.end_time:
44
+ return (self.end_time - self.start_time).total_seconds()
45
+ return (datetime.utcnow() - self.start_time).total_seconds()
46
+
47
+
48
+ @dataclass
49
+ class ProgressMetrics:
50
+ """Progress tracking metrics for threaded operations."""
51
+
52
+ total_tasks: int = 0
53
+ completed_tasks: int = 0
54
+ successful_tasks: int = 0
55
+ failed_tasks: int = 0
56
+ start_time: datetime = field(default_factory=datetime.utcnow)
57
+ estimated_completion: Optional[datetime] = None
58
+
59
+ def update_progress(self, success: bool):
60
+ """Update progress metrics with task completion."""
61
+ self.completed_tasks += 1
62
+ if success:
63
+ self.successful_tasks += 1
64
+ else:
65
+ self.failed_tasks += 1
66
+
67
+ # Update estimated completion time
68
+ if self.completed_tasks > 0:
69
+ elapsed = datetime.utcnow() - self.start_time
70
+ rate = self.completed_tasks / elapsed.total_seconds()
71
+ if rate > 0:
72
+ remaining_seconds = (self.total_tasks - self.completed_tasks) / rate
73
+ self.estimated_completion = datetime.utcnow() + timedelta(seconds=remaining_seconds)
74
+
75
+ def get_completion_percentage(self) -> float:
76
+ """Get completion percentage (0-100)."""
77
+ if self.total_tasks == 0:
78
+ return 0.0
79
+ return (self.completed_tasks / self.total_tasks) * 100
80
+
81
+ def get_success_rate(self) -> float:
82
+ """Get success rate percentage."""
83
+ if self.completed_tasks == 0:
84
+ return 0.0
85
+ return (self.successful_tasks / self.completed_tasks) * 100
86
+
87
+ def is_complete(self) -> bool:
88
+ """Check if all tasks are completed."""
89
+ return self.completed_tasks >= self.total_tasks
90
+
91
+ def get_remaining_tasks(self) -> int:
92
+ """Get number of remaining tasks."""
93
+ return max(0, self.total_tasks - self.completed_tasks)
94
+
95
+
96
+ class ThreadPoolManager:
97
+ """
98
+ Advanced thread pool manager for inventory operations.
99
+
100
+ Provides thread pool management with progress tracking, error handling,
101
+ and resource management for concurrent AWS API operations.
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ max_workers: Optional[int] = None,
107
+ thread_name_prefix: str = "InventoryWorker",
108
+ progress_callback: Optional[Callable[[ProgressMetrics], None]] = None,
109
+ ):
110
+ """
111
+ Initialize thread pool manager.
112
+
113
+ Args:
114
+ max_workers: Maximum number of worker threads (None for auto-detect)
115
+ thread_name_prefix: Prefix for worker thread names
116
+ progress_callback: Callback function for progress updates
117
+ """
118
+ # Auto-detect optimal worker count if not specified
119
+ if max_workers is None:
120
+ import os
121
+
122
+ max_workers = min(32, (os.cpu_count() or 1) + 4)
123
+
124
+ self.max_workers = max_workers
125
+ self.thread_name_prefix = thread_name_prefix
126
+ self.progress_callback = progress_callback
127
+
128
+ self._executor: Optional[ThreadPoolExecutor] = None
129
+ self._futures: Dict[str, Future] = {}
130
+ self._results: Dict[str, TaskResult] = {}
131
+ self._metrics = ProgressMetrics()
132
+ self._lock = threading.Lock()
133
+
134
+ logger.debug(f"Initialized ThreadPoolManager with {max_workers} workers")
135
+
136
+ def __enter__(self):
137
+ """Context manager entry."""
138
+ self.start()
139
+ return self
140
+
141
+ def __exit__(self, exc_type, exc_val, exc_tb):
142
+ """Context manager exit."""
143
+ self.shutdown()
144
+
145
+ def start(self):
146
+ """Start the thread pool."""
147
+ if self._executor is None:
148
+ self._executor = ThreadPoolExecutor(
149
+ max_workers=self.max_workers, thread_name_prefix=self.thread_name_prefix
150
+ )
151
+ logger.debug("Thread pool started")
152
+
153
+ def shutdown(self, wait: bool = True):
154
+ """
155
+ Shutdown the thread pool.
156
+
157
+ Args:
158
+ wait: Whether to wait for all tasks to complete
159
+ """
160
+ if self._executor:
161
+ self._executor.shutdown(wait=wait)
162
+ self._executor = None
163
+ logger.debug("Thread pool shutdown")
164
+
165
+ def submit_task(self, task_id: str, func: Callable, *args, **kwargs) -> Future:
166
+ """
167
+ Submit a task to the thread pool.
168
+
169
+ Args:
170
+ task_id: Unique identifier for the task
171
+ func: Function to execute
172
+ *args: Positional arguments for the function
173
+ **kwargs: Keyword arguments for the function
174
+
175
+ Returns:
176
+ Future object for the submitted task
177
+
178
+ Raises:
179
+ RuntimeError: If thread pool is not started
180
+ """
181
+ if not self._executor:
182
+ raise RuntimeError("Thread pool not started. Call start() first.")
183
+
184
+ # Create task result placeholder
185
+ task_result = TaskResult(task_id=task_id)
186
+
187
+ with self._lock:
188
+ self._results[task_id] = task_result
189
+ self._metrics.total_tasks += 1
190
+
191
+ # Submit task to executor
192
+ future = self._executor.submit(self._execute_task, task_id, func, *args, **kwargs)
193
+ self._futures[task_id] = future
194
+
195
+ logger.debug(f"Submitted task: {task_id}")
196
+ return future
197
+
198
+ def _execute_task(self, task_id: str, func: Callable, *args, **kwargs) -> Any:
199
+ """
200
+ Execute a task with error handling and metrics tracking.
201
+
202
+ Args:
203
+ task_id: Task identifier
204
+ func: Function to execute
205
+ *args: Function arguments
206
+ **kwargs: Function keyword arguments
207
+
208
+ Returns:
209
+ Function result
210
+ """
211
+ result = None
212
+ error = None
213
+ success = False
214
+
215
+ try:
216
+ logger.debug(f"Executing task: {task_id}")
217
+ result = func(*args, **kwargs)
218
+ success = True
219
+ logger.debug(f"Task completed successfully: {task_id}")
220
+
221
+ except Exception as e:
222
+ error = e
223
+ logger.error(f"Task failed: {task_id} - {e}")
224
+
225
+ # Update task result
226
+ with self._lock:
227
+ if task_id in self._results:
228
+ self._results[task_id].mark_completed(success, result, error)
229
+ self._metrics.update_progress(success)
230
+
231
+ # Call progress callback if provided
232
+ if self.progress_callback:
233
+ try:
234
+ self.progress_callback(self._metrics)
235
+ except Exception as cb_error:
236
+ logger.warning(f"Progress callback error: {cb_error}")
237
+
238
+ if not success:
239
+ raise error
240
+
241
+ return result
242
+
243
+ def submit_batch(self, tasks: List[Tuple[str, Callable, tuple, dict]]) -> Dict[str, Future]:
244
+ """
245
+ Submit a batch of tasks to the thread pool.
246
+
247
+ Args:
248
+ tasks: List of (task_id, function, args, kwargs) tuples
249
+
250
+ Returns:
251
+ Dictionary mapping task IDs to Future objects
252
+ """
253
+ futures = {}
254
+
255
+ for task_id, func, args, kwargs in tasks:
256
+ future = self.submit_task(task_id, func, *args, **kwargs)
257
+ futures[task_id] = future
258
+
259
+ logger.info(f"Submitted batch of {len(tasks)} tasks")
260
+ return futures
261
+
262
+ def wait_for_completion(
263
+ self, timeout: Optional[float] = None, progress_interval: float = 5.0
264
+ ) -> Dict[str, TaskResult]:
265
+ """
266
+ Wait for all submitted tasks to complete.
267
+
268
+ Args:
269
+ timeout: Maximum time to wait in seconds
270
+ progress_interval: Interval for progress logging in seconds
271
+
272
+ Returns:
273
+ Dictionary mapping task IDs to TaskResult objects
274
+ """
275
+ if not self._futures:
276
+ return self._results.copy()
277
+
278
+ logger.info(f"Waiting for {len(self._futures)} tasks to complete")
279
+
280
+ start_time = time.time()
281
+ last_progress_time = start_time
282
+
283
+ try:
284
+ for future in as_completed(self._futures.values(), timeout=timeout):
285
+ # Log progress periodically
286
+ current_time = time.time()
287
+ if current_time - last_progress_time >= progress_interval:
288
+ self._log_progress()
289
+ last_progress_time = current_time
290
+
291
+ # Check timeout
292
+ if timeout and (current_time - start_time) >= timeout:
293
+ logger.warning(f"Timeout reached after {timeout} seconds")
294
+ break
295
+
296
+ # Final progress log
297
+ self._log_progress()
298
+
299
+ except TimeoutError:
300
+ logger.error(f"Tasks did not complete within {timeout} seconds")
301
+
302
+ # Clean up futures
303
+ self._futures.clear()
304
+
305
+ total_time = time.time() - start_time
306
+ logger.info(
307
+ f"Task completion finished in {total_time:.2f} seconds. "
308
+ f"Success rate: {self._metrics.get_success_rate():.1f}%"
309
+ )
310
+
311
+ return self._results.copy()
312
+
313
+ def get_results(self, completed_only: bool = True) -> Dict[str, TaskResult]:
314
+ """
315
+ Get task results.
316
+
317
+ Args:
318
+ completed_only: Whether to return only completed tasks
319
+
320
+ Returns:
321
+ Dictionary of task results
322
+ """
323
+ with self._lock:
324
+ if completed_only:
325
+ return {task_id: result for task_id, result in self._results.items() if result.end_time is not None}
326
+ return self._results.copy()
327
+
328
+ def get_metrics(self) -> ProgressMetrics:
329
+ """Get current progress metrics."""
330
+ with self._lock:
331
+ return self._metrics
332
+
333
+ def get_successful_results(self) -> Dict[str, Any]:
334
+ """Get results from successful tasks only."""
335
+ return {
336
+ task_id: result.result
337
+ for task_id, result in self.get_results().items()
338
+ if result.success and result.result is not None
339
+ }
340
+
341
+ def get_failed_tasks(self) -> Dict[str, Exception]:
342
+ """Get errors from failed tasks."""
343
+ return {
344
+ task_id: result.error
345
+ for task_id, result in self.get_results().items()
346
+ if not result.success and result.error is not None
347
+ }
348
+
349
+ def _log_progress(self):
350
+ """Log current progress metrics."""
351
+ metrics = self._metrics
352
+ logger.info(
353
+ f"Progress: {metrics.completed_tasks}/{metrics.total_tasks} "
354
+ f"({metrics.get_completion_percentage():.1f}%) - "
355
+ f"Success rate: {metrics.get_success_rate():.1f}%"
356
+ )
357
+
358
+ if metrics.estimated_completion:
359
+ remaining = metrics.estimated_completion - datetime.utcnow()
360
+ if remaining.total_seconds() > 0:
361
+ logger.info(f"Estimated completion in {remaining}")
362
+
363
+ def cancel_remaining_tasks(self):
364
+ """Cancel all pending tasks."""
365
+ cancelled_count = 0
366
+
367
+ for task_id, future in self._futures.items():
368
+ if not future.done() and future.cancel():
369
+ cancelled_count += 1
370
+ logger.debug(f"Cancelled task: {task_id}")
371
+
372
+ if cancelled_count > 0:
373
+ logger.info(f"Cancelled {cancelled_count} pending tasks")
374
+
375
+ def is_active(self) -> bool:
376
+ """Check if there are active tasks."""
377
+ return bool(self._futures)
378
+
379
+ def get_active_task_count(self) -> int:
380
+ """Get number of active (not completed) tasks."""
381
+ return len([f for f in self._futures.values() if not f.done()])
382
+
383
+
384
+ class BatchProcessor:
385
+ """
386
+ Utility for processing large batches of items with threading.
387
+
388
+ Provides higher-level batch processing capabilities with automatic
389
+ chunking, error handling, and progress tracking.
390
+ """
391
+
392
+ def __init__(
393
+ self,
394
+ batch_size: int = 50,
395
+ max_workers: Optional[int] = None,
396
+ progress_callback: Optional[Callable[[ProgressMetrics], None]] = None,
397
+ ):
398
+ """
399
+ Initialize batch processor.
400
+
401
+ Args:
402
+ batch_size: Number of items to process per batch
403
+ max_workers: Maximum number of worker threads
404
+ progress_callback: Callback for progress updates
405
+ """
406
+ self.batch_size = batch_size
407
+ self.max_workers = max_workers
408
+ self.progress_callback = progress_callback
409
+
410
+ def process_items(
411
+ self,
412
+ items: List[Any],
413
+ processor_func: Callable[[List[Any]], Any],
414
+ item_id_func: Optional[Callable[[Any], str]] = None,
415
+ ) -> Dict[str, Any]:
416
+ """
417
+ Process a list of items in batches using threading.
418
+
419
+ Args:
420
+ items: List of items to process
421
+ processor_func: Function to process each batch
422
+ item_id_func: Function to generate task IDs from items
423
+
424
+ Returns:
425
+ Dictionary of batch results
426
+ """
427
+ # Split items into batches
428
+ batches = []
429
+ for i in range(0, len(items), self.batch_size):
430
+ batch = items[i : i + self.batch_size]
431
+ batch_id = f"batch_{i // self.batch_size + 1}"
432
+ batches.append((batch_id, batch))
433
+
434
+ logger.info(f"Processing {len(items)} items in {len(batches)} batches")
435
+
436
+ # Process batches using thread pool
437
+ with ThreadPoolManager(max_workers=self.max_workers, progress_callback=self.progress_callback) as pool:
438
+ # Submit all batches
439
+ for batch_id, batch in batches:
440
+ pool.submit_task(batch_id, processor_func, batch)
441
+
442
+ # Wait for completion
443
+ results = pool.wait_for_completion()
444
+
445
+ # Extract successful results
446
+ return pool.get_successful_results()
447
+
448
+
449
+ def run_with_timeout(func: Callable, timeout: float, *args, **kwargs) -> Any:
450
+ """
451
+ Run a function with a timeout using threading.
452
+
453
+ Args:
454
+ func: Function to run
455
+ timeout: Timeout in seconds
456
+ *args: Function arguments
457
+ **kwargs: Function keyword arguments
458
+
459
+ Returns:
460
+ Function result
461
+
462
+ Raises:
463
+ TimeoutError: If function doesn't complete within timeout
464
+ """
465
+ result = Queue()
466
+ exception = Queue()
467
+
468
+ def target():
469
+ try:
470
+ ret = func(*args, **kwargs)
471
+ result.put(ret)
472
+ except Exception as e:
473
+ exception.put(e)
474
+
475
+ thread = threading.Thread(target=target)
476
+ thread.daemon = True
477
+ thread.start()
478
+ thread.join(timeout)
479
+
480
+ if thread.is_alive():
481
+ # Thread is still running, timeout occurred
482
+ raise TimeoutError(f"Function {func.__name__} timed out after {timeout} seconds")
483
+
484
+ # Check for exceptions
485
+ if not exception.empty():
486
+ raise exception.get()
487
+
488
+ # Return result
489
+ if not result.empty():
490
+ return result.get()
491
+
492
+ # Should not reach here
493
+ raise RuntimeError("Function completed but no result or exception found")