runbooks 0.2.5__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- conftest.py +26 -0
- jupyter-agent/.env +2 -0
- jupyter-agent/.env.template +2 -0
- jupyter-agent/.gitattributes +35 -0
- jupyter-agent/.gradio/certificate.pem +31 -0
- jupyter-agent/README.md +16 -0
- jupyter-agent/__main__.log +8 -0
- jupyter-agent/app.py +256 -0
- jupyter-agent/cloudops-agent.png +0 -0
- jupyter-agent/ds-system-prompt.txt +154 -0
- jupyter-agent/jupyter-agent.png +0 -0
- jupyter-agent/llama3_template.jinja +123 -0
- jupyter-agent/requirements.txt +9 -0
- jupyter-agent/tmp/4ojbs8a02ir/jupyter-agent.ipynb +68 -0
- jupyter-agent/tmp/cm5iasgpm3p/jupyter-agent.ipynb +91 -0
- jupyter-agent/tmp/crqbsseag5/jupyter-agent.ipynb +91 -0
- jupyter-agent/tmp/hohanq1u097/jupyter-agent.ipynb +57 -0
- jupyter-agent/tmp/jns1sam29wm/jupyter-agent.ipynb +53 -0
- jupyter-agent/tmp/jupyter-agent.ipynb +27 -0
- jupyter-agent/utils.py +409 -0
- runbooks/__init__.py +71 -3
- runbooks/__main__.py +13 -0
- runbooks/aws/ec2_describe_instances.py +1 -1
- runbooks/aws/ec2_run_instances.py +8 -2
- runbooks/aws/ec2_start_stop_instances.py +17 -4
- runbooks/aws/ec2_unused_volumes.py +5 -1
- runbooks/aws/s3_create_bucket.py +4 -2
- runbooks/aws/s3_list_objects.py +6 -1
- runbooks/aws/tagging_lambda_handler.py +13 -2
- runbooks/aws/tags.json +12 -0
- runbooks/base.py +353 -0
- runbooks/cfat/README.md +49 -0
- runbooks/cfat/__init__.py +74 -0
- runbooks/cfat/app.ts +644 -0
- runbooks/cfat/assessment/__init__.py +40 -0
- runbooks/cfat/assessment/asana-import.csv +39 -0
- runbooks/cfat/assessment/cfat-checks.csv +31 -0
- runbooks/cfat/assessment/cfat.txt +520 -0
- runbooks/cfat/assessment/collectors.py +200 -0
- runbooks/cfat/assessment/jira-import.csv +39 -0
- runbooks/cfat/assessment/runner.py +387 -0
- runbooks/cfat/assessment/validators.py +290 -0
- runbooks/cfat/cli.py +103 -0
- runbooks/cfat/docs/asana-import.csv +24 -0
- runbooks/cfat/docs/cfat-checks.csv +31 -0
- runbooks/cfat/docs/cfat.txt +335 -0
- runbooks/cfat/docs/checks-output.png +0 -0
- runbooks/cfat/docs/cloudshell-console-run.png +0 -0
- runbooks/cfat/docs/cloudshell-download.png +0 -0
- runbooks/cfat/docs/cloudshell-output.png +0 -0
- runbooks/cfat/docs/downloadfile.png +0 -0
- runbooks/cfat/docs/jira-import.csv +24 -0
- runbooks/cfat/docs/open-cloudshell.png +0 -0
- runbooks/cfat/docs/report-header.png +0 -0
- runbooks/cfat/models.py +1026 -0
- runbooks/cfat/package-lock.json +5116 -0
- runbooks/cfat/package.json +38 -0
- runbooks/cfat/report.py +496 -0
- runbooks/cfat/reporting/__init__.py +46 -0
- runbooks/cfat/reporting/exporters.py +337 -0
- runbooks/cfat/reporting/formatters.py +496 -0
- runbooks/cfat/reporting/templates.py +135 -0
- runbooks/cfat/run-assessment.sh +23 -0
- runbooks/cfat/runner.py +69 -0
- runbooks/cfat/src/actions/check-cloudtrail-existence.ts +43 -0
- runbooks/cfat/src/actions/check-config-existence.ts +37 -0
- runbooks/cfat/src/actions/check-control-tower.ts +37 -0
- runbooks/cfat/src/actions/check-ec2-existence.ts +46 -0
- runbooks/cfat/src/actions/check-iam-users.ts +50 -0
- runbooks/cfat/src/actions/check-legacy-cur.ts +30 -0
- runbooks/cfat/src/actions/check-org-cloudformation.ts +30 -0
- runbooks/cfat/src/actions/check-vpc-existence.ts +43 -0
- runbooks/cfat/src/actions/create-asanaimport.ts +14 -0
- runbooks/cfat/src/actions/create-backlog.ts +372 -0
- runbooks/cfat/src/actions/create-jiraimport.ts +15 -0
- runbooks/cfat/src/actions/create-report.ts +616 -0
- runbooks/cfat/src/actions/define-account-type.ts +51 -0
- runbooks/cfat/src/actions/get-enabled-org-policy-types.ts +40 -0
- runbooks/cfat/src/actions/get-enabled-org-services.ts +26 -0
- runbooks/cfat/src/actions/get-idc-info.ts +34 -0
- runbooks/cfat/src/actions/get-org-da-accounts.ts +34 -0
- runbooks/cfat/src/actions/get-org-details.ts +35 -0
- runbooks/cfat/src/actions/get-org-member-accounts.ts +44 -0
- runbooks/cfat/src/actions/get-org-ous.ts +35 -0
- runbooks/cfat/src/actions/get-regions.ts +22 -0
- runbooks/cfat/src/actions/zip-assessment.ts +27 -0
- runbooks/cfat/src/types/index.d.ts +147 -0
- runbooks/cfat/tests/__init__.py +141 -0
- runbooks/cfat/tests/test_cli.py +340 -0
- runbooks/cfat/tests/test_integration.py +290 -0
- runbooks/cfat/tests/test_models.py +505 -0
- runbooks/cfat/tests/test_reporting.py +354 -0
- runbooks/cfat/tsconfig.json +16 -0
- runbooks/cfat/webpack.config.cjs +27 -0
- runbooks/config.py +260 -0
- runbooks/finops/README.md +337 -0
- runbooks/finops/__init__.py +86 -0
- runbooks/finops/aws_client.py +245 -0
- runbooks/finops/cli.py +151 -0
- runbooks/finops/cost_processor.py +410 -0
- runbooks/finops/dashboard_runner.py +448 -0
- runbooks/finops/helpers.py +355 -0
- runbooks/finops/main.py +14 -0
- runbooks/finops/profile_processor.py +174 -0
- runbooks/finops/types.py +66 -0
- runbooks/finops/visualisations.py +80 -0
- runbooks/inventory/.gitignore +354 -0
- runbooks/inventory/ArgumentsClass.py +261 -0
- runbooks/inventory/FAILED_SCRIPTS_TROUBLESHOOTING.md +619 -0
- runbooks/inventory/Inventory_Modules.py +6130 -0
- runbooks/inventory/LandingZone/delete_lz.py +1075 -0
- runbooks/inventory/PASSED_SCRIPTS_GUIDE.md +738 -0
- runbooks/inventory/README.md +1320 -0
- runbooks/inventory/__init__.py +62 -0
- runbooks/inventory/account_class.py +532 -0
- runbooks/inventory/all_my_instances_wrapper.py +123 -0
- runbooks/inventory/aws_decorators.py +201 -0
- runbooks/inventory/aws_organization.png +0 -0
- runbooks/inventory/cfn_move_stack_instances.py +1526 -0
- runbooks/inventory/check_cloudtrail_compliance.py +614 -0
- runbooks/inventory/check_controltower_readiness.py +1107 -0
- runbooks/inventory/check_landingzone_readiness.py +711 -0
- runbooks/inventory/cloudtrail.md +727 -0
- runbooks/inventory/collectors/__init__.py +20 -0
- runbooks/inventory/collectors/aws_compute.py +518 -0
- runbooks/inventory/collectors/aws_networking.py +275 -0
- runbooks/inventory/collectors/base.py +222 -0
- runbooks/inventory/core/__init__.py +19 -0
- runbooks/inventory/core/collector.py +303 -0
- runbooks/inventory/core/formatter.py +296 -0
- runbooks/inventory/delete_s3_buckets_objects.py +169 -0
- runbooks/inventory/discovery.md +81 -0
- runbooks/inventory/draw_org_structure.py +748 -0
- runbooks/inventory/ec2_vpc_utils.py +341 -0
- runbooks/inventory/find_cfn_drift_detection.py +272 -0
- runbooks/inventory/find_cfn_orphaned_stacks.py +719 -0
- runbooks/inventory/find_cfn_stackset_drift.py +733 -0
- runbooks/inventory/find_ec2_security_groups.py +669 -0
- runbooks/inventory/find_landingzone_versions.py +201 -0
- runbooks/inventory/find_vpc_flow_logs.py +1221 -0
- runbooks/inventory/inventory.sh +659 -0
- runbooks/inventory/list_cfn_stacks.py +558 -0
- runbooks/inventory/list_cfn_stackset_operation_results.py +252 -0
- runbooks/inventory/list_cfn_stackset_operations.py +734 -0
- runbooks/inventory/list_cfn_stacksets.py +453 -0
- runbooks/inventory/list_config_recorders_delivery_channels.py +681 -0
- runbooks/inventory/list_ds_directories.py +354 -0
- runbooks/inventory/list_ec2_availability_zones.py +286 -0
- runbooks/inventory/list_ec2_ebs_volumes.py +244 -0
- runbooks/inventory/list_ec2_instances.py +425 -0
- runbooks/inventory/list_ecs_clusters_and_tasks.py +562 -0
- runbooks/inventory/list_elbs_load_balancers.py +411 -0
- runbooks/inventory/list_enis_network_interfaces.py +526 -0
- runbooks/inventory/list_guardduty_detectors.py +568 -0
- runbooks/inventory/list_iam_policies.py +404 -0
- runbooks/inventory/list_iam_roles.py +518 -0
- runbooks/inventory/list_iam_saml_providers.py +359 -0
- runbooks/inventory/list_lambda_functions.py +882 -0
- runbooks/inventory/list_org_accounts.py +446 -0
- runbooks/inventory/list_org_accounts_users.py +354 -0
- runbooks/inventory/list_rds_db_instances.py +406 -0
- runbooks/inventory/list_route53_hosted_zones.py +318 -0
- runbooks/inventory/list_servicecatalog_provisioned_products.py +575 -0
- runbooks/inventory/list_sns_topics.py +360 -0
- runbooks/inventory/list_ssm_parameters.py +402 -0
- runbooks/inventory/list_vpc_subnets.py +433 -0
- runbooks/inventory/list_vpcs.py +422 -0
- runbooks/inventory/lockdown_cfn_stackset_role.py +224 -0
- runbooks/inventory/models/__init__.py +24 -0
- runbooks/inventory/models/account.py +192 -0
- runbooks/inventory/models/inventory.py +309 -0
- runbooks/inventory/models/resource.py +247 -0
- runbooks/inventory/recover_cfn_stack_ids.py +205 -0
- runbooks/inventory/requirements.txt +12 -0
- runbooks/inventory/run_on_multi_accounts.py +211 -0
- runbooks/inventory/tests/common_test_data.py +3661 -0
- runbooks/inventory/tests/common_test_functions.py +204 -0
- runbooks/inventory/tests/setup.py +24 -0
- runbooks/inventory/tests/src.py +18 -0
- runbooks/inventory/tests/test_cfn_describe_stacks.py +208 -0
- runbooks/inventory/tests/test_ec2_describe_instances.py +162 -0
- runbooks/inventory/tests/test_inventory_modules.py +55 -0
- runbooks/inventory/tests/test_lambda_list_functions.py +86 -0
- runbooks/inventory/tests/test_moto_integration_example.py +273 -0
- runbooks/inventory/tests/test_org_list_accounts.py +49 -0
- runbooks/inventory/update_aws_actions.py +173 -0
- runbooks/inventory/update_cfn_stacksets.py +1215 -0
- runbooks/inventory/update_cloudwatch_logs_retention_policy.py +294 -0
- runbooks/inventory/update_iam_roles_cross_accounts.py +478 -0
- runbooks/inventory/update_s3_public_access_block.py +539 -0
- runbooks/inventory/utils/__init__.py +23 -0
- runbooks/inventory/utils/aws_helpers.py +510 -0
- runbooks/inventory/utils/threading_utils.py +493 -0
- runbooks/inventory/utils/validation.py +682 -0
- runbooks/inventory/verify_ec2_security_groups.py +1430 -0
- runbooks/main.py +1004 -0
- runbooks/organizations/__init__.py +12 -0
- runbooks/organizations/manager.py +374 -0
- runbooks/security/README.md +447 -0
- runbooks/security/__init__.py +71 -0
- runbooks/{security_baseline → security}/checklist/alternate_contacts.py +8 -1
- runbooks/{security_baseline → security}/checklist/bucket_public_access.py +4 -1
- runbooks/{security_baseline → security}/checklist/cloudwatch_alarm_configuration.py +9 -2
- runbooks/{security_baseline → security}/checklist/guardduty_enabled.py +9 -2
- runbooks/{security_baseline → security}/checklist/multi_region_instance_usage.py +5 -1
- runbooks/{security_baseline → security}/checklist/root_access_key.py +6 -1
- runbooks/{security_baseline → security}/config-origin.json +1 -1
- runbooks/{security_baseline → security}/config.json +1 -1
- runbooks/{security_baseline → security}/permission.json +1 -1
- runbooks/{security_baseline → security}/report_generator.py +10 -2
- runbooks/{security_baseline → security}/report_template_en.html +7 -7
- runbooks/{security_baseline → security}/report_template_jp.html +7 -7
- runbooks/{security_baseline → security}/report_template_kr.html +12 -12
- runbooks/{security_baseline → security}/report_template_vn.html +7 -7
- runbooks/{security_baseline → security}/run_script.py +8 -2
- runbooks/{security_baseline → security}/security_baseline_tester.py +12 -4
- runbooks/{security_baseline → security}/utils/common.py +5 -1
- runbooks/utils/__init__.py +204 -0
- runbooks-0.7.0.dist-info/METADATA +375 -0
- runbooks-0.7.0.dist-info/RECORD +249 -0
- {runbooks-0.2.5.dist-info → runbooks-0.7.0.dist-info}/WHEEL +1 -1
- runbooks-0.7.0.dist-info/entry_points.txt +7 -0
- runbooks-0.7.0.dist-info/licenses/LICENSE +201 -0
- runbooks-0.7.0.dist-info/top_level.txt +3 -0
- runbooks/python101/calculator.py +0 -34
- runbooks/python101/config.py +0 -1
- runbooks/python101/exceptions.py +0 -16
- runbooks/python101/file_manager.py +0 -218
- runbooks/python101/toolkit.py +0 -153
- runbooks-0.2.5.dist-info/METADATA +0 -439
- runbooks-0.2.5.dist-info/RECORD +0 -61
- runbooks-0.2.5.dist-info/entry_points.txt +0 -3
- runbooks-0.2.5.dist-info/top_level.txt +0 -1
- /runbooks/{security_baseline/__init__.py → inventory/tests/script_test_data.py} +0 -0
- /runbooks/{security_baseline → security}/checklist/__init__.py +0 -0
- /runbooks/{security_baseline → security}/checklist/account_level_bucket_public_access.py +0 -0
- /runbooks/{security_baseline → security}/checklist/direct_attached_policy.py +0 -0
- /runbooks/{security_baseline → security}/checklist/iam_password_policy.py +0 -0
- /runbooks/{security_baseline → security}/checklist/iam_user_mfa.py +0 -0
- /runbooks/{security_baseline → security}/checklist/multi_region_trail.py +0 -0
- /runbooks/{security_baseline → security}/checklist/root_mfa.py +0 -0
- /runbooks/{security_baseline → security}/checklist/root_usage.py +0 -0
- /runbooks/{security_baseline → security}/checklist/trail_enabled.py +0 -0
- /runbooks/{security_baseline → security}/checklist/trusted_advisor.py +0 -0
- /runbooks/{security_baseline → security}/utils/__init__.py +0 -0
- /runbooks/{security_baseline → security}/utils/enums.py +0 -0
- /runbooks/{security_baseline → security}/utils/language.py +0 -0
- /runbooks/{security_baseline → security}/utils/level_const.py +0 -0
- /runbooks/{security_baseline → security}/utils/permission_list.py +0 -0
@@ -0,0 +1,493 @@
|
|
1
|
+
"""
|
2
|
+
Threading and concurrency utilities for inventory operations.
|
3
|
+
|
4
|
+
This module provides thread pool management, concurrent execution helpers,
|
5
|
+
and progress tracking for multi-threaded inventory collection operations.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import threading
|
9
|
+
import time
|
10
|
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from datetime import datetime, timedelta
|
13
|
+
from queue import Empty, Queue
|
14
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
15
|
+
|
16
|
+
from loguru import logger
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class TaskResult:
|
21
|
+
"""Result from a threaded task execution."""
|
22
|
+
|
23
|
+
task_id: str
|
24
|
+
success: bool
|
25
|
+
result: Any = None
|
26
|
+
error: Optional[Exception] = None
|
27
|
+
start_time: datetime = field(default_factory=datetime.utcnow)
|
28
|
+
end_time: Optional[datetime] = None
|
29
|
+
duration: Optional[timedelta] = None
|
30
|
+
|
31
|
+
def mark_completed(self, success: bool, result: Any = None, error: Optional[Exception] = None):
|
32
|
+
"""Mark task as completed with result or error."""
|
33
|
+
self.success = success
|
34
|
+
self.result = result
|
35
|
+
self.error = error
|
36
|
+
self.end_time = datetime.utcnow()
|
37
|
+
self.duration = self.end_time - self.start_time
|
38
|
+
|
39
|
+
def get_duration_seconds(self) -> float:
|
40
|
+
"""Get task duration in seconds."""
|
41
|
+
if self.duration:
|
42
|
+
return self.duration.total_seconds()
|
43
|
+
if self.end_time:
|
44
|
+
return (self.end_time - self.start_time).total_seconds()
|
45
|
+
return (datetime.utcnow() - self.start_time).total_seconds()
|
46
|
+
|
47
|
+
|
48
|
+
@dataclass
|
49
|
+
class ProgressMetrics:
|
50
|
+
"""Progress tracking metrics for threaded operations."""
|
51
|
+
|
52
|
+
total_tasks: int = 0
|
53
|
+
completed_tasks: int = 0
|
54
|
+
successful_tasks: int = 0
|
55
|
+
failed_tasks: int = 0
|
56
|
+
start_time: datetime = field(default_factory=datetime.utcnow)
|
57
|
+
estimated_completion: Optional[datetime] = None
|
58
|
+
|
59
|
+
def update_progress(self, success: bool):
|
60
|
+
"""Update progress metrics with task completion."""
|
61
|
+
self.completed_tasks += 1
|
62
|
+
if success:
|
63
|
+
self.successful_tasks += 1
|
64
|
+
else:
|
65
|
+
self.failed_tasks += 1
|
66
|
+
|
67
|
+
# Update estimated completion time
|
68
|
+
if self.completed_tasks > 0:
|
69
|
+
elapsed = datetime.utcnow() - self.start_time
|
70
|
+
rate = self.completed_tasks / elapsed.total_seconds()
|
71
|
+
if rate > 0:
|
72
|
+
remaining_seconds = (self.total_tasks - self.completed_tasks) / rate
|
73
|
+
self.estimated_completion = datetime.utcnow() + timedelta(seconds=remaining_seconds)
|
74
|
+
|
75
|
+
def get_completion_percentage(self) -> float:
|
76
|
+
"""Get completion percentage (0-100)."""
|
77
|
+
if self.total_tasks == 0:
|
78
|
+
return 0.0
|
79
|
+
return (self.completed_tasks / self.total_tasks) * 100
|
80
|
+
|
81
|
+
def get_success_rate(self) -> float:
|
82
|
+
"""Get success rate percentage."""
|
83
|
+
if self.completed_tasks == 0:
|
84
|
+
return 0.0
|
85
|
+
return (self.successful_tasks / self.completed_tasks) * 100
|
86
|
+
|
87
|
+
def is_complete(self) -> bool:
|
88
|
+
"""Check if all tasks are completed."""
|
89
|
+
return self.completed_tasks >= self.total_tasks
|
90
|
+
|
91
|
+
def get_remaining_tasks(self) -> int:
|
92
|
+
"""Get number of remaining tasks."""
|
93
|
+
return max(0, self.total_tasks - self.completed_tasks)
|
94
|
+
|
95
|
+
|
96
|
+
class ThreadPoolManager:
|
97
|
+
"""
|
98
|
+
Advanced thread pool manager for inventory operations.
|
99
|
+
|
100
|
+
Provides thread pool management with progress tracking, error handling,
|
101
|
+
and resource management for concurrent AWS API operations.
|
102
|
+
"""
|
103
|
+
|
104
|
+
def __init__(
|
105
|
+
self,
|
106
|
+
max_workers: Optional[int] = None,
|
107
|
+
thread_name_prefix: str = "InventoryWorker",
|
108
|
+
progress_callback: Optional[Callable[[ProgressMetrics], None]] = None,
|
109
|
+
):
|
110
|
+
"""
|
111
|
+
Initialize thread pool manager.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
max_workers: Maximum number of worker threads (None for auto-detect)
|
115
|
+
thread_name_prefix: Prefix for worker thread names
|
116
|
+
progress_callback: Callback function for progress updates
|
117
|
+
"""
|
118
|
+
# Auto-detect optimal worker count if not specified
|
119
|
+
if max_workers is None:
|
120
|
+
import os
|
121
|
+
|
122
|
+
max_workers = min(32, (os.cpu_count() or 1) + 4)
|
123
|
+
|
124
|
+
self.max_workers = max_workers
|
125
|
+
self.thread_name_prefix = thread_name_prefix
|
126
|
+
self.progress_callback = progress_callback
|
127
|
+
|
128
|
+
self._executor: Optional[ThreadPoolExecutor] = None
|
129
|
+
self._futures: Dict[str, Future] = {}
|
130
|
+
self._results: Dict[str, TaskResult] = {}
|
131
|
+
self._metrics = ProgressMetrics()
|
132
|
+
self._lock = threading.Lock()
|
133
|
+
|
134
|
+
logger.debug(f"Initialized ThreadPoolManager with {max_workers} workers")
|
135
|
+
|
136
|
+
def __enter__(self):
|
137
|
+
"""Context manager entry."""
|
138
|
+
self.start()
|
139
|
+
return self
|
140
|
+
|
141
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
142
|
+
"""Context manager exit."""
|
143
|
+
self.shutdown()
|
144
|
+
|
145
|
+
def start(self):
|
146
|
+
"""Start the thread pool."""
|
147
|
+
if self._executor is None:
|
148
|
+
self._executor = ThreadPoolExecutor(
|
149
|
+
max_workers=self.max_workers, thread_name_prefix=self.thread_name_prefix
|
150
|
+
)
|
151
|
+
logger.debug("Thread pool started")
|
152
|
+
|
153
|
+
def shutdown(self, wait: bool = True):
|
154
|
+
"""
|
155
|
+
Shutdown the thread pool.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
wait: Whether to wait for all tasks to complete
|
159
|
+
"""
|
160
|
+
if self._executor:
|
161
|
+
self._executor.shutdown(wait=wait)
|
162
|
+
self._executor = None
|
163
|
+
logger.debug("Thread pool shutdown")
|
164
|
+
|
165
|
+
def submit_task(self, task_id: str, func: Callable, *args, **kwargs) -> Future:
|
166
|
+
"""
|
167
|
+
Submit a task to the thread pool.
|
168
|
+
|
169
|
+
Args:
|
170
|
+
task_id: Unique identifier for the task
|
171
|
+
func: Function to execute
|
172
|
+
*args: Positional arguments for the function
|
173
|
+
**kwargs: Keyword arguments for the function
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
Future object for the submitted task
|
177
|
+
|
178
|
+
Raises:
|
179
|
+
RuntimeError: If thread pool is not started
|
180
|
+
"""
|
181
|
+
if not self._executor:
|
182
|
+
raise RuntimeError("Thread pool not started. Call start() first.")
|
183
|
+
|
184
|
+
# Create task result placeholder
|
185
|
+
task_result = TaskResult(task_id=task_id)
|
186
|
+
|
187
|
+
with self._lock:
|
188
|
+
self._results[task_id] = task_result
|
189
|
+
self._metrics.total_tasks += 1
|
190
|
+
|
191
|
+
# Submit task to executor
|
192
|
+
future = self._executor.submit(self._execute_task, task_id, func, *args, **kwargs)
|
193
|
+
self._futures[task_id] = future
|
194
|
+
|
195
|
+
logger.debug(f"Submitted task: {task_id}")
|
196
|
+
return future
|
197
|
+
|
198
|
+
def _execute_task(self, task_id: str, func: Callable, *args, **kwargs) -> Any:
|
199
|
+
"""
|
200
|
+
Execute a task with error handling and metrics tracking.
|
201
|
+
|
202
|
+
Args:
|
203
|
+
task_id: Task identifier
|
204
|
+
func: Function to execute
|
205
|
+
*args: Function arguments
|
206
|
+
**kwargs: Function keyword arguments
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
Function result
|
210
|
+
"""
|
211
|
+
result = None
|
212
|
+
error = None
|
213
|
+
success = False
|
214
|
+
|
215
|
+
try:
|
216
|
+
logger.debug(f"Executing task: {task_id}")
|
217
|
+
result = func(*args, **kwargs)
|
218
|
+
success = True
|
219
|
+
logger.debug(f"Task completed successfully: {task_id}")
|
220
|
+
|
221
|
+
except Exception as e:
|
222
|
+
error = e
|
223
|
+
logger.error(f"Task failed: {task_id} - {e}")
|
224
|
+
|
225
|
+
# Update task result
|
226
|
+
with self._lock:
|
227
|
+
if task_id in self._results:
|
228
|
+
self._results[task_id].mark_completed(success, result, error)
|
229
|
+
self._metrics.update_progress(success)
|
230
|
+
|
231
|
+
# Call progress callback if provided
|
232
|
+
if self.progress_callback:
|
233
|
+
try:
|
234
|
+
self.progress_callback(self._metrics)
|
235
|
+
except Exception as cb_error:
|
236
|
+
logger.warning(f"Progress callback error: {cb_error}")
|
237
|
+
|
238
|
+
if not success:
|
239
|
+
raise error
|
240
|
+
|
241
|
+
return result
|
242
|
+
|
243
|
+
def submit_batch(self, tasks: List[Tuple[str, Callable, tuple, dict]]) -> Dict[str, Future]:
|
244
|
+
"""
|
245
|
+
Submit a batch of tasks to the thread pool.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
tasks: List of (task_id, function, args, kwargs) tuples
|
249
|
+
|
250
|
+
Returns:
|
251
|
+
Dictionary mapping task IDs to Future objects
|
252
|
+
"""
|
253
|
+
futures = {}
|
254
|
+
|
255
|
+
for task_id, func, args, kwargs in tasks:
|
256
|
+
future = self.submit_task(task_id, func, *args, **kwargs)
|
257
|
+
futures[task_id] = future
|
258
|
+
|
259
|
+
logger.info(f"Submitted batch of {len(tasks)} tasks")
|
260
|
+
return futures
|
261
|
+
|
262
|
+
def wait_for_completion(
|
263
|
+
self, timeout: Optional[float] = None, progress_interval: float = 5.0
|
264
|
+
) -> Dict[str, TaskResult]:
|
265
|
+
"""
|
266
|
+
Wait for all submitted tasks to complete.
|
267
|
+
|
268
|
+
Args:
|
269
|
+
timeout: Maximum time to wait in seconds
|
270
|
+
progress_interval: Interval for progress logging in seconds
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
Dictionary mapping task IDs to TaskResult objects
|
274
|
+
"""
|
275
|
+
if not self._futures:
|
276
|
+
return self._results.copy()
|
277
|
+
|
278
|
+
logger.info(f"Waiting for {len(self._futures)} tasks to complete")
|
279
|
+
|
280
|
+
start_time = time.time()
|
281
|
+
last_progress_time = start_time
|
282
|
+
|
283
|
+
try:
|
284
|
+
for future in as_completed(self._futures.values(), timeout=timeout):
|
285
|
+
# Log progress periodically
|
286
|
+
current_time = time.time()
|
287
|
+
if current_time - last_progress_time >= progress_interval:
|
288
|
+
self._log_progress()
|
289
|
+
last_progress_time = current_time
|
290
|
+
|
291
|
+
# Check timeout
|
292
|
+
if timeout and (current_time - start_time) >= timeout:
|
293
|
+
logger.warning(f"Timeout reached after {timeout} seconds")
|
294
|
+
break
|
295
|
+
|
296
|
+
# Final progress log
|
297
|
+
self._log_progress()
|
298
|
+
|
299
|
+
except TimeoutError:
|
300
|
+
logger.error(f"Tasks did not complete within {timeout} seconds")
|
301
|
+
|
302
|
+
# Clean up futures
|
303
|
+
self._futures.clear()
|
304
|
+
|
305
|
+
total_time = time.time() - start_time
|
306
|
+
logger.info(
|
307
|
+
f"Task completion finished in {total_time:.2f} seconds. "
|
308
|
+
f"Success rate: {self._metrics.get_success_rate():.1f}%"
|
309
|
+
)
|
310
|
+
|
311
|
+
return self._results.copy()
|
312
|
+
|
313
|
+
def get_results(self, completed_only: bool = True) -> Dict[str, TaskResult]:
|
314
|
+
"""
|
315
|
+
Get task results.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
completed_only: Whether to return only completed tasks
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
Dictionary of task results
|
322
|
+
"""
|
323
|
+
with self._lock:
|
324
|
+
if completed_only:
|
325
|
+
return {task_id: result for task_id, result in self._results.items() if result.end_time is not None}
|
326
|
+
return self._results.copy()
|
327
|
+
|
328
|
+
def get_metrics(self) -> ProgressMetrics:
|
329
|
+
"""Get current progress metrics."""
|
330
|
+
with self._lock:
|
331
|
+
return self._metrics
|
332
|
+
|
333
|
+
def get_successful_results(self) -> Dict[str, Any]:
|
334
|
+
"""Get results from successful tasks only."""
|
335
|
+
return {
|
336
|
+
task_id: result.result
|
337
|
+
for task_id, result in self.get_results().items()
|
338
|
+
if result.success and result.result is not None
|
339
|
+
}
|
340
|
+
|
341
|
+
def get_failed_tasks(self) -> Dict[str, Exception]:
|
342
|
+
"""Get errors from failed tasks."""
|
343
|
+
return {
|
344
|
+
task_id: result.error
|
345
|
+
for task_id, result in self.get_results().items()
|
346
|
+
if not result.success and result.error is not None
|
347
|
+
}
|
348
|
+
|
349
|
+
def _log_progress(self):
|
350
|
+
"""Log current progress metrics."""
|
351
|
+
metrics = self._metrics
|
352
|
+
logger.info(
|
353
|
+
f"Progress: {metrics.completed_tasks}/{metrics.total_tasks} "
|
354
|
+
f"({metrics.get_completion_percentage():.1f}%) - "
|
355
|
+
f"Success rate: {metrics.get_success_rate():.1f}%"
|
356
|
+
)
|
357
|
+
|
358
|
+
if metrics.estimated_completion:
|
359
|
+
remaining = metrics.estimated_completion - datetime.utcnow()
|
360
|
+
if remaining.total_seconds() > 0:
|
361
|
+
logger.info(f"Estimated completion in {remaining}")
|
362
|
+
|
363
|
+
def cancel_remaining_tasks(self):
|
364
|
+
"""Cancel all pending tasks."""
|
365
|
+
cancelled_count = 0
|
366
|
+
|
367
|
+
for task_id, future in self._futures.items():
|
368
|
+
if not future.done() and future.cancel():
|
369
|
+
cancelled_count += 1
|
370
|
+
logger.debug(f"Cancelled task: {task_id}")
|
371
|
+
|
372
|
+
if cancelled_count > 0:
|
373
|
+
logger.info(f"Cancelled {cancelled_count} pending tasks")
|
374
|
+
|
375
|
+
def is_active(self) -> bool:
|
376
|
+
"""Check if there are active tasks."""
|
377
|
+
return bool(self._futures)
|
378
|
+
|
379
|
+
def get_active_task_count(self) -> int:
|
380
|
+
"""Get number of active (not completed) tasks."""
|
381
|
+
return len([f for f in self._futures.values() if not f.done()])
|
382
|
+
|
383
|
+
|
384
|
+
class BatchProcessor:
|
385
|
+
"""
|
386
|
+
Utility for processing large batches of items with threading.
|
387
|
+
|
388
|
+
Provides higher-level batch processing capabilities with automatic
|
389
|
+
chunking, error handling, and progress tracking.
|
390
|
+
"""
|
391
|
+
|
392
|
+
def __init__(
|
393
|
+
self,
|
394
|
+
batch_size: int = 50,
|
395
|
+
max_workers: Optional[int] = None,
|
396
|
+
progress_callback: Optional[Callable[[ProgressMetrics], None]] = None,
|
397
|
+
):
|
398
|
+
"""
|
399
|
+
Initialize batch processor.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
batch_size: Number of items to process per batch
|
403
|
+
max_workers: Maximum number of worker threads
|
404
|
+
progress_callback: Callback for progress updates
|
405
|
+
"""
|
406
|
+
self.batch_size = batch_size
|
407
|
+
self.max_workers = max_workers
|
408
|
+
self.progress_callback = progress_callback
|
409
|
+
|
410
|
+
def process_items(
|
411
|
+
self,
|
412
|
+
items: List[Any],
|
413
|
+
processor_func: Callable[[List[Any]], Any],
|
414
|
+
item_id_func: Optional[Callable[[Any], str]] = None,
|
415
|
+
) -> Dict[str, Any]:
|
416
|
+
"""
|
417
|
+
Process a list of items in batches using threading.
|
418
|
+
|
419
|
+
Args:
|
420
|
+
items: List of items to process
|
421
|
+
processor_func: Function to process each batch
|
422
|
+
item_id_func: Function to generate task IDs from items
|
423
|
+
|
424
|
+
Returns:
|
425
|
+
Dictionary of batch results
|
426
|
+
"""
|
427
|
+
# Split items into batches
|
428
|
+
batches = []
|
429
|
+
for i in range(0, len(items), self.batch_size):
|
430
|
+
batch = items[i : i + self.batch_size]
|
431
|
+
batch_id = f"batch_{i // self.batch_size + 1}"
|
432
|
+
batches.append((batch_id, batch))
|
433
|
+
|
434
|
+
logger.info(f"Processing {len(items)} items in {len(batches)} batches")
|
435
|
+
|
436
|
+
# Process batches using thread pool
|
437
|
+
with ThreadPoolManager(max_workers=self.max_workers, progress_callback=self.progress_callback) as pool:
|
438
|
+
# Submit all batches
|
439
|
+
for batch_id, batch in batches:
|
440
|
+
pool.submit_task(batch_id, processor_func, batch)
|
441
|
+
|
442
|
+
# Wait for completion
|
443
|
+
results = pool.wait_for_completion()
|
444
|
+
|
445
|
+
# Extract successful results
|
446
|
+
return pool.get_successful_results()
|
447
|
+
|
448
|
+
|
449
|
+
def run_with_timeout(func: Callable, timeout: float, *args, **kwargs) -> Any:
|
450
|
+
"""
|
451
|
+
Run a function with a timeout using threading.
|
452
|
+
|
453
|
+
Args:
|
454
|
+
func: Function to run
|
455
|
+
timeout: Timeout in seconds
|
456
|
+
*args: Function arguments
|
457
|
+
**kwargs: Function keyword arguments
|
458
|
+
|
459
|
+
Returns:
|
460
|
+
Function result
|
461
|
+
|
462
|
+
Raises:
|
463
|
+
TimeoutError: If function doesn't complete within timeout
|
464
|
+
"""
|
465
|
+
result = Queue()
|
466
|
+
exception = Queue()
|
467
|
+
|
468
|
+
def target():
|
469
|
+
try:
|
470
|
+
ret = func(*args, **kwargs)
|
471
|
+
result.put(ret)
|
472
|
+
except Exception as e:
|
473
|
+
exception.put(e)
|
474
|
+
|
475
|
+
thread = threading.Thread(target=target)
|
476
|
+
thread.daemon = True
|
477
|
+
thread.start()
|
478
|
+
thread.join(timeout)
|
479
|
+
|
480
|
+
if thread.is_alive():
|
481
|
+
# Thread is still running, timeout occurred
|
482
|
+
raise TimeoutError(f"Function {func.__name__} timed out after {timeout} seconds")
|
483
|
+
|
484
|
+
# Check for exceptions
|
485
|
+
if not exception.empty():
|
486
|
+
raise exception.get()
|
487
|
+
|
488
|
+
# Return result
|
489
|
+
if not result.empty():
|
490
|
+
return result.get()
|
491
|
+
|
492
|
+
# Should not reach here
|
493
|
+
raise RuntimeError("Function completed but no result or exception found")
|