secator 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of secator might be problematic. Click here for more details.
- secator/__init__.py +0 -0
- secator/celery.py +482 -0
- secator/cli.py +617 -0
- secator/config.py +137 -0
- secator/configs/__init__.py +0 -0
- secator/configs/profiles/__init__.py +0 -0
- secator/configs/profiles/aggressive.yaml +7 -0
- secator/configs/profiles/default.yaml +9 -0
- secator/configs/profiles/stealth.yaml +7 -0
- secator/configs/scans/__init__.py +0 -0
- secator/configs/scans/domain.yaml +18 -0
- secator/configs/scans/host.yaml +14 -0
- secator/configs/scans/network.yaml +17 -0
- secator/configs/scans/subdomain.yaml +8 -0
- secator/configs/scans/url.yaml +12 -0
- secator/configs/workflows/__init__.py +0 -0
- secator/configs/workflows/cidr_recon.yaml +28 -0
- secator/configs/workflows/code_scan.yaml +11 -0
- secator/configs/workflows/host_recon.yaml +41 -0
- secator/configs/workflows/port_scan.yaml +34 -0
- secator/configs/workflows/subdomain_recon.yaml +33 -0
- secator/configs/workflows/url_crawl.yaml +29 -0
- secator/configs/workflows/url_dirsearch.yaml +29 -0
- secator/configs/workflows/url_fuzz.yaml +35 -0
- secator/configs/workflows/url_nuclei.yaml +11 -0
- secator/configs/workflows/url_vuln.yaml +55 -0
- secator/configs/workflows/user_hunt.yaml +10 -0
- secator/configs/workflows/wordpress.yaml +14 -0
- secator/decorators.py +309 -0
- secator/definitions.py +165 -0
- secator/exporters/__init__.py +12 -0
- secator/exporters/_base.py +3 -0
- secator/exporters/csv.py +30 -0
- secator/exporters/gdrive.py +118 -0
- secator/exporters/json.py +15 -0
- secator/exporters/table.py +7 -0
- secator/exporters/txt.py +25 -0
- secator/hooks/__init__.py +0 -0
- secator/hooks/mongodb.py +212 -0
- secator/output_types/__init__.py +24 -0
- secator/output_types/_base.py +95 -0
- secator/output_types/exploit.py +50 -0
- secator/output_types/ip.py +33 -0
- secator/output_types/port.py +45 -0
- secator/output_types/progress.py +35 -0
- secator/output_types/record.py +34 -0
- secator/output_types/subdomain.py +42 -0
- secator/output_types/tag.py +46 -0
- secator/output_types/target.py +30 -0
- secator/output_types/url.py +76 -0
- secator/output_types/user_account.py +41 -0
- secator/output_types/vulnerability.py +97 -0
- secator/report.py +107 -0
- secator/rich.py +124 -0
- secator/runners/__init__.py +12 -0
- secator/runners/_base.py +833 -0
- secator/runners/_helpers.py +153 -0
- secator/runners/command.py +638 -0
- secator/runners/scan.py +65 -0
- secator/runners/task.py +106 -0
- secator/runners/workflow.py +135 -0
- secator/serializers/__init__.py +8 -0
- secator/serializers/dataclass.py +33 -0
- secator/serializers/json.py +15 -0
- secator/serializers/regex.py +17 -0
- secator/tasks/__init__.py +10 -0
- secator/tasks/_categories.py +304 -0
- secator/tasks/cariddi.py +102 -0
- secator/tasks/dalfox.py +65 -0
- secator/tasks/dirsearch.py +90 -0
- secator/tasks/dnsx.py +56 -0
- secator/tasks/dnsxbrute.py +34 -0
- secator/tasks/feroxbuster.py +91 -0
- secator/tasks/ffuf.py +86 -0
- secator/tasks/fping.py +44 -0
- secator/tasks/gau.py +47 -0
- secator/tasks/gf.py +33 -0
- secator/tasks/gospider.py +71 -0
- secator/tasks/grype.py +79 -0
- secator/tasks/h8mail.py +81 -0
- secator/tasks/httpx.py +99 -0
- secator/tasks/katana.py +133 -0
- secator/tasks/maigret.py +78 -0
- secator/tasks/mapcidr.py +32 -0
- secator/tasks/msfconsole.py +174 -0
- secator/tasks/naabu.py +52 -0
- secator/tasks/nmap.py +344 -0
- secator/tasks/nuclei.py +97 -0
- secator/tasks/searchsploit.py +52 -0
- secator/tasks/subfinder.py +40 -0
- secator/tasks/wpscan.py +179 -0
- secator/utils.py +445 -0
- secator/utils_test.py +183 -0
- secator-0.0.1.dist-info/LICENSE +60 -0
- secator-0.0.1.dist-info/METADATA +199 -0
- secator-0.0.1.dist-info/RECORD +114 -0
- secator-0.0.1.dist-info/WHEEL +5 -0
- secator-0.0.1.dist-info/entry_points.txt +2 -0
- secator-0.0.1.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/inputs.py +42 -0
- tests/integration/outputs.py +392 -0
- tests/integration/test_scans.py +82 -0
- tests/integration/test_tasks.py +103 -0
- tests/integration/test_workflows.py +163 -0
- tests/performance/__init__.py +0 -0
- tests/performance/loadtester.py +56 -0
- tests/unit/__init__.py +0 -0
- tests/unit/test_celery.py +39 -0
- tests/unit/test_scans.py +0 -0
- tests/unit/test_serializers.py +51 -0
- tests/unit/test_tasks.py +348 -0
- tests/unit/test_workflows.py +96 -0
secator/__init__.py
ADDED
|
File without changes
|
secator/celery.py
ADDED
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import logging
|
|
3
|
+
import traceback
|
|
4
|
+
from time import sleep
|
|
5
|
+
|
|
6
|
+
import celery
|
|
7
|
+
from celery import chain, chord, signals
|
|
8
|
+
from celery.app import trace
|
|
9
|
+
from celery.result import AsyncResult, allow_join_result
|
|
10
|
+
# from pyinstrument import Profiler
|
|
11
|
+
from rich.logging import RichHandler
|
|
12
|
+
|
|
13
|
+
from secator.definitions import (CELERY_BROKER_CONNECTION_TIMEOUT,
|
|
14
|
+
CELERY_BROKER_POOL_LIMIT, CELERY_BROKER_URL,
|
|
15
|
+
CELERY_BROKER_VISIBILITY_TIMEOUT,
|
|
16
|
+
CELERY_DATA_FOLDER,
|
|
17
|
+
CELERY_OVERRIDE_DEFAULT_LOGGING,
|
|
18
|
+
CELERY_RESULT_BACKEND, DEBUG)
|
|
19
|
+
from secator.rich import console
|
|
20
|
+
from secator.runners import Scan, Task, Workflow
|
|
21
|
+
from secator.runners._helpers import run_extractors
|
|
22
|
+
from secator.utils import (TaskError, debug, deduplicate,
|
|
23
|
+
discover_external_tasks, discover_internal_tasks,
|
|
24
|
+
flatten)
|
|
25
|
+
|
|
26
|
+
# from pathlib import Path
|
|
27
|
+
# import memray
|
|
28
|
+
|
|
29
|
+
rich_handler = RichHandler(rich_tracebacks=True)
|
|
30
|
+
rich_handler.setLevel(logging.INFO)
|
|
31
|
+
logging.basicConfig(
|
|
32
|
+
level='NOTSET',
|
|
33
|
+
format="%(threadName)s:%(message)s",
|
|
34
|
+
datefmt="[%X]",
|
|
35
|
+
handlers=[rich_handler],
|
|
36
|
+
force=True)
|
|
37
|
+
logging.getLogger('kombu').setLevel(logging.ERROR)
|
|
38
|
+
logging.getLogger('celery').setLevel(logging.INFO if DEBUG > 6 else logging.WARNING)
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
trace.LOG_SUCCESS = """\
|
|
43
|
+
Task %(name)s[%(id)s] succeeded in %(runtime)ss\
|
|
44
|
+
"""
|
|
45
|
+
COMMANDS = discover_internal_tasks() + discover_external_tasks()
|
|
46
|
+
|
|
47
|
+
app = celery.Celery(__name__)
|
|
48
|
+
app.conf.update({
|
|
49
|
+
# Worker config
|
|
50
|
+
'worker_send_task_events': True,
|
|
51
|
+
'worker_prefetch_multiplier': 1,
|
|
52
|
+
'worker_max_tasks_per_child': 10,
|
|
53
|
+
|
|
54
|
+
# Broker config
|
|
55
|
+
'broker_url': CELERY_BROKER_URL,
|
|
56
|
+
'broker_transport_options': {
|
|
57
|
+
'data_folder_in': CELERY_DATA_FOLDER,
|
|
58
|
+
'data_folder_out': CELERY_DATA_FOLDER,
|
|
59
|
+
'visibility_timeout': CELERY_BROKER_VISIBILITY_TIMEOUT,
|
|
60
|
+
},
|
|
61
|
+
'broker_connection_retry_on_startup': True,
|
|
62
|
+
'broker_pool_limit': CELERY_BROKER_POOL_LIMIT,
|
|
63
|
+
'broker_connection_timeout': CELERY_BROKER_CONNECTION_TIMEOUT,
|
|
64
|
+
|
|
65
|
+
# Backend config
|
|
66
|
+
'result_backend': CELERY_RESULT_BACKEND,
|
|
67
|
+
'result_extended': True,
|
|
68
|
+
'result_backend_thread_safe': True,
|
|
69
|
+
# 'result_backend_transport_options': {'master_name': 'mymaster'}, # for Redis HA backend
|
|
70
|
+
|
|
71
|
+
# Task config
|
|
72
|
+
'task_eager_propagates': False,
|
|
73
|
+
'task_routes': {
|
|
74
|
+
'secator.celery.run_workflow': {'queue': 'celery'},
|
|
75
|
+
'secator.celery.run_scan': {'queue': 'celery'},
|
|
76
|
+
'secator.celery.run_task': {'queue': 'celery'},
|
|
77
|
+
'secator.hooks.mongodb.tag_duplicates': {'queue': 'mongodb'}
|
|
78
|
+
},
|
|
79
|
+
'task_reject_on_worker_lost': True,
|
|
80
|
+
'task_acks_late': True,
|
|
81
|
+
'task_create_missing_queues': True,
|
|
82
|
+
'task_send_sent_event': True,
|
|
83
|
+
|
|
84
|
+
# Serialization / compression
|
|
85
|
+
'accept_content': ['application/x-python-serialize', 'application/json'],
|
|
86
|
+
'task_compression': 'gzip',
|
|
87
|
+
'task_serializer': 'pickle',
|
|
88
|
+
'result_serializer': 'pickle'
|
|
89
|
+
})
|
|
90
|
+
app.autodiscover_tasks(['secator.hooks.mongodb'], related_name=None)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def maybe_override_logging():
|
|
94
|
+
def decorator(func):
|
|
95
|
+
if CELERY_OVERRIDE_DEFAULT_LOGGING:
|
|
96
|
+
return signals.setup_logging.connect(func)
|
|
97
|
+
else:
|
|
98
|
+
return func
|
|
99
|
+
return decorator
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@maybe_override_logging()
|
|
103
|
+
def void(*args, **kwargs):
|
|
104
|
+
"""Override celery's logging setup to prevent it from altering our settings.
|
|
105
|
+
github.com/celery/celery/issues/1867
|
|
106
|
+
"""
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def revoke_task(task_id):
|
|
111
|
+
console.print(f'Revoking task {task_id}')
|
|
112
|
+
return app.control.revoke(task_id, terminate=True, signal='SIGKILL')
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
#--------------#
|
|
116
|
+
# Celery tasks #
|
|
117
|
+
#--------------#
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def chunker(seq, size):
|
|
121
|
+
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def break_task(task_cls, task_opts, targets, results=[], chunk_size=1):
|
|
125
|
+
"""Break a task into multiple of the same type."""
|
|
126
|
+
chunks = targets
|
|
127
|
+
if chunk_size > 1:
|
|
128
|
+
chunks = list(chunker(targets, chunk_size))
|
|
129
|
+
|
|
130
|
+
# Clone opts
|
|
131
|
+
opts = task_opts.copy()
|
|
132
|
+
|
|
133
|
+
# Build signatures
|
|
134
|
+
sigs = []
|
|
135
|
+
for ix, chunk in enumerate(chunks):
|
|
136
|
+
if len(chunks) > 0: # add chunk to task opts for tracking chunks exec
|
|
137
|
+
opts['chunk'] = ix + 1
|
|
138
|
+
opts['chunk_count'] = len(chunks)
|
|
139
|
+
opts['parent'] = False
|
|
140
|
+
sig = task_cls.s(chunk, **opts).set(queue=task_cls.profile)
|
|
141
|
+
sigs.append(sig)
|
|
142
|
+
|
|
143
|
+
# Build Celery workflow
|
|
144
|
+
workflow = chain(
|
|
145
|
+
forward_results.s(results).set(queue='io'),
|
|
146
|
+
chord(
|
|
147
|
+
tuple(sigs),
|
|
148
|
+
forward_results.s().set(queue='io'),
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
return workflow
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@app.task(bind=True)
|
|
155
|
+
def run_task(self, args=[], kwargs={}):
|
|
156
|
+
if DEBUG > 1:
|
|
157
|
+
logger.info(f'Received task with args {args} and kwargs {kwargs}')
|
|
158
|
+
if 'context' not in kwargs:
|
|
159
|
+
kwargs['context'] = {}
|
|
160
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
161
|
+
task = Task(*args, **kwargs)
|
|
162
|
+
task.run()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@app.task(bind=True)
|
|
166
|
+
def run_workflow(self, args=[], kwargs={}):
|
|
167
|
+
if DEBUG > 1:
|
|
168
|
+
logger.info(f'Received workflow with args {args} and kwargs {kwargs}')
|
|
169
|
+
if 'context' not in kwargs:
|
|
170
|
+
kwargs['context'] = {}
|
|
171
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
172
|
+
workflow = Workflow(*args, **kwargs)
|
|
173
|
+
workflow.run()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@app.task(bind=True)
|
|
177
|
+
def run_scan(self, args=[], kwargs={}):
|
|
178
|
+
if DEBUG > 1:
|
|
179
|
+
logger.info(f'Received scan with args {args} and kwargs {kwargs}')
|
|
180
|
+
if 'context' not in kwargs:
|
|
181
|
+
kwargs['context'] = {}
|
|
182
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
183
|
+
scan = Scan(*args, **kwargs)
|
|
184
|
+
scan.run()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@app.task(bind=True)
|
|
188
|
+
def run_command(self, results, name, targets, opts={}):
|
|
189
|
+
# profiler = Profiler(interval=0.0001)
|
|
190
|
+
# profiler.start()
|
|
191
|
+
chunk = opts.get('chunk')
|
|
192
|
+
chunk_count = opts.get('chunk_count')
|
|
193
|
+
description = opts.get('description')
|
|
194
|
+
sync = opts.get('sync', True)
|
|
195
|
+
|
|
196
|
+
# Set Celery request id in context
|
|
197
|
+
context = opts.get('context', {})
|
|
198
|
+
context['celery_id'] = self.request.id
|
|
199
|
+
opts['context'] = context
|
|
200
|
+
|
|
201
|
+
# Debug task
|
|
202
|
+
full_name = name
|
|
203
|
+
full_name += f' {chunk}/{chunk_count}' if chunk_count else ''
|
|
204
|
+
|
|
205
|
+
# Update task state in backend
|
|
206
|
+
count = 0
|
|
207
|
+
msg_type = 'error'
|
|
208
|
+
task_results = []
|
|
209
|
+
task_state = 'RUNNING'
|
|
210
|
+
task = None
|
|
211
|
+
parent = True
|
|
212
|
+
state = {
|
|
213
|
+
'state': task_state,
|
|
214
|
+
'meta': {
|
|
215
|
+
'name': name,
|
|
216
|
+
'progress': 0,
|
|
217
|
+
'results': [],
|
|
218
|
+
'chunk': chunk,
|
|
219
|
+
'chunk_count': chunk_count,
|
|
220
|
+
'count': count,
|
|
221
|
+
'description': description
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
self.update_state(**state)
|
|
225
|
+
debug('updated', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'}, obj_after=False, level=2)
|
|
226
|
+
# profile_root = Path('/code/.profiles')
|
|
227
|
+
# profile_root.mkdir(exist_ok=True)
|
|
228
|
+
# profile_path = f'/code/.profiles/{self.request.id}.bin'
|
|
229
|
+
# with memray.Tracker(profile_path):
|
|
230
|
+
try:
|
|
231
|
+
# Flatten + dedupe results
|
|
232
|
+
results = flatten(results)
|
|
233
|
+
results = deduplicate(results, attr='_uuid')
|
|
234
|
+
|
|
235
|
+
# Get expanded targets
|
|
236
|
+
if not chunk:
|
|
237
|
+
targets, opts = run_extractors(results, opts, targets)
|
|
238
|
+
if not targets:
|
|
239
|
+
msg_type = 'info'
|
|
240
|
+
raise TaskError(f'No targets were specified as input. Skipping. [{self.request.id}]')
|
|
241
|
+
|
|
242
|
+
# Get task class
|
|
243
|
+
task_cls = Task.get_task_class(name)
|
|
244
|
+
|
|
245
|
+
# Get split
|
|
246
|
+
multiple_targets = isinstance(targets, list) and len(targets) > 1
|
|
247
|
+
single_target_only = multiple_targets and task_cls.file_flag is None
|
|
248
|
+
break_size_threshold = multiple_targets and task_cls.input_chunk_size and len(targets) > task_cls.input_chunk_size
|
|
249
|
+
|
|
250
|
+
# If task doesn't support multiple targets, or if the number of targets is too big, split into multiple tasks
|
|
251
|
+
if single_target_only or (not sync and break_size_threshold):
|
|
252
|
+
|
|
253
|
+
# Initiate main task and set context for sub-tasks
|
|
254
|
+
task = task_cls(targets, parent=parent, has_children=True, **opts)
|
|
255
|
+
chunk_size = 1 if single_target_only else task_cls.input_chunk_size
|
|
256
|
+
debug(f'breaking task by chunks of size {chunk_size}.', id=self.request.id, sub='celery.state')
|
|
257
|
+
workflow = break_task(
|
|
258
|
+
task_cls,
|
|
259
|
+
opts,
|
|
260
|
+
targets,
|
|
261
|
+
results=results,
|
|
262
|
+
chunk_size=chunk_size)
|
|
263
|
+
result = workflow.apply() if sync else workflow.apply_async()
|
|
264
|
+
debug(
|
|
265
|
+
'waiting for subtasks', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'},
|
|
266
|
+
obj_after=False, level=2)
|
|
267
|
+
if not sync:
|
|
268
|
+
list(task.__class__.get_live_results(result))
|
|
269
|
+
with allow_join_result():
|
|
270
|
+
task_results = result.get()
|
|
271
|
+
results.extend(task_results)
|
|
272
|
+
task_state = 'SUCCESS'
|
|
273
|
+
debug(
|
|
274
|
+
'all subtasks done', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'},
|
|
275
|
+
obj_after=False, level=2)
|
|
276
|
+
|
|
277
|
+
# otherwise, run normally
|
|
278
|
+
else:
|
|
279
|
+
# If list with 1 element
|
|
280
|
+
if isinstance(targets, list) and len(targets) == 1:
|
|
281
|
+
targets = targets[0]
|
|
282
|
+
|
|
283
|
+
# Run task
|
|
284
|
+
task = task_cls(targets, **opts)
|
|
285
|
+
for item in task:
|
|
286
|
+
task_results.append(item)
|
|
287
|
+
results.append(item)
|
|
288
|
+
count += 1
|
|
289
|
+
state['meta']['task_results'] = task_results
|
|
290
|
+
state['meta']['results'] = results
|
|
291
|
+
state['meta']['count'] = len(task_results)
|
|
292
|
+
if item._type == 'progress':
|
|
293
|
+
state['meta']['progress'] = item.percent
|
|
294
|
+
self.update_state(**state)
|
|
295
|
+
debug(
|
|
296
|
+
'items found', sub='celery.state', id=self.request.id, obj={full_name: len(task_results)},
|
|
297
|
+
obj_after=False, level=4)
|
|
298
|
+
|
|
299
|
+
# Update task state based on task return code
|
|
300
|
+
if task.return_code == 0:
|
|
301
|
+
task_state = 'SUCCESS'
|
|
302
|
+
task_exc = None
|
|
303
|
+
else:
|
|
304
|
+
task_state = 'FAILURE'
|
|
305
|
+
task_exc = TaskError('\n'.join(task.errors))
|
|
306
|
+
|
|
307
|
+
except BaseException as exc:
|
|
308
|
+
task_state = 'FAILURE'
|
|
309
|
+
task_exc = exc
|
|
310
|
+
|
|
311
|
+
finally:
|
|
312
|
+
# Set task state and exception
|
|
313
|
+
state['state'] = 'SUCCESS' # force task success to serialize exception
|
|
314
|
+
state['meta']['results'] = results
|
|
315
|
+
state['meta']['task_results'] = task_results
|
|
316
|
+
state['meta']['progress'] = 100
|
|
317
|
+
|
|
318
|
+
# Handle task failure
|
|
319
|
+
if task_state == 'FAILURE':
|
|
320
|
+
if isinstance(task_exc, TaskError):
|
|
321
|
+
exc_str = str(task_exc)
|
|
322
|
+
else: # full traceback
|
|
323
|
+
exc_str = ' '.join(traceback.format_exception(task_exc, value=task_exc, tb=task_exc.__traceback__))
|
|
324
|
+
state['meta'][msg_type] = exc_str
|
|
325
|
+
if task:
|
|
326
|
+
color = 'bold red' if msg_type == 'error' else 'green'
|
|
327
|
+
task._print(exc_str, color=color)
|
|
328
|
+
else:
|
|
329
|
+
console.log(exc_str)
|
|
330
|
+
|
|
331
|
+
# Update task state with final status
|
|
332
|
+
self.update_state(**state)
|
|
333
|
+
debug('updated', sub='celery.state', id=self.request.id, obj={full_name: task_state}, obj_after=False, level=2)
|
|
334
|
+
|
|
335
|
+
# Update parent task if necessary
|
|
336
|
+
if task and task.has_children:
|
|
337
|
+
task.log_results()
|
|
338
|
+
task.run_hooks('on_end')
|
|
339
|
+
|
|
340
|
+
# profiler.stop()
|
|
341
|
+
# from pathlib import Path
|
|
342
|
+
# logger.info('Stopped profiling')
|
|
343
|
+
# profile_root = Path('/code/.profiles')
|
|
344
|
+
# profile_root.mkdir(exist_ok=True)
|
|
345
|
+
# profile_path = f'/code/.profiles/{self.request.id}.html'
|
|
346
|
+
# logger.info(f'Saving profile to {profile_path}')
|
|
347
|
+
# with open(profile_path, 'w', encoding='utf-8') as f_html:
|
|
348
|
+
# f_html.write(profiler.output_html())
|
|
349
|
+
|
|
350
|
+
# TODO: fix memory leak instead of running a garbage collector
|
|
351
|
+
gc.collect()
|
|
352
|
+
|
|
353
|
+
# If running in chunk mode, only return chunk result, not all results
|
|
354
|
+
return results if parent else task_results
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
@app.task
|
|
358
|
+
def forward_results(results):
|
|
359
|
+
if isinstance(results, list):
|
|
360
|
+
for ix, item in enumerate(results):
|
|
361
|
+
if isinstance(item, dict) and 'results' in item:
|
|
362
|
+
results[ix] = item['results']
|
|
363
|
+
elif 'results' in results:
|
|
364
|
+
results = results['results']
|
|
365
|
+
results = flatten(results)
|
|
366
|
+
results = deduplicate(results, attr='_uuid')
|
|
367
|
+
return results
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
#---------------------#
|
|
371
|
+
# Celery result utils #
|
|
372
|
+
#---------------------#
|
|
373
|
+
|
|
374
|
+
def find_root_task(result):
|
|
375
|
+
while (result.parent is not None):
|
|
376
|
+
result = result.parent
|
|
377
|
+
return result
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def poll_task(result, seen=[]):
|
|
381
|
+
"""Poll Celery result tree recursively to get results live.
|
|
382
|
+
|
|
383
|
+
TODO: function is incomplete, as it does not parse all results.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
result (Union[AsyncResult, GroupResult]): Celery result object.
|
|
387
|
+
seen (list): List of seen results (do not yield again).
|
|
388
|
+
|
|
389
|
+
Yields:
|
|
390
|
+
dict: Result.
|
|
391
|
+
"""
|
|
392
|
+
if result is None:
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
if result.children:
|
|
396
|
+
for child in result.children:
|
|
397
|
+
yield from poll_task(child, seen=seen)
|
|
398
|
+
else:
|
|
399
|
+
res = AsyncResult(result.id)
|
|
400
|
+
if not res.info:
|
|
401
|
+
sleep(0.1)
|
|
402
|
+
yield from poll_task(result, seen=seen)
|
|
403
|
+
|
|
404
|
+
# Task done running
|
|
405
|
+
if isinstance(res.info, list):
|
|
406
|
+
for item in res.info:
|
|
407
|
+
if item._uuid not in seen:
|
|
408
|
+
yield res.id, None, item
|
|
409
|
+
seen.append(item._uuid)
|
|
410
|
+
return
|
|
411
|
+
|
|
412
|
+
# Get task partial results, remove duplicates
|
|
413
|
+
results = res.info['results']
|
|
414
|
+
name = res.info['name']
|
|
415
|
+
for item in results:
|
|
416
|
+
if item._uuid not in seen:
|
|
417
|
+
yield res.id, name, item
|
|
418
|
+
seen.append(item._uuid)
|
|
419
|
+
|
|
420
|
+
# Task still running, keep polling
|
|
421
|
+
if not res.ready():
|
|
422
|
+
sleep(0.1)
|
|
423
|
+
yield from poll_task(result, seen=seen)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def get_results(result):
|
|
427
|
+
"""Get all intermediate results from Celery result object.
|
|
428
|
+
|
|
429
|
+
Use this when running complex workflows with .si() i.e not passing results
|
|
430
|
+
between tasks.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
result (Union[AsyncResult, GroupResult]): Celery result.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
list: List of results.
|
|
437
|
+
"""
|
|
438
|
+
while not result.ready():
|
|
439
|
+
continue
|
|
440
|
+
results = []
|
|
441
|
+
get_nested_results(result, results=results)
|
|
442
|
+
return results
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def get_nested_results(result, results=[]):
|
|
446
|
+
"""Get results recursively from Celery result object by parsing result tree
|
|
447
|
+
in reverse order. Also gets results from GroupResult children.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
result (Union[AsyncResult, GroupResult]): Celery result object.
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
list: List of results.
|
|
454
|
+
"""
|
|
455
|
+
if result is None:
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
if isinstance(result, celery.result.GroupResult):
|
|
459
|
+
console.log(repr(result))
|
|
460
|
+
get_nested_results(result.parent, results=results)
|
|
461
|
+
for child in result.children:
|
|
462
|
+
get_nested_results(child, results=results)
|
|
463
|
+
|
|
464
|
+
elif isinstance(result, celery.result.AsyncResult):
|
|
465
|
+
console.log(repr(result))
|
|
466
|
+
res = result.get()
|
|
467
|
+
console.log(f'-> Found {len(res)} results.')
|
|
468
|
+
console.log(f'-> {res}')
|
|
469
|
+
if res is not None:
|
|
470
|
+
results.extend(res)
|
|
471
|
+
get_nested_results(result.parent, results=results)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def is_celery_worker_alive():
|
|
475
|
+
"""Check if a Celery worker is available."""
|
|
476
|
+
result = app.control.broadcast('ping', reply=True, limit=1, timeout=1)
|
|
477
|
+
result = bool(result)
|
|
478
|
+
if result:
|
|
479
|
+
console.print('Celery worker is alive !', style='bold green')
|
|
480
|
+
else:
|
|
481
|
+
console.print('No Celery worker alive.', style='bold red')
|
|
482
|
+
return result
|