secator 0.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of secator might be problematic. Click here for more details.
- secator/.gitignore +162 -0
- secator/__init__.py +0 -0
- secator/celery.py +421 -0
- secator/cli.py +927 -0
- secator/config.py +137 -0
- secator/configs/__init__.py +0 -0
- secator/configs/profiles/__init__.py +0 -0
- secator/configs/profiles/aggressive.yaml +7 -0
- secator/configs/profiles/default.yaml +9 -0
- secator/configs/profiles/stealth.yaml +7 -0
- secator/configs/scans/__init__.py +0 -0
- secator/configs/scans/domain.yaml +18 -0
- secator/configs/scans/host.yaml +14 -0
- secator/configs/scans/network.yaml +17 -0
- secator/configs/scans/subdomain.yaml +8 -0
- secator/configs/scans/url.yaml +12 -0
- secator/configs/workflows/__init__.py +0 -0
- secator/configs/workflows/cidr_recon.yaml +28 -0
- secator/configs/workflows/code_scan.yaml +11 -0
- secator/configs/workflows/host_recon.yaml +41 -0
- secator/configs/workflows/port_scan.yaml +34 -0
- secator/configs/workflows/subdomain_recon.yaml +33 -0
- secator/configs/workflows/url_crawl.yaml +29 -0
- secator/configs/workflows/url_dirsearch.yaml +29 -0
- secator/configs/workflows/url_fuzz.yaml +35 -0
- secator/configs/workflows/url_nuclei.yaml +11 -0
- secator/configs/workflows/url_vuln.yaml +55 -0
- secator/configs/workflows/user_hunt.yaml +10 -0
- secator/configs/workflows/wordpress.yaml +14 -0
- secator/decorators.py +346 -0
- secator/definitions.py +183 -0
- secator/exporters/__init__.py +12 -0
- secator/exporters/_base.py +3 -0
- secator/exporters/csv.py +29 -0
- secator/exporters/gdrive.py +118 -0
- secator/exporters/json.py +14 -0
- secator/exporters/table.py +7 -0
- secator/exporters/txt.py +24 -0
- secator/hooks/__init__.py +0 -0
- secator/hooks/mongodb.py +212 -0
- secator/output_types/__init__.py +24 -0
- secator/output_types/_base.py +95 -0
- secator/output_types/exploit.py +50 -0
- secator/output_types/ip.py +33 -0
- secator/output_types/port.py +45 -0
- secator/output_types/progress.py +35 -0
- secator/output_types/record.py +34 -0
- secator/output_types/subdomain.py +42 -0
- secator/output_types/tag.py +46 -0
- secator/output_types/target.py +30 -0
- secator/output_types/url.py +76 -0
- secator/output_types/user_account.py +41 -0
- secator/output_types/vulnerability.py +97 -0
- secator/report.py +95 -0
- secator/rich.py +123 -0
- secator/runners/__init__.py +12 -0
- secator/runners/_base.py +873 -0
- secator/runners/_helpers.py +154 -0
- secator/runners/command.py +674 -0
- secator/runners/scan.py +67 -0
- secator/runners/task.py +107 -0
- secator/runners/workflow.py +137 -0
- secator/serializers/__init__.py +8 -0
- secator/serializers/dataclass.py +33 -0
- secator/serializers/json.py +15 -0
- secator/serializers/regex.py +17 -0
- secator/tasks/__init__.py +10 -0
- secator/tasks/_categories.py +304 -0
- secator/tasks/cariddi.py +102 -0
- secator/tasks/dalfox.py +66 -0
- secator/tasks/dirsearch.py +88 -0
- secator/tasks/dnsx.py +56 -0
- secator/tasks/dnsxbrute.py +34 -0
- secator/tasks/feroxbuster.py +89 -0
- secator/tasks/ffuf.py +85 -0
- secator/tasks/fping.py +44 -0
- secator/tasks/gau.py +43 -0
- secator/tasks/gf.py +34 -0
- secator/tasks/gospider.py +71 -0
- secator/tasks/grype.py +78 -0
- secator/tasks/h8mail.py +80 -0
- secator/tasks/httpx.py +104 -0
- secator/tasks/katana.py +128 -0
- secator/tasks/maigret.py +78 -0
- secator/tasks/mapcidr.py +32 -0
- secator/tasks/msfconsole.py +176 -0
- secator/tasks/naabu.py +52 -0
- secator/tasks/nmap.py +341 -0
- secator/tasks/nuclei.py +97 -0
- secator/tasks/searchsploit.py +53 -0
- secator/tasks/subfinder.py +40 -0
- secator/tasks/wpscan.py +177 -0
- secator/utils.py +404 -0
- secator/utils_test.py +183 -0
- secator-0.1.0.dist-info/METADATA +379 -0
- secator-0.1.0.dist-info/RECORD +99 -0
- secator-0.1.0.dist-info/WHEEL +5 -0
- secator-0.1.0.dist-info/entry_points.txt +2 -0
- secator-0.1.0.dist-info/licenses/LICENSE +60 -0
secator/.gitignore
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# ---> Python
|
|
2
|
+
# Byte-compiled / optimized / DLL files
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.py[cod]
|
|
5
|
+
*$py.class
|
|
6
|
+
|
|
7
|
+
# C extensions
|
|
8
|
+
*.so
|
|
9
|
+
|
|
10
|
+
# Distribution / packaging
|
|
11
|
+
.Python
|
|
12
|
+
build/
|
|
13
|
+
develop-eggs/
|
|
14
|
+
dist/
|
|
15
|
+
downloads/
|
|
16
|
+
eggs/
|
|
17
|
+
.eggs/
|
|
18
|
+
lib/
|
|
19
|
+
lib64/
|
|
20
|
+
parts/
|
|
21
|
+
sdist/
|
|
22
|
+
var/
|
|
23
|
+
wheels/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
# Usually these files are written by a python script from a template
|
|
32
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
33
|
+
*.manifest
|
|
34
|
+
*.spec
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage reports
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
*.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
cover/
|
|
54
|
+
|
|
55
|
+
# Translations
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
|
|
59
|
+
# Django stuff:
|
|
60
|
+
*.log
|
|
61
|
+
local_settings.py
|
|
62
|
+
db.sqlite3
|
|
63
|
+
db.sqlite3-journal
|
|
64
|
+
|
|
65
|
+
# Flask stuff:
|
|
66
|
+
instance/
|
|
67
|
+
.webassets-cache
|
|
68
|
+
|
|
69
|
+
# Scrapy stuff:
|
|
70
|
+
.scrapy
|
|
71
|
+
|
|
72
|
+
# Sphinx documentation
|
|
73
|
+
docs/_build/
|
|
74
|
+
|
|
75
|
+
# PyBuilder
|
|
76
|
+
.pybuilder/
|
|
77
|
+
target/
|
|
78
|
+
|
|
79
|
+
# Jupyter Notebook
|
|
80
|
+
.ipynb_checkpoints
|
|
81
|
+
|
|
82
|
+
# IPython
|
|
83
|
+
profile_default/
|
|
84
|
+
ipython_config.py
|
|
85
|
+
|
|
86
|
+
# pyenv
|
|
87
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
88
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
89
|
+
# .python-version
|
|
90
|
+
|
|
91
|
+
# pipenv
|
|
92
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
93
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
94
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
95
|
+
# install all needed dependencies.
|
|
96
|
+
#Pipfile.lock
|
|
97
|
+
|
|
98
|
+
# poetry
|
|
99
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
100
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
101
|
+
# commonly ignored for libraries.
|
|
102
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
103
|
+
#poetry.lock
|
|
104
|
+
|
|
105
|
+
# pdm
|
|
106
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
107
|
+
#pdm.lock
|
|
108
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
109
|
+
# in version control.
|
|
110
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
111
|
+
.pdm.toml
|
|
112
|
+
|
|
113
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
114
|
+
__pypackages__/
|
|
115
|
+
|
|
116
|
+
# Celery stuff
|
|
117
|
+
celerybeat-schedule
|
|
118
|
+
celerybeat.pid
|
|
119
|
+
|
|
120
|
+
# SageMath parsed files
|
|
121
|
+
*.sage.py
|
|
122
|
+
|
|
123
|
+
# Environments
|
|
124
|
+
.env
|
|
125
|
+
.venv
|
|
126
|
+
env/
|
|
127
|
+
venv/
|
|
128
|
+
ENV/
|
|
129
|
+
env.bak/
|
|
130
|
+
venv.bak/
|
|
131
|
+
|
|
132
|
+
# Spyder project settings
|
|
133
|
+
.spyderproject
|
|
134
|
+
.spyproject
|
|
135
|
+
|
|
136
|
+
# Rope project settings
|
|
137
|
+
.ropeproject
|
|
138
|
+
|
|
139
|
+
# mkdocs documentation
|
|
140
|
+
/site
|
|
141
|
+
|
|
142
|
+
# mypy
|
|
143
|
+
.mypy_cache/
|
|
144
|
+
.dmypy.json
|
|
145
|
+
dmypy.json
|
|
146
|
+
|
|
147
|
+
# Pyre type checker
|
|
148
|
+
.pyre/
|
|
149
|
+
|
|
150
|
+
# pytype static type analyzer
|
|
151
|
+
.pytype/
|
|
152
|
+
|
|
153
|
+
# Cython debug symbols
|
|
154
|
+
cython_debug/
|
|
155
|
+
|
|
156
|
+
# PyCharm
|
|
157
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
158
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
159
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
160
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
161
|
+
#.idea/
|
|
162
|
+
|
secator/__init__.py
ADDED
|
File without changes
|
secator/celery.py
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import logging
|
|
3
|
+
import traceback
|
|
4
|
+
from time import sleep
|
|
5
|
+
|
|
6
|
+
from celery import Celery, chain, chord, signals
|
|
7
|
+
from celery.app import trace
|
|
8
|
+
from celery.result import AsyncResult, allow_join_result
|
|
9
|
+
# from pyinstrument import Profiler # TODO: make pyinstrument optional
|
|
10
|
+
from rich.logging import RichHandler
|
|
11
|
+
|
|
12
|
+
from secator.definitions import (CELERY_BROKER_CONNECTION_TIMEOUT,
|
|
13
|
+
CELERY_BROKER_POOL_LIMIT, CELERY_BROKER_URL,
|
|
14
|
+
CELERY_BROKER_VISIBILITY_TIMEOUT,
|
|
15
|
+
CELERY_DATA_FOLDER,
|
|
16
|
+
CELERY_OVERRIDE_DEFAULT_LOGGING,
|
|
17
|
+
CELERY_RESULT_BACKEND, DEBUG)
|
|
18
|
+
from secator.rich import console
|
|
19
|
+
from secator.runners import Scan, Task, Workflow
|
|
20
|
+
from secator.runners._helpers import run_extractors
|
|
21
|
+
from secator.utils import (TaskError, debug, deduplicate,
|
|
22
|
+
flatten)
|
|
23
|
+
|
|
24
|
+
# from pathlib import Path
|
|
25
|
+
# import memray # TODO: conditional memray tracing
|
|
26
|
+
|
|
27
|
+
rich_handler = RichHandler(rich_tracebacks=True)
|
|
28
|
+
rich_handler.setLevel(logging.INFO)
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level='NOTSET',
|
|
31
|
+
format="%(threadName)s:%(message)s",
|
|
32
|
+
datefmt="[%X]",
|
|
33
|
+
handlers=[rich_handler],
|
|
34
|
+
force=True)
|
|
35
|
+
logging.getLogger('kombu').setLevel(logging.ERROR)
|
|
36
|
+
logging.getLogger('celery').setLevel(logging.INFO if DEBUG > 6 else logging.WARNING)
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
trace.LOG_SUCCESS = """\
|
|
41
|
+
Task %(name)s[%(id)s] succeeded in %(runtime)ss\
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
app = Celery(__name__)
|
|
45
|
+
app.conf.update({
|
|
46
|
+
# Worker config
|
|
47
|
+
'worker_send_task_events': True,
|
|
48
|
+
'worker_prefetch_multiplier': 1,
|
|
49
|
+
'worker_max_tasks_per_child': 10,
|
|
50
|
+
|
|
51
|
+
# Broker config
|
|
52
|
+
'broker_url': CELERY_BROKER_URL,
|
|
53
|
+
'broker_transport_options': {
|
|
54
|
+
'data_folder_in': CELERY_DATA_FOLDER,
|
|
55
|
+
'data_folder_out': CELERY_DATA_FOLDER,
|
|
56
|
+
'visibility_timeout': CELERY_BROKER_VISIBILITY_TIMEOUT,
|
|
57
|
+
},
|
|
58
|
+
'broker_connection_retry_on_startup': True,
|
|
59
|
+
'broker_pool_limit': CELERY_BROKER_POOL_LIMIT,
|
|
60
|
+
'broker_connection_timeout': CELERY_BROKER_CONNECTION_TIMEOUT,
|
|
61
|
+
|
|
62
|
+
# Backend config
|
|
63
|
+
'result_backend': CELERY_RESULT_BACKEND,
|
|
64
|
+
'result_extended': True,
|
|
65
|
+
'result_backend_thread_safe': True,
|
|
66
|
+
# 'result_backend_transport_options': {'master_name': 'mymaster'}, # for Redis HA backend
|
|
67
|
+
|
|
68
|
+
# Task config
|
|
69
|
+
'task_eager_propagates': False,
|
|
70
|
+
'task_routes': {
|
|
71
|
+
'secator.celery.run_workflow': {'queue': 'celery'},
|
|
72
|
+
'secator.celery.run_scan': {'queue': 'celery'},
|
|
73
|
+
'secator.celery.run_task': {'queue': 'celery'},
|
|
74
|
+
'secator.hooks.mongodb.tag_duplicates': {'queue': 'mongodb'}
|
|
75
|
+
},
|
|
76
|
+
'task_reject_on_worker_lost': True,
|
|
77
|
+
'task_acks_late': True,
|
|
78
|
+
'task_create_missing_queues': True,
|
|
79
|
+
'task_send_sent_event': True,
|
|
80
|
+
|
|
81
|
+
# Serialization / compression
|
|
82
|
+
'accept_content': ['application/x-python-serialize', 'application/json'],
|
|
83
|
+
'task_compression': 'gzip',
|
|
84
|
+
'task_serializer': 'pickle',
|
|
85
|
+
'result_serializer': 'pickle'
|
|
86
|
+
})
|
|
87
|
+
app.autodiscover_tasks(['secator.hooks.mongodb'], related_name=None)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def maybe_override_logging():
|
|
91
|
+
def decorator(func):
|
|
92
|
+
if CELERY_OVERRIDE_DEFAULT_LOGGING:
|
|
93
|
+
return signals.setup_logging.connect(func)
|
|
94
|
+
else:
|
|
95
|
+
return func
|
|
96
|
+
return decorator
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@maybe_override_logging()
|
|
100
|
+
def void(*args, **kwargs):
|
|
101
|
+
"""Override celery's logging setup to prevent it from altering our settings.
|
|
102
|
+
github.com/celery/celery/issues/1867
|
|
103
|
+
"""
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def revoke_task(task_id):
|
|
108
|
+
console.print(f'Revoking task {task_id}')
|
|
109
|
+
return app.control.revoke(task_id, terminate=True, signal='SIGKILL')
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
#--------------#
|
|
113
|
+
# Celery tasks #
|
|
114
|
+
#--------------#
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def chunker(seq, size):
|
|
118
|
+
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def break_task(task_cls, task_opts, targets, results=[], chunk_size=1):
|
|
122
|
+
"""Break a task into multiple of the same type."""
|
|
123
|
+
chunks = targets
|
|
124
|
+
if chunk_size > 1:
|
|
125
|
+
chunks = list(chunker(targets, chunk_size))
|
|
126
|
+
|
|
127
|
+
# Clone opts
|
|
128
|
+
opts = task_opts.copy()
|
|
129
|
+
|
|
130
|
+
# Build signatures
|
|
131
|
+
sigs = []
|
|
132
|
+
for ix, chunk in enumerate(chunks):
|
|
133
|
+
if len(chunks) > 0: # add chunk to task opts for tracking chunks exec
|
|
134
|
+
opts['chunk'] = ix + 1
|
|
135
|
+
opts['chunk_count'] = len(chunks)
|
|
136
|
+
opts['parent'] = False
|
|
137
|
+
sig = task_cls.s(chunk, **opts).set(queue=task_cls.profile)
|
|
138
|
+
sigs.append(sig)
|
|
139
|
+
|
|
140
|
+
# Build Celery workflow
|
|
141
|
+
workflow = chain(
|
|
142
|
+
forward_results.s(results).set(queue='io'),
|
|
143
|
+
chord(
|
|
144
|
+
tuple(sigs),
|
|
145
|
+
forward_results.s().set(queue='io'),
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
return workflow
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@app.task(bind=True)
|
|
152
|
+
def run_task(self, args=[], kwargs={}):
|
|
153
|
+
if DEBUG > 1:
|
|
154
|
+
logger.info(f'Received task with args {args} and kwargs {kwargs}')
|
|
155
|
+
if 'context' not in kwargs:
|
|
156
|
+
kwargs['context'] = {}
|
|
157
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
158
|
+
task = Task(*args, **kwargs)
|
|
159
|
+
task.run()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@app.task(bind=True)
|
|
163
|
+
def run_workflow(self, args=[], kwargs={}):
|
|
164
|
+
if DEBUG > 1:
|
|
165
|
+
logger.info(f'Received workflow with args {args} and kwargs {kwargs}')
|
|
166
|
+
if 'context' not in kwargs:
|
|
167
|
+
kwargs['context'] = {}
|
|
168
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
169
|
+
workflow = Workflow(*args, **kwargs)
|
|
170
|
+
workflow.run()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@app.task(bind=True)
|
|
174
|
+
def run_scan(self, args=[], kwargs={}):
|
|
175
|
+
if DEBUG > 1:
|
|
176
|
+
logger.info(f'Received scan with args {args} and kwargs {kwargs}')
|
|
177
|
+
if 'context' not in kwargs:
|
|
178
|
+
kwargs['context'] = {}
|
|
179
|
+
kwargs['context']['celery_id'] = self.request.id
|
|
180
|
+
scan = Scan(*args, **kwargs)
|
|
181
|
+
scan.run()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@app.task(bind=True)
|
|
185
|
+
def run_command(self, results, name, targets, opts={}):
|
|
186
|
+
# profiler = Profiler(interval=0.0001)
|
|
187
|
+
# profiler.start()
|
|
188
|
+
chunk = opts.get('chunk')
|
|
189
|
+
chunk_count = opts.get('chunk_count')
|
|
190
|
+
description = opts.get('description')
|
|
191
|
+
sync = opts.get('sync', True)
|
|
192
|
+
|
|
193
|
+
# Set Celery request id in context
|
|
194
|
+
context = opts.get('context', {})
|
|
195
|
+
context['celery_id'] = self.request.id
|
|
196
|
+
opts['context'] = context
|
|
197
|
+
|
|
198
|
+
# Debug task
|
|
199
|
+
full_name = name
|
|
200
|
+
full_name += f' {chunk}/{chunk_count}' if chunk_count else ''
|
|
201
|
+
|
|
202
|
+
# Update task state in backend
|
|
203
|
+
count = 0
|
|
204
|
+
msg_type = 'error'
|
|
205
|
+
task_results = []
|
|
206
|
+
task_state = 'RUNNING'
|
|
207
|
+
task = None
|
|
208
|
+
parent = True
|
|
209
|
+
state = {
|
|
210
|
+
'state': task_state,
|
|
211
|
+
'meta': {
|
|
212
|
+
'name': name,
|
|
213
|
+
'progress': 0,
|
|
214
|
+
'results': [],
|
|
215
|
+
'chunk': chunk,
|
|
216
|
+
'chunk_count': chunk_count,
|
|
217
|
+
'count': count,
|
|
218
|
+
'description': description
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
self.update_state(**state)
|
|
222
|
+
debug('updated', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'}, obj_after=False, level=2)
|
|
223
|
+
# profile_root = Path('/code/.profiles')
|
|
224
|
+
# profile_root.mkdir(exist_ok=True)
|
|
225
|
+
# profile_path = f'/code/.profiles/{self.request.id}.bin'
|
|
226
|
+
# with memray.Tracker(profile_path):
|
|
227
|
+
try:
|
|
228
|
+
# Flatten + dedupe results
|
|
229
|
+
results = flatten(results)
|
|
230
|
+
results = deduplicate(results, attr='_uuid')
|
|
231
|
+
|
|
232
|
+
# Get expanded targets
|
|
233
|
+
if not chunk:
|
|
234
|
+
targets, opts = run_extractors(results, opts, targets)
|
|
235
|
+
if not targets:
|
|
236
|
+
msg_type = 'info'
|
|
237
|
+
raise TaskError(f'No targets were specified as input. Skipping. [{self.request.id}]')
|
|
238
|
+
|
|
239
|
+
# Get task class
|
|
240
|
+
task_cls = Task.get_task_class(name)
|
|
241
|
+
|
|
242
|
+
# Get split
|
|
243
|
+
multiple_targets = isinstance(targets, list) and len(targets) > 1
|
|
244
|
+
single_target_only = multiple_targets and task_cls.file_flag is None
|
|
245
|
+
break_size_threshold = multiple_targets and task_cls.input_chunk_size and len(targets) > task_cls.input_chunk_size
|
|
246
|
+
|
|
247
|
+
# If task doesn't support multiple targets, or if the number of targets is too big, split into multiple tasks
|
|
248
|
+
if single_target_only or (not sync and break_size_threshold):
|
|
249
|
+
|
|
250
|
+
# Initiate main task and set context for sub-tasks
|
|
251
|
+
task = task_cls(targets, parent=parent, has_children=True, **opts)
|
|
252
|
+
chunk_size = 1 if single_target_only else task_cls.input_chunk_size
|
|
253
|
+
debug(f'breaking task by chunks of size {chunk_size}.', id=self.request.id, sub='celery.state')
|
|
254
|
+
workflow = break_task(
|
|
255
|
+
task_cls,
|
|
256
|
+
opts,
|
|
257
|
+
targets,
|
|
258
|
+
results=results,
|
|
259
|
+
chunk_size=chunk_size)
|
|
260
|
+
result = workflow.apply() if sync else workflow.apply_async()
|
|
261
|
+
debug(
|
|
262
|
+
'waiting for subtasks', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'},
|
|
263
|
+
obj_after=False, level=2)
|
|
264
|
+
if not sync:
|
|
265
|
+
list(task.__class__.get_live_results(result))
|
|
266
|
+
with allow_join_result():
|
|
267
|
+
task_results = result.get()
|
|
268
|
+
results.extend(task_results)
|
|
269
|
+
task_state = 'SUCCESS'
|
|
270
|
+
debug(
|
|
271
|
+
'all subtasks done', sub='celery.state', id=self.request.id, obj={full_name: 'RUNNING'},
|
|
272
|
+
obj_after=False, level=2)
|
|
273
|
+
|
|
274
|
+
# otherwise, run normally
|
|
275
|
+
else:
|
|
276
|
+
# If list with 1 element
|
|
277
|
+
if isinstance(targets, list) and len(targets) == 1:
|
|
278
|
+
targets = targets[0]
|
|
279
|
+
|
|
280
|
+
# Run task
|
|
281
|
+
task = task_cls(targets, **opts)
|
|
282
|
+
for item in task:
|
|
283
|
+
task_results.append(item)
|
|
284
|
+
results.append(item)
|
|
285
|
+
count += 1
|
|
286
|
+
state['meta']['task_results'] = task_results
|
|
287
|
+
state['meta']['results'] = results
|
|
288
|
+
state['meta']['count'] = len(task_results)
|
|
289
|
+
if item._type == 'progress':
|
|
290
|
+
state['meta']['progress'] = item.percent
|
|
291
|
+
self.update_state(**state)
|
|
292
|
+
debug(
|
|
293
|
+
'items found', sub='celery.state', id=self.request.id, obj={full_name: len(task_results)},
|
|
294
|
+
obj_after=False, level=4)
|
|
295
|
+
|
|
296
|
+
# Update task state based on task return code
|
|
297
|
+
if task.return_code == 0:
|
|
298
|
+
task_state = 'SUCCESS'
|
|
299
|
+
task_exc = None
|
|
300
|
+
else:
|
|
301
|
+
task_state = 'FAILURE'
|
|
302
|
+
task_exc = TaskError('\n'.join(task.errors))
|
|
303
|
+
|
|
304
|
+
except BaseException as exc:
|
|
305
|
+
task_state = 'FAILURE'
|
|
306
|
+
task_exc = exc
|
|
307
|
+
|
|
308
|
+
finally:
|
|
309
|
+
# Set task state and exception
|
|
310
|
+
state['state'] = 'SUCCESS' # force task success to serialize exception
|
|
311
|
+
state['meta']['results'] = results
|
|
312
|
+
state['meta']['task_results'] = task_results
|
|
313
|
+
state['meta']['progress'] = 100
|
|
314
|
+
|
|
315
|
+
# Handle task failure
|
|
316
|
+
if task_state == 'FAILURE':
|
|
317
|
+
if isinstance(task_exc, TaskError):
|
|
318
|
+
exc_str = str(task_exc)
|
|
319
|
+
else: # full traceback
|
|
320
|
+
exc_str = ' '.join(traceback.format_exception(task_exc, value=task_exc, tb=task_exc.__traceback__))
|
|
321
|
+
state['meta'][msg_type] = exc_str
|
|
322
|
+
|
|
323
|
+
# Update task state with final status
|
|
324
|
+
self.update_state(**state)
|
|
325
|
+
debug('updated', sub='celery.state', id=self.request.id, obj={full_name: task_state}, obj_after=False, level=2)
|
|
326
|
+
|
|
327
|
+
# Update parent task if necessary
|
|
328
|
+
if task and task.has_children:
|
|
329
|
+
task.log_results()
|
|
330
|
+
task.run_hooks('on_end')
|
|
331
|
+
|
|
332
|
+
# profiler.stop()
|
|
333
|
+
# from pathlib import Path
|
|
334
|
+
# logger.info('Stopped profiling')
|
|
335
|
+
# profile_root = Path('/code/.profiles')
|
|
336
|
+
# profile_root.mkdir(exist_ok=True)
|
|
337
|
+
# profile_path = f'/code/.profiles/{self.request.id}.html'
|
|
338
|
+
# logger.info(f'Saving profile to {profile_path}')
|
|
339
|
+
# with open(profile_path, 'w', encoding='utf-8') as f_html:
|
|
340
|
+
# f_html.write(profiler.output_html())
|
|
341
|
+
|
|
342
|
+
# TODO: fix memory leak instead of running a garbage collector
|
|
343
|
+
gc.collect()
|
|
344
|
+
|
|
345
|
+
# If running in chunk mode, only return chunk result, not all results
|
|
346
|
+
return results if parent else task_results
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@app.task
|
|
350
|
+
def forward_results(results):
|
|
351
|
+
if isinstance(results, list):
|
|
352
|
+
for ix, item in enumerate(results):
|
|
353
|
+
if isinstance(item, dict) and 'results' in item:
|
|
354
|
+
results[ix] = item['results']
|
|
355
|
+
elif 'results' in results:
|
|
356
|
+
results = results['results']
|
|
357
|
+
results = flatten(results)
|
|
358
|
+
results = deduplicate(results, attr='_uuid')
|
|
359
|
+
return results
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
#---------------------#
|
|
363
|
+
# Celery result utils #
|
|
364
|
+
#---------------------#
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def poll_task(result, seen=[]):
|
|
368
|
+
"""Poll Celery result tree recursively to get results live.
|
|
369
|
+
|
|
370
|
+
TODO: function is incomplete, as it does not parse all results.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
result (Union[AsyncResult, GroupResult]): Celery result object.
|
|
374
|
+
seen (list): List of seen results (do not yield again).
|
|
375
|
+
|
|
376
|
+
Yields:
|
|
377
|
+
dict: Result.
|
|
378
|
+
"""
|
|
379
|
+
if result is None:
|
|
380
|
+
return
|
|
381
|
+
|
|
382
|
+
if result.children:
|
|
383
|
+
for child in result.children:
|
|
384
|
+
yield from poll_task(child, seen=seen)
|
|
385
|
+
else:
|
|
386
|
+
res = AsyncResult(result.id)
|
|
387
|
+
if not res.info:
|
|
388
|
+
sleep(0.1)
|
|
389
|
+
yield from poll_task(result, seen=seen)
|
|
390
|
+
|
|
391
|
+
# Task done running
|
|
392
|
+
if isinstance(res.info, list):
|
|
393
|
+
for item in res.info:
|
|
394
|
+
if item._uuid not in seen:
|
|
395
|
+
yield res.id, None, item
|
|
396
|
+
seen.append(item._uuid)
|
|
397
|
+
return
|
|
398
|
+
|
|
399
|
+
# Get task partial results, remove duplicates
|
|
400
|
+
results = res.info['results']
|
|
401
|
+
name = res.info['name']
|
|
402
|
+
for item in results:
|
|
403
|
+
if item._uuid not in seen:
|
|
404
|
+
yield res.id, name, item
|
|
405
|
+
seen.append(item._uuid)
|
|
406
|
+
|
|
407
|
+
# Task still running, keep polling
|
|
408
|
+
if not res.ready():
|
|
409
|
+
sleep(0.1)
|
|
410
|
+
yield from poll_task(result, seen=seen)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def is_celery_worker_alive():
|
|
414
|
+
"""Check if a Celery worker is available."""
|
|
415
|
+
result = app.control.broadcast('ping', reply=True, limit=1, timeout=1)
|
|
416
|
+
result = bool(result)
|
|
417
|
+
if result:
|
|
418
|
+
console.print('Celery worker is alive !', style='bold green')
|
|
419
|
+
# else:
|
|
420
|
+
# console.print('No Celery worker alive.', style='bold red')
|
|
421
|
+
return result
|