secator 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of secator might be problematic. Click here for more details.

Files changed (84) hide show
  1. secator/celery.py +160 -185
  2. secator/celery_utils.py +268 -0
  3. secator/cli.py +327 -106
  4. secator/config.py +27 -11
  5. secator/configs/workflows/host_recon.yaml +5 -3
  6. secator/configs/workflows/port_scan.yaml +7 -3
  7. secator/configs/workflows/url_bypass.yaml +10 -0
  8. secator/configs/workflows/url_vuln.yaml +1 -1
  9. secator/decorators.py +169 -92
  10. secator/definitions.py +10 -3
  11. secator/exporters/__init__.py +7 -5
  12. secator/exporters/console.py +10 -0
  13. secator/exporters/csv.py +27 -19
  14. secator/exporters/gdrive.py +16 -11
  15. secator/exporters/json.py +3 -1
  16. secator/exporters/table.py +30 -2
  17. secator/exporters/txt.py +20 -16
  18. secator/hooks/gcs.py +53 -0
  19. secator/hooks/mongodb.py +53 -27
  20. secator/output_types/__init__.py +29 -11
  21. secator/output_types/_base.py +11 -1
  22. secator/output_types/error.py +36 -0
  23. secator/output_types/exploit.py +1 -1
  24. secator/output_types/info.py +24 -0
  25. secator/output_types/ip.py +7 -0
  26. secator/output_types/port.py +8 -1
  27. secator/output_types/progress.py +5 -0
  28. secator/output_types/record.py +3 -1
  29. secator/output_types/stat.py +33 -0
  30. secator/output_types/tag.py +6 -4
  31. secator/output_types/url.py +6 -3
  32. secator/output_types/vulnerability.py +3 -2
  33. secator/output_types/warning.py +24 -0
  34. secator/report.py +55 -23
  35. secator/rich.py +44 -39
  36. secator/runners/_base.py +622 -635
  37. secator/runners/_helpers.py +5 -91
  38. secator/runners/celery.py +18 -0
  39. secator/runners/command.py +364 -211
  40. secator/runners/scan.py +8 -24
  41. secator/runners/task.py +21 -55
  42. secator/runners/workflow.py +41 -40
  43. secator/scans/__init__.py +28 -0
  44. secator/serializers/dataclass.py +6 -0
  45. secator/serializers/json.py +10 -5
  46. secator/serializers/regex.py +12 -4
  47. secator/tasks/_categories.py +5 -2
  48. secator/tasks/bbot.py +293 -0
  49. secator/tasks/bup.py +98 -0
  50. secator/tasks/cariddi.py +38 -49
  51. secator/tasks/dalfox.py +3 -0
  52. secator/tasks/dirsearch.py +12 -23
  53. secator/tasks/dnsx.py +49 -30
  54. secator/tasks/dnsxbrute.py +2 -0
  55. secator/tasks/feroxbuster.py +8 -17
  56. secator/tasks/ffuf.py +3 -2
  57. secator/tasks/fping.py +3 -3
  58. secator/tasks/gau.py +5 -0
  59. secator/tasks/gf.py +2 -2
  60. secator/tasks/gospider.py +4 -0
  61. secator/tasks/grype.py +9 -9
  62. secator/tasks/h8mail.py +31 -41
  63. secator/tasks/httpx.py +58 -21
  64. secator/tasks/katana.py +18 -22
  65. secator/tasks/maigret.py +26 -24
  66. secator/tasks/mapcidr.py +2 -3
  67. secator/tasks/msfconsole.py +4 -16
  68. secator/tasks/naabu.py +3 -1
  69. secator/tasks/nmap.py +50 -35
  70. secator/tasks/nuclei.py +9 -2
  71. secator/tasks/searchsploit.py +17 -9
  72. secator/tasks/subfinder.py +5 -1
  73. secator/tasks/wpscan.py +79 -93
  74. secator/template.py +61 -45
  75. secator/thread.py +24 -0
  76. secator/utils.py +330 -80
  77. secator/utils_test.py +48 -23
  78. secator/workflows/__init__.py +28 -0
  79. {secator-0.6.0.dist-info → secator-0.7.0.dist-info}/METADATA +11 -5
  80. secator-0.7.0.dist-info/RECORD +115 -0
  81. {secator-0.6.0.dist-info → secator-0.7.0.dist-info}/WHEEL +1 -1
  82. secator-0.6.0.dist-info/RECORD +0 -101
  83. {secator-0.6.0.dist-info → secator-0.7.0.dist-info}/entry_points.txt +0 -0
  84. {secator-0.6.0.dist-info → secator-0.7.0.dist-info}/licenses/LICENSE +0 -0
secator/runners/_base.py CHANGED
@@ -3,26 +3,20 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  import uuid
6
- from contextlib import nullcontext
7
6
  from datetime import datetime
8
- from time import sleep, time
7
+ from pathlib import Path
8
+ from time import time
9
9
 
10
10
  import humanize
11
- from dotmap import DotMap
12
- from rich.padding import Padding
13
- from rich.panel import Panel
14
- from rich.progress import Progress as RichProgress
15
- from rich.progress import SpinnerColumn, TextColumn, TimeElapsedColumn
16
11
 
17
- from secator.definitions import DEBUG
12
+ from secator.definitions import ADDONS_ENABLED
13
+ from secator.celery_utils import CeleryData
18
14
  from secator.config import CONFIG
19
- from secator.output_types import OUTPUT_TYPES, OutputType, Progress
15
+ from secator.output_types import FINDING_TYPES, OutputType, Progress, Info, Warning, Error, Target
20
16
  from secator.report import Report
21
17
  from secator.rich import console, console_stdout
22
- from secator.runners._helpers import (get_task_data, get_task_ids, get_task_folder_id,
23
- process_extractor)
24
- from secator.utils import (debug, import_dynamic, merge_opts, pluralize,
25
- rich_to_ansi)
18
+ from secator.runners._helpers import (get_task_folder_id, process_extractor)
19
+ from secator.utils import (debug, import_dynamic, merge_opts, rich_to_ansi, should_update)
26
20
 
27
21
  logger = logging.getLogger(__name__)
28
22
 
@@ -34,14 +28,12 @@ HOOKS = [
34
28
  'on_item_pre_convert',
35
29
  'on_item',
36
30
  'on_duplicate',
37
- 'on_line',
38
- 'on_iter',
39
- 'on_error',
31
+ 'on_interval',
40
32
  ]
41
33
 
42
34
  VALIDATORS = [
43
- 'input',
44
- 'item'
35
+ 'validate_input',
36
+ 'validate_item'
45
37
  ]
46
38
 
47
39
 
@@ -49,17 +41,16 @@ class Runner:
49
41
  """Runner class.
50
42
 
51
43
  Args:
52
- config (secator.config.TemplateLoader): Loaded config.
53
- targets (list): List of targets to run task on.
54
- results (list): List of existing results to re-use.
55
- workspace_name (str): Workspace name.
56
- run_opts (dict): Run options.
44
+ config (secator.config.TemplateLoader): Runner config.
45
+ inputs (List[str]): List of inputs to run task on.
46
+ results (List[OutputType]): List of results to re-use.
47
+ run_opts (dict[str]): Run options.
48
+ hooks (dict[str, List[Callable]]): User hooks to register.
49
+ validators (dict): User validators to register.
50
+ context (dict): Runner context.
57
51
 
58
52
  Yields:
59
- dict: Result (when running in sync mode with `run`).
60
-
61
- Returns:
62
- list: List of results (when running in async mode with `run_async`).
53
+ OutputType: Output types.
63
54
  """
64
55
 
65
56
  # Input field (mostly for tests and CLI)
@@ -68,9 +59,6 @@ class Runner:
68
59
  # Output types
69
60
  output_types = []
70
61
 
71
- # Dict return
72
- output_return_type = dict # TODO: deprecate this
73
-
74
62
  # Default exporters
75
63
  default_exporters = []
76
64
 
@@ -80,37 +68,38 @@ class Runner:
80
68
  # Reports folder
81
69
  reports_folder = None
82
70
 
83
- def __init__(self, config, targets, results=[], run_opts={}, hooks={}, context={}):
71
+ def __init__(self, config, inputs=[], results=[], run_opts={}, hooks={}, validators={}, context={}):
84
72
  self.config = config
85
73
  self.name = run_opts.get('name', config.name)
86
74
  self.description = run_opts.get('description', config.description)
87
- if not isinstance(targets, list):
88
- targets = [targets]
89
- self.targets = targets
75
+ if not isinstance(inputs, list):
76
+ inputs = [inputs]
77
+ self.inputs = inputs
90
78
  self.results = results
91
- self.results_count = 0
92
79
  self.workspace_name = context.get('workspace_name', 'default')
93
80
  self.run_opts = run_opts.copy()
94
81
  self.sync = run_opts.get('sync', True)
95
82
  self.done = False
96
83
  self.start_time = datetime.fromtimestamp(time())
97
- self.last_updated = None
84
+ self.last_updated_db = None
85
+ self.last_updated_celery = None
98
86
  self.last_updated_progress = None
99
87
  self.end_time = None
100
88
  self._hooks = hooks
101
- self.errors = []
102
- self.infos = []
103
89
  self.output = ''
104
- self.status = 'RUNNING'
105
90
  self.progress = 0
106
91
  self.context = context
107
92
  self.delay = run_opts.get('delay', False)
108
- self.uuids = []
109
93
  self.celery_result = None
94
+ self.celery_ids = []
95
+ self.celery_ids_map = {}
96
+ self.uuids = []
97
+ self.caller = self.run_opts.get('caller', None)
98
+ self.threads = []
110
99
 
111
100
  # Determine exporters
112
101
  exporters_str = self.run_opts.get('output') or self.default_exporters
113
- self.exporters = Runner.resolve_exporters(exporters_str)
102
+ self.exporters = self.resolve_exporters(exporters_str)
114
103
 
115
104
  # Determine report folder
116
105
  default_reports_folder_base = f'{CONFIG.dirs.reports}/{self.workspace_name}/{self.config.type}s'
@@ -122,86 +111,63 @@ class Runner:
122
111
  os.makedirs(f'{self.reports_folder}/.inputs', exist_ok=True)
123
112
  os.makedirs(f'{self.reports_folder}/.outputs', exist_ok=True)
124
113
 
125
- # Process input
126
- self.input = targets
127
- if isinstance(self.input, list) and len(self.input) == 1:
128
- self.input = self.input[0]
129
-
130
- # Yield dicts if CLI supports JSON
131
- if self.output_return_type is dict or (self.json_flag is not None):
132
- self.output_return_type = dict
133
-
134
- # Output options
135
- self.output_fmt = self.run_opts.get('format', False)
136
- self.output_quiet = self.run_opts.get('quiet', False)
137
- self.output_json = self.output_return_type == dict
138
-
139
- # Print options
140
- self.print_start = self.run_opts.pop('print_start', False)
141
- self.print_item = self.run_opts.pop('print_item', False)
142
- self.print_line = self.run_opts.pop('print_line', False)
143
- self.print_errors = self.run_opts.pop('print_errors', True)
144
- self.print_item_count = self.run_opts.pop('print_item_count', False)
145
- self.print_cmd = self.run_opts.pop('print_cmd', False)
146
- self.print_run_opts = self.run_opts.pop('print_run_opts', DEBUG > 1)
147
- self.print_fmt_opts = self.run_opts.pop('print_fmt_opts', DEBUG > 1)
148
- self.print_input_file = self.run_opts.pop('print_input_file', False)
149
- self.print_hooks = self.run_opts.pop('print_hooks', DEBUG > 1)
150
- self.print_progress = self.run_opts.pop('print_progress', not self.output_quiet)
151
- self.print_cmd_prefix = self.run_opts.pop('print_cmd_prefix', False)
152
- self.print_remote_status = self.run_opts.pop('print_remote_status', False)
153
- self.print_run_summary = self.run_opts.pop('print_run_summary', False)
154
- self.print_json = self.run_opts.get('json', False)
155
- self.print_raw = self.run_opts.get('raw', False)
156
- self.orig = self.run_opts.get('orig', False)
157
- self.opts_to_print = {k: v for k, v in self.__dict__.items() if k.startswith('print_') if v}
114
+ # Profiler
115
+ self.enable_profiler = self.run_opts.get('enable_profiler', False) and ADDONS_ENABLED['trace']
116
+ if self.enable_profiler:
117
+ from pyinstrument import Profiler
118
+ self.profiler = Profiler(async_mode=False, interval=0.0001)
119
+ try:
120
+ self.profiler.start()
121
+ except RuntimeError:
122
+ self.enable_profiler = False
123
+ pass
124
+
125
+ # Process opts
126
+ self.quiet = self.run_opts.get('quiet', False)
127
+ self.no_process = self.run_opts.get('no_process', False)
128
+ self.piped_input = self.run_opts.get('piped_input', False)
129
+ self.piped_output = self.run_opts.get('piped_output', False)
130
+ self.enable_duplicate_check = self.run_opts.get('enable_duplicate_check', True)
131
+
132
+ # Print opts
133
+ self.print_item = self.run_opts.get('print_item', False)
134
+ self.print_line = self.run_opts.get('print_line', False) and not self.quiet
135
+ self.print_remote_info = self.run_opts.get('print_remote_info', False) and not self.piped_input and not self.piped_output # noqa: E501
136
+ self.print_json = self.run_opts.get('print_json', False)
137
+ self.print_raw = self.run_opts.get('print_raw', False) or self.piped_output
138
+ self.print_fmt = self.run_opts.get('fmt', '')
139
+ self.print_progress = self.run_opts.get('print_progress', False) and not self.quiet and not self.print_raw
140
+ self.print_target = self.run_opts.get('print_target', False) and not self.quiet and not self.print_raw
141
+ self.print_stat = self.run_opts.get('print_stat', False) and not self.quiet and not self.print_raw
142
+ self.raise_on_error = self.run_opts.get('raise_on_error', not self.sync)
143
+ self.print_opts = {k: v for k, v in self.__dict__.items() if k.startswith('print_') if v}
144
+
145
+ # Debug
146
+ self.debug('Inputs', obj=self.inputs, sub='init')
147
+ self.debug('Run opts', obj={k: v for k, v in self.run_opts.items() if v is not None}, sub='init')
148
+ self.debug('Print opts', obj={k: v for k, v in self.print_opts.items() if v is not None}, sub='init')
158
149
 
159
150
  # Hooks
160
- self.raise_on_error = self.run_opts.get('raise_on_error', False)
161
- self.hooks = {name: [] for name in HOOKS}
162
- for key in self.hooks:
163
-
164
- # Register class specific hooks
165
- class_hook = getattr(self, key, None)
166
- if class_hook:
167
- name = f'{self.__class__.__name__}.{key}'
168
- fun = self.get_func_path(class_hook)
169
- debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered'}, sub='hooks', level=3)
170
- self.hooks[key].append(class_hook)
171
-
172
- # Register user hooks
173
- user_hooks = hooks.get(self.__class__, {}).get(key, [])
174
- user_hooks.extend(hooks.get(key, []))
175
- for hook in user_hooks:
176
- name = f'{self.__class__.__name__}.{key}'
177
- fun = self.get_func_path(hook)
178
- debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered (user)'}, sub='hooks', level=3)
179
- self.hooks[key].extend(user_hooks)
151
+ self.hooks = {name: [] for name in HOOKS + getattr(self, 'hooks', [])}
152
+ self.register_hooks(hooks)
180
153
 
181
154
  # Validators
182
- self.validators = {name: [] for name in VALIDATORS}
183
- for key in self.validators:
184
- instance_func = getattr(self, f'validate_{key}', None)
185
- if instance_func:
186
- self.validators[key].append(instance_func)
187
- self.validators[key].extend(self.validators.get(self.__class__, {}).get(key, []))
155
+ self.validators = {name: [] for name in VALIDATORS + getattr(self, 'validators', [])}
156
+ self.register_validators(validators)
188
157
 
189
158
  # Chunks
190
- self.parent = self.run_opts.get('parent', True)
159
+ self.has_parent = self.run_opts.get('has_parent', False)
191
160
  self.has_children = self.run_opts.get('has_children', False)
192
161
  self.chunk = self.run_opts.get('chunk', None)
193
162
  self.chunk_count = self.run_opts.get('chunk_count', None)
194
163
  self.unique_name = self.name.replace('/', '_')
195
164
  self.unique_name = f'{self.unique_name}_{self.chunk}' if self.chunk else self.unique_name
196
- self._set_print_prefix()
197
165
 
198
166
  # Input post-process
199
167
  self.run_hooks('before_init')
200
168
 
201
- # Abort if inputs are invalid
202
- self.input_valid = True
203
- if not self.run_validators('input', self.input):
204
- self.input_valid = False
169
+ # Check if input is valid
170
+ self.inputs_valid = self.run_validators('validate_input', self.inputs)
205
171
 
206
172
  # Run hooks
207
173
  self.run_hooks('on_init')
@@ -216,496 +182,480 @@ class Runner:
216
182
  def elapsed_human(self):
217
183
  return humanize.naturaldelta(self.elapsed)
218
184
 
185
+ @property
186
+ def targets(self):
187
+ return [r for r in self.results if isinstance(r, Target)]
188
+
189
+ @property
190
+ def infos(self):
191
+ return [r for r in self.results if isinstance(r, Info)]
192
+
193
+ @property
194
+ def warnings(self):
195
+ return [r for r in self.results if isinstance(r, Warning)]
196
+
197
+ @property
198
+ def errors(self):
199
+ return [r for r in self.results if isinstance(r, Error)]
200
+
201
+ @property
202
+ def self_results(self):
203
+ return [r for r in self.results if r._source.startswith(self.unique_name)]
204
+
205
+ @property
206
+ def findings(self):
207
+ return [r for r in self.results if isinstance(r, tuple(FINDING_TYPES))]
208
+
209
+ @property
210
+ def findings_count(self):
211
+ return len(self.findings)
212
+
213
+ @property
214
+ def self_findings(self):
215
+ return [r for r in self.results if isinstance(r, tuple(FINDING_TYPES)) if r._source.startswith(self.unique_name)]
216
+
217
+ @property
218
+ def self_findings_count(self):
219
+ return len(self.self_findings)
220
+
221
+ @property
222
+ def status(self):
223
+ if not self.done:
224
+ return 'RUNNING'
225
+ return 'FAILURE' if len(self.errors) > 0 else 'SUCCESS'
226
+
227
+ @property
228
+ def celery_state(self):
229
+ return {
230
+ 'name': self.config.name,
231
+ 'full_name': self.unique_name,
232
+ 'state': self.status,
233
+ 'progress': self.progress,
234
+ 'results': self.self_results,
235
+ 'chunk': self.chunk,
236
+ 'chunk_count': self.chunk_count,
237
+ 'chunk_info': f'{self.chunk}/{self.chunk_count}' if self.chunk and self.chunk_count else '',
238
+ 'celery_id': self.context['celery_id'],
239
+ 'count': self.self_findings_count,
240
+ 'descr': self.config.description or '',
241
+ }
242
+
219
243
  def run(self):
244
+ """Run method.
245
+
246
+ Returns:
247
+ List[OutputType]: List of runner results.
248
+ """
220
249
  return list(self.__iter__())
221
250
 
222
251
  def __iter__(self):
223
- if self.print_start:
224
- self.log_start()
252
+ """Process results from derived runner class in real-time and yield results.
225
253
 
226
- if not self.input_valid:
227
- return
254
+ Yields:
255
+ OutputType: runner result.
256
+ """
228
257
  try:
229
- for item in self.yielder():
230
-
231
- if isinstance(item, (OutputType, DotMap, dict)):
232
-
233
- # Handle direct yield of item
234
- item = self._process_item(item)
235
- if not item:
236
- continue
237
-
238
- # Discard item if needed
239
- if item._uuid in self.uuids:
240
- continue
241
-
242
- # Add item to results
243
- if isinstance(item, OutputType) or self.orig:
244
- self.results.append(item)
245
- self.results_count += 1
246
- self.uuids.append(item._uuid)
247
- yield item
248
-
249
- # Print JSON or raw item
250
- if self.print_item and item._type != 'target':
251
- if not isinstance(item, OutputType) and not self.orig:
252
- item_str = rich_to_ansi(
253
- f'[dim red]❌ Failed to load item as output type:\n {item.toDict()}[/]'
254
- )
255
- self.output += item_str + '\n'
256
- self._print(item_str, rich=True)
257
- elif self.print_json:
258
- self._print(item, out=sys.stdout)
259
- elif self.print_raw:
260
- self._print(str(item), out=sys.stdout)
261
- else:
262
- item_str = self.get_repr(item)
263
- if self.print_remote_status or DEBUG > 1:
264
- item_str += f' [{item._source}]'
265
- if item._type == 'progress' and not self.print_progress:
266
- continue
267
- self._print(item_str, out=sys.stdout)
268
-
269
- elif item and isinstance(item, str):
270
- if self.print_line:
271
- self._print(item, out=sys.stderr, end='\n')
272
- if not self.output_json:
273
- self.results.append(item)
274
- yield item
275
-
276
- if item:
277
- if isinstance(item, OutputType):
278
- self.output += self.get_repr(item) + '\n'
279
- else:
280
- self.output += str(item) + '\n'
281
-
282
- self.run_hooks('on_iter')
283
-
284
- except KeyboardInterrupt:
285
- self._print('Process was killed manually (CTRL+C / CTRL+X).', color='bold red', rich=True)
286
- if self.celery_result:
287
- self._print('Revoking remote Celery tasks ...', color='bold red', rich=True)
288
- self.stop_live_tasks(self.celery_result)
289
-
290
- # Filter results and log info
291
- self.mark_duplicates()
292
- self.results = self.filter_results()
258
+ self.log_start()
259
+ self.run_hooks('on_start')
260
+
261
+ # If any errors happened during valid ation, exit
262
+ if self.errors:
263
+ yield from self.errors
264
+ self.log_results()
265
+ self.run_hooks('on_end')
266
+ return
267
+
268
+ # Choose yielder
269
+ yielder = self.yielder_celery if self.celery_result else self.yielder
270
+
271
+ # Loop and process items
272
+ for item in yielder():
273
+ yield from self._process_item(item)
274
+ self.run_hooks('on_interval')
275
+
276
+ # Wait for threads to finish
277
+ yield from self.join_threads()
278
+
279
+ except BaseException as e:
280
+ self.debug(f'encountered exception {type(e).__name__}. Stopping remote tasks.', sub='error')
281
+ error = Error.from_exception(e)
282
+ error._source = self.unique_name
283
+ error._uuid = str(uuid.uuid4())
284
+ self.add_result(error, print=True)
285
+ self.stop_celery_tasks()
286
+ yield from self.join_threads()
287
+ yield error
288
+
289
+ # Mark duplicates and filter results
290
+ if not self.no_process:
291
+ self.mark_duplicates()
292
+ self.results = self.filter_results()
293
+
294
+ # Finalize run
293
295
  self.log_results()
294
296
  self.run_hooks('on_end')
295
297
 
298
+ def join_threads(self):
299
+ """Wait for all running threads to complete."""
300
+ if not self.threads:
301
+ return
302
+ self.debug(f'waiting for {len(self.threads)} threads to complete')
303
+ for thread in self.threads:
304
+ error = thread.join()
305
+ if error:
306
+ error._source = self.unique_name
307
+ error._uuid = str(uuid.uuid4())
308
+ self.add_result(error, print=True)
309
+ yield error
310
+
311
+ def add_result(self, item, print=False):
312
+ """Add item to runner results.
313
+
314
+ Args:
315
+ item (OutputType): Item.
316
+ print (bool): Whether to print it or not.
317
+ """
318
+ self.uuids.append(item._uuid)
319
+ self.results.append(item)
320
+ self.output += repr(item) + '\n'
321
+ if print:
322
+ self._print_item(item)
323
+
324
+ def add_subtask(self, task_id, task_name, task_description):
325
+ """Add a Celery subtask to the current runner for tracking purposes.
326
+
327
+ Args:
328
+ task_id (str): Celery task id.
329
+ task_name (str): Task name.
330
+ task_description (str): Task description.
331
+ """
332
+ self.celery_ids.append(task_id)
333
+ self.celery_ids_map[task_id] = {
334
+ 'id': task_id,
335
+ 'name': task_name,
336
+ 'full_name': task_name,
337
+ 'descr': task_description,
338
+ 'state': 'PENDING',
339
+ 'count': 0,
340
+ 'progress': 0
341
+ }
342
+
343
+ def _print_item(self, item, force=False):
344
+ """Print an item and add it to the runner's output.
345
+
346
+ Args:
347
+ item (str | OutputType): Secator output type to print.
348
+ force (bool): Whether to force-print it.
349
+ """
350
+ item_str = str(item)
351
+
352
+ # Item is an output type
353
+ if isinstance(item, OutputType):
354
+ self.debug(item, lazy=lambda x: repr(x), sub='item', allow_no_process=False, verbose=True)
355
+ _type = item._type
356
+ print_this_type = getattr(self, f'print_{_type}', True)
357
+ if not print_this_type:
358
+ return
359
+
360
+ if self.print_item or force:
361
+ item_out = sys.stdout
362
+
363
+ # JSON lines output
364
+ if self.print_json:
365
+ self._print(item, out=sys.stdout)
366
+ item_out = None # suppress item repr output to sdout
367
+
368
+ # Raw output
369
+ elif self.print_raw:
370
+ item_out = sys.stderr if self.piped_output else None
371
+
372
+ # Format raw output with custom item fields
373
+ if self.print_fmt:
374
+ try:
375
+ item_str = item.format(**self.print_fmt)
376
+ except KeyError:
377
+ item_str = ''
378
+
379
+ # raw output is used to pipe, we should only pipe the first output type of a Runner.
380
+ if not isinstance(item, self.output_types[0]):
381
+ item_str = ''
382
+
383
+ if item_str:
384
+ self._print(item_str, out=sys.stdout)
385
+
386
+ # Repr output
387
+ if item_out:
388
+ item_repr = repr(item)
389
+ if isinstance(item, OutputType) and self.print_remote_info:
390
+ item_repr += rich_to_ansi(f' \[[dim]{item._source}[/]]')
391
+ self._print(item_repr, out=item_out)
392
+
393
+ # Item is a line
394
+ elif isinstance(item, str):
395
+ self.debug(item, sub='line', allow_no_process=False, verbose=True)
396
+ if self.print_line or force:
397
+ self._print(item, out=sys.stderr, end='\n')
398
+
399
+ def debug(self, *args, **kwargs):
400
+ """Print debug with runner class name, only if self.no_process is True.
401
+
402
+ Args:
403
+ args (list): List of debug args.
404
+ kwargs (dict): Dict of debug kwargs.
405
+ """
406
+ allow_no_process = kwargs.pop('allow_no_process', True)
407
+ if self.no_process and not allow_no_process:
408
+ return
409
+ sub = kwargs.get('sub')
410
+ new_sub = f'runner.{self.__class__.__name__}'
411
+ if sub:
412
+ new_sub += f'.{sub}'
413
+ kwargs['sub'] = new_sub
414
+ debug(*args, **kwargs)
415
+
296
416
  def mark_duplicates(self):
297
- debug('running duplicate check', id=self.config.name, sub='runner.mark_duplicates')
298
- dupe_count = 0
299
- for item in self.results:
300
- # debug('running duplicate check', obj=item.toDict(), obj_breaklines=True, sub='runner.mark_duplicates', level=5)
301
- others = [f for f in self.results if f == item and f._uuid != item._uuid]
302
- if others:
303
- main = max(item, *others)
304
- dupes = [f for f in others if f._uuid != main._uuid]
305
- main._duplicate = False
306
- main._related.extend([dupe._uuid for dupe in dupes])
307
- main._related = list(dict.fromkeys(main._related))
308
- if main._uuid != item._uuid:
309
- debug(f'found {len(others)} duplicates for', obj=item.toDict(), obj_breaklines=True, sub='runner.mark_duplicates', level=5) # noqa: E501
310
- item._duplicate = True
311
- item = self.run_hooks('on_item', item)
312
- if item._uuid not in main._related:
313
- main._related.append(item._uuid)
314
- main = self.run_hooks('on_duplicate', main)
315
- item = self.run_hooks('on_duplicate', item)
316
-
317
- for dupe in dupes:
318
- if not dupe._duplicate:
319
- debug(
320
- 'found new duplicate', obj=dupe.toDict(), obj_breaklines=True,
321
- sub='runner.mark_duplicates', level=5)
322
- dupe_count += 1
323
- dupe._duplicate = True
324
- dupe = self.run_hooks('on_duplicate', dupe)
325
-
326
- duplicates = [repr(i) for i in self.results if i._duplicate]
327
- if duplicates:
328
- duplicates_str = '\n\t'.join(duplicates)
329
- debug(f'Duplicates ({dupe_count}):\n\t{duplicates_str}', sub='runner.mark_duplicates', level=5)
330
- debug(f'duplicate check completed: {dupe_count} found', id=self.config.name, sub='runner.mark_duplicates')
417
+ """Check for duplicates and mark items as duplicates."""
418
+ if not self.enable_duplicate_check:
419
+ return
420
+ self.debug('running duplicate check', id=self.config.name, sub='duplicates')
421
+ # dupe_count = 0
422
+ import concurrent.futures
423
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=100)
424
+ for item in self.results.copy():
425
+ executor.submit(self.check_duplicate, item)
426
+ executor.shutdown(wait=True)
427
+ # duplicates = [repr(i) for i in self.results if i._duplicate]
428
+ # if duplicates:
429
+ # duplicates_str = '\n\t'.join(duplicates)
430
+ # self.debug(f'Duplicates ({dupe_count}):\n\t{duplicates_str}', sub='duplicates', verbose=True)
431
+ # self.debug(f'duplicate check completed: {dupe_count} found', id=self.config.name, sub='duplicates')
432
+
433
+ def check_duplicate(self, item):
434
+ """Check if an item is a duplicate in the list of results and mark it like so.
435
+
436
+ Args:
437
+ item (OutputType): Secator output type.
438
+ """
439
+ self.debug('running duplicate check for item', obj=item.toDict(), obj_breaklines=True, sub='duplicates', verbose=True)
440
+ others = [f for f in self.results if f == item and f._uuid != item._uuid]
441
+ if others:
442
+ main = max(item, *others)
443
+ dupes = [f for f in others if f._uuid != main._uuid]
444
+ main._duplicate = False
445
+ main._related.extend([dupe._uuid for dupe in dupes])
446
+ main._related = list(dict.fromkeys(main._related))
447
+ if main._uuid != item._uuid:
448
+ self.debug(f'found {len(others)} duplicates for', obj=item.toDict(), obj_breaklines=True, sub='duplicates', verbose=True) # noqa: E501
449
+ item._duplicate = True
450
+ item = self.run_hooks('on_item', item)
451
+ if item._uuid not in main._related:
452
+ main._related.append(item._uuid)
453
+ main = self.run_hooks('on_duplicate', main)
454
+ item = self.run_hooks('on_duplicate', item)
455
+
456
+ for dupe in dupes:
457
+ if not dupe._duplicate:
458
+ self.debug(
459
+ 'found new duplicate', obj=dupe.toDict(), obj_breaklines=True,
460
+ sub='duplicates', verbose=True)
461
+ # dupe_count += 1
462
+ dupe._duplicate = True
463
+ dupe = self.run_hooks('on_duplicate', dupe)
331
464
 
332
465
  def yielder(self):
466
+ """Yield results. Should be implemented by derived classes."""
333
467
  raise NotImplementedError()
334
468
 
469
+ def yielder_celery(self):
470
+ """Yield results from Celery result."""
471
+ yield from CeleryData.iter_results(
472
+ self.celery_result,
473
+ ids_map=self.celery_ids_map,
474
+ print_remote_info=False
475
+ )
476
+
335
477
  def toDict(self):
336
- return {
478
+ """Dict representation of the runner."""
479
+ data = {
480
+ 'name': self.name,
481
+ 'status': self.status,
482
+ 'targets': self.inputs,
483
+ 'start_time': self.start_time,
484
+ 'end_time': self.end_time,
485
+ 'elapsed': self.elapsed.total_seconds(),
486
+ 'elapsed_human': self.elapsed_human,
487
+ 'run_opts': {k: v for k, v in self.run_opts.items() if k not in self.print_opts},
488
+ }
489
+ data.update({
337
490
  'config': self.config.toDict(),
338
491
  'opts': self.config.supported_opts,
339
- 'name': self.name,
340
- 'targets': self.targets,
341
- 'run_opts': self.run_opts,
342
- 'parent': self.parent,
492
+ 'has_parent': self.has_parent,
343
493
  'has_children': self.has_children,
344
494
  'chunk': self.chunk,
345
495
  'chunk_count': self.chunk_count,
346
- 'results_count': self.results_count,
347
496
  'sync': self.sync,
348
497
  'done': self.done,
349
498
  'output': self.output,
350
- 'status': self.status,
351
499
  'progress': self.progress,
352
- 'start_time': self.start_time,
353
- 'end_time': self.end_time,
354
- 'last_updated': self.last_updated,
355
- 'elapsed': self.elapsed.total_seconds(),
356
- 'elapsed_human': self.elapsed_human,
357
- 'errors': self.errors,
358
- 'context': self.context
359
- }
500
+ 'last_updated_db': self.last_updated_db,
501
+ 'context': self.context,
502
+ 'errors': [e.toDict() for e in self.errors],
503
+ })
504
+ return data
360
505
 
361
506
  def run_hooks(self, hook_type, *args):
507
+ """"Run hooks of a certain type.
508
+
509
+ Args:
510
+ hook_type (str): Hook type.
511
+ args (list): List of arguments to pass to the hook.
512
+
513
+ Returns:
514
+ any: Hook return value.
515
+ """
362
516
  result = args[0] if len(args) > 0 else None
363
- if not self.enable_hooks:
364
- return result
517
+ _id = self.context.get('task_id', '') or self.context.get('workflow_id', '') or self.context.get('scan_id', '')
365
518
  for hook in self.hooks[hook_type]:
366
519
  name = f'{self.__class__.__name__}.{hook_type}'
367
520
  fun = self.get_func_path(hook)
368
521
  try:
369
- _id = self.context.get('task_id', '') or self.context.get('workflow_id', '') or self.context.get('scan_id', '')
370
- debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'started'}, id=_id, sub='hooks', level=3)
522
+ if hook_type == 'on_interval' and not should_update(CONFIG.runners.backend_update_frequency, self.last_updated_db):
523
+ self.debug('', obj={f'{name} [dim yellow]->[/] {fun}': '[dim gray11]skipped[/]'}, id=_id, sub='hooks.db', verbose=True) # noqa: E501
524
+ return
525
+ if not self.enable_hooks or self.no_process:
526
+ self.debug('', obj={f'{name} [dim yellow]->[/] {fun}': '[dim gray11]skipped[/]'}, id=_id, sub='hooks', verbose=True) # noqa: E501
527
+ continue
528
+ # self.debug('', obj={f'{name} [dim yellow]->[/] {fun}': '[dim yellow]started[/]'}, id=_id, sub='hooks', verbose=True) # noqa: E501
371
529
  result = hook(self, *args)
372
- debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'ended'}, id=_id, sub='hooks', level=3)
530
+ self.debug('', obj={f'{name} [dim yellow]->[/] {fun}': '[dim green]success[/]'}, id=_id, sub='hooks', verbose=True) # noqa: E501
373
531
  except Exception as e:
532
+ self.debug('', obj={f'{name} [dim yellow]->[/] {fun}': '[dim red]failed[/]'}, id=_id, sub='hooks', verbose=True) # noqa: E501
533
+ error = Error.from_exception(e)
534
+ error.message = f'Hook "{fun}" execution failed.'
535
+ error._source = self.unique_name
536
+ error._uuid = str(uuid.uuid4())
537
+ self.add_result(error, print=True)
374
538
  if self.raise_on_error:
375
539
  raise e
376
- else:
377
- if DEBUG > 1:
378
- logger.exception(e)
379
- else:
380
- self._print(
381
- f'{fun} failed: "{e.__class__.__name__}: {str(e)}". Skipping',
382
- color='bold red',
383
- rich=True)
384
- self._print('Set DEBUG to > 1 to see the detailed exception.', color='dim red', rich=True)
385
540
  return result
386
541
 
387
- def run_validators(self, validator_type, *args):
388
- # logger.debug(f'Running validators of type {validator_type}')
542
+ def run_validators(self, validator_type, *args, error=True):
543
+ """Run validators of a certain type.
544
+
545
+ Args:
546
+ validator_type (str): Validator type. E.g: on_start.
547
+ args (list): List of arguments to pass to the validator.
548
+ error (bool): Whether to add an error to runner results if the validator failed.
549
+
550
+ Returns:
551
+ bool: Validator return value.
552
+ """
553
+ if self.no_process:
554
+ return True
555
+ _id = self.context.get('task_id', '') or self.context.get('workflow_id', '') or self.context.get('scan_id', '')
389
556
  for validator in self.validators[validator_type]:
390
- # logger.debug(validator)
557
+ name = f'{self.__class__.__name__}.{validator_type}'
558
+ fun = self.get_func_path(validator)
391
559
  if not validator(self, *args):
392
- if validator_type == 'input':
393
- self._print(f'{validator.__doc__}', color='bold red', rich=True)
560
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'failed'}, id=_id, sub='validators')
561
+ doc = validator.__doc__
562
+ if error:
563
+ message = 'Validator failed'
564
+ if doc:
565
+ message += f': {doc}'
566
+ error = Error(
567
+ message=message,
568
+ _source=self.unique_name,
569
+ _uuid=str(uuid.uuid4())
570
+ )
571
+ self.add_result(error, print=True)
394
572
  return False
573
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'success'}, id=_id, sub='validators')
395
574
  return True
396
575
 
397
- @staticmethod
398
- def resolve_exporters(exporters):
399
- """Resolve exporters from output options."""
400
- if not exporters or exporters in ['false', 'False']:
401
- return []
402
- if isinstance(exporters, str):
403
- exporters = exporters.split(',')
404
- classes = [
405
- import_dynamic(f'secator.exporters.{o.capitalize()}Exporter', 'Exporter')
406
- for o in exporters
407
- if o
408
- ]
409
- return [cls for cls in classes if cls]
576
+ def register_hooks(self, hooks):
577
+ """Register hooks.
410
578
 
411
- def log_start(self):
412
- """Log runner start."""
413
- remote_str = 'starting' if self.sync else 'sent to Celery worker'
414
- runner_name = self.__class__.__name__
415
- self.log_header()
416
- self._print(
417
- f':tada: {runner_name} [bold magenta]{self.config.name}[/] {remote_str}...', rich=True)
418
- if not self.sync and self.print_remote_status and self.__class__.__name__ != 'Scan':
419
- self._print('\n🏆 [bold gold3]Live results:[/]', rich=True)
420
-
421
- def log_header(self):
422
- """Log runner header."""
423
- runner_name = self.__class__.__name__
579
+ Args:
580
+ hooks (dict[str, List[Callable]]): List of hooks to register.
581
+ """
582
+ for key in self.hooks:
583
+ # Register class + derived class hooks
584
+ class_hook = getattr(self, key, None)
585
+ if class_hook:
586
+ name = f'{self.__class__.__name__}.{key}'
587
+ fun = self.get_func_path(class_hook)
588
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered'}, sub='hooks')
589
+ self.hooks[key].append(class_hook)
424
590
 
425
- # Description
426
- panel_str = f':scroll: [bold gold3]Description:[/] {self.config.description}'
591
+ # Register user hooks
592
+ user_hooks = hooks.get(self.__class__, {}).get(key, [])
593
+ user_hooks.extend(hooks.get(key, []))
594
+ for hook in user_hooks:
595
+ name = f'{self.__class__.__name__}.{key}'
596
+ fun = self.get_func_path(hook)
597
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered (user)'}, sub='hooks')
598
+ self.hooks[key].extend(user_hooks)
427
599
 
428
- # Workspace
429
- if self.workspace_name:
430
- panel_str += f'\n:construction_worker: [bold gold3]Workspace:[/] {self.workspace_name}'
600
+ def register_validators(self, validators):
601
+ """Register validators.
431
602
 
432
- # Targets
433
- if self.targets:
434
- panel_str += '\n:pear: [bold gold3]Targets:[/]'
435
- for target in self.targets:
436
- panel_str += f'\n • {target}'
603
+ Args:
604
+ validators (dict[str, List[Callable]]): Validators to register.
605
+ """
606
+ # Register class + derived class hooks
607
+ for key in self.validators:
608
+ class_validator = getattr(self, key, None)
609
+ if class_validator:
610
+ name = f'{self.__class__.__name__}.{key}'
611
+ fun = self.get_func_path(class_validator)
612
+ self.validators[key].append(class_validator)
613
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered'}, sub='validators')
437
614
 
438
- # Options
439
- DISPLAY_OPTS_EXCLUDE = [
440
- 'sync', 'worker', 'debug', 'output', 'json', 'orig', 'raw', 'format', 'quiet'
441
- ]
442
- items = [
443
- f'[italic]{k}[/]: {v}'
444
- for k, v in self.run_opts.items()
445
- if k not in DISPLAY_OPTS_EXCLUDE
446
- and v is not None
447
- ]
448
- if items:
449
- panel_str += '\n:pushpin: [bold gold3]Options:[/]'
450
- for item in items:
451
- panel_str += f'\n • {item}'
452
-
453
- if self.exporters:
454
- panel_str += '\n:email: [bold gold3]Exporters:[/]'
455
- for exporter in self.exporters:
456
- exporter_name = exporter.__name__.replace('Exporter', '').lower()
457
- panel_str += f'\n • {exporter_name}'
458
-
459
- panel = Panel(
460
- panel_str,
461
- title=f'[bold gold3]{runner_name}[/] [bold magenta]{self.config.name}[/]',
462
- border_style='bold gold3',
463
- expand=False,
464
- highlight=True
465
- )
466
- self._print('\n')
467
- self._print(panel, rich=True)
615
+ # Register user hooks
616
+ user_validators = validators.get(key, [])
617
+ for validator in user_validators:
618
+ name = f'{self.__class__.__name__}.{key}'
619
+ fun = self.get_func_path(validator)
620
+ self.debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered (user)'}, sub='validators')
621
+ self.validators[key].extend(user_validators)
468
622
 
469
- def log_results(self):
470
- """Log results.
623
+ def log_start(self):
624
+ """Log runner start."""
625
+ if not self.print_remote_info:
626
+ return
627
+ remote_str = 'starting' if self.sync else 'sent to Celery worker'
628
+ runner_name = self.__class__.__name__
629
+ info = Info(message=f'{runner_name} [bold magenta]{self.config.name}[/] {remote_str}...', _source=self.unique_name)
630
+ self._print_item(info)
471
631
 
472
- Args:
473
- results (list): List of results.
474
- output_types (list): List of result types to add to report.
475
- """
632
+ def log_results(self):
633
+ """Log runner results."""
476
634
  self.done = True
477
635
  self.progress = 100
478
- self.results_count = len(self.results)
479
- self.status = 'SUCCESS' if not self.errors else 'FAILED'
480
636
  self.end_time = datetime.fromtimestamp(time())
481
-
482
- # Log execution results
483
- status = 'succeeded' if not self.errors else '[bold red]failed[/]'
484
- if self.print_run_summary:
485
- self._print('\n')
486
- self._print(
487
- f':tada: [bold green]{self.__class__.__name__.capitalize()}[/] [bold magenta]{self.config.name}[/] '
488
- f'[bold green]{status} in[/] [bold gold3]{self.elapsed_human}[/].', rich=True)
489
-
490
- # Log runner infos
491
- if self.infos:
492
- self._print(
493
- f':heavy_check_mark: [bold magenta]{self.config.name}[/] infos ({len(self.infos)}):',
494
- color='bold green', rich=True)
495
- for info in self.infos:
496
- self._print(f' • {info}', color='bold green', rich=True)
497
-
498
- # Log runner errors
499
- if self.errors and self.print_errors:
500
- self._print(
501
- f':exclamation_mark:[bold magenta]{self.config.name}[/] errors ({len(self.errors)}):',
502
- color='bold red', rich=True)
503
- for error in self.errors:
504
- self._print(f' • {error}', color='bold red', rich=True)
505
-
506
- # Build and send report
507
- if self.results:
637
+ if self.status == 'FAILURE':
638
+ self.debug('', obj={self.__class__.__name__: self.status, 'errors': [str(_.message) for _ in self.errors]}, sub='status') # noqa: E501
639
+ else:
640
+ self.debug('', obj={self.__class__.__name__: self.status}, sub='status')
641
+ if self.exporters and not self.no_process:
508
642
  report = Report(self, exporters=self.exporters)
509
643
  report.build()
510
644
  report.send()
511
645
  self.report = report
512
-
513
- # Log results count
514
- if self.print_item_count and not self.print_raw and not self.orig:
515
- count_map = self._get_results_count()
516
- if all(count == 0 for count in count_map.values()):
517
- self._print(':exclamation_mark:Found 0 results.', color='bold red', rich=True)
518
- else:
519
- results_str = ':heavy_check_mark: Found ' + ' and '.join([
520
- f'{count} {pluralize(name) if count > 1 or count == 0 else name}'
521
- for name, count in count_map.items()
522
- ]) + '.'
523
- self._print(results_str, color='bold green', rich=True)
524
-
525
- @staticmethod
526
- def get_live_results(result):
527
- """Poll Celery subtasks results in real-time. Fetch task metadata and partial results from each task that runs.
528
-
529
- Args:
530
- result (celery.result.AsyncResult): Result object.
531
-
532
- Yields:
533
- dict: Subtasks state and results.
534
- """
535
- from celery.result import AsyncResult
536
- res = AsyncResult(result.id)
537
- while True:
538
- # Yield results
539
- yield from Runner.get_celery_results(result)
540
-
541
- # Break out of while loop
542
- if res.ready():
543
- yield from Runner.get_celery_results(result)
544
- break
545
-
546
- # Sleep between updates
547
- sleep(1)
548
-
549
- @staticmethod
550
- def get_celery_results(result):
551
- """Get Celery results from main result object, including any subtasks results.
552
-
553
- Args:
554
- result (celery.result.AsyncResult): Result object.
555
-
556
- Yields:
557
- dict: Subtasks state and results, Progress objects.
558
- """
559
- task_ids = []
560
- get_task_ids(result, ids=task_ids)
561
- datas = []
562
- for task_id in task_ids:
563
- data = get_task_data(task_id)
564
- if data and DEBUG > 1:
565
- full_name = data['name']
566
- if data['chunk_info']:
567
- full_name += ' ' + data['chunk_info']
568
- debug('', sub='celery.runner', id=data['id'], obj={full_name: data['state']}, level=4)
569
- if not data:
570
- continue
571
- yield data
572
- datas.append(data)
573
-
574
- # Calculate and yield progress
575
- total = len(datas)
576
- count_finished = sum([i['ready'] for i in datas if i])
577
- percent = int(count_finished * 100 / total) if total > 0 else 0
578
- if percent > 0:
579
- yield Progress(duration='unknown', percent=percent)
580
-
581
- def stop_live_tasks(self, result):
582
- """Stop live tasks running in Celery worker.
583
-
584
- Args:
585
- result (AsyncResult | GroupResult): Celery result.
586
- """
587
- task_ids = []
588
- get_task_ids(result, ids=task_ids)
589
- for task_id in task_ids:
590
- from secator.celery import revoke_task
591
- revoke_task(task_id)
592
-
593
- def process_live_tasks(self, result, description=True, results_only=True, print_remote_status=True):
594
- """Rich progress indicator showing live tasks statuses.
595
-
596
- Args:
597
- result (AsyncResult | GroupResult): Celery result.
598
- results_only (bool): Yield only results, no task state.
599
-
600
- Yields:
601
- dict: Subtasks state and results.
602
- """
603
- config_name = self.config.name
604
- runner_name = self.__class__.__name__.capitalize()
605
-
606
- # Display live results if print_remote_status is set
607
- if print_remote_status:
608
- class PanelProgress(RichProgress):
609
- def get_renderables(self):
610
- yield Padding(Panel(
611
- self.make_tasks_table(self.tasks),
612
- title=f'[bold gold3]{runner_name}[/] [bold magenta]{config_name}[/] results',
613
- border_style='bold gold3',
614
- expand=False,
615
- highlight=True), pad=(2, 0, 0, 0))
616
-
617
- tasks_progress = PanelProgress(
618
- SpinnerColumn('dots'),
619
- TextColumn('{task.fields[descr]} ') if description else '',
620
- TextColumn('[bold cyan]{task.fields[name]}[/]'),
621
- TextColumn('[dim gold3]{task.fields[chunk_info]}[/]'),
622
- TextColumn('{task.fields[state]:<20}'),
623
- TimeElapsedColumn(),
624
- TextColumn('{task.fields[count]}'),
625
- # TextColumn('{task.fields[progress]}%'),
626
- # TextColumn('\[[bold magenta]{task.fields[id]:<30}[/]]'), # noqa: W605
627
- refresh_per_second=1,
628
- transient=False,
629
- # console=console,
630
- # redirect_stderr=True,
631
- # redirect_stdout=False
632
- )
633
- state_colors = {
634
- 'RUNNING': 'bold yellow',
635
- 'SUCCESS': 'bold green',
636
- 'FAILURE': 'bold red',
637
- 'REVOKED': 'bold magenta'
638
- }
639
- else:
640
- tasks_progress = nullcontext()
641
-
642
- with tasks_progress as progress:
643
-
644
- # Make progress tasks
645
- tasks_progress = {}
646
-
647
- # Get live results and print progress
648
- for data in Runner.get_live_results(result):
649
-
650
- # If progress object, yield progress and ignore tracking
651
- if isinstance(data, OutputType) and data._type == 'progress':
652
- yield data
653
- continue
654
-
655
- # TODO: add error output type and yield errors in get_celery_results
656
- # if isinstance(data, OutputType) and data._type == 'error':
657
- # yield data
658
- # continue
659
-
660
- # Re-yield so that we can consume it externally
661
- if results_only:
662
- yield from data['results']
663
- else:
664
- yield data
665
-
666
- if not print_remote_status:
667
- continue
668
-
669
- # Ignore partials in output unless DEBUG > 1
670
- # TODO: weird to change behavior based on debug flag, could cause issues
671
- # if data['chunk'] and not DEBUG > 1:
672
- # continue
673
-
674
- # Handle messages if any
675
- state = data['state']
676
- error = data.get('error')
677
- info = data.get('info')
678
- full_name = data['name']
679
- chunk_info = data.get('chunk_info', '')
680
- if chunk_info:
681
- full_name += f' {chunk_info}'
682
- if error:
683
- state = 'FAILURE'
684
- error = f'{full_name}: {error}'
685
- if error not in self.errors:
686
- self.errors.append(error)
687
- if info:
688
- info = f'{full_name}: {info}'
689
- if info not in self.infos:
690
- self.infos.append(info)
691
-
692
- task_id = data['id']
693
- state_str = f'[{state_colors[state]}]{state}[/]'
694
- data['state'] = state_str
695
-
696
- if task_id not in tasks_progress:
697
- id = progress.add_task('', **data)
698
- tasks_progress[task_id] = id
699
- else:
700
- progress_id = tasks_progress[task_id]
701
- if state in ['SUCCESS', 'FAILURE']:
702
- progress.update(progress_id, advance=100, **data)
703
- elif data['progress'] != 0:
704
- progress.update(progress_id, advance=data['progress'], **data)
705
-
706
- # Update all tasks to 100 %
707
- for progress_id in tasks_progress.values():
708
- progress.update(progress_id, advance=100)
646
+ if self.enable_profiler:
647
+ self.profiler.stop()
648
+ profile_path = Path(self.reports_folder) / f'{self.unique_name}_profile.html'
649
+ with profile_path.open('w', encoding='utf-8') as f_html:
650
+ f_html.write(self.profiler.output_html())
651
+ self._print_item(Info(message=f'Wrote profile to {str(profile_path)}', _source=self.unique_name), force=True)
652
+
653
+ def stop_celery_tasks(self):
654
+ """Stop all tasks running in Celery worker."""
655
+ from secator.celery import revoke_task
656
+ for task_id in self.celery_ids:
657
+ name = self.celery_ids_map.get(task_id, {}).get('full_name')
658
+ revoke_task(task_id, name)
709
659
 
710
660
  def filter_results(self):
711
661
  """Filter runner results using extractors defined in config."""
@@ -721,7 +671,7 @@ class Runner:
721
671
  # Keep the field types in results not specified in the extractors.
722
672
  extract_fields = [e['type'] for e in extractors]
723
673
  keep_fields = [
724
- _type for _type in OUTPUT_TYPES if _type.__name__ != 'Progress'
674
+ _type for _type in FINDING_TYPES
725
675
  if _type not in extract_fields
726
676
  ]
727
677
  results.extend([
@@ -733,41 +683,61 @@ class Runner:
733
683
  return results
734
684
 
735
685
  def _convert_item_schema(self, item):
736
- """Convert dict item to a new structure using the class output schema.
686
+ """Convert dict item to a secator output type.
737
687
 
738
688
  Args:
739
- item (dict): Item.
689
+ item (dict): Dict item.
740
690
 
741
691
  Returns:
742
- dict: Item with new schema.
692
+ OutputType: Loaded item.
743
693
  """
744
- # Load item using available output types and get the first matching
745
- # output type based on the schema
694
+ # Skip if already converted
695
+ if isinstance(item, OutputType):
696
+ return item
697
+
698
+ # Init the new item and the list of output types to load from
746
699
  new_item = None
747
700
  output_types = getattr(self, 'output_types', [])
748
- debug(f'Input item: {item}', sub='klass.load', level=5)
749
- debug(f'Output types to try: {[o.__name__ for o in output_types]}', sub='klass.load', level=5)
701
+ self.debug(f'Input item: {item}', sub='klass.load', verbose=True)
702
+
703
+ # Use a function to pick proper output types
704
+ output_discriminator = getattr(self, 'output_discriminator', None)
705
+ if output_discriminator:
706
+ result = output_discriminator(item)
707
+ if result:
708
+ self.debug(f'Discriminated output type: {result.__name__}', sub='klass.load', verbose=True)
709
+ output_types = [result]
710
+ else:
711
+ output_types = []
712
+
713
+ # Use the _type key to pick proper output type
714
+ elif '_type' in item:
715
+ otypes = [o for o in output_types if o.get_name() == item['_type']]
716
+ if otypes:
717
+ output_types = [otypes[0]]
718
+ self.debug(f'_type key is present in item and matches {otypes[0]}', sub='klass.load', verbose=True)
719
+
720
+ # Load item using picked output types
721
+ self.debug(f'Output types to try: {[o.__name__ for o in output_types]}', sub='klass.load', verbose=True)
750
722
  for klass in output_types:
751
- debug(f'Loading item as {klass.__name__}', sub='klass.load', level=5)
752
- output_map = getattr(self, 'output_map', {})
753
- output_map = output_map.get(klass, {})
723
+ self.debug(f'Loading item as {klass.__name__}', sub='klass.load', verbose=True)
724
+ output_map = getattr(self, 'output_map', {}).get(klass, {})
754
725
  try:
755
726
  new_item = klass.load(item, output_map)
756
- debug(f'[dim green]Successfully loaded item as {klass.__name__}[/]', sub='klass.load', level=5)
757
- break # found an item that fits
758
- except (TypeError, KeyError) as e: # can't load using class
759
- debug(
727
+ self.debug(f'[dim green]Successfully loaded item as {klass.__name__}[/]', sub='klass.load', verbose=True)
728
+ break
729
+ except (TypeError, KeyError) as e:
730
+ self.debug(
760
731
  f'[dim red]Failed loading item as {klass.__name__}: {type(e).__name__}: {str(e)}.[/] [dim green]Continuing.[/]',
761
- sub='klass.load',
762
- level=5)
763
- if DEBUG == 6:
764
- console.print_exception(show_locals=False)
732
+ sub='klass.load', verbose=True)
733
+ # error = Error.from_exception(e)
734
+ # self.debug(repr(error), sub='debug.klass.load')
765
735
  continue
766
736
 
767
- # No output type was found, so make no conversion
768
737
  if not new_item:
769
- new_item = DotMap(item)
770
- new_item._type = 'unknown'
738
+ new_item = Warning(message=f'Failed to load item as output type:\n {item}')
739
+
740
+ self.debug(f'Output item: {new_item.toDict()}', sub='klass.load', verbose=True)
771
741
 
772
742
  return new_item
773
743
 
@@ -779,113 +749,130 @@ class Runner:
779
749
  color (str, Optional): Rich color.
780
750
  out (str, Optional): Output pipe (sys.stderr, sys.stdout, ...)
781
751
  rich (bool, Optional): Force rich output.
752
+ end (str, Optional): End of line.
753
+ add_to_output (bool, Optional): Whether to add the item to runner output.
782
754
  """
783
- # Print a JSON item
784
- if isinstance(data, (OutputType, DotMap, dict)):
785
- if getattr(data, 'toDict', None):
786
- data = data.toDict()
787
- data = json.dumps(data)
788
- data = f'{self.prefix:>15} {data}' if self.prefix and not self.print_item else data
789
-
790
- if self.sync or rich:
755
+ if rich:
791
756
  _console = console_stdout if out == sys.stdout else console
792
757
  _console.print(data, highlight=False, style=color, soft_wrap=True, end=end)
793
758
  else:
759
+ if isinstance(data, (OutputType, dict)):
760
+ if getattr(data, 'toDict', None):
761
+ data = data.toDict()
762
+ data = json.dumps(data)
794
763
  print(data, file=out)
795
764
 
796
- # # Print a line using Rich console
797
- # if rich:
798
- # _console = console_stdout if out == sys.stdout else console
799
- # _console.print(data, highlight=False, style=color, soft_wrap=True)
800
-
801
- # # Print a line using Rich markup
802
- # elif markup:
803
- # from rich import print as _print
804
- # from rich.text import Text
805
- # _print(Text.from_markup(data), file=out)
806
-
807
- # # Print a line raw
808
- # else:
809
- # print(data, file=out)
810
-
811
- def _set_print_prefix(self):
812
- self.prefix = ''
813
- if self.print_cmd_prefix:
814
- self.prefix = f'[bold gold3]({self.config.name})[/]'
815
- if self.chunk and self.chunk_count:
816
- self.prefix += f' [{self.chunk}/{self.chunk_count}]'
817
-
818
- def _get_results_count(self):
765
+ def _get_findings_count(self):
766
+ """Get finding count.
767
+
768
+ Returns:
769
+ dict[str,int]: Dict of finding type to count.
770
+ """
819
771
  count_map = {}
820
- for output_type in self.output_types:
821
- if output_type.__name__ == 'Progress':
822
- continue
772
+ for output_type in FINDING_TYPES:
823
773
  name = output_type.get_name()
824
- count = len([r for r in self.results if r._type == name])
825
- count_map[name] = count
774
+ count = len([r for r in self.results if isinstance(r, output_type)])
775
+ if count > 0:
776
+ count_map[name] = count
826
777
  return count_map
827
778
 
828
- def _process_item(self, item: dict):
829
- # Run item validators
830
- if not self.run_validators('item', item):
831
- return None
779
+ def _process_item(self, item):
780
+ """Process an item yielded by the derived runner.
781
+
782
+ Args:
783
+ item (dict | str): Input item.
832
784
 
833
- # Run item hooks
834
- item = self.run_hooks('on_item_pre_convert', item)
835
- if not item:
836
- return None
785
+ Yields:
786
+ OutputType: Output type.
787
+ """
788
+
789
+ # Item is a string, just print it
790
+ if isinstance(item, str):
791
+ self.output += item + '\n'
792
+ self._print_item(item) if item else ''
793
+ return
794
+
795
+ # Abort further processing if no_process is set
796
+ if self.no_process:
797
+ return
798
+
799
+ # Run item validators
800
+ if not self.run_validators('validate_item', item, error=False):
801
+ return
837
802
 
838
803
  # Convert output dict to another schema
839
- if isinstance(item, dict) and not self.orig:
804
+ if isinstance(item, dict):
805
+ item = self.run_hooks('on_item_pre_convert', item)
806
+ if not item:
807
+ return
840
808
  item = self._convert_item_schema(item)
841
- elif isinstance(item, OutputType):
842
- pass
843
- else:
844
- item = DotMap(item)
845
809
 
846
810
  # Update item context
847
811
  item._context.update(self.context)
848
812
 
849
- # Add context, uuid, progress to item
850
- if not item._source:
851
- item._source = self.config.name
813
+ # Return if already seen
814
+ if item._uuid in self.uuids:
815
+ return
852
816
 
817
+ # Add uuid to item
853
818
  if not item._uuid:
854
819
  item._uuid = str(uuid.uuid4())
855
820
 
856
- if item._type == 'progress' and item._source == self.config.name:
821
+ # Add source to item
822
+ if not item._source:
823
+ item._source = self.unique_name
824
+
825
+ # If progress item, update runner progress
826
+ if isinstance(item, Progress) and item._source == self.unique_name:
857
827
  self.progress = item.percent
858
- update_frequency = CONFIG.runners.progress_update_frequency
859
- if self.last_updated_progress and (item._timestamp - self.last_updated_progress) < update_frequency:
860
- return None
828
+ if not should_update(CONFIG.runners.progress_update_frequency, self.last_updated_progress, item._timestamp):
829
+ return
861
830
  elif int(item.percent) in [0, 100]:
862
- return None
831
+ return
863
832
  else:
864
833
  self.last_updated_progress = item._timestamp
865
834
 
835
+ # If info item and task_id is defined, update runner celery_ids
836
+ elif isinstance(item, Info) and item.task_id and item.task_id not in self.celery_ids:
837
+ self.celery_ids.append(item.task_id)
838
+
866
839
  # Run on_item hooks
867
- if isinstance(item, OutputType) and not self.orig:
840
+ if isinstance(item, tuple(FINDING_TYPES)):
868
841
  item = self.run_hooks('on_item', item)
842
+ if not item:
843
+ return
869
844
 
870
- return item
845
+ # Add item to results
846
+ self.add_result(item, print=True)
871
847
 
872
- def get_repr(self, item=None):
873
- if not item:
874
- return [
875
- self.get_repr(item)
876
- for item in self.results
877
- ]
878
- if self.output_fmt:
879
- item = self.output_fmt.format(**item.toDict())
880
- elif isinstance(item, OutputType):
881
- item = repr(item)
882
- return item
848
+ # Yield item
849
+ yield item
850
+
851
+ @staticmethod
852
+ def resolve_exporters(exporters):
853
+ """Resolve exporters from output options.
854
+
855
+ Args:
856
+ exporters (list[str]): List of exporters to resolve.
857
+
858
+ Returns:
859
+ list: List of exporter classes.
860
+ """
861
+ if not exporters or exporters in ['false', 'False']:
862
+ return []
863
+ if isinstance(exporters, str):
864
+ exporters = exporters.split(',')
865
+ classes = [
866
+ import_dynamic('secator.exporters', f'{o.capitalize()}Exporter')
867
+ for o in exporters
868
+ if o
869
+ ]
870
+ return [cls for cls in classes if cls]
883
871
 
884
872
  @classmethod
885
873
  def get_func_path(cls, func):
886
- """
887
- Get the full symbolic path of a function or method, including staticmethods,
888
- using function and method attributes.
874
+ """Get the full symbolic path of a function or method, including staticmethods, using function and method
875
+ attributes.
889
876
 
890
877
  Args:
891
878
  func (function, method, or staticmethod): A function or method object.