secator 0.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of secator might be problematic. Click here for more details.

Files changed (99) hide show
  1. secator/.gitignore +162 -0
  2. secator/__init__.py +0 -0
  3. secator/celery.py +421 -0
  4. secator/cli.py +927 -0
  5. secator/config.py +137 -0
  6. secator/configs/__init__.py +0 -0
  7. secator/configs/profiles/__init__.py +0 -0
  8. secator/configs/profiles/aggressive.yaml +7 -0
  9. secator/configs/profiles/default.yaml +9 -0
  10. secator/configs/profiles/stealth.yaml +7 -0
  11. secator/configs/scans/__init__.py +0 -0
  12. secator/configs/scans/domain.yaml +18 -0
  13. secator/configs/scans/host.yaml +14 -0
  14. secator/configs/scans/network.yaml +17 -0
  15. secator/configs/scans/subdomain.yaml +8 -0
  16. secator/configs/scans/url.yaml +12 -0
  17. secator/configs/workflows/__init__.py +0 -0
  18. secator/configs/workflows/cidr_recon.yaml +28 -0
  19. secator/configs/workflows/code_scan.yaml +11 -0
  20. secator/configs/workflows/host_recon.yaml +41 -0
  21. secator/configs/workflows/port_scan.yaml +34 -0
  22. secator/configs/workflows/subdomain_recon.yaml +33 -0
  23. secator/configs/workflows/url_crawl.yaml +29 -0
  24. secator/configs/workflows/url_dirsearch.yaml +29 -0
  25. secator/configs/workflows/url_fuzz.yaml +35 -0
  26. secator/configs/workflows/url_nuclei.yaml +11 -0
  27. secator/configs/workflows/url_vuln.yaml +55 -0
  28. secator/configs/workflows/user_hunt.yaml +10 -0
  29. secator/configs/workflows/wordpress.yaml +14 -0
  30. secator/decorators.py +346 -0
  31. secator/definitions.py +183 -0
  32. secator/exporters/__init__.py +12 -0
  33. secator/exporters/_base.py +3 -0
  34. secator/exporters/csv.py +29 -0
  35. secator/exporters/gdrive.py +118 -0
  36. secator/exporters/json.py +14 -0
  37. secator/exporters/table.py +7 -0
  38. secator/exporters/txt.py +24 -0
  39. secator/hooks/__init__.py +0 -0
  40. secator/hooks/mongodb.py +212 -0
  41. secator/output_types/__init__.py +24 -0
  42. secator/output_types/_base.py +95 -0
  43. secator/output_types/exploit.py +50 -0
  44. secator/output_types/ip.py +33 -0
  45. secator/output_types/port.py +45 -0
  46. secator/output_types/progress.py +35 -0
  47. secator/output_types/record.py +34 -0
  48. secator/output_types/subdomain.py +42 -0
  49. secator/output_types/tag.py +46 -0
  50. secator/output_types/target.py +30 -0
  51. secator/output_types/url.py +76 -0
  52. secator/output_types/user_account.py +41 -0
  53. secator/output_types/vulnerability.py +97 -0
  54. secator/report.py +95 -0
  55. secator/rich.py +123 -0
  56. secator/runners/__init__.py +12 -0
  57. secator/runners/_base.py +873 -0
  58. secator/runners/_helpers.py +154 -0
  59. secator/runners/command.py +674 -0
  60. secator/runners/scan.py +67 -0
  61. secator/runners/task.py +107 -0
  62. secator/runners/workflow.py +137 -0
  63. secator/serializers/__init__.py +8 -0
  64. secator/serializers/dataclass.py +33 -0
  65. secator/serializers/json.py +15 -0
  66. secator/serializers/regex.py +17 -0
  67. secator/tasks/__init__.py +10 -0
  68. secator/tasks/_categories.py +304 -0
  69. secator/tasks/cariddi.py +102 -0
  70. secator/tasks/dalfox.py +66 -0
  71. secator/tasks/dirsearch.py +88 -0
  72. secator/tasks/dnsx.py +56 -0
  73. secator/tasks/dnsxbrute.py +34 -0
  74. secator/tasks/feroxbuster.py +89 -0
  75. secator/tasks/ffuf.py +85 -0
  76. secator/tasks/fping.py +44 -0
  77. secator/tasks/gau.py +43 -0
  78. secator/tasks/gf.py +34 -0
  79. secator/tasks/gospider.py +71 -0
  80. secator/tasks/grype.py +78 -0
  81. secator/tasks/h8mail.py +80 -0
  82. secator/tasks/httpx.py +104 -0
  83. secator/tasks/katana.py +128 -0
  84. secator/tasks/maigret.py +78 -0
  85. secator/tasks/mapcidr.py +32 -0
  86. secator/tasks/msfconsole.py +176 -0
  87. secator/tasks/naabu.py +52 -0
  88. secator/tasks/nmap.py +341 -0
  89. secator/tasks/nuclei.py +97 -0
  90. secator/tasks/searchsploit.py +53 -0
  91. secator/tasks/subfinder.py +40 -0
  92. secator/tasks/wpscan.py +177 -0
  93. secator/utils.py +404 -0
  94. secator/utils_test.py +183 -0
  95. secator-0.1.0.dist-info/METADATA +379 -0
  96. secator-0.1.0.dist-info/RECORD +99 -0
  97. secator-0.1.0.dist-info/WHEEL +5 -0
  98. secator-0.1.0.dist-info/entry_points.txt +2 -0
  99. secator-0.1.0.dist-info/licenses/LICENSE +60 -0
@@ -0,0 +1,873 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import sys
5
+ import uuid
6
+ from contextlib import nullcontext
7
+ from datetime import datetime
8
+ from time import sleep, time
9
+
10
+ import humanize
11
+ from dotmap import DotMap
12
+ from rich.padding import Padding
13
+ from rich.panel import Panel
14
+ from rich.progress import Progress as RichProgress
15
+ from rich.progress import SpinnerColumn, TextColumn, TimeElapsedColumn
16
+
17
+ from secator.definitions import DEBUG, DEFAULT_PROGRESS_UPDATE_FREQUENCY, REPORTS_FOLDER
18
+ from secator.output_types import OUTPUT_TYPES, OutputType, Progress
19
+ from secator.report import Report
20
+ from secator.rich import console, console_stdout
21
+ from secator.runners._helpers import (get_task_data, get_task_ids, get_task_folder_id,
22
+ process_extractor)
23
+ from secator.utils import (debug, import_dynamic, merge_opts, pluralize,
24
+ rich_to_ansi)
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ HOOKS = [
29
+ 'before_init',
30
+ 'on_init',
31
+ 'on_start',
32
+ 'on_end',
33
+ 'on_item_pre_convert',
34
+ 'on_item',
35
+ 'on_duplicate',
36
+ 'on_line',
37
+ 'on_iter',
38
+ 'on_error',
39
+ ]
40
+
41
+ VALIDATORS = [
42
+ 'input',
43
+ 'item'
44
+ ]
45
+
46
+
47
+ class Runner:
48
+ """Runner class.
49
+
50
+ Args:
51
+ config (secator.config.ConfigLoader): Loaded config.
52
+ targets (list): List of targets to run task on.
53
+ results (list): List of existing results to re-use.
54
+ workspace_name (str): Workspace name.
55
+ run_opts (dict): Run options.
56
+
57
+ Yields:
58
+ dict: Result (when running in sync mode with `run`).
59
+
60
+ Returns:
61
+ list: List of results (when running in async mode with `run_async`).
62
+ """
63
+
64
+ # Input field (mostly for tests and CLI)
65
+ input_type = None
66
+
67
+ # Output types
68
+ output_types = []
69
+
70
+ # Dict return
71
+ output_return_type = dict # TODO: deprecate this
72
+
73
+ # Default exporters
74
+ default_exporters = []
75
+
76
+ # Run hooks
77
+ enable_hooks = True
78
+
79
+ # Reports folder
80
+ reports_folder = None
81
+
82
+ def __init__(self, config, targets, results=[], run_opts={}, hooks={}, context={}):
83
+ self.config = config
84
+ self.name = run_opts.get('name', config.name)
85
+ self.description = run_opts.get('description', config.description)
86
+ if not isinstance(targets, list):
87
+ targets = [targets]
88
+ self.targets = targets
89
+ self.results = results
90
+ self.results_count = 0
91
+ self.workspace_name = context.get('workspace_name', 'default')
92
+ self.run_opts = run_opts.copy()
93
+ self.sync = run_opts.get('sync', True)
94
+ self.exporters = self.resolve_exporters()
95
+ self.done = False
96
+ self.start_time = datetime.fromtimestamp(time())
97
+ self.last_updated = None
98
+ self.last_updated_progress = None
99
+ self.end_time = None
100
+ self._hooks = hooks
101
+ self.errors = []
102
+ self.infos = []
103
+ self.output = ''
104
+ self.status = 'RUNNING'
105
+ self.progress = 0
106
+ self.context = context
107
+ self.delay = run_opts.get('delay', False)
108
+ self.uuids = []
109
+ self.result = None
110
+
111
+ # Determine report folder
112
+ default_reports_folder_base = f'{REPORTS_FOLDER}/{self.workspace_name}/{self.config.type}s'
113
+ _id = get_task_folder_id(default_reports_folder_base)
114
+ default_report_folder = f'{default_reports_folder_base}/{_id}'
115
+ self.reports_folder = run_opts.get('reports_folder') or default_report_folder
116
+
117
+ # Make reports folders
118
+ os.makedirs(self.reports_folder, exist_ok=True)
119
+ os.makedirs(f'{self.reports_folder}/.inputs', exist_ok=True)
120
+ os.makedirs(f'{self.reports_folder}/.outputs', exist_ok=True)
121
+
122
+ # Process input
123
+ self.input = targets
124
+ if isinstance(self.input, list) and len(self.input) == 1:
125
+ self.input = self.input[0]
126
+
127
+ # Yield dicts if CLI supports JSON
128
+ if self.output_return_type is dict or (self.json_flag is not None):
129
+ self.output_return_type = dict
130
+
131
+ # Output options
132
+ self.output_fmt = self.run_opts.get('format', False)
133
+ self.output_quiet = self.run_opts.get('quiet', False)
134
+ self.output_json = self.output_return_type == dict
135
+
136
+ # Print options
137
+ self.print_start = self.run_opts.pop('print_start', False)
138
+ self.print_item = self.run_opts.pop('print_item', False)
139
+ self.print_line = self.run_opts.pop('print_line', False)
140
+ self.print_errors = self.run_opts.pop('print_errors', True)
141
+ self.print_item_count = self.run_opts.pop('print_item_count', False)
142
+ self.print_cmd = self.run_opts.pop('print_cmd', False)
143
+ self.print_run_opts = self.run_opts.pop('print_run_opts', DEBUG > 1)
144
+ self.print_fmt_opts = self.run_opts.pop('print_fmt_opts', DEBUG > 1)
145
+ self.print_input_file = self.run_opts.pop('print_input_file', False)
146
+ self.print_hooks = self.run_opts.pop('print_hooks', DEBUG > 1)
147
+ self.print_progress = self.run_opts.pop('print_progress', not self.output_quiet)
148
+ self.print_cmd_prefix = self.run_opts.pop('print_cmd_prefix', False)
149
+ self.print_remote_status = self.run_opts.pop('print_remote_status', False)
150
+ self.print_run_summary = self.run_opts.pop('print_run_summary', False)
151
+ self.print_json = self.run_opts.get('json', False)
152
+ self.print_raw = self.run_opts.get('raw', False)
153
+ self.orig = self.run_opts.get('orig', False)
154
+ self.opts_to_print = {k: v for k, v in self.__dict__.items() if k.startswith('print_') if v}
155
+
156
+ # Hooks
157
+ self.raise_on_error = self.run_opts.get('raise_on_error', False)
158
+ self.hooks = {name: [] for name in HOOKS}
159
+ for key in self.hooks:
160
+
161
+ # Register class specific hooks
162
+ instance_func = getattr(self, key, None)
163
+ if instance_func:
164
+ name = f'{self.__class__.__name__}.{key}'
165
+ fun = f'{instance_func.__module__}.{instance_func.__name__}'
166
+ debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered'}, sub='hooks', level=3)
167
+ self.hooks[key].append(instance_func)
168
+
169
+ # Register user hooks
170
+ user_hooks = hooks.get(self.__class__, {}).get(key, [])
171
+ user_hooks.extend(hooks.get(key, []))
172
+ for hook in user_hooks:
173
+ name = f'{self.__class__.__name__}.{key}'
174
+ fun = f'{hook.__module__}.{hook.__name__}'
175
+ debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'registered (user)'}, sub='hooks', level=3)
176
+ self.hooks[key].extend(user_hooks)
177
+
178
+ # Validators
179
+ self.validators = {name: [] for name in VALIDATORS}
180
+ for key in self.validators:
181
+ instance_func = getattr(self, f'validate_{key}', None)
182
+ if instance_func:
183
+ self.validators[key].append(instance_func)
184
+ self.validators[key].extend(self.validators.get(self.__class__, {}).get(key, []))
185
+
186
+ # Chunks
187
+ self.parent = self.run_opts.get('parent', True)
188
+ self.has_children = self.run_opts.get('has_children', False)
189
+ self.chunk = self.run_opts.get('chunk', None)
190
+ self.chunk_count = self.run_opts.get('chunk_count', None)
191
+ self.unique_name = self.name.replace('/', '_')
192
+ self.unique_name = f'{self.unique_name}_{self.chunk}' if self.chunk else self.unique_name
193
+ self._set_print_prefix()
194
+
195
+ # Input post-process
196
+ self.run_hooks('before_init')
197
+
198
+ # Abort if inputs are invalid
199
+ self.input_valid = True
200
+ if not self.run_validators('input', self.input):
201
+ self.input_valid = False
202
+
203
+ # Run hooks
204
+ self.run_hooks('on_init')
205
+
206
+ @property
207
+ def elapsed(self):
208
+ if self.done:
209
+ return self.end_time - self.start_time
210
+ return datetime.fromtimestamp(time()) - self.start_time
211
+
212
+ @property
213
+ def elapsed_human(self):
214
+ return humanize.naturaldelta(self.elapsed)
215
+
216
+ def run(self):
217
+ return list(self.__iter__())
218
+
219
+ def __iter__(self):
220
+ if self.print_start:
221
+ self.log_start()
222
+
223
+ if not self.input_valid:
224
+ return
225
+ try:
226
+ for item in self.yielder():
227
+
228
+ if isinstance(item, (OutputType, DotMap, dict)):
229
+
230
+ # Handle direct yield of item
231
+ item = self._process_item(item)
232
+ if not item:
233
+ continue
234
+
235
+ # Discard item if needed
236
+ if item._uuid in self.uuids:
237
+ continue
238
+
239
+ # Add item to results
240
+ if isinstance(item, OutputType) or self.orig:
241
+ self.results.append(item)
242
+ self.results_count += 1
243
+ self.uuids.append(item._uuid)
244
+ yield item
245
+
246
+ # Print JSON or raw item
247
+ if self.print_item and item._type != 'target':
248
+ if not isinstance(item, OutputType) and not self.orig:
249
+ item_str = rich_to_ansi(
250
+ f'[dim red]❌ Failed to load item as output type:\n {item.toDict()}[/]'
251
+ )
252
+ self.output += item_str + '\n'
253
+ self._print(item_str, rich=True)
254
+ elif self.print_json:
255
+ self._print(item, out=sys.stdout)
256
+ elif self.print_raw:
257
+ self._print(str(item), out=sys.stdout)
258
+ else:
259
+ item_str = self.get_repr(item)
260
+ if self.print_remote_status or DEBUG > 1:
261
+ item_str += f' [{item._source}]'
262
+ if item._type == 'progress' and not self.print_progress:
263
+ continue
264
+ self._print(item_str, out=sys.stdout)
265
+
266
+ elif item and isinstance(item, str):
267
+ if self.print_line:
268
+ self._print(item, out=sys.stderr, end='\n')
269
+ if not self.output_json:
270
+ self.results.append(item)
271
+ yield item
272
+
273
+ if item:
274
+ if isinstance(item, OutputType):
275
+ self.output += self.get_repr(item) + '\n'
276
+ else:
277
+ self.output += str(item) + '\n'
278
+
279
+ self.run_hooks('on_iter')
280
+
281
+ except KeyboardInterrupt:
282
+ self._print('Process was killed manually (CTRL+C / CTRL+X).', color='bold red', rich=True)
283
+ if self.result:
284
+ self._print('Revoking remote Celery tasks ...', color='bold red', rich=True)
285
+ self.stop_live_tasks(self.result)
286
+
287
+ # Filter results and log info
288
+ self.mark_duplicates()
289
+ self.results = self.filter_results()
290
+ self.log_results()
291
+ self.run_hooks('on_end')
292
+
293
+ def mark_duplicates(self):
294
+ debug('duplicate check', id=self.config.name, sub='runner.mark_duplicates')
295
+ for item in self.results:
296
+ debug('duplicate check', obj=item.toDict(), obj_breaklines=True, sub='runner.mark_duplicates', level=2)
297
+ others = [f for f in self.results if f == item and f._uuid != item._uuid]
298
+ if others:
299
+ main = max(item, *others)
300
+ dupes = [f for f in others if f._uuid != main._uuid]
301
+ main._duplicate = False
302
+ main._related.extend([dupe._uuid for dupe in dupes])
303
+ main._related = list(dict.fromkeys(main._related))
304
+ if main._uuid != item._uuid:
305
+ item._duplicate = True
306
+ item = self.run_hooks('on_item', item)
307
+ if item._uuid not in main._related:
308
+ main._related.append(item._uuid)
309
+ main = self.run_hooks('on_duplicate', main)
310
+ item = self.run_hooks('on_duplicate', item)
311
+
312
+ for dupe in dupes:
313
+ if not dupe._duplicate:
314
+ debug(
315
+ 'found new duplicate', obj=dupe.toDict(), obj_breaklines=True,
316
+ sub='runner.mark_duplicates', level=2)
317
+ dupe._duplicate = True
318
+ dupe = self.run_hooks('on_duplicate', dupe)
319
+
320
+ debug('Duplicates:', sub='runner.mark_duplicates', level=2)
321
+ debug('\n\t'.join([repr(i) for i in self.results if i._duplicate]), sub='runner.mark_duplicates', level=2)
322
+ debug('duplicate check completed', id=self.config.name, sub='runner.mark_duplicates')
323
+
324
+ def yielder(self):
325
+ raise NotImplementedError()
326
+
327
+ def toDict(self):
328
+ return {
329
+ 'config': self.config.toDict(),
330
+ 'opts': self.config.supported_opts,
331
+ 'name': self.name,
332
+ 'targets': self.targets,
333
+ 'run_opts': self.run_opts,
334
+ 'parent': self.parent,
335
+ 'has_children': self.has_children,
336
+ 'chunk': self.chunk,
337
+ 'chunk_count': self.chunk_count,
338
+ 'results_count': self.results_count,
339
+ 'sync': self.sync,
340
+ 'done': self.done,
341
+ 'output': self.output,
342
+ 'status': self.status,
343
+ 'progress': self.progress,
344
+ 'start_time': self.start_time,
345
+ 'end_time': self.end_time,
346
+ 'last_updated': self.last_updated,
347
+ 'elapsed': self.elapsed.total_seconds(),
348
+ 'elapsed_human': self.elapsed_human,
349
+ 'errors': self.errors,
350
+ 'context': self.context
351
+ }
352
+
353
+ def run_hooks(self, hook_type, *args):
354
+ result = args[0] if len(args) > 0 else None
355
+ if not self.enable_hooks:
356
+ return result
357
+ for hook in self.hooks[hook_type]:
358
+ name = f'{self.__class__.__name__}.{hook_type}'
359
+ fun = f'{hook.__module__}.{hook.__name__}'
360
+ try:
361
+ _id = self.context.get('task_id', '') or self.context.get('workflow_id', '') or self.context.get('scan_id', '')
362
+ debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'started'}, id=_id, sub='hooks', level=3)
363
+ result = hook(self, *args)
364
+ debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'ended'}, id=_id, sub='hooks', level=3)
365
+ except Exception as e:
366
+ if self.raise_on_error:
367
+ raise e
368
+ else:
369
+ if DEBUG > 1:
370
+ logger.exception(e)
371
+ else:
372
+ self._print(
373
+ f'{fun} failed: "{e.__class__.__name__}: {str(e)}". Skipping',
374
+ color='bold red',
375
+ rich=True)
376
+ self._print('Set DEBUG to > 1 to see the detailed exception.', color='dim red', rich=True)
377
+ return result
378
+
379
+ def run_validators(self, validator_type, *args):
380
+ # logger.debug(f'Running validators of type {validator_type}')
381
+ for validator in self.validators[validator_type]:
382
+ # logger.debug(validator)
383
+ if not validator(self, *args):
384
+ if validator_type == 'input':
385
+ self._print(f'{validator.__doc__}', color='bold red', rich=True)
386
+ return False
387
+ return True
388
+
389
+ def resolve_exporters(self):
390
+ """Resolve exporters from output options."""
391
+ output = self.run_opts.get('output', '')
392
+ if output == '':
393
+ return self.default_exporters
394
+ elif output is False:
395
+ return []
396
+ exporters = [
397
+ import_dynamic(f'secator.exporters.{o.capitalize()}Exporter', 'Exporter')
398
+ for o in output.split(',')
399
+ if o
400
+ ]
401
+ return [e for e in exporters if e]
402
+
403
+ def log_start(self):
404
+ """Log runner start."""
405
+ remote_str = 'starting' if self.sync else 'sent to Celery worker'
406
+ runner_name = self.__class__.__name__
407
+ self.log_header()
408
+ self._print(
409
+ f':tada: {runner_name} [bold magenta]{self.config.name}[/] {remote_str}...', rich=True)
410
+ if not self.sync and self.print_remote_status and self.__class__.__name__ != 'Scan':
411
+ self._print('\n🏆 [bold gold3]Live results:[/]', rich=True)
412
+
413
+ def log_header(self):
414
+ """Log runner header."""
415
+ runner_name = self.__class__.__name__
416
+
417
+ # Description
418
+ panel_str = f':scroll: [bold gold3]Description:[/] {self.config.description}'
419
+
420
+ # Workspace
421
+ if self.workspace_name:
422
+ panel_str += f'\n:construction_worker: [bold gold3]Workspace:[/] {self.workspace_name}'
423
+
424
+ # Targets
425
+ if self.targets:
426
+ panel_str += '\n:pear: [bold gold3]Targets:[/]'
427
+ for target in self.targets:
428
+ panel_str += f'\n • {target}'
429
+
430
+ # Options
431
+ DISPLAY_OPTS_EXCLUDE = [
432
+ 'sync', 'worker', 'debug', 'output', 'json', 'orig', 'raw', 'format', 'quiet'
433
+ ]
434
+ items = [
435
+ f'[italic]{k}[/]: {v}'
436
+ for k, v in self.run_opts.items()
437
+ if k not in DISPLAY_OPTS_EXCLUDE
438
+ and v is not None
439
+ ]
440
+ if items:
441
+ panel_str += '\n:pushpin: [bold gold3]Options:[/]'
442
+ for item in items:
443
+ panel_str += f'\n • {item}'
444
+
445
+ if self.exporters:
446
+ panel_str += '\n:email: [bold gold3]Exporters:[/]'
447
+ for exporter in self.exporters:
448
+ exporter_name = exporter.__name__.replace('Exporter', '').lower()
449
+ panel_str += f'\n • {exporter_name}'
450
+
451
+ panel = Panel(
452
+ panel_str,
453
+ title=f'[bold gold3]{runner_name}[/] [bold magenta]{self.config.name}[/]',
454
+ border_style='bold gold3',
455
+ expand=False,
456
+ highlight=True
457
+ )
458
+ self._print('\n')
459
+ self._print(panel, rich=True)
460
+
461
+ def log_results(self):
462
+ """Log results.
463
+
464
+ Args:
465
+ results (list): List of results.
466
+ output_types (list): List of result types to add to report.
467
+ """
468
+ self.done = True
469
+ self.progress = 100
470
+ self.results_count = len(self.results)
471
+ self.status = 'SUCCESS' if not self.errors else 'FAILED'
472
+ self.end_time = datetime.fromtimestamp(time())
473
+
474
+ # Log execution results
475
+ status = 'succeeded' if not self.errors else '[bold red]failed[/]'
476
+ if self.print_run_summary:
477
+ self._print('\n')
478
+ self._print(
479
+ f':tada: [bold green]{self.__class__.__name__.capitalize()}[/] [bold magenta]{self.config.name}[/] '
480
+ f'[bold green]{status} in[/] [bold gold3]{self.elapsed_human}[/].', rich=True)
481
+
482
+ # Log runner infos
483
+ if self.infos:
484
+ self._print(
485
+ f':heavy_check_mark: [bold magenta]{self.config.name}[/] infos ({len(self.infos)}):',
486
+ color='bold green', rich=True)
487
+ for info in self.infos:
488
+ self._print(f' • {info}', color='bold green', rich=True)
489
+
490
+ # Log runner errors
491
+ if self.errors and self.print_errors:
492
+ self._print(
493
+ f':exclamation_mark:[bold magenta]{self.config.name}[/] errors ({len(self.errors)}):',
494
+ color='bold red', rich=True)
495
+ for error in self.errors:
496
+ self._print(f' • {error}', color='bold red', rich=True)
497
+
498
+ # Build and send report
499
+ if self.results:
500
+ report = Report(self, exporters=self.exporters)
501
+ report.build()
502
+ report.send()
503
+ self.report = report
504
+
505
+ # Log results count
506
+ if self.print_item_count and not self.print_raw and not self.orig:
507
+ count_map = self._get_results_count()
508
+ if all(count == 0 for count in count_map.values()):
509
+ self._print(':exclamation_mark:Found 0 results.', color='bold red', rich=True)
510
+ else:
511
+ results_str = ':heavy_check_mark: Found ' + ' and '.join([
512
+ f'{count} {pluralize(name) if count > 1 or count == 0 else name}'
513
+ for name, count in count_map.items()
514
+ ]) + '.'
515
+ self._print(results_str, color='bold green', rich=True)
516
+
517
+ @staticmethod
518
+ def get_live_results(result):
519
+ """Poll Celery subtasks results in real-time. Fetch task metadata and partial results from each task that runs.
520
+
521
+ Args:
522
+ result (celery.result.AsyncResult): Result object.
523
+
524
+ Yields:
525
+ dict: Subtasks state and results.
526
+ """
527
+ from celery.result import AsyncResult
528
+ res = AsyncResult(result.id)
529
+ while True:
530
+ # Yield results
531
+ yield from Runner.get_celery_results(result)
532
+
533
+ # Break out of while loop
534
+ if res.ready():
535
+ yield from Runner.get_celery_results(result)
536
+ break
537
+
538
+ # Sleep between updates
539
+ sleep(1)
540
+
541
+ @staticmethod
542
+ def get_celery_results(result):
543
+ """Get Celery results from main result object, including any subtasks results.
544
+
545
+ Args:
546
+ result (celery.result.AsyncResult): Result object.
547
+
548
+ Yields:
549
+ dict: Subtasks state and results, Progress objects.
550
+ """
551
+ task_ids = []
552
+ get_task_ids(result, ids=task_ids)
553
+ datas = []
554
+ for task_id in task_ids:
555
+ data = get_task_data(task_id)
556
+ if data and DEBUG > 1:
557
+ full_name = data['name']
558
+ if data['chunk_info']:
559
+ full_name += ' ' + data['chunk_info']
560
+ debug('', sub='celery.runner', id=data['id'], obj={full_name: data['state']}, level=4)
561
+ if not data:
562
+ continue
563
+ yield data
564
+ datas.append(data)
565
+
566
+ # Calculate and yield progress
567
+ total = len(datas)
568
+ count_finished = sum([i['ready'] for i in datas if i])
569
+ percent = int(count_finished * 100 / total) if total > 0 else 0
570
+ if percent > 0:
571
+ yield Progress(duration='unknown', percent=percent)
572
+
573
+ def stop_live_tasks(self, result):
574
+ """Stop live tasks running in Celery worker.
575
+
576
+ Args:
577
+ result (AsyncResult | GroupResult): Celery result.
578
+ """
579
+ task_ids = []
580
+ get_task_ids(result, ids=task_ids)
581
+ for task_id in task_ids:
582
+ from secator.celery import revoke_task
583
+ revoke_task(task_id)
584
+
585
+ def process_live_tasks(self, result, description=True, results_only=True, print_remote_status=True):
586
+ """Rich progress indicator showing live tasks statuses.
587
+
588
+ Args:
589
+ result (AsyncResult | GroupResult): Celery result.
590
+ results_only (bool): Yield only results, no task state.
591
+
592
+ Yields:
593
+ dict: Subtasks state and results.
594
+ """
595
+ config_name = self.config.name
596
+ runner_name = self.__class__.__name__.capitalize()
597
+
598
+ # Display live results if print_remote_status is set
599
+ if print_remote_status:
600
+ class PanelProgress(RichProgress):
601
+ def get_renderables(self):
602
+ yield Padding(Panel(
603
+ self.make_tasks_table(self.tasks),
604
+ title=f'[bold gold3]{runner_name}[/] [bold magenta]{config_name}[/] results',
605
+ border_style='bold gold3',
606
+ expand=False,
607
+ highlight=True), pad=(2, 0, 0, 0))
608
+
609
+ tasks_progress = PanelProgress(
610
+ SpinnerColumn('dots'),
611
+ TextColumn('{task.fields[descr]} ') if description else '',
612
+ TextColumn('[bold cyan]{task.fields[name]}[/]'),
613
+ TextColumn('[dim gold3]{task.fields[chunk_info]}[/]'),
614
+ TextColumn('{task.fields[state]:<20}'),
615
+ TimeElapsedColumn(),
616
+ TextColumn('{task.fields[count]}'),
617
+ # TextColumn('{task.fields[progress]}%'),
618
+ # TextColumn('\[[bold magenta]{task.fields[id]:<30}[/]]'), # noqa: W605
619
+ refresh_per_second=1,
620
+ transient=False,
621
+ # console=console,
622
+ # redirect_stderr=True,
623
+ # redirect_stdout=False
624
+ )
625
+ state_colors = {
626
+ 'RUNNING': 'bold yellow',
627
+ 'SUCCESS': 'bold green',
628
+ 'FAILURE': 'bold red',
629
+ 'REVOKED': 'bold magenta'
630
+ }
631
+ else:
632
+ tasks_progress = nullcontext()
633
+
634
+ with tasks_progress as progress:
635
+
636
+ # Make progress tasks
637
+ tasks_progress = {}
638
+
639
+ # Get live results and print progress
640
+ for data in Runner.get_live_results(result):
641
+
642
+ # If progress object, yield progress and ignore tracking
643
+ if isinstance(data, OutputType) and data._type == 'progress':
644
+ yield data
645
+ continue
646
+
647
+ # TODO: add error output type and yield errors in get_celery_results
648
+ # if isinstance(data, OutputType) and data._type == 'error':
649
+ # yield data
650
+ # continue
651
+
652
+ # Re-yield so that we can consume it externally
653
+ if results_only:
654
+ yield from data['results']
655
+ else:
656
+ yield data
657
+
658
+ if not print_remote_status:
659
+ continue
660
+
661
+ # Ignore partials in output unless DEBUG > 1
662
+ # TODO: weird to change behavior based on debug flag, could cause issues
663
+ # if data['chunk'] and not DEBUG > 1:
664
+ # continue
665
+
666
+ # Handle messages if any
667
+ state = data['state']
668
+ error = data.get('error')
669
+ info = data.get('info')
670
+ full_name = data['name']
671
+ chunk_info = data.get('chunk_info', '')
672
+ if chunk_info:
673
+ full_name += f' {chunk_info}'
674
+ if error:
675
+ state = 'FAILURE'
676
+ error = f'{full_name}: {error}'
677
+ if error not in self.errors:
678
+ self.errors.append(error)
679
+ if info:
680
+ info = f'{full_name}: {info}'
681
+ if info not in self.infos:
682
+ self.infos.append(info)
683
+
684
+ task_id = data['id']
685
+ state_str = f'[{state_colors[state]}]{state}[/]'
686
+ data['state'] = state_str
687
+
688
+ if task_id not in tasks_progress:
689
+ id = progress.add_task('', **data)
690
+ tasks_progress[task_id] = id
691
+ else:
692
+ progress_id = tasks_progress[task_id]
693
+ if state in ['SUCCESS', 'FAILURE']:
694
+ progress.update(progress_id, advance=100, **data)
695
+ elif data['progress'] != 0:
696
+ progress.update(progress_id, advance=data['progress'], **data)
697
+
698
+ # Update all tasks to 100 %
699
+ for progress_id in tasks_progress.values():
700
+ progress.update(progress_id, advance=100)
701
+
702
+ def filter_results(self):
703
+ """Filter runner results using extractors defined in config."""
704
+ extractors = self.config.results
705
+ results = []
706
+ if extractors:
707
+ # Keep results based on extractors
708
+ opts = merge_opts(self.config.options, self.run_opts)
709
+ for extractor in extractors:
710
+ tmp = process_extractor(self.results, extractor, ctx=opts)
711
+ results.extend(tmp)
712
+
713
+ # Keep the field types in results not specified in the extractors.
714
+ extract_fields = [e['type'] for e in extractors]
715
+ keep_fields = [
716
+ _type for _type in OUTPUT_TYPES if _type.__name__ != 'Progress'
717
+ if _type not in extract_fields
718
+ ]
719
+ results.extend([
720
+ item for item in self.results
721
+ if item._type in keep_fields
722
+ ])
723
+ else:
724
+ results = self.results
725
+ return results
726
+
727
+ def _convert_item_schema(self, item):
728
+ """Convert dict item to a new structure using the class output schema.
729
+
730
+ Args:
731
+ item (dict): Item.
732
+
733
+ Returns:
734
+ dict: Item with new schema.
735
+ """
736
+ # Load item using available output types and get the first matching
737
+ # output type based on the schema
738
+ new_item = None
739
+ output_types = getattr(self, 'output_types', [])
740
+ debug(f'Input item: {item}', sub='klass.load', level=5)
741
+ debug(f'Output types to try: {[o.__name__ for o in output_types]}', sub='klass.load', level=5)
742
+ for klass in output_types:
743
+ debug(f'Loading item as {klass.__name__}', sub='klass.load', level=5)
744
+ output_map = getattr(self, 'output_map', {})
745
+ output_map = output_map.get(klass, {})
746
+ try:
747
+ new_item = klass.load(item, output_map)
748
+ debug(f'[dim green]Successfully loaded item as {klass.__name__}[/]', sub='klass.load', level=5)
749
+ break # found an item that fits
750
+ except (TypeError, KeyError) as e: # can't load using class
751
+ debug(
752
+ f'[dim red]Failed loading item as {klass.__name__}: {type(e).__name__}: {str(e)}.[/] [dim green]Continuing.[/]',
753
+ sub='klass.load',
754
+ level=5)
755
+ if DEBUG == 6:
756
+ console.print_exception(show_locals=False)
757
+ continue
758
+
759
+ # No output type was found, so make no conversion
760
+ if not new_item:
761
+ new_item = DotMap(item)
762
+ new_item._type = 'unknown'
763
+
764
+ return new_item
765
+
766
+ def _print(self, data, color=None, out=sys.stderr, rich=False, end='\n'):
767
+ """Print function.
768
+
769
+ Args:
770
+ data (str or dict): Input data.
771
+ color (str, Optional): Rich color.
772
+ out (str, Optional): Output pipe (sys.stderr, sys.stdout, ...)
773
+ rich (bool, Optional): Force rich output.
774
+ """
775
+ # Print a JSON item
776
+ if isinstance(data, (OutputType, DotMap, dict)):
777
+ if getattr(data, 'toDict', None):
778
+ data = data.toDict()
779
+ data = json.dumps(data)
780
+ data = f'{self.prefix:>15} {data}' if self.prefix and not self.print_item else data
781
+
782
+ if self.sync or rich:
783
+ _console = console_stdout if out == sys.stdout else console
784
+ _console.print(data, highlight=False, style=color, soft_wrap=True, end=end)
785
+ else:
786
+ print(data, file=out)
787
+
788
+ # # Print a line using Rich console
789
+ # if rich:
790
+ # _console = console_stdout if out == sys.stdout else console
791
+ # _console.print(data, highlight=False, style=color, soft_wrap=True)
792
+
793
+ # # Print a line using Rich markup
794
+ # elif markup:
795
+ # from rich import print as _print
796
+ # from rich.text import Text
797
+ # _print(Text.from_markup(data), file=out)
798
+
799
+ # # Print a line raw
800
+ # else:
801
+ # print(data, file=out)
802
+
803
+ def _set_print_prefix(self):
804
+ self.prefix = ''
805
+ if self.print_cmd_prefix:
806
+ self.prefix = f'[bold gold3]({self.config.name})[/]'
807
+ if self.chunk and self.chunk_count:
808
+ self.prefix += f' [{self.chunk}/{self.chunk_count}]'
809
+
810
+ def _get_results_count(self):
811
+ count_map = {}
812
+ for output_type in self.output_types:
813
+ if output_type.__name__ == 'Progress':
814
+ continue
815
+ name = output_type.get_name()
816
+ count = len([r for r in self.results if r._type == name])
817
+ count_map[name] = count
818
+ return count_map
819
+
820
+ def _process_item(self, item: dict):
821
+ # Run item validators
822
+ if not self.run_validators('item', item):
823
+ return None
824
+
825
+ # Run item hooks
826
+ item = self.run_hooks('on_item_pre_convert', item)
827
+ if not item:
828
+ return None
829
+
830
+ # Convert output dict to another schema
831
+ if isinstance(item, dict) and not self.orig:
832
+ item = self._convert_item_schema(item)
833
+ elif isinstance(item, OutputType):
834
+ pass
835
+ else:
836
+ item = DotMap(item)
837
+
838
+ # Update item context
839
+ item._context.update(self.context)
840
+
841
+ # Add context, uuid, progress to item
842
+ if not item._source:
843
+ item._source = self.config.name
844
+
845
+ if not item._uuid:
846
+ item._uuid = str(uuid.uuid4())
847
+
848
+ if item._type == 'progress' and item._source == self.config.name:
849
+ self.progress = item.percent
850
+ if self.last_updated_progress and (item._timestamp - self.last_updated_progress) < DEFAULT_PROGRESS_UPDATE_FREQUENCY:
851
+ return None
852
+ elif int(item.percent) in [0, 100]:
853
+ return None
854
+ else:
855
+ self.last_updated_progress = item._timestamp
856
+
857
+ # Run on_item hooks
858
+ if isinstance(item, OutputType) and not self.orig:
859
+ item = self.run_hooks('on_item', item)
860
+
861
+ return item
862
+
863
+ def get_repr(self, item=None):
864
+ if not item:
865
+ return [
866
+ self.get_repr(item)
867
+ for item in self.results
868
+ ]
869
+ if self.output_fmt:
870
+ item = self.output_fmt.format(**item.toDict())
871
+ elif isinstance(item, OutputType):
872
+ item = repr(item)
873
+ return item