secator 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of secator might be problematic. Click here for more details.

Files changed (114) hide show
  1. secator/__init__.py +0 -0
  2. secator/celery.py +482 -0
  3. secator/cli.py +617 -0
  4. secator/config.py +137 -0
  5. secator/configs/__init__.py +0 -0
  6. secator/configs/profiles/__init__.py +0 -0
  7. secator/configs/profiles/aggressive.yaml +7 -0
  8. secator/configs/profiles/default.yaml +9 -0
  9. secator/configs/profiles/stealth.yaml +7 -0
  10. secator/configs/scans/__init__.py +0 -0
  11. secator/configs/scans/domain.yaml +18 -0
  12. secator/configs/scans/host.yaml +14 -0
  13. secator/configs/scans/network.yaml +17 -0
  14. secator/configs/scans/subdomain.yaml +8 -0
  15. secator/configs/scans/url.yaml +12 -0
  16. secator/configs/workflows/__init__.py +0 -0
  17. secator/configs/workflows/cidr_recon.yaml +28 -0
  18. secator/configs/workflows/code_scan.yaml +11 -0
  19. secator/configs/workflows/host_recon.yaml +41 -0
  20. secator/configs/workflows/port_scan.yaml +34 -0
  21. secator/configs/workflows/subdomain_recon.yaml +33 -0
  22. secator/configs/workflows/url_crawl.yaml +29 -0
  23. secator/configs/workflows/url_dirsearch.yaml +29 -0
  24. secator/configs/workflows/url_fuzz.yaml +35 -0
  25. secator/configs/workflows/url_nuclei.yaml +11 -0
  26. secator/configs/workflows/url_vuln.yaml +55 -0
  27. secator/configs/workflows/user_hunt.yaml +10 -0
  28. secator/configs/workflows/wordpress.yaml +14 -0
  29. secator/decorators.py +309 -0
  30. secator/definitions.py +165 -0
  31. secator/exporters/__init__.py +12 -0
  32. secator/exporters/_base.py +3 -0
  33. secator/exporters/csv.py +30 -0
  34. secator/exporters/gdrive.py +118 -0
  35. secator/exporters/json.py +15 -0
  36. secator/exporters/table.py +7 -0
  37. secator/exporters/txt.py +25 -0
  38. secator/hooks/__init__.py +0 -0
  39. secator/hooks/mongodb.py +212 -0
  40. secator/output_types/__init__.py +24 -0
  41. secator/output_types/_base.py +95 -0
  42. secator/output_types/exploit.py +50 -0
  43. secator/output_types/ip.py +33 -0
  44. secator/output_types/port.py +45 -0
  45. secator/output_types/progress.py +35 -0
  46. secator/output_types/record.py +34 -0
  47. secator/output_types/subdomain.py +42 -0
  48. secator/output_types/tag.py +46 -0
  49. secator/output_types/target.py +30 -0
  50. secator/output_types/url.py +76 -0
  51. secator/output_types/user_account.py +41 -0
  52. secator/output_types/vulnerability.py +97 -0
  53. secator/report.py +107 -0
  54. secator/rich.py +124 -0
  55. secator/runners/__init__.py +12 -0
  56. secator/runners/_base.py +833 -0
  57. secator/runners/_helpers.py +153 -0
  58. secator/runners/command.py +638 -0
  59. secator/runners/scan.py +65 -0
  60. secator/runners/task.py +106 -0
  61. secator/runners/workflow.py +135 -0
  62. secator/serializers/__init__.py +8 -0
  63. secator/serializers/dataclass.py +33 -0
  64. secator/serializers/json.py +15 -0
  65. secator/serializers/regex.py +17 -0
  66. secator/tasks/__init__.py +10 -0
  67. secator/tasks/_categories.py +304 -0
  68. secator/tasks/cariddi.py +102 -0
  69. secator/tasks/dalfox.py +65 -0
  70. secator/tasks/dirsearch.py +90 -0
  71. secator/tasks/dnsx.py +56 -0
  72. secator/tasks/dnsxbrute.py +34 -0
  73. secator/tasks/feroxbuster.py +91 -0
  74. secator/tasks/ffuf.py +86 -0
  75. secator/tasks/fping.py +44 -0
  76. secator/tasks/gau.py +47 -0
  77. secator/tasks/gf.py +33 -0
  78. secator/tasks/gospider.py +71 -0
  79. secator/tasks/grype.py +79 -0
  80. secator/tasks/h8mail.py +81 -0
  81. secator/tasks/httpx.py +99 -0
  82. secator/tasks/katana.py +133 -0
  83. secator/tasks/maigret.py +78 -0
  84. secator/tasks/mapcidr.py +32 -0
  85. secator/tasks/msfconsole.py +174 -0
  86. secator/tasks/naabu.py +52 -0
  87. secator/tasks/nmap.py +344 -0
  88. secator/tasks/nuclei.py +97 -0
  89. secator/tasks/searchsploit.py +52 -0
  90. secator/tasks/subfinder.py +40 -0
  91. secator/tasks/wpscan.py +179 -0
  92. secator/utils.py +445 -0
  93. secator/utils_test.py +183 -0
  94. secator-0.0.1.dist-info/LICENSE +60 -0
  95. secator-0.0.1.dist-info/METADATA +199 -0
  96. secator-0.0.1.dist-info/RECORD +114 -0
  97. secator-0.0.1.dist-info/WHEEL +5 -0
  98. secator-0.0.1.dist-info/entry_points.txt +2 -0
  99. secator-0.0.1.dist-info/top_level.txt +2 -0
  100. tests/__init__.py +0 -0
  101. tests/integration/__init__.py +0 -0
  102. tests/integration/inputs.py +42 -0
  103. tests/integration/outputs.py +392 -0
  104. tests/integration/test_scans.py +82 -0
  105. tests/integration/test_tasks.py +103 -0
  106. tests/integration/test_workflows.py +163 -0
  107. tests/performance/__init__.py +0 -0
  108. tests/performance/loadtester.py +56 -0
  109. tests/unit/__init__.py +0 -0
  110. tests/unit/test_celery.py +39 -0
  111. tests/unit/test_scans.py +0 -0
  112. tests/unit/test_serializers.py +51 -0
  113. tests/unit/test_tasks.py +348 -0
  114. tests/unit/test_workflows.py +96 -0
@@ -0,0 +1,833 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ import uuid
5
+ from contextlib import nullcontext
6
+ from datetime import datetime
7
+ from time import sleep, time
8
+
9
+ import humanize
10
+ from celery.result import AsyncResult
11
+ from dotmap import DotMap
12
+ from rich.padding import Padding
13
+ from rich.panel import Panel
14
+ from rich.progress import Progress as RichProgress
15
+ from rich.progress import SpinnerColumn, TextColumn, TimeElapsedColumn
16
+
17
+ from secator.definitions import DEBUG, DEFAULT_PROGRESS_UPDATE_FREQUENCY
18
+ from secator.output_types import OUTPUT_TYPES, OutputType, Progress
19
+ from secator.report import Report
20
+ from secator.rich import console, console_stdout
21
+ from secator.runners._helpers import (get_task_data, get_task_ids,
22
+ process_extractor)
23
+ from secator.utils import (debug, import_dynamic, merge_opts, pluralize,
24
+ rich_to_ansi)
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ HOOKS = [
29
+ 'before_init',
30
+ 'on_init',
31
+ 'on_start',
32
+ 'on_end',
33
+ 'on_item_pre_convert',
34
+ 'on_item',
35
+ 'on_duplicate',
36
+ 'on_line',
37
+ 'on_iter',
38
+ 'on_error',
39
+ ]
40
+
41
+ VALIDATORS = [
42
+ 'input',
43
+ 'item'
44
+ ]
45
+
46
+
47
+ class Runner:
48
+ """Runner class.
49
+
50
+ Args:
51
+ config (secator.config.ConfigLoader): Loaded config.
52
+ targets (list): List of targets to run task on.
53
+ results (list): List of existing results to re-use.
54
+ workspace_name (str): Workspace name.
55
+ run_opts (dict): Run options.
56
+
57
+ Yields:
58
+ dict: Result (when running in sync mode with `run`).
59
+
60
+ Returns:
61
+ list: List of results (when running in async mode with `run_async`).
62
+ """
63
+
64
+ # Input field (mostly for tests and CLI)
65
+ input_type = None
66
+
67
+ # Output types
68
+ output_types = []
69
+
70
+ # Dict return
71
+ output_return_type = dict # TODO: deprecate this
72
+
73
+ # Default exporters
74
+ default_exporters = []
75
+
76
+ # Run hooks
77
+ enable_hooks = True
78
+
79
+ def __init__(self, config, targets, results=[], run_opts={}, hooks={}, context={}):
80
+ self.config = config
81
+ self.name = run_opts.get('name', config.name)
82
+ self.description = run_opts.get('description', config.description)
83
+ if not isinstance(targets, list):
84
+ targets = [targets]
85
+ self.targets = targets
86
+ self.results = results
87
+ self.results_count = 0
88
+ self.workspace_name = context.get('workspace_name', 'default')
89
+ self.run_opts = run_opts.copy()
90
+ self.sync = run_opts.get('sync', True)
91
+ self.exporters = self.resolve_exporters()
92
+ self.done = False
93
+ self.start_time = datetime.fromtimestamp(time())
94
+ self.last_updated = None
95
+ self.last_updated_progress = None
96
+ self.end_time = None
97
+ self._hooks = hooks
98
+ self.errors = []
99
+ self.infos = []
100
+ self.output = ''
101
+ self.status = 'RUNNING'
102
+ self.progress = 0
103
+ self.context = context
104
+ self.delay = run_opts.get('delay', False)
105
+ self.uuids = []
106
+ self.result = None
107
+
108
+ # Process input
109
+ self.input = targets
110
+ if isinstance(self.input, list) and len(self.input) == 1:
111
+ self.input = self.input[0]
112
+
113
+ # Yield dicts if CLI supports JSON
114
+ if self.output_return_type is dict or (self.json_flag is not None):
115
+ self.output_return_type = dict
116
+
117
+ # Output options
118
+ self.output_fmt = self.run_opts.get('format', False)
119
+ self.output_quiet = self.run_opts.get('quiet', False)
120
+ self.output_json = self.output_return_type == dict
121
+
122
+ # Print options
123
+ self.print_start = self.run_opts.pop('print_start', False)
124
+ self.print_item = self.run_opts.pop('print_item', False)
125
+ self.print_line = self.run_opts.pop('print_line', self.sync and not self.output_quiet)
126
+ self.print_item_count = self.run_opts.pop('print_item_count', False)
127
+ self.print_cmd = self.run_opts.pop('print_cmd', False)
128
+ self.print_run_opts = self.run_opts.pop('print_run_opts', DEBUG > 1)
129
+ self.print_fmt_opts = self.run_opts.pop('print_fmt_opts', DEBUG > 1)
130
+ self.print_input_file = self.run_opts.pop('print_input_file', False)
131
+ self.print_hooks = self.run_opts.pop('print_hooks', DEBUG > 1)
132
+ self.print_progress = self.run_opts.pop('print_progress', not self.output_quiet)
133
+ self.print_cmd_prefix = self.run_opts.pop('print_cmd_prefix', False)
134
+ self.print_remote_status = self.run_opts.pop('print_remote_status', False)
135
+ self.print_run_summary = self.run_opts.pop('print_run_summary', False)
136
+ self.print_json = self.run_opts.get('json', False)
137
+ self.print_raw = self.run_opts.get('raw', False)
138
+ self.orig = self.run_opts.get('orig', False)
139
+ self.opts_to_print = {k: v for k, v in self.__dict__.items() if k.startswith('print_') if v}
140
+
141
+ # Hooks
142
+ self.hooks = {name: [] for name in HOOKS}
143
+ for key in self.hooks:
144
+ instance_func = getattr(self, key, None)
145
+ if instance_func:
146
+ self.hooks[key].append(instance_func)
147
+ self.hooks[key].extend(hooks.get(self.__class__, {}).get(key, []))
148
+
149
+ # Validators
150
+ self.validators = {name: [] for name in VALIDATORS}
151
+ for key in self.validators:
152
+ instance_func = getattr(self, f'validate_{key}', None)
153
+ if instance_func:
154
+ self.validators[key].append(instance_func)
155
+ self.validators[key].extend(self.validators.get(self.__class__, {}).get(key, []))
156
+
157
+ # Chunks
158
+ self.parent = self.run_opts.get('parent', True)
159
+ self.has_children = self.run_opts.get('has_children', False)
160
+ self.chunk = self.run_opts.get('chunk', None)
161
+ self.chunk_count = self.run_opts.get('chunk_count', None)
162
+ self._set_print_prefix()
163
+
164
+ # Input post-process
165
+ self.run_hooks('before_init')
166
+
167
+ # Abort if inputs are invalid
168
+ self.input_valid = True
169
+ if not self.run_validators('input', self.input):
170
+ self.input_valid = False
171
+
172
+ # Run hooks
173
+ self.run_hooks('on_init')
174
+
175
+ @property
176
+ def elapsed(self):
177
+ if self.done:
178
+ return self.end_time - self.start_time
179
+ return datetime.fromtimestamp(time()) - self.start_time
180
+
181
+ @property
182
+ def elapsed_human(self):
183
+ return humanize.naturaldelta(self.elapsed)
184
+
185
+ def run(self):
186
+ return list(self.__iter__())
187
+
188
+ def __iter__(self):
189
+ if self.print_start:
190
+ self.log_start()
191
+
192
+ if not self.input_valid:
193
+ return
194
+ try:
195
+ for item in self.yielder():
196
+
197
+ if isinstance(item, (OutputType, DotMap, dict)):
198
+
199
+ # Handle direct yield of item
200
+ item = self._process_item(item)
201
+ if not item:
202
+ continue
203
+
204
+ # Discard item if needed
205
+ if item._uuid in self.uuids:
206
+ continue
207
+
208
+ # Add item to results
209
+ if isinstance(item, OutputType) or self.orig:
210
+ self.results.append(item)
211
+ self.results_count += 1
212
+ self.uuids.append(item._uuid)
213
+ yield item
214
+
215
+ # Print JSON or raw item
216
+ if self.print_item and item._type != 'target':
217
+ if not isinstance(item, OutputType) and not self.orig:
218
+ item_str = rich_to_ansi(
219
+ f'[dim red]❌ Failed to load item as output type:\n {item.toDict()}[/]'
220
+ )
221
+ self.output += item_str + '\n'
222
+ self._print(item_str, rich=True)
223
+ elif self.print_json:
224
+ self._print(item, out=sys.stdout)
225
+ elif self.print_raw:
226
+ self._print(str(item), out=sys.stdout)
227
+ else:
228
+ item_str = self.get_repr(item)
229
+ if self.print_remote_status or DEBUG > 1:
230
+ item_str += f' [{item._source}]'
231
+ if item._type == 'progress' and not self.print_progress:
232
+ continue
233
+ self._print(item_str, out=sys.stdout)
234
+
235
+ elif item and isinstance(item, str):
236
+ if self.print_line:
237
+ self._print(item, out=sys.stderr)
238
+ if not self.output_json:
239
+ self.results.append(item)
240
+ yield item
241
+
242
+ if item:
243
+ if isinstance(item, OutputType):
244
+ self.output += self.get_repr(item) + '\n'
245
+ else:
246
+ self.output += str(item) + '\n'
247
+
248
+ self.run_hooks('on_iter')
249
+
250
+ except KeyboardInterrupt:
251
+ self._print('Process was killed manually (CTRL+C / CTRL+X).', color='bold red', rich=True)
252
+ if self.result:
253
+ self._print('Revoking remote Celery tasks ...', color='bold red', rich=True)
254
+ self.stop_live_tasks(self.result)
255
+
256
+ # Filter results and log info
257
+ self.mark_duplicates()
258
+ self.results = self.filter_results()
259
+ self.log_results()
260
+ self.run_hooks('on_end')
261
+
262
+ def mark_duplicates(self):
263
+ debug('duplicate check', id=self.config.name, sub='runner.mark_duplicates')
264
+ for item in self.results:
265
+ debug('duplicate check', obj=item.toDict(), obj_breaklines=True, sub='runner.mark_duplicates', level=2)
266
+ others = [f for f in self.results if f == item and f._uuid != item._uuid]
267
+ if others:
268
+ main = max(item, *others)
269
+ dupes = [f for f in others if f._uuid != main._uuid]
270
+ main._duplicate = False
271
+ main._related.extend([dupe._uuid for dupe in dupes])
272
+ main._related = list(dict.fromkeys(main._related))
273
+ if main._uuid != item._uuid:
274
+ item._duplicate = True
275
+ item = self.run_hooks('on_item', item)
276
+ if item._uuid not in main._related:
277
+ main._related.append(item._uuid)
278
+ main = self.run_hooks('on_duplicate', main)
279
+ item = self.run_hooks('on_duplicate', item)
280
+
281
+ for dupe in dupes:
282
+ if not dupe._duplicate:
283
+ debug(
284
+ 'found new duplicate', obj=dupe.toDict(), obj_breaklines=True,
285
+ sub='runner.mark_duplicates', level=2)
286
+ dupe._duplicate = True
287
+ dupe = self.run_hooks('on_duplicate', dupe)
288
+
289
+ debug('Duplicates:', sub='runner.mark_duplicates', level=2)
290
+ debug('\n\t'.join([repr(i) for i in self.results if i._duplicate]), sub='runner.mark_duplicates', level=2)
291
+ debug('duplicate check completed', id=self.config.name, sub='runner.mark_duplicates')
292
+
293
+ def yielder(self):
294
+ raise NotImplementedError()
295
+
296
+ def toDict(self):
297
+ return {
298
+ 'config': self.config.toDict(),
299
+ 'opts': self.config.supported_opts,
300
+ 'name': self.name,
301
+ 'targets': self.targets,
302
+ 'run_opts': self.run_opts,
303
+ 'parent': self.parent,
304
+ 'has_children': self.has_children,
305
+ 'chunk': self.chunk,
306
+ 'chunk_count': self.chunk_count,
307
+ 'results_count': self.results_count,
308
+ 'sync': self.sync,
309
+ 'done': self.done,
310
+ 'output': self.output,
311
+ 'status': self.status,
312
+ 'progress': self.progress,
313
+ 'start_time': self.start_time,
314
+ 'end_time': self.end_time,
315
+ 'last_updated': self.last_updated,
316
+ 'elapsed': self.elapsed.total_seconds(),
317
+ 'elapsed_human': self.elapsed_human,
318
+ 'errors': self.errors,
319
+ 'context': self.context
320
+ }
321
+
322
+ def run_hooks(self, hook_type, *args):
323
+ result = args[0] if len(args) > 0 else None
324
+ if not self.enable_hooks:
325
+ return result
326
+ for hook in self.hooks[hook_type]:
327
+ name = f'{self.__class__.__name__}.{hook_type}'
328
+ fun = f'{hook.__module__}.{hook.__name__}'
329
+ try:
330
+ _id = self.context.get('task_id', '') or self.context.get('workflow_id', '') or self.context.get('scan_id', '')
331
+ debug('', obj={name + ' [dim yellow]->[/] ' + fun: 'started'}, id=_id, sub='hooks', level=3)
332
+ result = hook(self, *args)
333
+ except Exception as e:
334
+ self._print(f'{fun} failed: "{e.__class__.__name__}". Skipping', color='bold red', rich=True)
335
+ if DEBUG > 1:
336
+ logger.exception(e)
337
+ else:
338
+ self._print('Please set DEBUG to > 1 to see the detailed exception.', color='dim red', rich=True)
339
+ return result
340
+
341
+ def run_validators(self, validator_type, *args):
342
+ # logger.debug(f'Running validators of type {validator_type}')
343
+ for validator in self.validators[validator_type]:
344
+ # logger.debug(validator)
345
+ if not validator(self, *args):
346
+ if validator_type == 'input':
347
+ self._print(f'{validator.__doc__}', color='bold red', rich=True)
348
+ return False
349
+ return True
350
+
351
+ def resolve_exporters(self):
352
+ """Resolve exporters from output options."""
353
+ output = self.run_opts.get('output', '')
354
+ if output == '':
355
+ return self.default_exporters
356
+ elif output is False:
357
+ return []
358
+ exporters = [
359
+ import_dynamic(f'secator.exporters.{o.capitalize()}Exporter', 'Exporter')
360
+ for o in output.split(',')
361
+ if o
362
+ ]
363
+ return [e for e in exporters if e]
364
+
365
+ def log_start(self):
366
+ """Log runner start."""
367
+ remote_str = 'starting' if self.sync else 'sent to Celery worker'
368
+ runner_name = self.__class__.__name__
369
+ self.log_header()
370
+ self._print(
371
+ f':tada: {runner_name} [bold magenta]{self.config.name}[/] {remote_str}...', rich=True)
372
+ if not self.sync and self.print_remote_status and self.__class__.__name__ != 'Scan':
373
+ self._print('\n🏆 [bold gold3]Live results:[/]', rich=True)
374
+
375
+ def log_header(self):
376
+ """Log runner header."""
377
+ runner_name = self.__class__.__name__
378
+
379
+ # Description
380
+ panel_str = f':scroll: [bold gold3]Description:[/] {self.config.description}'
381
+
382
+ # Workspace
383
+ if self.workspace_name:
384
+ panel_str += f'\n:construction_worker: [bold gold3]Workspace:[/] {self.workspace_name}'
385
+
386
+ # Targets
387
+ if self.targets:
388
+ panel_str += '\n:pear: [bold gold3]Targets:[/]'
389
+ for target in self.targets:
390
+ panel_str += f'\n • {target}'
391
+
392
+ # Options
393
+ DISPLAY_OPTS_EXCLUDE = [
394
+ 'sync', 'worker', 'debug', 'output', 'json', 'orig', 'raw', 'format', 'quiet'
395
+ ]
396
+ items = [
397
+ f'[italic]{k}[/]: {v}'
398
+ for k, v in self.run_opts.items()
399
+ if k not in DISPLAY_OPTS_EXCLUDE
400
+ and v is not None
401
+ ]
402
+ if items:
403
+ panel_str += '\n:pushpin: [bold gold3]Options:[/]'
404
+ for item in items:
405
+ panel_str += f'\n • {item}'
406
+
407
+ if self.exporters:
408
+ panel_str += '\n:email: [bold gold3]Exporters:[/]'
409
+ for exporter in self.exporters:
410
+ exporter_name = exporter.__name__.replace('Exporter', '').lower()
411
+ panel_str += f'\n • {exporter_name}'
412
+
413
+ panel = Panel(
414
+ panel_str,
415
+ title=f'[bold gold3]{runner_name}[/] [bold magenta]{self.config.name}[/]',
416
+ border_style='bold gold3',
417
+ expand=False,
418
+ highlight=True
419
+ )
420
+ self._print('\n')
421
+ self._print(panel, rich=True)
422
+
423
+ def log_results(self):
424
+ """Log results.
425
+
426
+ Args:
427
+ results (list): List of results.
428
+ output_types (list): List of result types to add to report.
429
+ """
430
+ self.done = True
431
+ self.progress = 100
432
+ self.results_count = len(self.results)
433
+ self.status = 'SUCCESS' if not self.errors else 'FAILED'
434
+ self.end_time = datetime.fromtimestamp(time())
435
+
436
+ # Log execution results
437
+ status = 'succeeded' if not self.errors else '[bold red]failed[/]'
438
+ if self.print_run_summary:
439
+ self._print('\n')
440
+ self._print(
441
+ f':tada: [bold green]{self.__class__.__name__.capitalize()}[/] [bold magenta]{self.config.name}[/] '
442
+ f'[bold green]{status} in[/] [bold gold3]{self.elapsed_human}[/].', rich=True)
443
+
444
+ # Log runner infos
445
+ if self.infos:
446
+ self._print(
447
+ f'✓ [bold magenta]{self.config.name}[/] infos ({len(self.infos)}):',
448
+ color='bold green', rich=True)
449
+ for info in self.infos:
450
+ self._print(f' • {info}', color='bold green', rich=True)
451
+
452
+ # Log runner errors
453
+ if self.errors:
454
+ self._print(
455
+ f'❌ [bold magenta]{self.config.name}[/] errors ({len(self.errors)}):',
456
+ color='bold red', rich=True)
457
+ for error in self.errors:
458
+ self._print(f' • {error}', color='bold red', rich=True)
459
+
460
+ # Build and send report
461
+ if self.results:
462
+ report = Report(self, exporters=self.exporters)
463
+ report.build()
464
+ report.send()
465
+ self.report = report
466
+
467
+ # Log results count
468
+ if self.print_item_count and not self.print_raw and not self.orig:
469
+ count_map = self._get_results_count()
470
+ if all(count == 0 for count in count_map.values()):
471
+ self._print(':adhesive_bandage: Found 0 results.', color='bold red', rich=True)
472
+ else:
473
+ results_str = ':pill: Found ' + ' and '.join([
474
+ f'{count} {pluralize(name) if count > 1 or count == 0 else name}'
475
+ for name, count in count_map.items()
476
+ ]) + '.'
477
+ self._print(results_str, color='bold green', rich=True)
478
+
479
+ @staticmethod
480
+ def get_live_results(result):
481
+ """Poll Celery subtasks results in real-time. Fetch task metadata and partial results from each task that runs.
482
+
483
+ Args:
484
+ result (celery.result.AsyncResult): Result object.
485
+
486
+ Yields:
487
+ dict: Subtasks state and results.
488
+ """
489
+ res = AsyncResult(result.id)
490
+ while True:
491
+ # Yield results
492
+ yield from Runner.get_celery_results(result)
493
+
494
+ # Break out of while loop
495
+ if res.ready():
496
+ yield from Runner.get_celery_results(result)
497
+ break
498
+
499
+ # Sleep between updates
500
+ sleep(1)
501
+
502
+ @staticmethod
503
+ def get_celery_results(result):
504
+ """Get Celery results from main result object, including any subtasks results.
505
+
506
+ Args:
507
+ result (celery.result.AsyncResult): Result object.
508
+
509
+ Yields:
510
+ dict: Subtasks state and results, Progress objects.
511
+ """
512
+ task_ids = []
513
+ get_task_ids(result, ids=task_ids)
514
+ datas = []
515
+ for task_id in task_ids:
516
+ data = get_task_data(task_id)
517
+ if data and DEBUG > 1:
518
+ full_name = data['name']
519
+ if data['chunk_info']:
520
+ full_name += ' ' + data['chunk_info']
521
+ debug('', sub='celery.runner', id=data['id'], obj={full_name: data['state']}, level=4)
522
+ if not data:
523
+ continue
524
+ yield data
525
+ datas.append(data)
526
+
527
+ # Calculate and yield progress
528
+ total = len(datas)
529
+ count_finished = sum([i['ready'] for i in datas if i])
530
+ percent = int(count_finished * 100 / total) if total > 0 else 0
531
+ if percent > 0:
532
+ yield Progress(duration='unknown', percent=percent)
533
+
534
+ def stop_live_tasks(self, result):
535
+ """Stop live tasks running in Celery worker.
536
+
537
+ Args:
538
+ result (AsyncResult | GroupResult): Celery result.
539
+ """
540
+ task_ids = []
541
+ get_task_ids(result, ids=task_ids)
542
+ for task_id in task_ids:
543
+ from secator.celery import revoke_task
544
+ revoke_task(task_id)
545
+
546
+ def process_live_tasks(self, result, description=True, results_only=True, print_remote_status=True):
547
+ """Rich progress indicator showing live tasks statuses.
548
+
549
+ Args:
550
+ result (AsyncResult | GroupResult): Celery result.
551
+ results_only (bool): Yield only results, no task state.
552
+
553
+ Yields:
554
+ dict: Subtasks state and results.
555
+ """
556
+ config_name = self.config.name
557
+ runner_name = self.__class__.__name__.capitalize()
558
+
559
+ # Display live results if print_remote_status is set
560
+ if print_remote_status:
561
+ class PanelProgress(RichProgress):
562
+ def get_renderables(self):
563
+ yield Padding(Panel(
564
+ self.make_tasks_table(self.tasks),
565
+ title=f'[bold gold3]{runner_name}[/] [bold magenta]{config_name}[/] results',
566
+ border_style='bold gold3',
567
+ expand=False,
568
+ highlight=True), pad=(2, 0, 0, 0))
569
+
570
+ tasks_progress = PanelProgress(
571
+ SpinnerColumn('dots'),
572
+ TextColumn('{task.fields[descr]} ') if description else '',
573
+ TextColumn('[bold cyan]{task.fields[name]}[/]'),
574
+ TextColumn('[dim gold3]{task.fields[chunk_info]}[/]'),
575
+ TextColumn('{task.fields[state]:<20}'),
576
+ TimeElapsedColumn(),
577
+ TextColumn('{task.fields[count]}'),
578
+ # TextColumn('{task.fields[progress]}%'),
579
+ # TextColumn('\[[bold magenta]{task.fields[id]:<30}[/]]'), # noqa: W605
580
+ refresh_per_second=1,
581
+ transient=False,
582
+ # console=console,
583
+ # redirect_stderr=True,
584
+ # redirect_stdout=False
585
+ )
586
+ state_colors = {
587
+ 'RUNNING': 'bold yellow',
588
+ 'SUCCESS': 'bold green',
589
+ 'FAILURE': 'bold red',
590
+ 'REVOKED': 'bold magenta'
591
+ }
592
+ else:
593
+ tasks_progress = nullcontext()
594
+
595
+ with tasks_progress as progress:
596
+
597
+ # Make progress tasks
598
+ tasks_progress = {}
599
+
600
+ # Get live results and print progress
601
+ for data in Runner.get_live_results(result):
602
+
603
+ # If progress object, yield progress and ignore tracking
604
+ if isinstance(data, OutputType) and data._type == 'progress':
605
+ yield data
606
+ continue
607
+
608
+ # TODO: add error output type and yield errors in get_celery_results
609
+ # if isinstance(data, OutputType) and data._type == 'error':
610
+ # yield data
611
+ # continue
612
+
613
+ # Re-yield so that we can consume it externally
614
+ if results_only:
615
+ yield from data['results']
616
+ else:
617
+ yield data
618
+
619
+ if not print_remote_status:
620
+ continue
621
+
622
+ # Ignore partials in output unless DEBUG > 1
623
+ # TODO: weird to change behavior based on debug flag, could cause issues
624
+ # if data['chunk'] and not DEBUG > 1:
625
+ # continue
626
+
627
+ # Handle messages if any
628
+ # TODO: error handling should be moved to process_live_tasks
629
+ state = data['state']
630
+ error = data.get('error')
631
+ info = data.get('info')
632
+ full_name = data['name']
633
+ chunk_info = data.get('chunk_info', '')
634
+ if chunk_info:
635
+ full_name += f' {chunk_info}'
636
+ if error:
637
+ state = 'FAILURE'
638
+ error = f'{full_name}: {error}'
639
+ if error not in self.errors:
640
+ self.errors.append(error)
641
+ if info:
642
+ info = f'{full_name}: {info}'
643
+ if info not in self.infos:
644
+ self.infos.append(info)
645
+
646
+ task_id = data['id']
647
+ state_str = f'[{state_colors[state]}]{state}[/]'
648
+ data['state'] = state_str
649
+
650
+ if task_id not in tasks_progress:
651
+ id = progress.add_task('', **data)
652
+ tasks_progress[task_id] = id
653
+ else:
654
+ progress_id = tasks_progress[task_id]
655
+ if state in ['SUCCESS', 'FAILURE']:
656
+ progress.update(progress_id, advance=100, **data)
657
+ elif data['progress'] != 0:
658
+ progress.update(progress_id, advance=data['progress'], **data)
659
+
660
+ # Update all tasks to 100 %
661
+ for progress_id in tasks_progress.values():
662
+ progress.update(progress_id, advance=100)
663
+
664
+ def filter_results(self):
665
+ """Filter runner results using extractors defined in config."""
666
+ extractors = self.config.results
667
+ results = []
668
+ if extractors:
669
+ # Keep results based on extractors
670
+ opts = merge_opts(self.config.options, self.run_opts)
671
+ for extractor in extractors:
672
+ tmp = process_extractor(self.results, extractor, ctx=opts)
673
+ results.extend(tmp)
674
+
675
+ # Keep the field types in results not specified in the extractors.
676
+ extract_fields = [e['type'] for e in extractors]
677
+ keep_fields = [
678
+ _type for _type in OUTPUT_TYPES if _type.__name__ != 'Progress'
679
+ if _type not in extract_fields
680
+ ]
681
+ results.extend([
682
+ item for item in self.results
683
+ if item._type in keep_fields
684
+ ])
685
+ else:
686
+ results = self.results
687
+ return results
688
+
689
+ def _convert_item_schema(self, item):
690
+ """Convert dict item to a new structure using the class output schema.
691
+
692
+ Args:
693
+ item (dict): Item.
694
+
695
+ Returns:
696
+ dict: Item with new schema.
697
+ """
698
+ # Load item using available output types and get the first matching
699
+ # output type based on the schema
700
+ new_item = None
701
+ output_types = getattr(self, 'output_types', [])
702
+ debug(f'Input item: {item}', sub='klass.load', level=5)
703
+ debug(f'Output types to try: {[o.__name__ for o in output_types]}', sub='klass.load', level=5)
704
+ for klass in output_types:
705
+ debug(f'Loading item as {klass.__name__}', sub='klass.load', level=5)
706
+ output_map = getattr(self, 'output_map', {})
707
+ output_map = output_map.get(klass, {})
708
+ try:
709
+ new_item = klass.load(item, output_map)
710
+ debug(f'[dim green]Successfully loaded item as {klass.__name__}[/]', sub='klass.load', level=5)
711
+ break # found an item that fits
712
+ except (TypeError, KeyError) as e: # can't load using class
713
+ debug(
714
+ f'[dim red]Failed loading item as {klass.__name__}: {str(e)}.[/] [dim green]Continuing.[/]',
715
+ sub='klass.load',
716
+ level=5)
717
+ if DEBUG == 6:
718
+ console.print_exception(show_locals=False)
719
+ continue
720
+
721
+ # No output type was found, so make no conversion
722
+ if not new_item:
723
+ new_item = DotMap(item)
724
+ new_item._type = 'unknown'
725
+
726
+ return new_item
727
+
728
+ def _print(self, data, color=None, out=sys.stderr, rich=False):
729
+ """Print function.
730
+
731
+ Args:
732
+ data (str or dict): Input data.
733
+ color (str, Optional): Termcolor color.
734
+ out (str, Optional): Output pipe (sys.stderr, sys.stdout, ...)
735
+ rich (bool, Optional): Force rich output.
736
+ """
737
+ # Print a JSON item
738
+ if isinstance(data, (OutputType, DotMap, dict)):
739
+ if getattr(data, 'toDict', None):
740
+ data = data.toDict()
741
+ data = json.dumps(data)
742
+ data = f'{self.prefix:>15} {data}' if self.prefix and not self.print_item else data
743
+
744
+ if self.sync or rich:
745
+ _console = console_stdout if out == sys.stdout else console
746
+ _console.print(data, highlight=False, style=color, soft_wrap=True)
747
+ else:
748
+ print(data, file=out)
749
+
750
+ # # Print a line using Rich console
751
+ # if rich:
752
+ # _console = console_stdout if out == sys.stdout else console
753
+ # _console.print(data, highlight=False, style=color, soft_wrap=True)
754
+
755
+ # # Print a line using Rich markup
756
+ # elif markup:
757
+ # from rich import print as _print
758
+ # from rich.text import Text
759
+ # _print(Text.from_markup(data), file=out)
760
+
761
+ # # Print a line raw
762
+ # else:
763
+ # print(data, file=out)
764
+
765
+ def _set_print_prefix(self):
766
+ self.prefix = ''
767
+ if self.print_cmd_prefix:
768
+ self.prefix = f'[bold gold3]({self.config.name})[/]'
769
+ if self.chunk and self.chunk_count:
770
+ self.prefix += f' [{self.chunk}/{self.chunk_count}]'
771
+
772
+ def _get_results_count(self):
773
+ count_map = {}
774
+ for output_type in self.output_types:
775
+ if output_type.__name__ == 'Progress':
776
+ continue
777
+ name = output_type.get_name()
778
+ count = len([r for r in self.results if r._type == name])
779
+ count_map[name] = count
780
+ return count_map
781
+
782
+ def _process_item(self, item: dict):
783
+ # Run item validators
784
+ if not self.run_validators('item', item):
785
+ return None
786
+
787
+ # Run item hooks
788
+ item = self.run_hooks('on_item_pre_convert', item)
789
+ if not item:
790
+ return None
791
+
792
+ # Convert output dict to another schema
793
+ if isinstance(item, dict) and not self.orig:
794
+ item = self._convert_item_schema(item)
795
+ elif isinstance(item, OutputType):
796
+ pass
797
+ else:
798
+ item = DotMap(item)
799
+
800
+ # Update item context
801
+ item._context.update(self.context)
802
+
803
+ # Add context, uuid, progress to item
804
+ if not item._source:
805
+ item._source = self.config.name
806
+
807
+ if not item._uuid:
808
+ item._uuid = str(uuid.uuid4())
809
+
810
+ if item._type == 'progress' and item._source == self.config.name and int(item.percent) != 100:
811
+ self.progress = item.percent
812
+ if self.last_updated_progress and (item._timestamp - self.last_updated_progress) < DEFAULT_PROGRESS_UPDATE_FREQUENCY:
813
+ return None
814
+ else:
815
+ self.last_updated_progress = item._timestamp
816
+
817
+ # Run on_item hooks
818
+ if isinstance(item, OutputType) and not self.orig:
819
+ item = self.run_hooks('on_item', item)
820
+
821
+ return item
822
+
823
+ def get_repr(self, item=None):
824
+ if not item:
825
+ return [
826
+ self.get_repr(item)
827
+ for item in self.results
828
+ ]
829
+ if self.output_fmt:
830
+ item = self.output_fmt.format(**item.toDict())
831
+ elif isinstance(item, OutputType):
832
+ item = repr(item)
833
+ return item