moriarty-project 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. moriarty/__init__.py +1 -1
  2. moriarty/cli/app.py +4 -3
  3. moriarty/cli/domain_cmd.py +5 -1
  4. moriarty/modules/directory_fuzzer.py +25 -5
  5. moriarty/modules/web_crawler.py +448 -91
  6. {moriarty_project-0.1.22.dist-info → moriarty_project-0.1.24.dist-info}/METADATA +3 -3
  7. {moriarty_project-0.1.22.dist-info → moriarty_project-0.1.24.dist-info}/RECORD +9 -27
  8. moriarty/modules/wifippler/__init__.py +0 -92
  9. moriarty/modules/wifippler/cli/__init__.py +0 -8
  10. moriarty/modules/wifippler/cli/commands.py +0 -123
  11. moriarty/modules/wifippler/core/__init__.py +0 -94
  12. moriarty/modules/wifippler/core/attacks/__init__.py +0 -146
  13. moriarty/modules/wifippler/core/attacks/deauth.py +0 -262
  14. moriarty/modules/wifippler/core/attacks/handshake.py +0 -402
  15. moriarty/modules/wifippler/core/attacks/pmkid.py +0 -424
  16. moriarty/modules/wifippler/core/attacks/wep.py +0 -467
  17. moriarty/modules/wifippler/core/attacks/wpa.py +0 -446
  18. moriarty/modules/wifippler/core/attacks/wps.py +0 -474
  19. moriarty/modules/wifippler/core/models/__init__.py +0 -10
  20. moriarty/modules/wifippler/core/models/network.py +0 -240
  21. moriarty/modules/wifippler/core/scanner.py +0 -903
  22. moriarty/modules/wifippler/core/utils/__init__.py +0 -624
  23. moriarty/modules/wifippler/core/utils/exec.py +0 -182
  24. moriarty/modules/wifippler/core/utils/network.py +0 -262
  25. moriarty/modules/wifippler/core/utils/system.py +0 -153
  26. {moriarty_project-0.1.22.dist-info → moriarty_project-0.1.24.dist-info}/WHEEL +0 -0
  27. {moriarty_project-0.1.22.dist-info → moriarty_project-0.1.24.dist-info}/entry_points.txt +0 -0
@@ -1,10 +1,14 @@
1
- """Crawler HTTP leve focado em enumeração de rotas e formulários."""
1
+ """Crawler HTTP avançado para enumeração de rotas e formulários com suporte a redirecionamentos e evasão de bloqueios."""
2
2
  from __future__ import annotations
3
3
 
4
4
  import asyncio
5
5
  import random
6
+ import time
7
+ import ssl
8
+ import certifi
6
9
  from dataclasses import dataclass, field
7
- from typing import Dict, List, Optional, Set, TYPE_CHECKING
10
+ from typing import Dict, List, Optional, Set, Tuple, Any, TYPE_CHECKING
11
+ from urllib.parse import urlparse, urljoin
8
12
 
9
13
  import httpx
10
14
  from selectolax.parser import HTMLParser
@@ -15,149 +19,502 @@ if TYPE_CHECKING: # pragma: no cover - apenas para type hints
15
19
 
16
20
  logger = structlog.get_logger(__name__)
17
21
 
22
+ # Headers realistas de navegador
23
+ DEFAULT_HEADERS = {
24
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
25
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
26
+ "Accept-Language": "pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3",
27
+ "Accept-Encoding": "gzip, deflate, br",
28
+ "Connection": "keep-alive",
29
+ "Upgrade-Insecure-Requests": "1",
30
+ "Sec-Fetch-Dest": "document",
31
+ "Sec-Fetch-Mode": "navigate",
32
+ "Sec-Fetch-Site": "none",
33
+ "Sec-Fetch-User": "?1",
34
+ "Cache-Control": "max-age=0",
35
+ }
36
+
37
+ # Lista de user-agents para rotação
38
+ USER_AGENTS = [
39
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
40
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
41
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
42
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59",
43
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
44
+ ]
45
+
46
+ # Lista de referrers para rotação
47
+ REFERRERS = [
48
+ "https://www.google.com/",
49
+ "https://www.bing.com/",
50
+ "https://www.yahoo.com/",
51
+ "https://duckduckgo.com/",
52
+ ""
53
+ ]
18
54
 
19
55
  @dataclass
20
56
  class CrawlPage:
57
+ """Representa uma página web rastreada."""
21
58
  url: str
22
59
  status: int
23
60
  title: Optional[str] = None
24
- forms: List[Dict[str, str]] = field(default_factory=list)
61
+ forms: List[Dict[str, Any]] = field(default_factory=list)
25
62
  links: List[str] = field(default_factory=list)
63
+ redirect_chain: List[Tuple[str, int]] = field(default_factory=list)
64
+ error: Optional[str] = None
26
65
 
27
66
 
28
67
  class WebCrawler:
29
- """Crawler simples limitado a um domínio, ideal para pré-enumeração."""
68
+ """Crawler avançado com suporte a redirecionamentos e evasão de bloqueios."""
30
69
 
31
70
  def __init__(
32
71
  self,
33
72
  base_url: str,
34
73
  max_pages: int = 100,
35
74
  max_depth: int = 2,
36
- concurrency: int = 10,
75
+ concurrency: int = 5, # Reduzido para evitar sobrecarga
37
76
  follow_subdomains: bool = False,
38
- user_agent: str = "Mozilla/5.0 (Moriarty Recon)",
77
+ user_agent: Optional[str] = None,
39
78
  stealth: Optional["StealthMode"] = None,
79
+ request_delay: Tuple[float, float] = (1.0, 3.0), # Atraso aleatório entre requisições (min, max)
80
+ timeout: float = 30.0, # Timeout para requisições
81
+ verify_ssl: bool = True, # Verificar certificados SSL
82
+ max_redirects: int = 5, # Número máximo de redirecionamentos
83
+ respect_robots: bool = True, # Respeitar robots.txt
40
84
  ):
41
85
  self.base_url = base_url.rstrip("/")
42
86
  self.max_pages = max_pages
43
87
  self.max_depth = max_depth
44
88
  self.concurrency = concurrency
45
89
  self.follow_subdomains = follow_subdomains
90
+
91
+ # Configurações de requisição
92
+ self.request_delay = request_delay
93
+ self.timeout = timeout
94
+ self.max_redirects = max_redirects
95
+ self.verify_ssl = verify_ssl
96
+ self.respect_robots = respect_robots
97
+
98
+ # Configurações de stealth
99
+ self.stealth = stealth
100
+ self.user_agent = user_agent or random.choice(USER_AGENTS)
101
+ self.session_cookies: Dict[str, str] = {}
102
+ self.last_request_time: float = 0
103
+
104
+ # Configurações de domínio
105
+ self.parsed_base_url = self._parse_url(base_url)
106
+ self.base_domain = self._get_base_domain(self.parsed_base_url.hostname or '')
107
+ self.allowed_domains = {self.base_domain}
108
+ if follow_subdomains:
109
+ self.allowed_domains.add(f".{self.base_domain}")
110
+
111
+ # Estado do crawler
46
112
  self.visited: Set[str] = set()
47
113
  self.results: Dict[str, CrawlPage] = {}
48
- self.stealth = stealth
49
- self.user_agent = user_agent
50
-
51
- effective_concurrency = concurrency
52
- if self.stealth and getattr(self.stealth.config, "timing_randomization", False):
53
- # Reduz concorrência para modos stealth altos
54
- effective_concurrency = max(2, min(concurrency, int(concurrency / (self.stealth.level or 1))))
55
-
56
- self.sem = asyncio.Semaphore(effective_concurrency)
57
- self.session = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
58
-
59
- parsed = httpx.URL(self.base_url)
60
- self._host = parsed.host
61
- self._scheme = parsed.scheme
62
-
63
- async def close(self) -> None:
64
- await self.session.aclose()
114
+ self.robots_txt: Optional[Dict[str, Any]] = None
115
+
116
+ # Configuração do cliente HTTP
117
+ self.session: Optional[httpx.AsyncClient] = None
118
+ self.sem: Optional[asyncio.Semaphore] = None
65
119
 
120
+ async def _init_session(self) -> None:
121
+ """Inicializa a sessão HTTP com configurações de segurança e performance."""
122
+ # Configuração SSL
123
+ ssl_context = ssl.create_default_context(cafile=certifi.where())
124
+ if not self.verify_ssl:
125
+ ssl_context.check_hostname = False
126
+ ssl_context.verify_mode = ssl.CERT_NONE
127
+
128
+ # Configuração do transporte HTTP
129
+ limits = httpx.Limits(
130
+ max_keepalive_connections=10,
131
+ max_connections=20,
132
+ keepalive_expiry=60.0
133
+ )
134
+
135
+ # Configuração do cliente HTTP
136
+ self.session = httpx.AsyncClient(
137
+ timeout=self.timeout,
138
+ follow_redirects=True,
139
+ max_redirects=self.max_redirects,
140
+ http_versions=["HTTP/1.1", "HTTP/2"],
141
+ limits=limits,
142
+ verify=ssl_context if self.verify_ssl else False,
143
+ headers=DEFAULT_HEADERS.copy(),
144
+ cookies=self.session_cookies
145
+ )
146
+
147
+ # Atualiza o user-agent
148
+ if self.user_agent:
149
+ self.session.headers["User-Agent"] = self.user_agent
150
+
151
+ # Adiciona headers adicionais de stealth
152
+ self.session.headers.update({
153
+ "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7",
154
+ "Accept-Encoding": "gzip, deflate, br",
155
+ "DNT": "1",
156
+ "Upgrade-Insecure-Requests": "1"
157
+ })
158
+
159
+ # Configura o semáforo para limitar concorrência
160
+ self.sem = asyncio.Semaphore(self.concurrency)
161
+
162
+ # Se necessário, verifica o robots.txt
163
+ if self.respect_robots:
164
+ await self._check_robots_txt()
165
+
166
+ async def _check_robots_txt(self) -> None:
167
+ """Verifica o arquivo robots.txt e atualiza as regras de acesso."""
168
+ if not self.session:
169
+ return
170
+
171
+ robots_url = f"{self.parsed_base_url.scheme}://{self.parsed_base_url.netloc}/robots.txt"
172
+ try:
173
+ response = await self.session.get(robots_url)
174
+ if response.status_code == 200:
175
+ # Aqui você pode implementar um parser de robots.txt mais sofisticado
176
+ self.robots_txt = {"content": response.text}
177
+ logger.info("robots_txt_found", url=robots_url)
178
+ except Exception as e:
179
+ logger.warning("robots_txt_error", url=robots_url, error=str(e))
180
+
181
+ async def _random_delay(self) -> None:
182
+ """Aguarda um tempo aleatório entre requisições para evitar bloqueios."""
183
+ if self.request_delay:
184
+ min_delay, max_delay = self.request_delay
185
+ delay = random.uniform(min_delay, max_delay)
186
+ elapsed = time.time() - self.last_request_time
187
+ if elapsed < delay:
188
+ await asyncio.sleep(delay - elapsed)
189
+ self.last_request_time = time.time()
190
+
66
191
  async def crawl(self) -> Dict[str, CrawlPage]:
192
+ """Inicia o processo de rastreamento do site.
193
+
194
+ Returns:
195
+ Dict[str, CrawlPage]: Dicionário com as páginas encontradas, onde a chave é a URL.
196
+ """
197
+ # Inicializa a sessão HTTP
198
+ if not self.session:
199
+ await self._init_session()
200
+
201
+ # Inicializa a fila de URLs a serem processadas
67
202
  queue: asyncio.Queue = asyncio.Queue()
68
- await queue.put((self.base_url, 0))
203
+ initial_url = f"{self.parsed_base_url.scheme}://{self.parsed_base_url.netloc}"
204
+ await queue.put((initial_url, 0))
69
205
 
70
- async def worker():
206
+ # Função worker para processar URLs em paralelo
207
+ async def worker() -> None:
71
208
  while True:
72
209
  try:
73
210
  url, depth = queue.get_nowait()
74
211
  except asyncio.QueueEmpty:
75
212
  break
213
+
214
+ # Verifica os limites de páginas e profundidade
76
215
  if len(self.results) >= self.max_pages or depth > self.max_depth:
77
216
  continue
217
+
218
+ # Evita processar a mesma URL múltiplas vezes
78
219
  if url in self.visited:
79
220
  continue
80
- self.visited.add(url)
221
+
222
+ # Aguarda um tempo aleatório entre requisições
223
+ await self._random_delay()
224
+
225
+ # Processa a URL
81
226
  await self._fetch(url, depth, queue)
227
+
228
+ # Atualiza o contador de páginas processadas
229
+ queue.task_done()
82
230
 
231
+ # Inicia os workers
83
232
  workers = [asyncio.create_task(worker()) for _ in range(self.concurrency)]
84
233
  await asyncio.gather(*workers)
85
234
  return self.results
86
235
 
236
+ def _parse_url(self, url: str) -> httpx.URL:
237
+ """Parseia uma URL e retorna um objeto URL do httpx."""
238
+ try:
239
+ return httpx.URL(url)
240
+ except Exception as e:
241
+ logger.error("url_parse_error", url=url, error=str(e))
242
+ raise ValueError(f"URL inválida: {url}") from e
243
+
244
+ def _get_base_domain(self, hostname: str) -> str:
245
+ """Extrai o domínio base de um hostname."""
246
+ if not hostname:
247
+ return ""
248
+ parts = hostname.split(".")
249
+ if len(parts) > 2:
250
+ return ".".join(parts[-2:])
251
+ return hostname
252
+
253
+ def _is_same_domain(self, url: str) -> bool:
254
+ """Verifica se uma URL pertence ao mesmo domínio do alvo."""
255
+ try:
256
+ parsed = self._parse_url(url)
257
+ if not parsed.host:
258
+ return False
259
+
260
+ # Verifica se o domínio é o mesmo ou um subdomínio
261
+ if self.follow_subdomains:
262
+ return parsed.host.endswith(self.base_domain) or f".{parsed.host}".endswith(f".{self.base_domain}")
263
+ return parsed.host == self.parsed_base_url.host
264
+ except Exception:
265
+ return False
266
+
267
+ def _normalize_url(self, url: str, base_url: Optional[str] = None) -> str:
268
+ """Normaliza uma URL, resolvendo URLs relativas e removendo fragmentos."""
269
+ try:
270
+ if not url:
271
+ return ""
272
+
273
+ # Remove fragmentos e espaços em branco
274
+ url = url.split("#")[0].strip()
275
+ if not url:
276
+ return ""
277
+
278
+ # Se for uma URL relativa, resolve em relação à base_url
279
+ if base_url and not url.startswith(('http://', 'https://')):
280
+ base = self._parse_url(base_url)
281
+ url = str(base.join(url))
282
+
283
+ # Parseia a URL para normalização
284
+ parsed = self._parse_url(url)
285
+
286
+ # Remove parâmetros de rastreamento comuns
287
+ if parsed.query:
288
+ query_params = []
289
+ for param in parsed.query.decode().split('&'):
290
+ if '=' in param and any(t in param.lower() for t in ['utm_', 'ref=', 'source=', 'fbclid=', 'gclid=']):
291
+ continue
292
+ query_params.append(param)
293
+
294
+ # Reconstrói a URL sem os parâmetros de rastreamento
295
+ if query_params:
296
+ parsed = parsed.copy_with(query='&'.join(query_params))
297
+ else:
298
+ parsed = parsed.copy_with(query=None)
299
+
300
+ # Remove barras finais desnecessárias
301
+ path = parsed.path.decode()
302
+ if path.endswith('/'):
303
+ path = path.rstrip('/') or '/'
304
+ parsed = parsed.copy_with(path=path)
305
+
306
+ return str(parsed)
307
+
308
+ except Exception as e:
309
+ logger.warning("url_normalize_error", url=url, error=str(e))
310
+ return url
311
+
312
+ def _build_headers(self, referer: Optional[str] = None) -> Dict[str, str]:
313
+ """Constrói os headers para a requisição HTTP."""
314
+ headers = DEFAULT_HEADERS.copy()
315
+
316
+ # Rotaciona o User-Agent
317
+ headers["User-Agent"] = random.choice(USER_AGENTS)
318
+
319
+ # Adiciona o referer se fornecido
320
+ if referer:
321
+ headers["Referer"] = referer
322
+ else:
323
+ headers["Referer"] = random.choice(REFERRERS)
324
+
325
+ return headers
326
+
327
+ async def _stealth_delay(self) -> None:
328
+ """Aplica um atraso aleatório para evitar detecção."""
329
+ if self.stealth and hasattr(self.stealth, 'get_delay'):
330
+ delay = self.stealth.get_delay()
331
+ if delay > 0:
332
+ await asyncio.sleep(delay)
333
+
87
334
  async def _fetch(self, url: str, depth: int, queue: asyncio.Queue) -> None:
88
- async with self.sem:
335
+ """
336
+ Faz o fetch de uma URL e processa os links encontrados.
337
+
338
+ Args:
339
+ url: URL a ser acessada
340
+ depth: Profundidade atual do rastreamento
341
+ queue: Fila de URLs para processamento
342
+ """
343
+ if not self.session:
344
+ logger.error("session_not_initialized")
345
+ return
346
+
347
+ # Marca a URL como visitada
348
+ self.visited.add(url)
349
+
350
+ try:
351
+ # Aplica atraso de stealth, se necessário
352
+ await self._stealth_delay()
353
+
354
+ # Prepara os headers para a requisição
355
+ headers = self._build_headers()
356
+
357
+ # Tenta fazer a requisição com tratamento de erros
89
358
  try:
90
- await self._stealth_delay()
91
- response = await self.session.get(url, headers=self._build_headers())
92
- except Exception as exc:
93
- logger.debug("crawler.fetch.error", url=url, error=str(exc))
359
+ response = await self.session.get(
360
+ url,
361
+ headers=headers,
362
+ follow_redirects=True,
363
+ timeout=self.timeout
364
+ )
365
+
366
+ # Registra o tempo da última requisição
367
+ self.last_request_time = time.time()
368
+
369
+ except httpx.HTTPStatusError as e:
370
+ logger.warning("http_status_error", url=url, status_code=e.response.status_code)
371
+ self.results[url] = CrawlPage(
372
+ url=url,
373
+ status=e.response.status_code,
374
+ error=f"HTTP Error: {e.response.status_code}"
375
+ )
94
376
  return
95
-
96
- page = CrawlPage(url=url, status=response.status_code)
377
+
378
+ except httpx.RequestError as e:
379
+ logger.warning("request_error", url=url, error=str(e))
380
+ self.results[url] = CrawlPage(
381
+ url=url,
382
+ status=0,
383
+ error=f"Request Error: {str(e)}"
384
+ )
385
+ return
386
+
387
+ except Exception as e:
388
+ logger.error("unexpected_error", url=url, error=str(e))
389
+ self.results[url] = CrawlPage(
390
+ url=url,
391
+ status=0,
392
+ error=f"Unexpected Error: {str(e)}"
393
+ )
394
+ return
395
+
396
+ # Processa a resposta
397
+ await self._process_response(url, response, depth, queue)
398
+
399
+ except Exception as e:
400
+ logger.error("fetch_error", url=url, error=str(e))
401
+ self.results[url] = CrawlPage(
402
+ url=url,
403
+ status=0,
404
+ error=f"Processing Error: {str(e)}"
405
+ )
406
+
407
+ async def _process_response(self, url: str, response: httpx.Response, depth: int, queue: asyncio.Queue) -> None:
408
+ """
409
+ Processa a resposta HTTP e extrai links para continuar o rastreamento.
410
+
411
+ Args:
412
+ url: URL que foi acessada
413
+ response: Resposta HTTP
414
+ depth: Profundidade atual do rastreamento
415
+ queue: Fila de URLs para processamento
416
+ """
417
+ # Cria o objeto da página com os dados básicos
418
+ page = CrawlPage(
419
+ url=url,
420
+ status=response.status_code,
421
+ redirect_chain=[(str(r.url), r.status_code) for r in response.history]
422
+ )
423
+
424
+ # Se não for uma resposta de sucesso ou não for HTML, retorna
97
425
  if response.status_code >= 400 or not response.headers.get("content-type", "").startswith("text"):
98
426
  self.results[url] = page
99
427
  return
100
-
101
- parser = HTMLParser(response.text)
102
- title = parser.css_first("title")
103
- page.title = title.text(strip=True) if title else None
104
-
105
- # Forms
428
+
429
+ try:
430
+ # Parseia o HTML
431
+ parser = HTMLParser(response.text)
432
+
433
+ # Extrai o título da página
434
+ title = parser.css_first("title")
435
+ if title and hasattr(title, 'text') and callable(title.text):
436
+ page.title = title.text(strip=True)
437
+
438
+ # Extrai os links da página
439
+ await self._extract_links(parser, url, depth, queue)
440
+
441
+ # Extrai os formulários da página
442
+ self._extract_forms(parser, page)
443
+
444
+ # Adiciona a página aos resultados
445
+ self.results[url] = page
446
+
447
+ except Exception as e:
448
+ logger.error("process_response_error", url=url, error=str(e))
449
+ page.error = f"Error processing response: {str(e)}"
450
+ self.results[url] = page
451
+
452
+ async def _extract_links(self, parser: HTMLParser, base_url: str, depth: int, queue: asyncio.Queue) -> None:
453
+ """Extrai links do HTML e os adiciona à fila de processamento."""
454
+ for link in parser.css("a[href]"):
455
+ try:
456
+ href = link.attributes.get("href", "").strip()
457
+ if not href or href.startswith("#") or href.startswith("javascript:"):
458
+ continue
459
+
460
+ # Normaliza a URL
461
+ url = self._normalize_url(href, base_url)
462
+ if not url:
463
+ continue
464
+
465
+ # Verifica se a URL pertence ao mesmo domínio
466
+ if not self._is_same_domain(url):
467
+ continue
468
+
469
+ # Adiciona à fila se ainda não foi visitada
470
+ if url not in self.visited and url not in self.results:
471
+ queue.put_nowait((url, depth + 1))
472
+
473
+ except Exception as e:
474
+ logger.warning("link_extraction_error", href=href, error=str(e))
475
+
476
+ def _extract_forms(self, parser: HTMLParser, page: CrawlPage) -> None:
477
+ """Extrai formulários do HTML."""
106
478
  for form in parser.css("form"):
107
- action = form.attributes.get("action", url)
108
- method = form.attributes.get("method", "GET").upper()
109
- inputs = [inp.attributes.get("name") for inp in form.css("input") if inp.attributes.get("name")]
110
- page.forms.append(
111
- {
112
- "action": action,
113
- "method": method,
114
- "inputs": ",".join(inputs),
115
- }
116
- )
479
+ try:
480
+ form_data = {"method": form.attributes.get("method", "GET").upper()}
481
+
482
+ # Obtém a ação do formulário
483
+ action = form.attributes.get("action", "").strip()
484
+ if action:
485
+ form_data["action"] = self._normalize_url(action, page.url)
486
+ else:
487
+ form_data["action"] = page.url
488
+
489
+ # Extrai os campos do formulário
490
+ form_data["fields"] = []
491
+ for field in form.css("input, textarea, select"):
492
+ field_data = {
493
+ "name": field.attributes.get("name", ""),
494
+ "type": field.attributes.get("type", "text"),
495
+ "value": field.attributes.get("value", ""),
496
+ "required": "required" in field.attributes
497
+ }
498
+ form_data["fields"].append(field_data)
499
+
500
+ page.forms.append(form_data)
501
+
502
+ except Exception as e:
503
+ logger.warning("form_extraction_error", error=str(e))
504
+
505
+ async def close(self) -> None:
506
+ """Fecha a sessão HTTP."""
507
+ if self.session:
508
+ await self.session.aclose()
509
+ self.session = None
117
510
 
118
- # Links
119
- links: Set[str] = set()
120
- for anchor in parser.css("a"):
121
- href = anchor.attributes.get("href")
122
- if not href:
123
- continue
124
- href = href.strip()
125
- if href.startswith("javascript:") or href.startswith("mailto:"):
126
- continue
127
- absolute = httpx.URL(href, base=httpx.URL(url)).human_repr()
128
- if not self._should_follow(absolute):
129
- continue
130
- links.add(absolute)
131
- if absolute not in self.visited and len(self.results) < self.max_pages:
132
- await queue.put((absolute, depth + 1))
133
- page.links = sorted(links)
134
- self.results[url] = page
135
-
136
- def _should_follow(self, url: str) -> bool:
137
- parsed = httpx.URL(url)
138
- if parsed.scheme not in {"http", "https"}:
139
- return False
140
- if not self.follow_subdomains and parsed.host != self._host:
141
- return False
142
- if not parsed.host.endswith(self._host):
143
- return False
144
- return True
145
-
146
- def _build_headers(self) -> Dict[str, str]:
147
- headers: Dict[str, str] = {"User-Agent": self.user_agent, "Accept": "*/*"}
148
- if self.stealth:
149
- stealth_headers = self.stealth.get_random_headers()
150
- headers.update(stealth_headers)
151
- headers.setdefault("User-Agent", stealth_headers.get("User-Agent", self.user_agent))
152
- return headers
511
+ async def __aenter__(self):
512
+ await self._init_session()
513
+ return self
153
514
 
154
- async def _stealth_delay(self) -> None:
155
- if not self.stealth:
156
- return
157
- config = getattr(self.stealth, "config", None)
158
- if not config or not getattr(config, "timing_randomization", False):
159
- return
160
- await asyncio.sleep(random.uniform(0.05, 0.2) * max(1, self.stealth.level))
515
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
516
+ await self.close()
161
517
 
162
518
 
519
+ # Para compatibilidade com código existente
163
520
  __all__ = ["WebCrawler", "CrawlPage"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moriarty-project
3
- Version: 0.1.22
3
+ Version: 0.1.24
4
4
  Summary: Client-side OSINT toolkit with forensic-grade evidence handling.
5
5
  Project-URL: Homepage, https://github.com/DonatoReis/moriarty
6
6
  Project-URL: Documentation, https://github.com/DonatoReis/moriarty#readme
@@ -98,7 +98,7 @@ Description-Content-Type: text/markdown
98
98
  <!-- Badges -->
99
99
  <p align="center">
100
100
  <a href="https://pypi.org/project/moriarty-project/">
101
- <img src="https://img.shields.io/badge/version-0.1.22-blue" alt="Version 0.1.22">
101
+ <img src="https://img.shields.io/badge/version-0.1.24-blue" alt="Version 0.1.24">
102
102
  </a>
103
103
  <a href="https://www.python.org/downloads/">
104
104
  <img src="https://img.shields.io/pypi/pyversions/moriarty-project?color=blue" alt="Python Versions">
@@ -152,7 +152,7 @@ Description-Content-Type: text/markdown
152
152
  pipx install moriarty-project
153
153
 
154
154
  # OU para instalar uma versão específica
155
- # pipx install moriarty-project==0.1.11
155
+ # pipx install moriarty-project==0.1.24
156
156
 
157
157
  # Verificar a instalação
158
158
  moriarty --help