openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,356 @@
1
+ """
2
+ Context window management for long-running agents.
3
+
4
+ Provides proactive tool result truncation and reactive message compaction
5
+ to prevent agents from exceeding model context limits.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from typing import Optional
11
+
12
+ from .llm import Message
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ MODEL_CONTEXT_LIMITS: dict[str, int] = {
18
+ "kimi-k2.5": 128_000,
19
+ }
20
+
21
+ DEFAULT_CONTEXT_LIMIT = 128_000
22
+
23
+
24
+ class ContextWindowManager:
25
+ """Manages context window usage for an agent via truncation and compaction."""
26
+
27
+ def __init__(
28
+ self,
29
+ context_window_limit: int = DEFAULT_CONTEXT_LIMIT,
30
+ compaction_threshold: float = 0.70,
31
+ tool_result_max_lines: int = 200,
32
+ ):
33
+ self.context_window_limit = context_window_limit
34
+ self.compaction_threshold = compaction_threshold
35
+ self.tool_result_max_lines = tool_result_max_lines
36
+ self.last_input_tokens: int = 0
37
+
38
+ def update_usage(self, input_tokens: int) -> None:
39
+ """Update with the latest input token count from an LLM response."""
40
+ self.last_input_tokens = input_tokens
41
+
42
+ def needs_compaction(self) -> bool:
43
+ """Check if context usage has exceeded the compaction threshold."""
44
+ return self.last_input_tokens > self.context_window_limit * self.compaction_threshold
45
+
46
+ # ── Proactive truncation (before insertion) ─────────────────────────
47
+
48
+ def truncate_tool_result(self, tool_name: str, content: str) -> str:
49
+ """Truncate a tool result before inserting it into the message history.
50
+
51
+ Tool results are JSON-serialized dicts (via json.dumps), so we parse
52
+ the JSON, truncate the relevant inner field, and re-serialize.
53
+ """
54
+ try:
55
+ data = json.loads(content)
56
+ except (json.JSONDecodeError, TypeError):
57
+ if len(content) > 10_000:
58
+ return content[:8_000] + f"\n\n[... truncated, {len(content)} total chars ...]"
59
+ return content
60
+
61
+ if not isinstance(data, dict) or "error" in data:
62
+ return content
63
+
64
+ if tool_name == "read_file":
65
+ return self._truncate_read_file(data)
66
+ if tool_name == "grep":
67
+ return self._truncate_grep(data)
68
+ if tool_name == "list_dir":
69
+ return self._truncate_list_dir(data)
70
+ if tool_name == "glob":
71
+ return self._truncate_glob_result(data)
72
+
73
+ if len(content) > 10_000:
74
+ return content[:8_000] + f"\n\n[... truncated, {len(content)} total chars ...]"
75
+ return content
76
+
77
+ def _truncate_read_file(self, data: dict) -> str:
78
+ """Truncate read_file by trimming the content field's lines."""
79
+ file_content = data.get("content", "")
80
+ lines = file_content.split("\n")
81
+ max_lines = self.tool_result_max_lines
82
+
83
+ if len(lines) <= max_lines:
84
+ return json.dumps(data)
85
+
86
+ head = lines[:100]
87
+ tail = lines[-50:]
88
+ omitted = len(lines) - 150
89
+ data["content"] = "\n".join(head) + f"\n\n[... {omitted} lines omitted ...]\n\n" + "\n".join(tail)
90
+ data["truncated"] = True
91
+ return json.dumps(data)
92
+
93
+ def _truncate_grep(self, data: dict) -> str:
94
+ """Truncate grep by trimming the matches list."""
95
+ matches = data.get("matches", [])
96
+ if len(matches) <= 50:
97
+ return json.dumps(data)
98
+
99
+ head = matches[:30]
100
+ tail = matches[-10:]
101
+ omitted = len(matches) - 40
102
+ data["matches"] = head + [{"note": f"... {omitted} matches omitted ..."}] + tail
103
+ data["truncated"] = True
104
+ return json.dumps(data)
105
+
106
+ def _truncate_list_dir(self, data: dict) -> str:
107
+ """Truncate list_dir by trimming the entries list."""
108
+ entries = data.get("entries", [])
109
+ if len(entries) <= 100:
110
+ return json.dumps(data)
111
+
112
+ head = entries[:50]
113
+ tail = entries[-20:]
114
+ omitted = len(entries) - 70
115
+ data["entries"] = head + [{"note": f"... {omitted} entries omitted ..."}] + tail
116
+ data["truncated"] = True
117
+ return json.dumps(data)
118
+
119
+ def _truncate_glob_result(self, data: dict) -> str:
120
+ """Truncate glob by trimming the matches list."""
121
+ matches = data.get("matches", [])
122
+ if len(matches) <= 100:
123
+ return json.dumps(data)
124
+
125
+ head = matches[:50]
126
+ tail = matches[-20:]
127
+ omitted = len(matches) - 70
128
+ data["matches"] = head + [f"... {omitted} matches omitted ..."] + tail
129
+ data["truncated"] = True
130
+ return json.dumps(data)
131
+
132
+ # ── Reactive compaction (on threshold breach) ───────────────────────
133
+
134
+ def compact_messages(self, messages: list[Message], keep_recent_turns: int = 3) -> list[Message]:
135
+ """Compact older messages by summarizing tool results.
136
+
137
+ Preserves:
138
+ - The first message (original task)
139
+ - All [USER INSTRUCTION] messages
140
+ - The last ``keep_recent_turns`` full turns (assistant + tool results)
141
+ Never removes messages — only replaces content to keep tool_call/result pairing.
142
+ """
143
+ if len(messages) <= 4:
144
+ return messages
145
+
146
+ # Find turn boundaries: each turn starts with an assistant message that has tool_calls
147
+ turn_starts: list[int] = []
148
+ for i, msg in enumerate(messages):
149
+ if msg.role == "assistant" and msg.tool_calls:
150
+ turn_starts.append(i)
151
+
152
+ if len(turn_starts) <= keep_recent_turns:
153
+ return messages
154
+
155
+ # Messages from the start of the Nth-from-last turn onward are protected
156
+ protect_from = turn_starts[-keep_recent_turns]
157
+
158
+ compacted = []
159
+ for i, msg in enumerate(messages):
160
+ if i == 0:
161
+ # Always keep the original task intact
162
+ compacted.append(msg)
163
+ elif i >= protect_from:
164
+ # Recent turns — keep intact
165
+ compacted.append(msg)
166
+ elif msg.role == "user" and msg.content and "[USER INSTRUCTION]" in msg.content:
167
+ # Always keep user instructions
168
+ compacted.append(msg)
169
+ elif msg.role == "tool":
170
+ # Older tool result — summarize
171
+ tool_name = self._infer_tool_name(messages, i)
172
+ summary = self._summarize_tool_result(tool_name, msg.content or "")
173
+ compacted.append(Message(
174
+ role=msg.role,
175
+ content=summary,
176
+ tool_call_id=msg.tool_call_id,
177
+ name=msg.name,
178
+ ))
179
+ elif msg.role == "assistant" and msg.content and len(msg.content) > 200:
180
+ # Older assistant thinking — truncate but keep tool_calls structure
181
+ compacted.append(Message(
182
+ role=msg.role,
183
+ content=msg.content[:200] + "...",
184
+ tool_calls=msg.tool_calls,
185
+ reasoning_content=None,
186
+ ))
187
+ else:
188
+ compacted.append(msg)
189
+
190
+ # Reset token counter so we don't re-compact before the next LLM call
191
+ # updates it with the actual (lower) token count.
192
+ self.last_input_tokens = 0
193
+
194
+ logger.info(
195
+ f"Compacted messages: {len(messages)} msgs, "
196
+ f"protected last {keep_recent_turns} turns from idx {protect_from}"
197
+ )
198
+ return compacted
199
+
200
+ def emergency_compact(self, messages: list[Message]) -> list[Message]:
201
+ """Aggressive compaction for when normal compaction isn't enough.
202
+
203
+ Keeps only the first message, user instructions, and the last 2 turns.
204
+ All older tool results are replaced with one-line summaries.
205
+ All older assistant messages are truncated to 100 chars.
206
+ Protected-turn tool results are also truncated to prevent overflow.
207
+ """
208
+ if len(messages) <= 3:
209
+ return messages
210
+
211
+ turn_starts: list[int] = []
212
+ for i, msg in enumerate(messages):
213
+ if msg.role == "assistant" and msg.tool_calls:
214
+ turn_starts.append(i)
215
+
216
+ protect_from = turn_starts[-2] if len(turn_starts) >= 2 else turn_starts[-1] if turn_starts else len(messages)
217
+
218
+ compacted = []
219
+ for i, msg in enumerate(messages):
220
+ if i == 0:
221
+ content = msg.content or ""
222
+ if len(content) > 2000:
223
+ compacted.append(Message(role=msg.role, content=content[:2000] + "\n[... truncated ...]"))
224
+ else:
225
+ compacted.append(msg)
226
+ elif i >= protect_from:
227
+ if msg.role == "tool" and msg.content and len(msg.content) > 4000:
228
+ tool_name = self._infer_tool_name(messages, i)
229
+ truncated = msg.content[:3000] + f"\n\n[... {tool_name} result truncated from {len(msg.content)} chars for context management ...]"
230
+ compacted.append(Message(
231
+ role=msg.role,
232
+ content=truncated,
233
+ tool_call_id=msg.tool_call_id,
234
+ name=msg.name,
235
+ ))
236
+ elif msg.role == "assistant" and msg.content and len(msg.content) > 500:
237
+ compacted.append(Message(
238
+ role=msg.role,
239
+ content=msg.content[:500] + "...",
240
+ tool_calls=msg.tool_calls,
241
+ reasoning_content=None,
242
+ ))
243
+ else:
244
+ compacted.append(msg)
245
+ elif msg.role == "user" and msg.content and "[USER INSTRUCTION]" in msg.content:
246
+ compacted.append(msg)
247
+ elif msg.role == "tool":
248
+ tool_name = self._infer_tool_name(messages, i)
249
+ compacted.append(Message(
250
+ role=msg.role,
251
+ content=f"[{tool_name}: result omitted for context management]",
252
+ tool_call_id=msg.tool_call_id,
253
+ name=msg.name,
254
+ ))
255
+ elif msg.role == "assistant":
256
+ compacted.append(Message(
257
+ role=msg.role,
258
+ content=(msg.content or "")[:100] + "..." if msg.content and len(msg.content) > 100 else msg.content,
259
+ tool_calls=msg.tool_calls,
260
+ reasoning_content=None,
261
+ ))
262
+ else:
263
+ compacted.append(msg)
264
+
265
+ self.last_input_tokens = 0
266
+ logger.warning(
267
+ f"Emergency compaction: {len(messages)} → {len(compacted)} msgs, "
268
+ f"protected from idx {protect_from}"
269
+ )
270
+ return compacted
271
+
272
+ def _infer_tool_name(self, messages: list[Message], tool_result_idx: int) -> str:
273
+ """Walk backwards from a tool result to find which tool_call it belongs to."""
274
+ tool_call_id = messages[tool_result_idx].tool_call_id
275
+ if not tool_call_id:
276
+ return "unknown"
277
+ for i in range(tool_result_idx - 1, -1, -1):
278
+ msg = messages[i]
279
+ if msg.role == "assistant" and msg.tool_calls:
280
+ for tc in msg.tool_calls:
281
+ tc_id = tc.get("id") if isinstance(tc, dict) else getattr(tc, "id", None)
282
+ tc_name = (tc.get("function", {}).get("name") if isinstance(tc, dict)
283
+ else getattr(tc, "name", "unknown"))
284
+ if tc_id == tool_call_id:
285
+ return tc_name
286
+ return "unknown"
287
+
288
+ def _summarize_tool_result(self, tool_name: str, content: str) -> str:
289
+ """Produce a terse deterministic summary of a tool result."""
290
+ if tool_name == "read_file":
291
+ return self._summarize_read_file(content)
292
+ if tool_name == "grep":
293
+ return self._summarize_grep(content)
294
+ if tool_name == "list_dir":
295
+ return self._summarize_list_dir(content)
296
+ if tool_name == "glob":
297
+ return self._summarize_glob(content)
298
+ # Fallback
299
+ preview = content[:100].replace("\n", " ")
300
+ return f"[{tool_name}: {preview}...]"
301
+
302
+ def _summarize_read_file(self, content: str) -> str:
303
+ try:
304
+ data = json.loads(content)
305
+ if isinstance(data, dict):
306
+ path = data.get("path", "?")
307
+ total = data.get("total_lines", "?")
308
+ return f"[read_file: {path} -- {total} lines]"
309
+ except (json.JSONDecodeError, TypeError):
310
+ pass
311
+ line_count = content.count("\n") + 1
312
+ # Try to extract path from first line
313
+ first_line = content.split("\n")[0][:80]
314
+ return f"[read_file: {first_line}... -- ~{line_count} lines]"
315
+
316
+ def _summarize_grep(self, content: str) -> str:
317
+ try:
318
+ data = json.loads(content)
319
+ if isinstance(data, dict):
320
+ pattern = data.get("pattern", "?")
321
+ matches = data.get("matches", [])
322
+ # Don't count truncation note entries
323
+ count = sum(1 for m in matches if isinstance(m, dict) and "note" not in m)
324
+ if data.get("truncated"):
325
+ return f"[grep: '{pattern}' -- {count}+ matches (truncated)]"
326
+ return f"[grep: '{pattern}' -- {count} matches]"
327
+ except (json.JSONDecodeError, TypeError):
328
+ pass
329
+ match_count = content.count("\n") + 1
330
+ return f"[grep: ~{match_count} result lines]"
331
+
332
+ def _summarize_list_dir(self, content: str) -> str:
333
+ try:
334
+ data = json.loads(content)
335
+ if isinstance(data, dict):
336
+ path = data.get("path", "?")
337
+ entries = data.get("entries", [])
338
+ count = sum(1 for e in entries if isinstance(e, dict) and "note" not in e)
339
+ if data.get("truncated"):
340
+ return f"[list_dir: {path} -- {count}+ entries (truncated)]"
341
+ return f"[list_dir: {path} -- {count} entries]"
342
+ except (json.JSONDecodeError, TypeError):
343
+ pass
344
+ entry_count = content.count("\n") + 1
345
+ return f"[list_dir: ~{entry_count} entries]"
346
+
347
+ def _summarize_glob(self, content: str) -> str:
348
+ try:
349
+ data = json.loads(content)
350
+ if isinstance(data, dict):
351
+ pattern = data.get("pattern", "?")
352
+ matches = len(data.get("matches", []))
353
+ return f"[glob: '{pattern}' -- {matches} files]"
354
+ except (json.JSONDecodeError, TypeError):
355
+ pass
356
+ return f"[glob: {content[:80]}...]"