agentslim 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentslim/__init__.py ADDED
@@ -0,0 +1,27 @@
1
+ """
2
+ agentslim — Make your AI agents leaner, faster, and cheaper.
3
+
4
+ Core modules:
5
+ - memory: Smart context window with auto-summarization
6
+ - compressor: Text / HTML / JSON compressor before sending to LLM
7
+ - tools: Tool/function-call schema minifier
8
+ - code: Code-aware context extractor (send only relevant chunks)
9
+ """
10
+
11
+ from agentslim.memory import AgentMemory
12
+ from agentslim.compressor import Compressor
13
+ from agentslim.tools import ToolMinifier
14
+ from agentslim.code import CodeContext
15
+ from agentslim.utils import count_tokens, estimate_cost
16
+
17
+ __version__ = "0.1.0"
18
+ __author__ = "agentslim contributors"
19
+
20
+ __all__ = [
21
+ "AgentMemory",
22
+ "Compressor",
23
+ "ToolMinifier",
24
+ "CodeContext",
25
+ "count_tokens",
26
+ "estimate_cost",
27
+ ]
agentslim/code.py ADDED
@@ -0,0 +1,307 @@
1
+ """
2
+ code.py — Code-aware context extractor for coding agents.
3
+
4
+ Instead of sending an entire file (hundreds of tokens), send only the
5
+ relevant "chunk" — the function or class the agent needs, plus configurable
6
+ surrounding context lines.
7
+
8
+ Supports Python, JavaScript / TypeScript, and generic line-based extraction.
9
+
10
+ Quickstart::
11
+
12
+ from agentslim import CodeContext
13
+
14
+ ctx = CodeContext.extract_function("my_file.py", "process_payment")
15
+ print(ctx)
16
+ # → def process_payment(order_id: str) -> dict:
17
+ # ...
18
+
19
+ outline = CodeContext.outline("my_file.py")
20
+ # → ['class PaymentService (L12)', 'def process_payment (L34)', ...]
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import ast
26
+ import re
27
+ from pathlib import Path
28
+ from typing import Optional, Union
29
+
30
+
31
+ class CodeContext:
32
+ """
33
+ Code-aware context extractor — send only what the agent needs.
34
+
35
+ All methods are static; no instantiation required.
36
+ """
37
+
38
+ # ──────────────────────────────────────────────────────────────────────────
39
+ # Public API
40
+ # ──────────────────────────────────────────────────────────────────────────
41
+
42
+ @staticmethod
43
+ def extract_function(
44
+ source: Union[str, Path],
45
+ function_name: str,
46
+ context_lines: int = 3,
47
+ ) -> Optional[str]:
48
+ """
49
+ Extract a single function or method from a Python source file.
50
+
51
+ Args:
52
+ source: File path or raw source code string.
53
+ function_name: Name of the function / method to find.
54
+ context_lines: Lines of surrounding context to include.
55
+
56
+ Returns:
57
+ Source of the function with surrounding context, or ``None``
58
+ if not found.
59
+
60
+ Example::
61
+
62
+ snippet = CodeContext.extract_function("app.py", "handle_request")
63
+ """
64
+ code = _read_source(source)
65
+ try:
66
+ tree = ast.parse(code)
67
+ except SyntaxError:
68
+ return _fallback_extract(code, function_name, context_lines)
69
+
70
+ lines = code.splitlines()
71
+ for node in ast.walk(tree):
72
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
73
+ if node.name == function_name:
74
+ start = max(0, node.lineno - 1 - context_lines)
75
+ end = node.end_lineno # type: ignore[attr-defined]
76
+ end = min(len(lines), end + context_lines)
77
+ snippet = "\n".join(lines[start:end])
78
+ return f"# File: {source} (L{start + 1}–L{end})\n{snippet}"
79
+ return None
80
+
81
+ @staticmethod
82
+ def extract_class(
83
+ source: Union[str, Path],
84
+ class_name: str,
85
+ methods_only: bool = False,
86
+ ) -> Optional[str]:
87
+ """
88
+ Extract a class definition from Python source.
89
+
90
+ Args:
91
+ source: File path or raw source code string.
92
+ class_name: Name of the class to find.
93
+ methods_only: If ``True``, return only method signatures (no bodies).
94
+
95
+ Returns:
96
+ Class source or skeleton, or ``None`` if not found.
97
+ """
98
+ code = _read_source(source)
99
+ try:
100
+ tree = ast.parse(code)
101
+ except SyntaxError:
102
+ return None
103
+
104
+ lines = code.splitlines()
105
+ for node in ast.walk(tree):
106
+ if isinstance(node, ast.ClassDef) and node.name == class_name:
107
+ start = node.lineno - 1
108
+ end = node.end_lineno # type: ignore[attr-defined]
109
+ if methods_only:
110
+ return _class_skeleton(node, lines)
111
+ snippet = "\n".join(lines[start:end])
112
+ return f"# File: {source} (L{start + 1}–L{end})\n{snippet}"
113
+ return None
114
+
115
+ @staticmethod
116
+ def extract_lines(
117
+ source: Union[str, Path],
118
+ start_line: int,
119
+ end_line: int,
120
+ context_lines: int = 0,
121
+ ) -> str:
122
+ """
123
+ Extract specific line range from a file (1-indexed).
124
+
125
+ Args:
126
+ source: File path or raw source code string.
127
+ start_line: First line to include (1-indexed).
128
+ end_line: Last line to include (1-indexed, inclusive).
129
+ context_lines: Extra lines of context around the range.
130
+ """
131
+ code = _read_source(source)
132
+ lines = code.splitlines()
133
+ start = max(0, start_line - 1 - context_lines)
134
+ end = min(len(lines), end_line + context_lines)
135
+ snippet = "\n".join(lines[start:end])
136
+ return f"# File: {source} (L{start + 1}–L{end})\n{snippet}"
137
+
138
+ @staticmethod
139
+ def outline(source: Union[str, Path], language: str = "auto") -> list[str]:
140
+ """
141
+ Return a high-level outline of the file: class and function names
142
+ with line numbers.
143
+
144
+ Useful for giving an agent an overview without sending full source.
145
+
146
+ Args:
147
+ source: File path or raw source code string.
148
+ language: ``"python"``, ``"js"``/``"ts"``, or ``"auto"`` (detect).
149
+
150
+ Returns:
151
+ List of strings like ``['class PaymentService (L12)', ...]``.
152
+
153
+ Example::
154
+
155
+ for item in CodeContext.outline("api/routes.py"):
156
+ print(item)
157
+ """
158
+ code = _read_source(source)
159
+ lang = _detect_language(source, language)
160
+
161
+ if lang == "python":
162
+ return _python_outline(code)
163
+ if lang in ("js", "ts"):
164
+ return _js_outline(code)
165
+ return _generic_outline(code)
166
+
167
+ @staticmethod
168
+ def folded(source: Union[str, Path], language: str = "auto") -> str:
169
+ """
170
+ Return the source with function/method bodies replaced by ``...``.
171
+
172
+ Dramatically reduces token count while preserving structure.
173
+
174
+ Example::
175
+
176
+ folded = CodeContext.folded("large_module.py")
177
+ # class MyClass:
178
+ # def method_one(self): ...
179
+ # def method_two(self, x: int) -> str: ...
180
+ """
181
+ code = _read_source(source)
182
+ lang = _detect_language(source, language)
183
+
184
+ if lang == "python":
185
+ return _python_folded(code)
186
+ # For other languages, return outline as string
187
+ items = CodeContext.outline(source, language)
188
+ return "\n".join(items)
189
+
190
+
191
+ # ──────────────────────────────────────────────────────────────────────────────
192
+ # Internal helpers
193
+ # ──────────────────────────────────────────────────────────────────────────────
194
+
195
+ def _read_source(source: Union[str, Path]) -> str:
196
+ path = Path(source)
197
+ if path.exists():
198
+ return path.read_text(encoding="utf-8", errors="replace")
199
+ return str(source) # Treat as raw source code
200
+
201
+
202
+ def _detect_language(source: Union[str, Path], hint: str) -> str:
203
+ if hint != "auto":
204
+ return hint.lower()
205
+ suffix = Path(str(source)).suffix.lower()
206
+ return {"py": "python", ".py": "python", ".js": "js", ".ts": "ts", ".jsx": "js", ".tsx": "ts"}.get(
207
+ suffix, "generic"
208
+ )
209
+
210
+
211
+ def _fallback_extract(code: str, name: str, context_lines: int) -> Optional[str]:
212
+ """Regex-based fallback for non-parsable files."""
213
+ lines = code.splitlines()
214
+ for i, line in enumerate(lines):
215
+ if re.match(rf"\s*def\s+{re.escape(name)}\s*\(", line):
216
+ start = max(0, i - context_lines)
217
+ # Find end heuristically: next line at same or lower indent
218
+ indent = len(line) - len(line.lstrip())
219
+ end = i + 1
220
+ while end < len(lines):
221
+ l = lines[end]
222
+ if l.strip() == "":
223
+ end += 1
224
+ continue
225
+ if len(l) - len(l.lstrip()) <= indent and end > i + 1:
226
+ break
227
+ end += 1
228
+ end = min(len(lines), end + context_lines)
229
+ return "\n".join(lines[start:end])
230
+ return None
231
+
232
+
233
+ def _python_outline(code: str) -> list[str]:
234
+ try:
235
+ tree = ast.parse(code)
236
+ except SyntaxError:
237
+ return _generic_outline(code)
238
+ items = []
239
+ for node in ast.walk(tree):
240
+ if isinstance(node, ast.ClassDef):
241
+ items.append((node.lineno, f"class {node.name} (L{node.lineno})"))
242
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
243
+ prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
244
+ items.append((node.lineno, f"{prefix} {node.name} (L{node.lineno})"))
245
+ items.sort()
246
+ return [label for _, label in items]
247
+
248
+
249
+ def _js_outline(code: str) -> list[str]:
250
+ items = []
251
+ patterns = [
252
+ (r"^(export\s+)?(default\s+)?class\s+(\w+)", "class"),
253
+ (r"^(export\s+)?(async\s+)?function\s+(\w+)", "function"),
254
+ (r"^\s*(const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(", "arrow fn"),
255
+ ]
256
+ for i, line in enumerate(code.splitlines(), 1):
257
+ for pattern, kind in patterns:
258
+ m = re.match(pattern, line.strip())
259
+ if m:
260
+ name = m.group(m.lastindex or 1) if m.lastindex else line[:40]
261
+ items.append(f"{kind} {name} (L{i})")
262
+ break
263
+ return items
264
+
265
+
266
+ def _generic_outline(code: str) -> list[str]:
267
+ items = []
268
+ for i, line in enumerate(code.splitlines(), 1):
269
+ if re.match(r"^\s*(def |class |function |const |export )", line):
270
+ items.append(f"L{i}: {line.strip()[:80]}")
271
+ return items
272
+
273
+
274
+ def _class_skeleton(node: ast.ClassDef, lines: list[str]) -> str:
275
+ """Return class header + method signatures only, bodies as '...'."""
276
+ parts = [f"class {node.name}:"]
277
+ for item in node.body:
278
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
279
+ sig_line = lines[item.lineno - 1].strip()
280
+ parts.append(f" {sig_line} ...")
281
+ return "\n".join(parts)
282
+
283
+
284
+ def _python_folded(code: str) -> str:
285
+ """Replace function bodies with '...' using AST."""
286
+ try:
287
+ tree = ast.parse(code)
288
+ except SyntaxError:
289
+ return code
290
+
291
+ lines = code.splitlines()
292
+ replacements: list[tuple[int, int, str]] = [] # (start_line, end_line, replacement)
293
+
294
+ for node in ast.walk(tree):
295
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
296
+ sig_line = node.lineno - 1
297
+ body_start = node.body[0].lineno - 1
298
+ body_end = node.end_lineno - 1 # type: ignore[attr-defined]
299
+ indent = " " * (len(lines[sig_line]) - len(lines[sig_line].lstrip()) + 4)
300
+ replacements.append((body_start, body_end, f"{indent}..."))
301
+
302
+ # Apply replacements from bottom to top to preserve line numbers
303
+ replacements.sort(key=lambda x: x[0], reverse=True)
304
+ for start, end, replacement in replacements:
305
+ lines[start : end + 1] = [replacement]
306
+
307
+ return "\n".join(lines)
@@ -0,0 +1,242 @@
1
+ """
2
+ compressor.py — Text, HTML, JSON, and Markdown compressor for LLM prompts.
3
+
4
+ Strategy:
5
+ 1. Strip HTML tags / attributes that carry zero semantic value.
6
+ 2. Collapse redundant whitespace and blank lines.
7
+ 3. Remove code comments (optional — preserves intent by default).
8
+ 4. Compact JSON / YAML to minimal representation.
9
+ 5. Remove filler phrases from natural language ("As an AI language model…").
10
+
11
+ All steps are individually togglable so you control the trade-off between
12
+ compression ratio and information fidelity.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import re
19
+ from dataclasses import dataclass, field
20
+ from typing import Optional
21
+
22
+ # ──────────────────────────────────────────────────────────────────────────────
23
+ # Filler phrases common in LLM outputs / web scrapes (English + Russian)
24
+ # ──────────────────────────────────────────────────────────────────────────────
25
+ _FILLER_PHRASES: list[str] = [
26
+ r"as an ai language model[,\s]*",
27
+ r"i('m| am) just an ai[,\s]*",
28
+ r"certainly[,!]?\s*",
29
+ r"of course[,!]?\s*",
30
+ r"sure[,!]?\s*here('s| is)[,\s]*",
31
+ r"great question[,!]?\s*",
32
+ r"absolutely[,!]?\s*",
33
+ r"как языковая модель[,\s]*",
34
+ r"конечно[,!]?\s*",
35
+ r"разумеется[,!]?\s*",
36
+ r"отличный вопрос[,!]?\s*",
37
+ ]
38
+ _FILLER_RE = re.compile(
39
+ "|".join(_FILLER_PHRASES),
40
+ flags=re.IGNORECASE,
41
+ )
42
+
43
+ # HTML tags that are purely structural/decorative — safe to remove entirely
44
+ _DECORATIVE_TAGS = {
45
+ "script", "style", "nav", "footer", "header",
46
+ "aside", "advertisement", "noscript", "svg",
47
+ }
48
+
49
+
50
+ @dataclass
51
+ class CompressorConfig:
52
+ """Fine-grained control over which compression steps are applied."""
53
+
54
+ strip_html: bool = True
55
+ """Remove HTML tags, keeping only inner text."""
56
+
57
+ remove_decorative_html: bool = True
58
+ """Drop entire <script>, <style>, <nav>, etc. blocks before stripping."""
59
+
60
+ collapse_whitespace: bool = True
61
+ """Merge multiple blank lines / spaces into one."""
62
+
63
+ remove_filler_phrases: bool = True
64
+ """Strip common LLM filler phrases from text."""
65
+
66
+ compact_json: bool = True
67
+ """Re-serialize JSON with no indentation / extra spaces."""
68
+
69
+ remove_python_comments: bool = False
70
+ """Strip ``# …`` comment lines from Python code (off by default)."""
71
+
72
+ remove_js_comments: bool = False
73
+ """Strip ``// …`` and ``/* … */`` comments from JS/TS code."""
74
+
75
+ max_consecutive_newlines: int = 2
76
+ """Maximum number of consecutive newlines kept after collapsing."""
77
+
78
+
79
+ class Compressor:
80
+ """
81
+ Multi-format text compressor that reduces token usage before sending
82
+ content to an LLM.
83
+
84
+ Quickstart::
85
+
86
+ from agentslim import Compressor
87
+
88
+ c = Compressor()
89
+ slim = c.compress(raw_html_page)
90
+ print(slim)
91
+
92
+ You can also use format-specific helpers::
93
+
94
+ c.compress_json(big_json_string)
95
+ c.compress_html(html_string)
96
+ c.compress_code(python_source, language="python")
97
+ """
98
+
99
+ def __init__(self, config: Optional[CompressorConfig] = None) -> None:
100
+ self.config = config or CompressorConfig()
101
+
102
+ # ──────────────────────────────────────────────────────────────────────────
103
+ # Public API
104
+ # ──────────────────────────────────────────────────────────────────────────
105
+
106
+ def compress(self, text: str) -> str:
107
+ """
108
+ Auto-detect format and compress accordingly.
109
+
110
+ Detects JSON, HTML, or treats input as plain text / markdown.
111
+ """
112
+ stripped = text.strip()
113
+ if self._looks_like_json(stripped):
114
+ return self.compress_json(stripped)
115
+ if self._looks_like_html(stripped):
116
+ return self.compress_html(stripped)
117
+ return self.compress_text(stripped)
118
+
119
+ def compress_text(self, text: str) -> str:
120
+ """Compress natural-language text (plain text / Markdown)."""
121
+ if self.config.remove_filler_phrases:
122
+ text = _FILLER_RE.sub("", text)
123
+ if self.config.collapse_whitespace:
124
+ text = self._collapse_whitespace(text)
125
+ return text.strip()
126
+
127
+ def compress_html(self, html: str) -> str:
128
+ """Strip HTML tags and decorative blocks, returning clean text."""
129
+ if self.config.remove_decorative_html:
130
+ html = self._remove_decorative_blocks(html)
131
+ if self.config.strip_html:
132
+ html = self._strip_html_tags(html)
133
+ return self.compress_text(html)
134
+
135
+ def compress_json(self, json_str: str) -> str:
136
+ """
137
+ Compact JSON to minimal representation (no indentation, no spaces).
138
+
139
+ Falls back gracefully if the input is not valid JSON.
140
+ """
141
+ if not self.config.compact_json:
142
+ return json_str
143
+ try:
144
+ obj = json.loads(json_str)
145
+ return json.dumps(obj, ensure_ascii=False, separators=(",", ":"))
146
+ except json.JSONDecodeError:
147
+ # Not valid JSON — treat as plain text
148
+ return self.compress_text(json_str)
149
+
150
+ def compress_code(self, source: str, language: str = "python") -> str:
151
+ """
152
+ Compress source code by optionally removing comments.
153
+
154
+ Args:
155
+ source: Raw source code string.
156
+ language: ``"python"``, ``"js"``, or ``"ts"`` (TypeScript).
157
+ """
158
+ lang = language.lower()
159
+ if lang == "python" and self.config.remove_python_comments:
160
+ source = self._strip_python_comments(source)
161
+ elif lang in ("js", "ts", "javascript", "typescript") and self.config.remove_js_comments:
162
+ source = self._strip_js_comments(source)
163
+ if self.config.collapse_whitespace:
164
+ source = self._collapse_whitespace(source)
165
+ return source.strip()
166
+
167
+ # ──────────────────────────────────────────────────────────────────────────
168
+ # Private helpers
169
+ # ──────────────────────────────────────────────────────────────────────────
170
+
171
+ @staticmethod
172
+ def _looks_like_json(text: str) -> bool:
173
+ return (text.startswith("{") and text.endswith("}")) or (
174
+ text.startswith("[") and text.endswith("]")
175
+ )
176
+
177
+ @staticmethod
178
+ def _looks_like_html(text: str) -> bool:
179
+ return bool(re.search(r"<\s*(html|body|div|p|span|a|head)", text, re.IGNORECASE))
180
+
181
+ @staticmethod
182
+ def _remove_decorative_blocks(html: str) -> str:
183
+ for tag in _DECORATIVE_TAGS:
184
+ pattern = re.compile(
185
+ rf"<{tag}[^>]*>.*?</{tag}>",
186
+ flags=re.IGNORECASE | re.DOTALL,
187
+ )
188
+ html = pattern.sub("", html)
189
+ return html
190
+
191
+ @staticmethod
192
+ def _strip_html_tags(html: str) -> str:
193
+ # Replace block-level tags with newlines for readability
194
+ block_tags = re.compile(
195
+ r"<(br|p|div|li|tr|h[1-6]|blockquote)[^>]*>",
196
+ flags=re.IGNORECASE,
197
+ )
198
+ html = block_tags.sub("\n", html)
199
+ # Strip remaining tags
200
+ html = re.sub(r"<[^>]+>", "", html)
201
+ # Decode common HTML entities
202
+ html = (
203
+ html.replace("&amp;", "&")
204
+ .replace("&lt;", "<")
205
+ .replace("&gt;", ">")
206
+ .replace("&quot;", '"')
207
+ .replace("&#39;", "'")
208
+ .replace("&nbsp;", " ")
209
+ )
210
+ return html
211
+
212
+ def _collapse_whitespace(self, text: str) -> str:
213
+ # Replace tabs and non-breaking spaces with regular space
214
+ text = re.sub(r"[ \t]+", " ", text)
215
+ # Collapse more than N consecutive newlines
216
+ limit = self.config.max_consecutive_newlines
217
+ text = re.sub(rf"\n{{{limit + 1},}}", "\n" * limit, text)
218
+ # Strip trailing space on each line
219
+ text = "\n".join(line.rstrip() for line in text.splitlines())
220
+ return text
221
+
222
+ @staticmethod
223
+ def _strip_python_comments(source: str) -> str:
224
+ lines = []
225
+ for line in source.splitlines():
226
+ stripped = line.lstrip()
227
+ if stripped.startswith("#"):
228
+ continue # Drop full-line comments
229
+ # Inline comment — keep code, drop comment
230
+ # (naive: doesn't handle # inside strings)
231
+ if " #" in line:
232
+ line = line[: line.index(" #")]
233
+ lines.append(line)
234
+ return "\n".join(lines)
235
+
236
+ @staticmethod
237
+ def _strip_js_comments(source: str) -> str:
238
+ # Remove /* … */ block comments
239
+ source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL)
240
+ # Remove // … line comments (not inside strings — simplified)
241
+ source = re.sub(r"//[^\n]*", "", source)
242
+ return source