optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,5 @@
1
+ class LLMException(Exception):
2
+ def __init__(self, status_code, message):
3
+ self.status_code = status_code
4
+ self.message = message
5
+ super().__init__(f'Error {status_code}: {message}')
File without changes
@@ -0,0 +1,619 @@
1
+ import asyncio
2
+ import base64
3
+ import os
4
+ import re
5
+ import shutil
6
+ from abc import ABC, abstractmethod
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel, Field
12
+ from reportlab.lib.pagesizes import letter
13
+ from reportlab.lib.styles import getSampleStyleSheet
14
+ from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
15
+
16
+ INVALID_FILENAME_ERROR_MESSAGE = 'Error: Invalid filename format. Must be alphanumeric with supported extension.'
17
+ DEFAULT_FILE_SYSTEM_PATH = 'browseruse_agent_data'
18
+
19
+
20
+ class FileSystemError(Exception):
21
+ """Custom exception for file system operations that should be shown to LLM"""
22
+
23
+ pass
24
+
25
+
26
+ class BaseFile(BaseModel, ABC):
27
+ """Base class for all file types"""
28
+
29
+ name: str
30
+ content: str = ''
31
+
32
+ # --- Subclass must define this ---
33
+ @property
34
+ @abstractmethod
35
+ def extension(self) -> str:
36
+ """File extension (e.g. 'txt', 'md')"""
37
+ pass
38
+
39
+ def write_file_content(self, content: str) -> None:
40
+ """Update internal content (formatted)"""
41
+ self.update_content(content)
42
+
43
+ def append_file_content(self, content: str) -> None:
44
+ """Append content to internal content"""
45
+ self.update_content(self.content + content)
46
+
47
+ # --- These are shared and implemented here ---
48
+
49
+ def update_content(self, content: str) -> None:
50
+ self.content = content
51
+
52
+ def sync_to_disk_sync(self, path: Path) -> None:
53
+ file_path = path / self.full_name
54
+ file_path.write_text(self.content)
55
+
56
+ async def sync_to_disk(self, path: Path) -> None:
57
+ file_path = path / self.full_name
58
+ with ThreadPoolExecutor() as executor:
59
+ await asyncio.get_event_loop().run_in_executor(executor, lambda: file_path.write_text(self.content))
60
+
61
+ async def write(self, content: str, path: Path) -> None:
62
+ self.write_file_content(content)
63
+ await self.sync_to_disk(path)
64
+
65
+ async def append(self, content: str, path: Path) -> None:
66
+ self.append_file_content(content)
67
+ await self.sync_to_disk(path)
68
+
69
+ def read(self) -> str:
70
+ return self.content
71
+
72
+ @property
73
+ def full_name(self) -> str:
74
+ return f'{self.name}.{self.extension}'
75
+
76
+ @property
77
+ def get_size(self) -> int:
78
+ return len(self.content)
79
+
80
+ @property
81
+ def get_line_count(self) -> int:
82
+ return len(self.content.splitlines())
83
+
84
+
85
+ class MarkdownFile(BaseFile):
86
+ """Markdown file implementation"""
87
+
88
+ @property
89
+ def extension(self) -> str:
90
+ return 'md'
91
+
92
+
93
+ class TxtFile(BaseFile):
94
+ """Plain text file implementation"""
95
+
96
+ @property
97
+ def extension(self) -> str:
98
+ return 'txt'
99
+
100
+
101
+ class JsonFile(BaseFile):
102
+ """JSON file implementation"""
103
+
104
+ @property
105
+ def extension(self) -> str:
106
+ return 'json'
107
+
108
+
109
+ class CsvFile(BaseFile):
110
+ """CSV file implementation"""
111
+
112
+ @property
113
+ def extension(self) -> str:
114
+ return 'csv'
115
+
116
+
117
+ class JsonlFile(BaseFile):
118
+ """JSONL (JSON Lines) file implementation"""
119
+
120
+ @property
121
+ def extension(self) -> str:
122
+ return 'jsonl'
123
+
124
+
125
+ class PdfFile(BaseFile):
126
+ """PDF file implementation"""
127
+
128
+ @property
129
+ def extension(self) -> str:
130
+ return 'pdf'
131
+
132
+ def sync_to_disk_sync(self, path: Path) -> None:
133
+ file_path = path / self.full_name
134
+ try:
135
+ # Create PDF document
136
+ doc = SimpleDocTemplate(str(file_path), pagesize=letter)
137
+ styles = getSampleStyleSheet()
138
+ story = []
139
+
140
+ # Convert markdown content to simple text and add to PDF
141
+ # For basic implementation, we'll treat content as plain text
142
+ # This avoids the AGPL license issue while maintaining functionality
143
+ content_lines = self.content.split('\n')
144
+
145
+ for line in content_lines:
146
+ if line.strip():
147
+ # Handle basic markdown headers
148
+ if line.startswith('# '):
149
+ para = Paragraph(line[2:], styles['Title'])
150
+ elif line.startswith('## '):
151
+ para = Paragraph(line[3:], styles['Heading1'])
152
+ elif line.startswith('### '):
153
+ para = Paragraph(line[4:], styles['Heading2'])
154
+ else:
155
+ para = Paragraph(line, styles['Normal'])
156
+ story.append(para)
157
+ else:
158
+ story.append(Spacer(1, 6))
159
+
160
+ doc.build(story)
161
+ except Exception as e:
162
+ raise FileSystemError(f"Error: Could not write to file '{self.full_name}'. {str(e)}")
163
+
164
+ async def sync_to_disk(self, path: Path) -> None:
165
+ with ThreadPoolExecutor() as executor:
166
+ await asyncio.get_event_loop().run_in_executor(executor, lambda: self.sync_to_disk_sync(path))
167
+
168
+
169
+ class DocxFile(BaseFile):
170
+ """DOCX file implementation"""
171
+
172
+ @property
173
+ def extension(self) -> str:
174
+ return 'docx'
175
+
176
+ def sync_to_disk_sync(self, path: Path) -> None:
177
+ file_path = path / self.full_name
178
+ try:
179
+ from docx import Document
180
+
181
+ doc = Document()
182
+
183
+ # Convert content to DOCX paragraphs
184
+ content_lines = self.content.split('\n')
185
+
186
+ for line in content_lines:
187
+ if line.strip():
188
+ # Handle basic markdown headers
189
+ if line.startswith('# '):
190
+ doc.add_heading(line[2:], level=1)
191
+ elif line.startswith('## '):
192
+ doc.add_heading(line[3:], level=2)
193
+ elif line.startswith('### '):
194
+ doc.add_heading(line[4:], level=3)
195
+ else:
196
+ doc.add_paragraph(line)
197
+ else:
198
+ doc.add_paragraph() # Empty paragraph for spacing
199
+
200
+ doc.save(str(file_path))
201
+ except Exception as e:
202
+ raise FileSystemError(f"Error: Could not write to file '{self.full_name}'. {str(e)}")
203
+
204
+ async def sync_to_disk(self, path: Path) -> None:
205
+ with ThreadPoolExecutor() as executor:
206
+ await asyncio.get_event_loop().run_in_executor(executor, lambda: self.sync_to_disk_sync(path))
207
+
208
+
209
+ class FileSystemState(BaseModel):
210
+ """Serializable state of the file system"""
211
+
212
+ files: dict[str, dict[str, Any]] = Field(default_factory=dict) # full filename -> file data
213
+ base_dir: str
214
+ extracted_content_count: int = 0
215
+
216
+
217
+ class FileSystem:
218
+ """Enhanced file system with in-memory storage and multiple file type support"""
219
+
220
+ def __init__(self, base_dir: str | Path, create_default_files: bool = True):
221
+ # Handle the Path conversion before calling super().__init__
222
+ self.base_dir = Path(base_dir) if isinstance(base_dir, str) else base_dir
223
+ self.base_dir.mkdir(parents=True, exist_ok=True)
224
+
225
+ # Create and use a dedicated subfolder for all operations
226
+ self.data_dir = self.base_dir / DEFAULT_FILE_SYSTEM_PATH
227
+ if self.data_dir.exists():
228
+ # clean the data directory
229
+ shutil.rmtree(self.data_dir)
230
+ self.data_dir.mkdir(exist_ok=True)
231
+
232
+ self._file_types: dict[str, type[BaseFile]] = {
233
+ 'md': MarkdownFile,
234
+ 'txt': TxtFile,
235
+ 'json': JsonFile,
236
+ 'jsonl': JsonlFile,
237
+ 'csv': CsvFile,
238
+ 'pdf': PdfFile,
239
+ 'docx': DocxFile,
240
+ }
241
+
242
+ self.files = {}
243
+ if create_default_files:
244
+ self.default_files = ['todo.md']
245
+ self._create_default_files()
246
+
247
+ self.extracted_content_count = 0
248
+
249
+ def get_allowed_extensions(self) -> list[str]:
250
+ """Get allowed extensions"""
251
+ return list(self._file_types.keys())
252
+
253
+ def _get_file_type_class(self, extension: str) -> type[BaseFile] | None:
254
+ """Get the appropriate file class for an extension."""
255
+ return self._file_types.get(extension.lower(), None)
256
+
257
+ def _create_default_files(self) -> None:
258
+ """Create default results and todo files"""
259
+ for full_filename in self.default_files:
260
+ name_without_ext, extension = self._parse_filename(full_filename)
261
+ file_class = self._get_file_type_class(extension)
262
+ if not file_class:
263
+ raise ValueError(f"Error: Invalid file extension '{extension}' for file '{full_filename}'.")
264
+
265
+ file_obj = file_class(name=name_without_ext)
266
+ self.files[full_filename] = file_obj # Use full filename as key
267
+ file_obj.sync_to_disk_sync(self.data_dir)
268
+
269
+ def _is_valid_filename(self, file_name: str) -> bool:
270
+ """Check if filename matches the required pattern: name.extension"""
271
+ # Build extensions pattern from _file_types
272
+ extensions = '|'.join(self._file_types.keys())
273
+ pattern = rf'^[a-zA-Z0-9_\-\u4e00-\u9fff]+\.({extensions})$'
274
+ file_name_base = os.path.basename(file_name)
275
+ return bool(re.match(pattern, file_name_base))
276
+
277
+ def _parse_filename(self, filename: str) -> tuple[str, str]:
278
+ """Parse filename into name and extension. Always check _is_valid_filename first."""
279
+ name, extension = filename.rsplit('.', 1)
280
+ return name, extension.lower()
281
+
282
+ def get_dir(self) -> Path:
283
+ """Get the file system directory"""
284
+ return self.data_dir
285
+
286
+ def get_file(self, full_filename: str) -> BaseFile | None:
287
+ """Get a file object by full filename"""
288
+ if not self._is_valid_filename(full_filename):
289
+ return None
290
+
291
+ # Use full filename as key
292
+ return self.files.get(full_filename)
293
+
294
+ def list_files(self) -> list[str]:
295
+ """List all files in the system"""
296
+ return [file_obj.full_name for file_obj in self.files.values()]
297
+
298
+ def display_file(self, full_filename: str) -> str | None:
299
+ """Display file content using file-specific display method"""
300
+ if not self._is_valid_filename(full_filename):
301
+ return None
302
+
303
+ file_obj = self.get_file(full_filename)
304
+ if not file_obj:
305
+ return None
306
+
307
+ return file_obj.read()
308
+
309
+ async def read_file_structured(self, full_filename: str, external_file: bool = False) -> dict[str, Any]:
310
+ """Read file and return structured data including images if applicable.
311
+
312
+ Returns:
313
+ dict with keys:
314
+ - 'message': str - The message to display
315
+ - 'images': list[dict] | None - Image data if file is an image: [{"name": str, "data": base64_str}]
316
+ """
317
+ result: dict[str, Any] = {'message': '', 'images': None}
318
+
319
+ if external_file:
320
+ try:
321
+ try:
322
+ _, extension = self._parse_filename(full_filename)
323
+ except Exception:
324
+ result['message'] = (
325
+ f'Error: Invalid filename format {full_filename}. Must be alphanumeric with a supported extension.'
326
+ )
327
+ return result
328
+
329
+ if extension in ['md', 'txt', 'json', 'jsonl', 'csv']:
330
+ import anyio
331
+
332
+ async with await anyio.open_file(full_filename, 'r') as f:
333
+ content = await f.read()
334
+ result['message'] = f'Read from file {full_filename}.\n<content>\n{content}\n</content>'
335
+ return result
336
+
337
+ elif extension == 'docx':
338
+ from docx import Document
339
+
340
+ doc = Document(full_filename)
341
+ content = '\n'.join([para.text for para in doc.paragraphs])
342
+ result['message'] = f'Read from file {full_filename}.\n<content>\n{content}\n</content>'
343
+ return result
344
+
345
+ elif extension == 'pdf':
346
+ import pypdf
347
+
348
+ reader = pypdf.PdfReader(full_filename)
349
+ num_pages = len(reader.pages)
350
+ MAX_PDF_PAGES = 20
351
+ extra_pages = num_pages - MAX_PDF_PAGES
352
+ extracted_text = ''
353
+ for page in reader.pages[:MAX_PDF_PAGES]:
354
+ extracted_text += page.extract_text()
355
+ extra_pages_text = f'{extra_pages} more pages...' if extra_pages > 0 else ''
356
+ result['message'] = (
357
+ f'Read from file {full_filename}.\n<content>\n{extracted_text}\n{extra_pages_text}</content>'
358
+ )
359
+ return result
360
+
361
+ elif extension in ['jpg', 'jpeg', 'png']:
362
+ import anyio
363
+
364
+ # Read image file and convert to base64
365
+ async with await anyio.open_file(full_filename, 'rb') as f:
366
+ img_data = await f.read()
367
+
368
+ base64_str = base64.b64encode(img_data).decode('utf-8')
369
+
370
+ result['message'] = f'Read image file {full_filename}.'
371
+ result['images'] = [{'name': os.path.basename(full_filename), 'data': base64_str}]
372
+ return result
373
+
374
+ else:
375
+ result['message'] = f'Error: Cannot read file {full_filename} as {extension} extension is not supported.'
376
+ return result
377
+
378
+ except FileNotFoundError:
379
+ result['message'] = f"Error: File '{full_filename}' not found."
380
+ return result
381
+ except PermissionError:
382
+ result['message'] = f"Error: Permission denied to read file '{full_filename}'."
383
+ return result
384
+ except Exception as e:
385
+ result['message'] = f"Error: Could not read file '{full_filename}'. {str(e)}"
386
+ return result
387
+
388
+ # For internal files, only non-image types are supported
389
+ if not self._is_valid_filename(full_filename):
390
+ result['message'] = INVALID_FILENAME_ERROR_MESSAGE
391
+ return result
392
+
393
+ file_obj = self.get_file(full_filename)
394
+ if not file_obj:
395
+ result['message'] = f"File '{full_filename}' not found."
396
+ return result
397
+
398
+ try:
399
+ content = file_obj.read()
400
+ result['message'] = f'Read from file {full_filename}.\n<content>\n{content}\n</content>'
401
+ return result
402
+ except FileSystemError as e:
403
+ result['message'] = str(e)
404
+ return result
405
+ except Exception as e:
406
+ result['message'] = f"Error: Could not read file '{full_filename}'. {str(e)}"
407
+ return result
408
+
409
+ async def read_file(self, full_filename: str, external_file: bool = False) -> str:
410
+ """Read file content using file-specific read method and return appropriate message to LLM.
411
+
412
+ Note: For image files, use read_file_structured() to get image data.
413
+ """
414
+ result = await self.read_file_structured(full_filename, external_file)
415
+ return result['message']
416
+
417
+ async def write_file(self, full_filename: str, content: str) -> str:
418
+ """Write content to file using file-specific write method"""
419
+ if not self._is_valid_filename(full_filename):
420
+ return INVALID_FILENAME_ERROR_MESSAGE
421
+
422
+ try:
423
+ name_without_ext, extension = self._parse_filename(full_filename)
424
+ file_class = self._get_file_type_class(extension)
425
+ if not file_class:
426
+ raise ValueError(f"Error: Invalid file extension '{extension}' for file '{full_filename}'.")
427
+
428
+ # Create or get existing file using full filename as key
429
+ if full_filename in self.files:
430
+ file_obj = self.files[full_filename]
431
+ else:
432
+ file_obj = file_class(name=name_without_ext)
433
+ self.files[full_filename] = file_obj # Use full filename as key
434
+
435
+ # Use file-specific write method
436
+ await file_obj.write(content, self.data_dir)
437
+ return f'Data written to file {full_filename} successfully.'
438
+ except FileSystemError as e:
439
+ return str(e)
440
+ except Exception as e:
441
+ return f"Error: Could not write to file '{full_filename}'. {str(e)}"
442
+
443
+ async def append_file(self, full_filename: str, content: str) -> str:
444
+ """Append content to file using file-specific append method"""
445
+ if not self._is_valid_filename(full_filename):
446
+ return INVALID_FILENAME_ERROR_MESSAGE
447
+
448
+ file_obj = self.get_file(full_filename)
449
+ if not file_obj:
450
+ return f"File '{full_filename}' not found."
451
+
452
+ try:
453
+ await file_obj.append(content, self.data_dir)
454
+ return f'Data appended to file {full_filename} successfully.'
455
+ except FileSystemError as e:
456
+ return str(e)
457
+ except Exception as e:
458
+ return f"Error: Could not append to file '{full_filename}'. {str(e)}"
459
+
460
+ async def replace_file_str(self, full_filename: str, old_str: str, new_str: str) -> str:
461
+ """Replace old_str with new_str in file_name"""
462
+ if not self._is_valid_filename(full_filename):
463
+ return INVALID_FILENAME_ERROR_MESSAGE
464
+
465
+ if not old_str:
466
+ return 'Error: Cannot replace empty string. Please provide a non-empty string to replace.'
467
+
468
+ file_obj = self.get_file(full_filename)
469
+ if not file_obj:
470
+ return f"File '{full_filename}' not found."
471
+
472
+ try:
473
+ content = file_obj.read()
474
+ content = content.replace(old_str, new_str)
475
+ await file_obj.write(content, self.data_dir)
476
+ return f'Successfully replaced all occurrences of "{old_str}" with "{new_str}" in file {full_filename}'
477
+ except FileSystemError as e:
478
+ return str(e)
479
+ except Exception as e:
480
+ return f"Error: Could not replace string in file '{full_filename}'. {str(e)}"
481
+
482
+ async def save_extracted_content(self, content: str) -> str:
483
+ """Save extracted content to a numbered file"""
484
+ initial_filename = f'extracted_content_{self.extracted_content_count}'
485
+ extracted_filename = f'{initial_filename}.md'
486
+ file_obj = MarkdownFile(name=initial_filename)
487
+ await file_obj.write(content, self.data_dir)
488
+ self.files[extracted_filename] = file_obj
489
+ self.extracted_content_count += 1
490
+ return extracted_filename
491
+
492
+ def describe(self) -> str:
493
+ """List all files with their content information using file-specific display methods"""
494
+ DISPLAY_CHARS = 400
495
+ description = ''
496
+
497
+ for file_obj in self.files.values():
498
+ # Skip todo.md from description
499
+ if file_obj.full_name == 'todo.md':
500
+ continue
501
+
502
+ content = file_obj.read()
503
+
504
+ # Handle empty files
505
+ if not content:
506
+ description += f'<file>\n{file_obj.full_name} - [empty file]\n</file>\n'
507
+ continue
508
+
509
+ lines = content.splitlines()
510
+ line_count = len(lines)
511
+
512
+ # For small files, display the entire content
513
+ whole_file_description = (
514
+ f'<file>\n{file_obj.full_name} - {line_count} lines\n<content>\n{content}\n</content>\n</file>\n'
515
+ )
516
+ if len(content) < int(1.5 * DISPLAY_CHARS):
517
+ description += whole_file_description
518
+ continue
519
+
520
+ # For larger files, display start and end previews
521
+ half_display_chars = DISPLAY_CHARS // 2
522
+
523
+ # Get start preview
524
+ start_preview = ''
525
+ start_line_count = 0
526
+ chars_count = 0
527
+ for line in lines:
528
+ if chars_count + len(line) + 1 > half_display_chars:
529
+ break
530
+ start_preview += line + '\n'
531
+ chars_count += len(line) + 1
532
+ start_line_count += 1
533
+
534
+ # Get end preview
535
+ end_preview = ''
536
+ end_line_count = 0
537
+ chars_count = 0
538
+ for line in reversed(lines):
539
+ if chars_count + len(line) + 1 > half_display_chars:
540
+ break
541
+ end_preview = line + '\n' + end_preview
542
+ chars_count += len(line) + 1
543
+ end_line_count += 1
544
+
545
+ # Calculate lines in between
546
+ middle_line_count = line_count - start_line_count - end_line_count
547
+ if middle_line_count <= 0:
548
+ description += whole_file_description
549
+ continue
550
+
551
+ start_preview = start_preview.strip('\n').rstrip()
552
+ end_preview = end_preview.strip('\n').rstrip()
553
+
554
+ # Format output
555
+ if not (start_preview or end_preview):
556
+ description += f'<file>\n{file_obj.full_name} - {line_count} lines\n<content>\n{middle_line_count} lines...\n</content>\n</file>\n'
557
+ else:
558
+ description += f'<file>\n{file_obj.full_name} - {line_count} lines\n<content>\n{start_preview}\n'
559
+ description += f'... {middle_line_count} more lines ...\n'
560
+ description += f'{end_preview}\n'
561
+ description += '</content>\n</file>\n'
562
+
563
+ return description.strip('\n')
564
+
565
+ def get_todo_contents(self) -> str:
566
+ """Get todo file contents"""
567
+ todo_file = self.get_file('todo.md')
568
+ return todo_file.read() if todo_file else ''
569
+
570
+ def get_state(self) -> FileSystemState:
571
+ """Get serializable state of the file system"""
572
+ files_data = {}
573
+ for full_filename, file_obj in self.files.items():
574
+ files_data[full_filename] = {'type': file_obj.__class__.__name__, 'data': file_obj.model_dump()}
575
+
576
+ return FileSystemState(
577
+ files=files_data, base_dir=str(self.base_dir), extracted_content_count=self.extracted_content_count
578
+ )
579
+
580
+ def nuke(self) -> None:
581
+ """Delete the file system directory"""
582
+ shutil.rmtree(self.data_dir)
583
+
584
+ @classmethod
585
+ def from_state(cls, state: FileSystemState) -> 'FileSystem':
586
+ """Restore file system from serializable state at the exact same location"""
587
+ # Create file system without default files
588
+ fs = cls(base_dir=Path(state.base_dir), create_default_files=False)
589
+ fs.extracted_content_count = state.extracted_content_count
590
+
591
+ # Restore all files
592
+ for full_filename, file_data in state.files.items():
593
+ file_type = file_data['type']
594
+ file_info = file_data['data']
595
+
596
+ # Create the appropriate file object based on type
597
+ if file_type == 'MarkdownFile':
598
+ file_obj = MarkdownFile(**file_info)
599
+ elif file_type == 'TxtFile':
600
+ file_obj = TxtFile(**file_info)
601
+ elif file_type == 'JsonFile':
602
+ file_obj = JsonFile(**file_info)
603
+ elif file_type == 'JsonlFile':
604
+ file_obj = JsonlFile(**file_info)
605
+ elif file_type == 'CsvFile':
606
+ file_obj = CsvFile(**file_info)
607
+ elif file_type == 'PdfFile':
608
+ file_obj = PdfFile(**file_info)
609
+ elif file_type == 'DocxFile':
610
+ file_obj = DocxFile(**file_info)
611
+ else:
612
+ # Skip unknown file types
613
+ continue
614
+
615
+ # Add to files dict and sync to disk
616
+ fs.files[full_filename] = file_obj
617
+ file_obj.sync_to_disk_sync(fs.data_dir)
618
+
619
+ return fs