autoforge-ai 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autoforge-ai",
3
- "version": "0.1.17",
3
+ "version": "0.1.18",
4
4
  "description": "Autonomous coding agent with web UI - build complete apps with AI",
5
5
  "license": "AGPL-3.0",
6
6
  "bin": {
@@ -12,3 +12,7 @@ aiofiles>=24.0.0
12
12
  apscheduler>=3.10.0,<4.0.0
13
13
  pywinpty>=2.0.0; sys_platform == "win32"
14
14
  pyyaml>=6.0.0
15
+ python-docx>=1.1.0
16
+ openpyxl>=3.1.0
17
+ PyPDF2>=3.0.0
18
+ python-pptx>=1.0.0
@@ -13,7 +13,7 @@ from typing import Optional
13
13
  from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
14
14
  from pydantic import BaseModel, ValidationError
15
15
 
16
- from ..schemas import ImageAttachment
16
+ from ..schemas import FileAttachment
17
17
  from ..services.expand_chat_session import (
18
18
  ExpandChatSession,
19
19
  create_expand_session,
@@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
181
181
  user_content = message.get("content", "").strip()
182
182
 
183
183
  # Parse attachments if present
184
- attachments: list[ImageAttachment] = []
184
+ attachments: list[FileAttachment] = []
185
185
  raw_attachments = message.get("attachments", [])
186
186
  if raw_attachments:
187
187
  try:
188
188
  for raw_att in raw_attachments:
189
- attachments.append(ImageAttachment(**raw_att))
189
+ attachments.append(FileAttachment(**raw_att))
190
190
  except (ValidationError, Exception) as e:
191
191
  logger.warning(f"Invalid attachment data: {e}")
192
192
  await websocket.send_json({
@@ -12,7 +12,7 @@ from typing import Optional
12
12
  from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
13
13
  from pydantic import BaseModel, ValidationError
14
14
 
15
- from ..schemas import ImageAttachment
15
+ from ..schemas import FileAttachment
16
16
  from ..services.spec_chat_session import (
17
17
  SpecChatSession,
18
18
  create_session,
@@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
242
242
  user_content = message.get("content", "").strip()
243
243
 
244
244
  # Parse attachments if present
245
- attachments: list[ImageAttachment] = []
245
+ attachments: list[FileAttachment] = []
246
246
  raw_attachments = message.get("attachments", [])
247
247
  if raw_attachments:
248
248
  try:
249
249
  for raw_att in raw_attachments:
250
- attachments.append(ImageAttachment(**raw_att))
250
+ attachments.append(FileAttachment(**raw_att))
251
251
  except (ValidationError, Exception) as e:
252
252
  logger.warning(f"Invalid attachment data: {e}")
253
253
  await websocket.send_json({
package/server/schemas.py CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
11
11
  from pathlib import Path
12
12
  from typing import Literal
13
13
 
14
- from pydantic import BaseModel, Field, field_validator
14
+ from pydantic import BaseModel, Field, field_validator, model_validator
15
15
 
16
16
  # Import model constants from registry (single source of truth)
17
17
  _root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):
331
331
 
332
332
 
333
333
  # ============================================================================
334
- # Spec Chat Schemas
334
+ # Chat Attachment Schemas
335
335
  # ============================================================================
336
336
 
337
- # Maximum image file size: 5 MB
338
- MAX_IMAGE_SIZE = 5 * 1024 * 1024
337
+ # Size limits
338
+ MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images
339
+ MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents
339
340
 
341
+ _IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}
340
342
 
341
- class ImageAttachment(BaseModel):
342
- """Image attachment from client for spec creation chat."""
343
+
344
+ class FileAttachment(BaseModel):
345
+ """File attachment from client for spec creation / expand project chat."""
343
346
  filename: str = Field(..., min_length=1, max_length=255)
344
- mimeType: Literal['image/jpeg', 'image/png']
347
+ mimeType: Literal[
348
+ 'image/jpeg', 'image/png',
349
+ 'text/plain', 'text/markdown', 'text/csv',
350
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
351
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
352
+ 'application/pdf',
353
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
354
+ ]
345
355
  base64Data: str
346
356
 
347
357
  @field_validator('base64Data')
348
358
  @classmethod
349
- def validate_base64_and_size(cls, v: str) -> str:
350
- """Validate that base64 data is valid and within size limit."""
359
+ def validate_base64(cls, v: str) -> str:
360
+ """Validate that base64 data is decodable."""
351
361
  try:
352
- decoded = base64.b64decode(v)
353
- if len(decoded) > MAX_IMAGE_SIZE:
354
- raise ValueError(
355
- f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
356
- f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
357
- )
362
+ base64.b64decode(v)
358
363
  return v
359
364
  except Exception as e:
360
- if 'Image size' in str(e):
361
- raise
362
365
  raise ValueError(f'Invalid base64 data: {e}')
363
366
 
367
+ @model_validator(mode='after')
368
+ def validate_size(self) -> 'FileAttachment':
369
+ """Validate file size based on MIME type."""
370
+ try:
371
+ decoded = base64.b64decode(self.base64Data)
372
+ except Exception:
373
+ return self # Already caught by field validator
374
+
375
+ if self.mimeType in _IMAGE_MIME_TYPES:
376
+ max_size = MAX_IMAGE_SIZE
377
+ label = "Image"
378
+ else:
379
+ max_size = MAX_DOCUMENT_SIZE
380
+ label = "Document"
381
+
382
+ if len(decoded) > max_size:
383
+ raise ValueError(
384
+ f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
385
+ f'maximum of {max_size // (1024 * 1024)} MB'
386
+ )
387
+ return self
388
+
364
389
 
365
390
  # ============================================================================
366
391
  # Filesystem Schemas
@@ -35,6 +35,13 @@ if _root_str not in sys.path:
35
35
  from env_constants import API_ENV_VARS # noqa: E402, F401
36
36
  from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401
37
37
 
38
+ from ..schemas import FileAttachment
39
+ from ..utils.document_extraction import (
40
+ extract_text_from_document,
41
+ is_document,
42
+ is_image,
43
+ )
44
+
38
45
  logger = logging.getLogger(__name__)
39
46
 
40
47
 
@@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
88
95
  raise
89
96
 
90
97
 
98
+ def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
99
+ """Convert FileAttachment objects to Claude API content blocks.
100
+
101
+ Images become image content blocks (passed directly to Claude's vision).
102
+ Documents are extracted to text and become text content blocks.
103
+
104
+ Raises:
105
+ DocumentExtractionError: If a document cannot be read.
106
+ """
107
+ blocks: list[dict] = []
108
+ for att in attachments:
109
+ if is_image(att.mimeType):
110
+ blocks.append({
111
+ "type": "image",
112
+ "source": {
113
+ "type": "base64",
114
+ "media_type": att.mimeType,
115
+ "data": att.base64Data,
116
+ }
117
+ })
118
+ elif is_document(att.mimeType):
119
+ text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
120
+ blocks.append({
121
+ "type": "text",
122
+ "text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
123
+ })
124
+ return blocks
125
+
126
+
91
127
  async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
92
128
  """Yield a single multimodal user message in Claude Agent SDK format.
93
129
 
@@ -21,9 +21,11 @@ from typing import Any, AsyncGenerator, Optional
21
21
  from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
22
22
  from dotenv import load_dotenv
23
23
 
24
- from ..schemas import ImageAttachment
24
+ from ..schemas import FileAttachment
25
+ from ..utils.document_extraction import DocumentExtractionError
25
26
  from .chat_constants import (
26
27
  ROOT_DIR,
28
+ build_attachment_content_blocks,
27
29
  check_rate_limit_error,
28
30
  make_multimodal_message,
29
31
  safe_receive_response,
@@ -226,7 +228,7 @@ class ExpandChatSession:
226
228
  async def send_message(
227
229
  self,
228
230
  user_message: str,
229
- attachments: list[ImageAttachment] | None = None
231
+ attachments: list[FileAttachment] | None = None
230
232
  ) -> AsyncGenerator[dict, None]:
231
233
  """
232
234
  Send user message and stream Claude's response.
@@ -273,7 +275,7 @@ class ExpandChatSession:
273
275
  async def _query_claude(
274
276
  self,
275
277
  message: str,
276
- attachments: list[ImageAttachment] | None = None
278
+ attachments: list[FileAttachment] | None = None
277
279
  ) -> AsyncGenerator[dict, None]:
278
280
  """
279
281
  Internal method to query Claude and stream responses.
@@ -289,17 +291,16 @@ class ExpandChatSession:
289
291
  content_blocks: list[dict[str, Any]] = []
290
292
  if message:
291
293
  content_blocks.append({"type": "text", "text": message})
292
- for att in attachments:
293
- content_blocks.append({
294
- "type": "image",
295
- "source": {
296
- "type": "base64",
297
- "media_type": att.mimeType,
298
- "data": att.base64Data,
299
- }
300
- })
294
+
295
+ # Add attachment blocks (images as image blocks, documents as extracted text)
296
+ try:
297
+ content_blocks.extend(build_attachment_content_blocks(attachments))
298
+ except DocumentExtractionError as e:
299
+ yield {"type": "error", "content": str(e)}
300
+ return
301
+
301
302
  await self.client.query(make_multimodal_message(content_blocks))
302
- logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
303
+ logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
303
304
  else:
304
305
  await self.client.query(message)
305
306
 
@@ -18,9 +18,11 @@ from typing import Any, AsyncGenerator, Optional
18
18
  from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
19
19
  from dotenv import load_dotenv
20
20
 
21
- from ..schemas import ImageAttachment
21
+ from ..schemas import FileAttachment
22
+ from ..utils.document_extraction import DocumentExtractionError
22
23
  from .chat_constants import (
23
24
  ROOT_DIR,
25
+ build_attachment_content_blocks,
24
26
  check_rate_limit_error,
25
27
  make_multimodal_message,
26
28
  safe_receive_response,
@@ -201,7 +203,7 @@ class SpecChatSession:
201
203
  async def send_message(
202
204
  self,
203
205
  user_message: str,
204
- attachments: list[ImageAttachment] | None = None
206
+ attachments: list[FileAttachment] | None = None
205
207
  ) -> AsyncGenerator[dict, None]:
206
208
  """
207
209
  Send user message and stream Claude's response.
@@ -247,7 +249,7 @@ class SpecChatSession:
247
249
  async def _query_claude(
248
250
  self,
249
251
  message: str,
250
- attachments: list[ImageAttachment] | None = None
252
+ attachments: list[FileAttachment] | None = None
251
253
  ) -> AsyncGenerator[dict, None]:
252
254
  """
253
255
  Internal method to query Claude and stream responses.
@@ -273,21 +275,17 @@ class SpecChatSession:
273
275
  if message:
274
276
  content_blocks.append({"type": "text", "text": message})
275
277
 
276
- # Add image blocks
277
- for att in attachments:
278
- content_blocks.append({
279
- "type": "image",
280
- "source": {
281
- "type": "base64",
282
- "media_type": att.mimeType,
283
- "data": att.base64Data,
284
- }
285
- })
278
+ # Add attachment blocks (images as image blocks, documents as extracted text)
279
+ try:
280
+ content_blocks.extend(build_attachment_content_blocks(attachments))
281
+ except DocumentExtractionError as e:
282
+ yield {"type": "error", "content": str(e)}
283
+ return
286
284
 
287
285
  # Send multimodal content to Claude using async generator format
288
286
  # The SDK's query() accepts AsyncIterable[dict] for custom message formats
289
287
  await self.client.query(make_multimodal_message(content_blocks))
290
- logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
288
+ logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
291
289
  else:
292
290
  # Text-only message: use string format
293
291
  await self.client.query(message)
@@ -0,0 +1,221 @@
1
+ """
2
+ Document Extraction Utility
3
+ ============================
4
+
5
+ Extracts text content from various document formats in memory (no disk I/O).
6
+ Supports: TXT, MD, CSV, DOCX, XLSX, PDF, PPTX.
7
+ """
8
+
9
+ import base64
10
+ import csv
11
+ import io
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Maximum characters of extracted text to send to Claude
17
+ MAX_EXTRACTED_CHARS = 200_000
18
+
19
+ # Maximum rows per sheet for Excel files
20
+ MAX_EXCEL_ROWS_PER_SHEET = 10_000
21
+ MAX_EXCEL_SHEETS = 50
22
+
23
+ # MIME type classification
24
+ DOCUMENT_MIME_TYPES: dict[str, str] = {
25
+ "text/plain": ".txt",
26
+ "text/markdown": ".md",
27
+ "text/csv": ".csv",
28
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
29
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
30
+ "application/pdf": ".pdf",
31
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
32
+ }
33
+
34
+ IMAGE_MIME_TYPES = {"image/jpeg", "image/png"}
35
+
36
+ ALL_ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES | set(DOCUMENT_MIME_TYPES.keys())
37
+
38
+
39
+ def is_image(mime_type: str) -> bool:
40
+ """Check if the MIME type is a supported image format."""
41
+ return mime_type in IMAGE_MIME_TYPES
42
+
43
+
44
+ def is_document(mime_type: str) -> bool:
45
+ """Check if the MIME type is a supported document format."""
46
+ return mime_type in DOCUMENT_MIME_TYPES
47
+
48
+
49
+ class DocumentExtractionError(Exception):
50
+ """Raised when text extraction from a document fails."""
51
+
52
+ def __init__(self, filename: str, reason: str):
53
+ self.filename = filename
54
+ self.reason = reason
55
+ super().__init__(f"Failed to read {filename}: {reason}")
56
+
57
+
58
+ def _truncate(text: str) -> str:
59
+ """Truncate text if it exceeds the maximum character limit."""
60
+ if len(text) > MAX_EXTRACTED_CHARS:
61
+ omitted = len(text) - MAX_EXTRACTED_CHARS
62
+ return text[:MAX_EXTRACTED_CHARS] + f"\n\n[... truncated, {omitted:,} characters omitted]"
63
+ return text
64
+
65
+
66
+ def _extract_plain_text(data: bytes) -> str:
67
+ """Extract text from plain text or markdown files."""
68
+ try:
69
+ return data.decode("utf-8")
70
+ except UnicodeDecodeError:
71
+ return data.decode("latin-1")
72
+
73
+
74
+ def _extract_csv(data: bytes) -> str:
75
+ """Extract text from CSV files, formatted as a readable table."""
76
+ try:
77
+ text = data.decode("utf-8")
78
+ except UnicodeDecodeError:
79
+ text = data.decode("latin-1")
80
+
81
+ reader = csv.reader(io.StringIO(text))
82
+ lines = []
83
+ for i, row in enumerate(reader):
84
+ lines.append(f"Row {i + 1}: {', '.join(row)}")
85
+ return "\n".join(lines)
86
+
87
+
88
+ def _extract_docx(data: bytes) -> str:
89
+ """Extract text from Word documents."""
90
+ from docx import Document
91
+
92
+ doc = Document(io.BytesIO(data))
93
+ paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
94
+ return "\n\n".join(paragraphs)
95
+
96
+
97
+ def _extract_xlsx(data: bytes) -> str:
98
+ """Extract text from Excel spreadsheets."""
99
+ from openpyxl import load_workbook
100
+
101
+ wb = load_workbook(io.BytesIO(data), read_only=True, data_only=True)
102
+ sections = []
103
+
104
+ for sheet_idx, sheet_name in enumerate(wb.sheetnames):
105
+ if sheet_idx >= MAX_EXCEL_SHEETS:
106
+ sections.append(f"\n[... {len(wb.sheetnames) - MAX_EXCEL_SHEETS} more sheets omitted]")
107
+ break
108
+
109
+ ws = wb[sheet_name]
110
+ rows_text = [f"=== Sheet: {sheet_name} ==="]
111
+ row_count = 0
112
+
113
+ for row in ws.iter_rows(values_only=True):
114
+ if row_count >= MAX_EXCEL_ROWS_PER_SHEET:
115
+ rows_text.append(f"[... more rows omitted, limit {MAX_EXCEL_ROWS_PER_SHEET:,} rows/sheet]")
116
+ break
117
+ cells = [str(cell) if cell is not None else "" for cell in row]
118
+ rows_text.append("\t".join(cells))
119
+ row_count += 1
120
+
121
+ sections.append("\n".join(rows_text))
122
+
123
+ wb.close()
124
+ return "\n\n".join(sections)
125
+
126
+
127
+ def _extract_pdf(data: bytes, filename: str) -> str:
128
+ """Extract text from PDF files."""
129
+ from PyPDF2 import PdfReader
130
+ from PyPDF2.errors import PdfReadError
131
+
132
+ try:
133
+ reader = PdfReader(io.BytesIO(data))
134
+ except PdfReadError as e:
135
+ if "encrypt" in str(e).lower() or "password" in str(e).lower():
136
+ raise DocumentExtractionError(filename, "PDF is password-protected")
137
+ raise
138
+
139
+ if reader.is_encrypted:
140
+ raise DocumentExtractionError(filename, "PDF is password-protected")
141
+
142
+ pages = []
143
+ for i, page in enumerate(reader.pages):
144
+ text = page.extract_text()
145
+ if text and text.strip():
146
+ pages.append(f"--- Page {i + 1} ---\n{text}")
147
+
148
+ return "\n\n".join(pages)
149
+
150
+
151
+ def _extract_pptx(data: bytes) -> str:
152
+ """Extract text from PowerPoint presentations."""
153
+ from pptx import Presentation
154
+
155
+ prs = Presentation(io.BytesIO(data))
156
+ slides_text = []
157
+
158
+ for i, slide in enumerate(prs.slides):
159
+ texts = []
160
+ for shape in slide.shapes:
161
+ if shape.has_text_frame:
162
+ for paragraph in shape.text_frame.paragraphs:
163
+ text = paragraph.text.strip()
164
+ if text:
165
+ texts.append(text)
166
+ if texts:
167
+ slides_text.append(f"--- Slide {i + 1} ---\n" + "\n".join(texts))
168
+
169
+ return "\n\n".join(slides_text)
170
+
171
+
172
+ def extract_text_from_document(base64_data: str, mime_type: str, filename: str) -> str:
173
+ """
174
+ Extract text content from a document file.
175
+
176
+ Args:
177
+ base64_data: Base64-encoded file content
178
+ mime_type: MIME type of the document
179
+ filename: Original filename (for error messages)
180
+
181
+ Returns:
182
+ Extracted text content, truncated if necessary
183
+
184
+ Raises:
185
+ DocumentExtractionError: If extraction fails
186
+ """
187
+ if mime_type not in DOCUMENT_MIME_TYPES:
188
+ raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
189
+
190
+ try:
191
+ data = base64.b64decode(base64_data)
192
+ except Exception as e:
193
+ raise DocumentExtractionError(filename, f"invalid base64 data: {e}")
194
+
195
+ try:
196
+ if mime_type in ("text/plain", "text/markdown"):
197
+ text = _extract_plain_text(data)
198
+ elif mime_type == "text/csv":
199
+ text = _extract_csv(data)
200
+ elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
201
+ text = _extract_docx(data)
202
+ elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
203
+ text = _extract_xlsx(data)
204
+ elif mime_type == "application/pdf":
205
+ text = _extract_pdf(data, filename)
206
+ elif mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
207
+ text = _extract_pptx(data)
208
+ else:
209
+ raise DocumentExtractionError(filename, f"unsupported document type: {mime_type}")
210
+ except DocumentExtractionError:
211
+ raise
212
+ except Exception as e:
213
+ logger.warning(f"Document extraction failed for {filename}: {e}")
214
+ raise DocumentExtractionError(
215
+ filename, "file appears to be corrupt or in an unexpected format"
216
+ )
217
+
218
+ if not text or not text.strip():
219
+ return f"[File {filename} is empty or contains no extractable text]"
220
+
221
+ return _truncate(text)