aiecs 1.7.6__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

@@ -1,8 +1,10 @@
1
1
  import asyncio
2
+ import json
2
3
  import logging
3
4
  import os
4
5
  import warnings
5
- from typing import Dict, Any, Optional, List, AsyncGenerator
6
+ import hashlib
7
+ from typing import Dict, Any, Optional, List, AsyncGenerator, Union
6
8
  import vertexai
7
9
  from vertexai.generative_models import (
8
10
  GenerativeModel,
@@ -14,6 +16,43 @@ from vertexai.generative_models import (
14
16
  Part,
15
17
  )
16
18
 
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Try to import CachedContent for prompt caching support
22
+ # CachedContent API requires google-cloud-aiplatform >= 1.38.0
23
+ # Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/cached-content
24
+ CACHED_CONTENT_AVAILABLE = False
25
+ CACHED_CONTENT_IMPORT_PATH = None
26
+ CACHED_CONTENT_SDK_VERSION = None
27
+
28
+ # Check SDK version
29
+ try:
30
+ import google.cloud.aiplatform as aiplatform
31
+ CACHED_CONTENT_SDK_VERSION = getattr(aiplatform, '__version__', None)
32
+ except ImportError:
33
+ pass
34
+
35
+ # Try to import CachedContent for prompt caching support
36
+ try:
37
+ from vertexai.preview import caching
38
+ if hasattr(caching, 'CachedContent'):
39
+ CACHED_CONTENT_AVAILABLE = True
40
+ CACHED_CONTENT_IMPORT_PATH = 'vertexai.preview.caching'
41
+ else:
42
+ # Module exists but CachedContent class not found
43
+ CACHED_CONTENT_AVAILABLE = False
44
+ except ImportError:
45
+ try:
46
+ # Alternative import path for different SDK versions
47
+ from vertexai import caching
48
+ if hasattr(caching, 'CachedContent'):
49
+ CACHED_CONTENT_AVAILABLE = True
50
+ CACHED_CONTENT_IMPORT_PATH = 'vertexai.caching'
51
+ else:
52
+ CACHED_CONTENT_AVAILABLE = False
53
+ except ImportError:
54
+ CACHED_CONTENT_AVAILABLE = False
55
+
17
56
  from aiecs.llm.clients.base_client import (
18
57
  BaseLLMClient,
19
58
  LLMMessage,
@@ -147,17 +186,20 @@ def _build_safety_block_error(
147
186
  error_parts = [default_message]
148
187
  if block_reason:
149
188
  error_parts.append(f"Block reason: {block_reason}")
150
-
151
- blocked_categories = [
152
- r.get("category", "UNKNOWN")
153
- for r in safety_ratings
154
- if r.get("blocked", False)
155
- ]
189
+
190
+ # Safely extract blocked categories, handling potential non-dict elements
191
+ blocked_categories = []
192
+ for r in safety_ratings:
193
+ if isinstance(r, dict) and r.get("blocked", False):
194
+ blocked_categories.append(r.get("category", "UNKNOWN"))
156
195
  if blocked_categories:
157
196
  error_parts.append(f"Blocked categories: {', '.join(blocked_categories)}")
158
-
197
+
159
198
  # Add severity/probability information
160
199
  for rating in safety_ratings:
200
+ # Skip non-dict elements
201
+ if not isinstance(rating, dict):
202
+ continue
161
203
  if rating.get("blocked"):
162
204
  if "severity" in rating:
163
205
  error_parts.append(
@@ -193,6 +235,8 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
193
235
  "part_counts": {}, # {part_count: frequency}
194
236
  "last_part_count": None,
195
237
  }
238
+ # Cache for CachedContent objects (key: content hash, value: cached_content_id)
239
+ self._cached_content_cache: Dict[str, str] = {}
196
240
 
197
241
  def _init_vertex_ai(self):
198
242
  """Lazy initialization of Vertex AI with proper authentication"""
@@ -230,14 +274,140 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
230
274
  except Exception as e:
231
275
  raise ProviderNotAvailableError(f"Failed to initialize Vertex AI: {str(e)}")
232
276
 
277
+ def _generate_content_hash(self, content: str) -> str:
278
+ """Generate a hash for content to use as cache key."""
279
+ return hashlib.md5(content.encode('utf-8')).hexdigest()
280
+
281
+ async def _create_or_get_cached_content(
282
+ self,
283
+ content: str,
284
+ model_name: str,
285
+ ttl_seconds: Optional[int] = None,
286
+ ) -> Optional[str]:
287
+ """
288
+ Create or get a CachedContent for the given content.
289
+
290
+ This method implements Gemini's CachedContent API for prompt caching.
291
+ It preserves the existing cache_control mechanism for developer convenience.
292
+
293
+ The method supports multiple Vertex AI SDK versions and gracefully falls back
294
+ to regular system_instruction if CachedContent API is unavailable.
295
+
296
+ Args:
297
+ content: Content to cache (typically system instruction)
298
+ model_name: Model name to use for caching
299
+ ttl_seconds: Time to live in seconds (optional, defaults to 3600)
300
+
301
+ Returns:
302
+ CachedContent resource name (e.g., "projects/.../cachedContents/...") or None if caching unavailable
303
+ """
304
+ if not CACHED_CONTENT_AVAILABLE:
305
+ # Provide version info if available
306
+ version_info = ""
307
+ if CACHED_CONTENT_SDK_VERSION:
308
+ version_info = f" (SDK version: {CACHED_CONTENT_SDK_VERSION})"
309
+ elif CACHED_CONTENT_IMPORT_PATH:
310
+ version_info = f" (import path '{CACHED_CONTENT_IMPORT_PATH}' available but CachedContent class not found)"
311
+
312
+ self.logger.debug(
313
+ f"CachedContent API not available{version_info}, skipping cache creation. "
314
+ f"Requires google-cloud-aiplatform >=1.38.0"
315
+ )
316
+ return None
317
+
318
+ if not content or not content.strip():
319
+ return None
320
+
321
+ # Generate cache key
322
+ cache_key = self._generate_content_hash(content)
323
+
324
+ # Check if we already have this cached
325
+ if cache_key in self._cached_content_cache:
326
+ cached_content_id = self._cached_content_cache[cache_key]
327
+ self.logger.debug(f"Using existing CachedContent: {cached_content_id}")
328
+ return cached_content_id
329
+
330
+ try:
331
+ self._init_vertex_ai()
332
+
333
+ # Build the content to cache (system instruction as Content)
334
+ # For CachedContent, we typically cache the system instruction
335
+ cached_content_obj = Content(
336
+ role="user",
337
+ parts=[Part.from_text(content)]
338
+ )
339
+
340
+ # Try different API patterns based on SDK version
341
+ cached_content_id = None
342
+
343
+ # Pattern 1: caching.CachedContent.create() (most common)
344
+ if hasattr(caching, 'CachedContent'):
345
+ try:
346
+ cached_content = await asyncio.get_event_loop().run_in_executor(
347
+ None,
348
+ lambda: caching.CachedContent.create(
349
+ model=model_name,
350
+ contents=[cached_content_obj],
351
+ ttl_seconds=ttl_seconds or 3600, # Default 1 hour
352
+ )
353
+ )
354
+
355
+ # Extract the resource name
356
+ if hasattr(cached_content, 'name'):
357
+ cached_content_id = cached_content.name
358
+ elif hasattr(cached_content, 'resource_name'):
359
+ cached_content_id = cached_content.resource_name
360
+ else:
361
+ cached_content_id = str(cached_content)
362
+
363
+ if cached_content_id:
364
+ # Store in cache
365
+ self._cached_content_cache[cache_key] = cached_content_id
366
+ self.logger.info(f"Created CachedContent for prompt caching: {cached_content_id}")
367
+ return cached_content_id
368
+
369
+ except AttributeError as e:
370
+ self.logger.debug(f"CachedContent.create() signature may differ: {str(e)}")
371
+ except Exception as e:
372
+ self.logger.debug(f"Failed to create CachedContent using pattern 1: {str(e)}")
373
+
374
+ # Pattern 2: Try alternative API patterns if Pattern 1 fails
375
+ # Note: Different SDK versions may have different APIs
376
+ # This is a fallback that allows graceful degradation
377
+
378
+ # Build informative warning message with version info
379
+ version_info = ""
380
+ if CACHED_CONTENT_SDK_VERSION:
381
+ version_info = f" Current SDK version: {CACHED_CONTENT_SDK_VERSION}."
382
+ else:
383
+ version_info = " Unable to detect SDK version."
384
+
385
+ required_version = ">=1.38.0"
386
+ upgrade_command = "pip install --upgrade 'google-cloud-aiplatform>=1.38.0'"
387
+
388
+ self.logger.warning(
389
+ f"CachedContent API not available or incompatible with current SDK version.{version_info} "
390
+ f"Falling back to system_instruction (prompt caching disabled). "
391
+ f"To enable prompt caching, upgrade to google-cloud-aiplatform {required_version} or later: "
392
+ f"{upgrade_command}"
393
+ )
394
+ return None
395
+
396
+ except Exception as e:
397
+ self.logger.warning(
398
+ f"Failed to create CachedContent (prompt caching disabled, using system_instruction): {str(e)}"
399
+ )
400
+ # Don't raise - allow fallback to regular generation without caching
401
+ return None
402
+
233
403
  def _convert_messages_to_contents(
234
404
  self, messages: List[LLMMessage]
235
405
  ) -> List[Content]:
236
406
  """
237
407
  Convert LLMMessage list to Vertex AI Content objects.
238
408
 
239
- This properly handles multi-turn conversations instead of
240
- string concatenation.
409
+ This properly handles multi-turn conversations including
410
+ function/tool responses for Vertex AI Function Calling.
241
411
 
242
412
  Args:
243
413
  messages: List of LLMMessage objects (system messages should be filtered out)
@@ -246,13 +416,77 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
246
416
  List of Content objects for Vertex AI API
247
417
  """
248
418
  contents = []
419
+
249
420
  for msg in messages:
250
- # Map role: Vertex AI uses "model" for assistant responses
251
- role = "model" if msg.role == "assistant" else msg.role
252
- contents.append(Content(
253
- role=role,
254
- parts=[Part.from_text(msg.content)]
255
- ))
421
+ # Handle tool/function responses (role="tool")
422
+ if msg.role == "tool":
423
+ # Vertex AI expects function responses as user messages with FunctionResponse parts
424
+ # The tool_call_id maps to the function name
425
+ func_name = msg.tool_call_id or "unknown_function"
426
+
427
+ # Parse content as the function response
428
+ try:
429
+ # Try to parse as JSON if it looks like JSON
430
+ if msg.content and msg.content.strip().startswith('{'):
431
+ response_data = json.loads(msg.content)
432
+ else:
433
+ response_data = {"result": msg.content}
434
+ except json.JSONDecodeError:
435
+ response_data = {"result": msg.content}
436
+
437
+ # Create FunctionResponse part using Part.from_function_response
438
+ func_response_part = Part.from_function_response(
439
+ name=func_name,
440
+ response=response_data
441
+ )
442
+
443
+ contents.append(Content(
444
+ role="user", # Function responses are sent as "user" role in Vertex AI
445
+ parts=[func_response_part]
446
+ ))
447
+
448
+ # Handle assistant messages with tool calls
449
+ elif msg.role == "assistant" and msg.tool_calls:
450
+ parts = []
451
+ if msg.content:
452
+ parts.append(Part.from_text(msg.content))
453
+
454
+ for tool_call in msg.tool_calls:
455
+ func = tool_call.get("function", {})
456
+ func_name = func.get("name", "")
457
+ func_args = func.get("arguments", "{}")
458
+
459
+ # Parse arguments
460
+ try:
461
+ args_dict = json.loads(func_args) if isinstance(func_args, str) else func_args
462
+ except json.JSONDecodeError:
463
+ args_dict = {}
464
+
465
+ # Create FunctionCall part using Part.from_dict
466
+ # Note: Part.from_function_call() does NOT exist in Vertex AI SDK
467
+ # Must use from_dict with function_call structure
468
+ function_call_part = Part.from_dict({
469
+ "function_call": {
470
+ "name": func_name,
471
+ "args": args_dict
472
+ }
473
+ })
474
+ parts.append(function_call_part)
475
+
476
+ contents.append(Content(
477
+ role="model",
478
+ parts=parts
479
+ ))
480
+
481
+ # Handle regular messages (user, assistant without tool_calls)
482
+ else:
483
+ role = "model" if msg.role == "assistant" else msg.role
484
+ if msg.content:
485
+ contents.append(Content(
486
+ role=role,
487
+ parts=[Part.from_text(msg.content)]
488
+ ))
489
+
256
490
  return contents
257
491
 
258
492
  async def generate_text(
@@ -281,17 +515,37 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
281
515
  try:
282
516
  # Extract system message from messages if present
283
517
  system_msg = None
518
+ system_cache_control = None
284
519
  user_messages = []
285
520
  for msg in messages:
286
521
  if msg.role == "system":
287
522
  system_msg = msg.content
523
+ system_cache_control = msg.cache_control
288
524
  else:
289
525
  user_messages.append(msg)
290
526
 
291
527
  # Use explicit system_instruction parameter if provided, else use extracted system message
292
528
  final_system_instruction = system_instruction or system_msg
293
529
 
530
+ # Check if we should use CachedContent API for prompt caching
531
+ cached_content_id = None
532
+ if final_system_instruction and system_cache_control:
533
+ # Create or get CachedContent for the system instruction
534
+ # Extract TTL from cache_control if available (defaults to 3600 seconds)
535
+ ttl_seconds = getattr(system_cache_control, 'ttl_seconds', None) or 3600
536
+ cached_content_id = await self._create_or_get_cached_content(
537
+ content=final_system_instruction,
538
+ model_name=model_name,
539
+ ttl_seconds=ttl_seconds,
540
+ )
541
+ if cached_content_id:
542
+ self.logger.debug(f"Using CachedContent for prompt caching: {cached_content_id}")
543
+ # When using CachedContent, we don't pass system_instruction to GenerativeModel
544
+ # Instead, we'll pass cached_content_id to generate_content
545
+ final_system_instruction = None
546
+
294
547
  # Initialize model WITH system instruction for prompt caching support
548
+ # Note: If using CachedContent, system_instruction will be None
295
549
  model_instance = GenerativeModel(
296
550
  model_name,
297
551
  system_instruction=final_system_instruction
@@ -362,13 +616,18 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
362
616
  "safety_settings": safety_settings,
363
617
  }
364
618
 
619
+ # Add cached_content if using CachedContent API for prompt caching
620
+ if cached_content_id:
621
+ api_params["cached_content"] = cached_content_id
622
+ self.logger.debug(f"Added cached_content to API params: {cached_content_id}")
623
+
365
624
  # Add tools if available
366
625
  if tools_for_api:
367
626
  api_params["tools"] = tools_for_api
368
627
 
369
- # Add any additional kwargs (but exclude tools/safety_settings to avoid conflicts)
628
+ # Add any additional kwargs (but exclude tools/safety_settings/cached_content to avoid conflicts)
370
629
  for key, value in kwargs.items():
371
- if key not in ["tools", "safety_settings"]:
630
+ if key not in ["tools", "safety_settings", "cached_content"]:
372
631
  api_params[key] = value
373
632
 
374
633
  response = await asyncio.get_event_loop().run_in_executor(
@@ -553,7 +812,9 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
553
812
 
554
813
  # Vertex AI doesn't provide detailed token usage in the response
555
814
  # Use estimation method as fallback
556
- input_tokens = self._count_tokens_estimate(prompt)
815
+ # Estimate input tokens from messages content
816
+ prompt_text = " ".join(msg.content for msg in messages if msg.content)
817
+ input_tokens = self._count_tokens_estimate(prompt_text)
557
818
  output_tokens = self._count_tokens_estimate(content)
558
819
  tokens_used = input_tokens + output_tokens
559
820
 
@@ -608,7 +869,9 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
608
869
  ):
609
870
  self.logger.warning(f"Vertex AI response issue: {str(e)}")
610
871
  # Return a response indicating the issue
611
- estimated_prompt_tokens = self._count_tokens_estimate(prompt)
872
+ # Estimate prompt tokens from messages content
873
+ prompt_text = " ".join(msg.content for msg in messages if msg.content)
874
+ estimated_prompt_tokens = self._count_tokens_estimate(prompt_text)
612
875
  return LLMResponse(
613
876
  content="[Response unavailable due to content processing issues or safety filters]",
614
877
  provider=self.provider_name,
@@ -664,17 +927,37 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
664
927
  try:
665
928
  # Extract system message from messages if present
666
929
  system_msg = None
930
+ system_cache_control = None
667
931
  user_messages = []
668
932
  for msg in messages:
669
933
  if msg.role == "system":
670
934
  system_msg = msg.content
935
+ system_cache_control = msg.cache_control
671
936
  else:
672
937
  user_messages.append(msg)
673
938
 
674
939
  # Use explicit system_instruction parameter if provided, else use extracted system message
675
940
  final_system_instruction = system_instruction or system_msg
676
941
 
942
+ # Check if we should use CachedContent API for prompt caching
943
+ cached_content_id = None
944
+ if final_system_instruction and system_cache_control:
945
+ # Create or get CachedContent for the system instruction
946
+ # Extract TTL from cache_control if available (defaults to 3600 seconds)
947
+ ttl_seconds = getattr(system_cache_control, 'ttl_seconds', None) or 3600
948
+ cached_content_id = await self._create_or_get_cached_content(
949
+ content=final_system_instruction,
950
+ model_name=model_name,
951
+ ttl_seconds=ttl_seconds,
952
+ )
953
+ if cached_content_id:
954
+ self.logger.debug(f"Using CachedContent for prompt caching in streaming: {cached_content_id}")
955
+ # When using CachedContent, we don't pass system_instruction to GenerativeModel
956
+ # Instead, we'll pass cached_content_id to generate_content
957
+ final_system_instruction = None
958
+
677
959
  # Initialize model WITH system instruction for prompt caching support
960
+ # Note: If using CachedContent, system_instruction will be None
678
961
  model_instance = GenerativeModel(
679
962
  model_name,
680
963
  system_instruction=final_system_instruction
@@ -738,6 +1021,12 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
738
1021
  # Use mixin method for Function Calling support
739
1022
  from aiecs.llm.clients.openai_compatible_mixin import StreamChunk
740
1023
 
1024
+ # Add cached_content to kwargs if using CachedContent API
1025
+ stream_kwargs = kwargs.copy()
1026
+ if cached_content_id:
1027
+ stream_kwargs["cached_content"] = cached_content_id
1028
+ self.logger.debug(f"Added cached_content to streaming API params: {cached_content_id}")
1029
+
741
1030
  async for chunk in self._stream_text_with_function_calling(
742
1031
  model_instance=model_instance,
743
1032
  contents=contents,
@@ -745,7 +1034,7 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
745
1034
  safety_settings=safety_settings,
746
1035
  tools=tools_for_api,
747
1036
  return_chunks=return_chunks,
748
- **kwargs,
1037
+ **stream_kwargs,
749
1038
  ):
750
1039
  # Yield chunk (can be str or StreamChunk)
751
1040
  yield chunk
aiecs/main.py CHANGED
@@ -142,7 +142,7 @@ async def lifespan(app: FastAPI):
142
142
  app = FastAPI(
143
143
  title="AIECS - AI Execute Services",
144
144
  description="Middleware service for AI-powered task execution and tool orchestration",
145
- version="1.7.6",
145
+ version="1.7.17",
146
146
  lifespan=lifespan,
147
147
  )
148
148
 
@@ -164,7 +164,7 @@ socket_app = socketio.ASGIApp(sio, other_asgi_app=app)
164
164
  @app.get("/health")
165
165
  async def health_check():
166
166
  """Health check endpoint"""
167
- return {"status": "healthy", "service": "aiecs", "version": "1.7.6"}
167
+ return {"status": "healthy", "service": "aiecs", "version": "1.7.17"}
168
168
 
169
169
 
170
170
  # Metrics health check endpoint
@@ -55,6 +55,8 @@ class DocumentFormat(str, Enum):
55
55
  PLAIN_TEXT = "txt"
56
56
  JSON = "json"
57
57
  XML = "xml"
58
+ PPTX = "pptx"
59
+ PPT = "ppt"
58
60
 
59
61
 
60
62
  class TemplateType(str, Enum):
@@ -175,6 +177,9 @@ class DocumentCreatorTool(BaseTool):
175
177
  # Initialize templates
176
178
  self._init_templates()
177
179
 
180
+ # Initialize office tool for PPTX/DOCX creation
181
+ self._init_office_tool()
182
+
178
183
  # Initialize document tracking
179
184
  self._documents_created: List[Any] = []
180
185
 
@@ -197,6 +202,17 @@ class DocumentCreatorTool(BaseTool):
197
202
  TemplateType.INVOICE: self._get_invoice_template(),
198
203
  }
199
204
 
205
+ def _init_office_tool(self):
206
+ """Initialize office tool for PPTX/DOCX creation"""
207
+ try:
208
+ from aiecs.tools.task_tools.office_tool import OfficeTool
209
+
210
+ self.office_tool = OfficeTool()
211
+ self.logger.info("OfficeTool initialized successfully for PPTX/DOCX support")
212
+ except ImportError:
213
+ self.logger.warning("OfficeTool not available, PPTX/DOCX creation will be limited")
214
+ self.office_tool = None
215
+
200
216
  # Schema definitions
201
217
  class Create_documentSchema(BaseModel):
202
218
  """Schema for create_document operation"""
@@ -943,7 +959,7 @@ class DocumentCreatorTool(BaseTool):
943
959
  "questions",
944
960
  "contact_info",
945
961
  ],
946
- "supported_formats": ["markdown", "html"],
962
+ "supported_formats": ["markdown", "html", "pptx"],
947
963
  "style_presets": ["presentation", "modern", "colorful"],
948
964
  }
949
965
 
@@ -1062,7 +1078,11 @@ class DocumentCreatorTool(BaseTool):
1062
1078
  ) -> str:
1063
1079
  """Generate output path for document"""
1064
1080
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1065
- filename = f"{document_type}_{timestamp}_{document_id[:8]}.{output_format.value}"
1081
+ # Handle PPT format - use pptx extension
1082
+ file_extension = output_format.value
1083
+ if output_format == DocumentFormat.PPT:
1084
+ file_extension = "pptx" # PPT format uses PPTX extension
1085
+ filename = f"{document_type}_{timestamp}_{document_id[:8]}.{file_extension}"
1066
1086
  return os.path.join(self.config.output_dir, filename)
1067
1087
 
1068
1088
  def _process_metadata(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> Dict[str, Any]:
@@ -1175,11 +1195,130 @@ class DocumentCreatorTool(BaseTool):
1175
1195
  elif output_format == DocumentFormat.JSON:
1176
1196
  with open(output_path, "w", encoding="utf-8") as f:
1177
1197
  json.dump({"content": content}, f, indent=2, ensure_ascii=False)
1198
+ elif output_format in [DocumentFormat.PPTX, DocumentFormat.PPT]:
1199
+ # Use office_tool to create PPTX file
1200
+ self._write_pptx_file(output_path, content)
1201
+ elif output_format == DocumentFormat.DOCX:
1202
+ # Use office_tool to create DOCX file
1203
+ self._write_docx_file(output_path, content)
1178
1204
  else:
1179
1205
  # For other formats, write as text for now
1180
1206
  with open(output_path, "w", encoding="utf-8") as f:
1181
1207
  f.write(content)
1182
1208
 
1209
+ def _write_pptx_file(self, output_path: str, content: str):
1210
+ """Write content to PPTX file using office_tool"""
1211
+ if not self.office_tool:
1212
+ raise DocumentCreationError("OfficeTool not available. Cannot create PPTX files.")
1213
+
1214
+ try:
1215
+ # Parse content to extract slides
1216
+ # Slides are separated by "---" or slide markers like "## Slide X:"
1217
+ slides = self._parse_content_to_slides(content)
1218
+
1219
+ # Use office_tool to create PPTX
1220
+ result = self.office_tool.write_pptx(
1221
+ slides=slides,
1222
+ output_path=output_path,
1223
+ image_path=None, # Can be enhanced to extract image paths from metadata
1224
+ )
1225
+
1226
+ if not result.get("success"):
1227
+ raise DocumentCreationError(f"Failed to create PPTX file: {result}")
1228
+
1229
+ self.logger.info(f"PPTX file created successfully: {output_path}")
1230
+
1231
+ except Exception as e:
1232
+ raise DocumentCreationError(f"Failed to write PPTX file: {str(e)}")
1233
+
1234
+ def _write_docx_file(self, output_path: str, content: str):
1235
+ """Write content to DOCX file using office_tool"""
1236
+ if not self.office_tool:
1237
+ raise DocumentCreationError("OfficeTool not available. Cannot create DOCX files.")
1238
+
1239
+ try:
1240
+ # Use office_tool to create DOCX
1241
+ result = self.office_tool.write_docx(
1242
+ text=content,
1243
+ output_path=output_path,
1244
+ table_data=None, # Can be enhanced to extract tables from content
1245
+ )
1246
+
1247
+ if not result.get("success"):
1248
+ raise DocumentCreationError(f"Failed to create DOCX file: {result}")
1249
+
1250
+ self.logger.info(f"DOCX file created successfully: {output_path}")
1251
+
1252
+ except Exception as e:
1253
+ raise DocumentCreationError(f"Failed to write DOCX file: {str(e)}")
1254
+
1255
+ def _parse_content_to_slides(self, content: str) -> List[str]:
1256
+ """Parse content string into list of slide contents
1257
+
1258
+ Supports multiple slide separation formats:
1259
+ - "---" separator (markdown style)
1260
+ - "## Slide X:" headers
1261
+ - Empty lines between slides
1262
+ """
1263
+ slides = []
1264
+
1265
+ # Split by "---" separator (common in markdown presentations)
1266
+ if "---" in content:
1267
+ parts = content.split("---")
1268
+ for part in parts:
1269
+ part = part.strip()
1270
+ if part:
1271
+ # Remove slide headers like "## Slide X: Title"
1272
+ lines = part.split("\n")
1273
+ cleaned_lines = []
1274
+ for line in lines:
1275
+ # Skip slide headers
1276
+ if line.strip().startswith("## Slide") and ":" in line:
1277
+ continue
1278
+ cleaned_lines.append(line)
1279
+ slide_content = "\n".join(cleaned_lines).strip()
1280
+ if slide_content:
1281
+ slides.append(slide_content)
1282
+ else:
1283
+ # Try to split by "## Slide" headers
1284
+ if "## Slide" in content:
1285
+ parts = content.split("## Slide")
1286
+ for i, part in enumerate(parts):
1287
+ if i == 0:
1288
+ # First part might be title slide
1289
+ part = part.strip()
1290
+ if part:
1291
+ slides.append(part)
1292
+ else:
1293
+ # Extract content after "Slide X: Title"
1294
+ lines = part.split("\n", 1)
1295
+ if len(lines) > 1:
1296
+ slide_content = lines[1].strip()
1297
+ if slide_content:
1298
+ slides.append(slide_content)
1299
+ else:
1300
+ # Fallback: split by double newlines (paragraph breaks)
1301
+ parts = content.split("\n\n")
1302
+ current_slide = []
1303
+ for part in parts:
1304
+ part = part.strip()
1305
+ if part:
1306
+ # If it's a header, start a new slide
1307
+ if part.startswith("#"):
1308
+ if current_slide:
1309
+ slides.append("\n".join(current_slide))
1310
+ current_slide = []
1311
+ current_slide.append(part)
1312
+
1313
+ if current_slide:
1314
+ slides.append("\n".join(current_slide))
1315
+
1316
+ # If no slides found, create a single slide with all content
1317
+ if not slides:
1318
+ slides = [content.strip()] if content.strip() else [""]
1319
+
1320
+ return slides
1321
+
1183
1322
  def _process_template_variables(self, template_content: str, variables: Dict[str, Any]) -> str:
1184
1323
  """Process template variables in content"""
1185
1324
  result = template_content
@@ -1282,6 +1421,8 @@ class DocumentCreatorTool(BaseTool):
1282
1421
  ".tex": DocumentFormat.LATEX,
1283
1422
  ".docx": DocumentFormat.DOCX,
1284
1423
  ".pdf": DocumentFormat.PDF,
1424
+ ".pptx": DocumentFormat.PPTX,
1425
+ ".ppt": DocumentFormat.PPT,
1285
1426
  }
1286
1427
  return format_map.get(ext, DocumentFormat.PLAIN_TEXT)
1287
1428