PyPI - aiecs - Versions diffs - 1.7.6__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

aiecs 1.7.6py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (35) hide show

aiecs/__init__.py +1 -1
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +5 -1
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +7 -5
aiecs/config/config.py +3 -0
aiecs/config/tool_config.py +55 -19
aiecs/domain/agent/base_agent.py +79 -0
aiecs/domain/agent/hybrid_agent.py +552 -175
aiecs/domain/agent/knowledge_aware_agent.py +3 -2
aiecs/domain/agent/llm_agent.py +2 -0
aiecs/domain/agent/models.py +10 -0
aiecs/domain/agent/tools/schema_generator.py +17 -4
aiecs/llm/callbacks/custom_callbacks.py +9 -4
aiecs/llm/client_factory.py +20 -7
aiecs/llm/clients/base_client.py +50 -5
aiecs/llm/clients/google_function_calling_mixin.py +46 -88
aiecs/llm/clients/googleai_client.py +183 -9
aiecs/llm/clients/openai_client.py +12 -0
aiecs/llm/clients/openai_compatible_mixin.py +42 -2
aiecs/llm/clients/openrouter_client.py +272 -0
aiecs/llm/clients/vertex_client.py +385 -22
aiecs/llm/clients/xai_client.py +41 -3
aiecs/llm/protocols.py +19 -1
aiecs/llm/utils/image_utils.py +179 -0
aiecs/main.py +2 -2
aiecs/tools/docs/document_creator_tool.py +143 -2
aiecs/tools/docs/document_parser_tool.py +9 -4
aiecs/tools/docs/document_writer_tool.py +179 -0
aiecs/tools/task_tools/image_tool.py +49 -14
aiecs/tools/task_tools/scraper_tool.py +39 -2
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/METADATA +4 -2
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/RECORD +35 -33
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/WHEEL +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/entry_points.txt +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/top_level.txt +0 -0

aiecs/llm/protocols.py CHANGED Viewed

@@ -4,7 +4,7 @@ LLM Client Protocols
 Defines Protocol interfaces for LLM clients to enable duck typing and flexible integration.
 """
-from typing import Protocol, List, Optional, AsyncGenerator, runtime_checkable
+from typing import Protocol, List, Optional, AsyncGenerator, runtime_checkable, Dict, Any
 from aiecs.llm.clients.base_client import LLMMessage, LLMResponse
@@ -31,9 +31,12 @@ class LLMClientProtocol(Protocol):
                 model: Optional[str] = None,
                 temperature: float = 0.7,
                 max_tokens: Optional[int] = None,
+                context: Optional[Dict[str, Any]] = None,
                 **kwargs
             ) -> LLMResponse:
                 # Custom implementation
+                # Use context for tracking, billing, observability, etc.
+                user_id = context.get("user_id") if context else None
                 pass
             async def stream_text(
@@ -42,6 +45,7 @@ class LLMClientProtocol(Protocol):
                 model: Optional[str] = None,
                 temperature: float = 0.7,
                 max_tokens: Optional[int] = None,
+                context: Optional[Dict[str, Any]] = None,
                 **kwargs
             ) -> AsyncGenerator[str, None]:
                 # Custom implementation
@@ -67,6 +71,7 @@ class LLMClientProtocol(Protocol):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> LLMResponse:
         """
@@ -77,6 +82,12 @@ class LLMClientProtocol(Protocol):
             model: Model name (optional, uses default if not provided)
             temperature: Sampling temperature (0.0 to 1.0)
             max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
             **kwargs: Additional provider-specific parameters
         Returns:
@@ -90,6 +101,7 @@ class LLMClientProtocol(Protocol):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> AsyncGenerator[str, None]:
         """
@@ -100,6 +112,12 @@ class LLMClientProtocol(Protocol):
             model: Model name (optional, uses default if not provided)
             temperature: Sampling temperature (0.0 to 1.0)
             max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
             **kwargs: Additional provider-specific parameters
         Yields:

aiecs/llm/utils/image_utils.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""
+Image processing utilities for LLM vision support.
+This module provides functions to handle images in various formats:
+- Image URLs
+- Base64-encoded images
+- Local file paths
+"""
+import os
+import base64
+import mimetypes
+from typing import Union, Optional, Dict, Any
+from urllib.parse import urlparse
+import logging
+logger = logging.getLogger(__name__)
+class ImageContent:
+    """Represents image content for LLM messages."""
+    def __init__(
+        self,
+        source: str,
+        mime_type: Optional[str] = None,
+        detail: Optional[str] = None,
+    ):
+        """
+        Initialize image content.
+        Args:
+            source: Image source - can be URL, base64 data URI, or file path
+            mime_type: MIME type (e.g., 'image/jpeg', 'image/png'). Auto-detected if not provided.
+            detail: Detail level for OpenAI API ('low', 'high', 'auto'). Defaults to 'auto'.
+        """
+        self.source = source
+        self.mime_type = mime_type or self._detect_mime_type(source)
+        self.detail = detail or "auto"
+    def _detect_mime_type(self, source: str) -> str:
+        """Detect MIME type from source."""
+        # Check if it's a data URI
+        if source.startswith("data:"):
+            header = source.split(",")[0]
+            mime_type = header.split(":")[1].split(";")[0]
+            return mime_type
+        # Check if it's a URL
+        if source.startswith(("http://", "https://")):
+            parsed = urlparse(source)
+            mime_type, _ = mimetypes.guess_type(parsed.path)
+            if mime_type and mime_type.startswith("image/"):
+                return mime_type
+        # Check if it's a file path
+        if os.path.exists(source):
+            mime_type, _ = mimetypes.guess_type(source)
+            if mime_type and mime_type.startswith("image/"):
+                return mime_type
+        # Default to jpeg if cannot detect
+        logger.warning(f"Could not detect MIME type for {source}, defaulting to image/jpeg")
+        return "image/jpeg"
+    def is_url(self) -> bool:
+        """Check if source is a URL."""
+        return self.source.startswith(("http://", "https://"))
+    def is_base64(self) -> bool:
+        """Check if source is a base64 data URI."""
+        return self.source.startswith("data:")
+    def is_file_path(self) -> bool:
+        """Check if source is a local file path."""
+        return os.path.exists(self.source) and not self.is_url() and not self.is_base64()
+    def get_base64_data(self) -> str:
+        """
+        Get base64-encoded image data.
+        Returns:
+            Base64 string without data URI prefix
+        """
+        if self.is_base64():
+            # Extract base64 data from data URI
+            return self.source.split(",", 1)[1]
+        elif self.is_file_path():
+            # Read file and encode to base64
+            with open(self.source, "rb") as f:
+                return base64.b64encode(f.read()).decode("utf-8")
+        else:
+            raise ValueError(f"Cannot get base64 data from URL: {self.source}. Use URL directly or download first.")
+    def get_url(self) -> str:
+        """
+        Get image URL.
+        Returns:
+            URL string
+        """
+        if self.is_url():
+            return self.source
+        else:
+            raise ValueError(f"Source is not a URL: {self.source}")
+def parse_image_source(source: Union[str, Dict[str, Any]]) -> ImageContent:
+    """
+    Parse image source into ImageContent object.
+    Args:
+        source: Can be:
+            - String URL (http://... or https://...)
+            - String base64 data URI (data:image/...;base64,...)
+            - String file path
+            - Dict with 'url', 'data', or 'path' key
+    Returns:
+        ImageContent object
+    """
+    if isinstance(source, dict):
+        # Handle dict format
+        if "url" in source:
+            return ImageContent(
+                source=source["url"],
+                mime_type=source.get("mime_type"),
+                detail=source.get("detail"),
+            )
+        elif "data" in source:
+            # Base64 data
+            mime_type = source.get("mime_type", "image/jpeg")
+            data = source["data"]
+            if not data.startswith("data:"):
+                data = f"data:{mime_type};base64,{data}"
+            return ImageContent(
+                source=data,
+                mime_type=mime_type,
+                detail=source.get("detail"),
+            )
+        elif "path" in source:
+            return ImageContent(
+                source=source["path"],
+                mime_type=source.get("mime_type"),
+                detail=source.get("detail"),
+            )
+        else:
+            raise ValueError(f"Invalid image dict format: {source}")
+    elif isinstance(source, str):
+        return ImageContent(source=source)
+    else:
+        raise TypeError(f"Image source must be str or dict, got {type(source)}")
+def validate_image_source(source: str) -> bool:
+    """
+    Validate that image source is accessible.
+    Args:
+        source: Image source (URL, base64, or file path)
+    Returns:
+        True if source is valid and accessible
+    """
+    try:
+        img = ImageContent(source)
+        if img.is_file_path():
+            return os.path.exists(source) and os.path.isfile(source)
+        elif img.is_url():
+            # URL validation - just check format, not accessibility
+            parsed = urlparse(source)
+            return bool(parsed.scheme and parsed.netloc)
+        elif img.is_base64():
+            # Base64 validation - check format
+            parts = source.split(",", 1)
+            return len(parts) == 2 and parts[0].startswith("data:image/")
+        return False
+    except Exception:
+        return False

aiecs/main.py CHANGED Viewed

@@ -142,7 +142,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="AIECS - AI Execute Services",
     description="Middleware service for AI-powered task execution and tool orchestration",
-    version="1.7.6",
+    version="1.8.4",
     lifespan=lifespan,
 )
@@ -164,7 +164,7 @@ socket_app = socketio.ASGIApp(sio, other_asgi_app=app)
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
-    return {"status": "healthy", "service": "aiecs", "version": "1.7.6"}
+    return {"status": "healthy", "service": "aiecs", "version": "1.8.4"}
 # Metrics health check endpoint

aiecs/tools/docs/document_creator_tool.py CHANGED Viewed

@@ -55,6 +55,8 @@ class DocumentFormat(str, Enum):
     PLAIN_TEXT = "txt"
     JSON = "json"
     XML = "xml"
+    PPTX = "pptx"
+    PPT = "ppt"
 class TemplateType(str, Enum):
@@ -175,6 +177,9 @@ class DocumentCreatorTool(BaseTool):
         # Initialize templates
         self._init_templates()
+        # Initialize office tool for PPTX/DOCX creation
+        self._init_office_tool()
         # Initialize document tracking
         self._documents_created: List[Any] = []
@@ -197,6 +202,17 @@ class DocumentCreatorTool(BaseTool):
             TemplateType.INVOICE: self._get_invoice_template(),
         }
+    def _init_office_tool(self):
+        """Initialize office tool for PPTX/DOCX creation"""
+        try:
+            from aiecs.tools.task_tools.office_tool import OfficeTool
+            self.office_tool = OfficeTool()
+            self.logger.info("OfficeTool initialized successfully for PPTX/DOCX support")
+        except ImportError:
+            self.logger.warning("OfficeTool not available, PPTX/DOCX creation will be limited")
+            self.office_tool = None
     # Schema definitions
     class Create_documentSchema(BaseModel):
         """Schema for create_document operation"""
@@ -943,7 +959,7 @@ class DocumentCreatorTool(BaseTool):
                 "questions",
                 "contact_info",
             ],
-            "supported_formats": ["markdown", "html"],
+            "supported_formats": ["markdown", "html", "pptx"],
             "style_presets": ["presentation", "modern", "colorful"],
         }
@@ -1062,7 +1078,11 @@ class DocumentCreatorTool(BaseTool):
     ) -> str:
         """Generate output path for document"""
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"{document_type}_{timestamp}_{document_id[:8]}.{output_format.value}"
+        # Handle PPT format - use pptx extension
+        file_extension = output_format.value
+        if output_format == DocumentFormat.PPT:
+            file_extension = "pptx"  # PPT format uses PPTX extension
+        filename = f"{document_type}_{timestamp}_{document_id[:8]}.{file_extension}"
         return os.path.join(self.config.output_dir, filename)
     def _process_metadata(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> Dict[str, Any]:
@@ -1175,11 +1195,130 @@ class DocumentCreatorTool(BaseTool):
         elif output_format == DocumentFormat.JSON:
             with open(output_path, "w", encoding="utf-8") as f:
                 json.dump({"content": content}, f, indent=2, ensure_ascii=False)
+        elif output_format in [DocumentFormat.PPTX, DocumentFormat.PPT]:
+            # Use office_tool to create PPTX file
+            self._write_pptx_file(output_path, content)
+        elif output_format == DocumentFormat.DOCX:
+            # Use office_tool to create DOCX file
+            self._write_docx_file(output_path, content)
         else:
             # For other formats, write as text for now
             with open(output_path, "w", encoding="utf-8") as f:
                 f.write(content)
+    def _write_pptx_file(self, output_path: str, content: str):
+        """Write content to PPTX file using office_tool"""
+        if not self.office_tool:
+            raise DocumentCreationError("OfficeTool not available. Cannot create PPTX files.")
+        try:
+            # Parse content to extract slides
+            # Slides are separated by "---" or slide markers like "## Slide X:"
+            slides = self._parse_content_to_slides(content)
+            # Use office_tool to create PPTX
+            result = self.office_tool.write_pptx(
+                slides=slides,
+                output_path=output_path,
+                image_path=None,  # Can be enhanced to extract image paths from metadata
+            )
+            if not result.get("success"):
+                raise DocumentCreationError(f"Failed to create PPTX file: {result}")
+            self.logger.info(f"PPTX file created successfully: {output_path}")
+        except Exception as e:
+            raise DocumentCreationError(f"Failed to write PPTX file: {str(e)}")
+    def _write_docx_file(self, output_path: str, content: str):
+        """Write content to DOCX file using office_tool"""
+        if not self.office_tool:
+            raise DocumentCreationError("OfficeTool not available. Cannot create DOCX files.")
+        try:
+            # Use office_tool to create DOCX
+            result = self.office_tool.write_docx(
+                text=content,
+                output_path=output_path,
+                table_data=None,  # Can be enhanced to extract tables from content
+            )
+            if not result.get("success"):
+                raise DocumentCreationError(f"Failed to create DOCX file: {result}")
+            self.logger.info(f"DOCX file created successfully: {output_path}")
+        except Exception as e:
+            raise DocumentCreationError(f"Failed to write DOCX file: {str(e)}")
+    def _parse_content_to_slides(self, content: str) -> List[str]:
+        """Parse content string into list of slide contents
+        Supports multiple slide separation formats:
+        - "---" separator (markdown style)
+        - "## Slide X:" headers
+        - Empty lines between slides
+        """
+        slides = []
+        # Split by "---" separator (common in markdown presentations)
+        if "---" in content:
+            parts = content.split("---")
+            for part in parts:
+                part = part.strip()
+                if part:
+                    # Remove slide headers like "## Slide X: Title"
+                    lines = part.split("\n")
+                    cleaned_lines = []
+                    for line in lines:
+                        # Skip slide headers
+                        if line.strip().startswith("## Slide") and ":" in line:
+                            continue
+                        cleaned_lines.append(line)
+                    slide_content = "\n".join(cleaned_lines).strip()
+                    if slide_content:
+                        slides.append(slide_content)
+        else:
+            # Try to split by "## Slide" headers
+            if "## Slide" in content:
+                parts = content.split("## Slide")
+                for i, part in enumerate(parts):
+                    if i == 0:
+                        # First part might be title slide
+                        part = part.strip()
+                        if part:
+                            slides.append(part)
+                    else:
+                        # Extract content after "Slide X: Title"
+                        lines = part.split("\n", 1)
+                        if len(lines) > 1:
+                            slide_content = lines[1].strip()
+                            if slide_content:
+                                slides.append(slide_content)
+            else:
+                # Fallback: split by double newlines (paragraph breaks)
+                parts = content.split("\n\n")
+                current_slide = []
+                for part in parts:
+                    part = part.strip()
+                    if part:
+                        # If it's a header, start a new slide
+                        if part.startswith("#"):
+                            if current_slide:
+                                slides.append("\n".join(current_slide))
+                                current_slide = []
+                        current_slide.append(part)
+                if current_slide:
+                    slides.append("\n".join(current_slide))
+        # If no slides found, create a single slide with all content
+        if not slides:
+            slides = [content.strip()] if content.strip() else [""]
+        return slides
     def _process_template_variables(self, template_content: str, variables: Dict[str, Any]) -> str:
         """Process template variables in content"""
         result = template_content
@@ -1282,6 +1421,8 @@ class DocumentCreatorTool(BaseTool):
             ".tex": DocumentFormat.LATEX,
             ".docx": DocumentFormat.DOCX,
             ".pdf": DocumentFormat.PDF,
+            ".pptx": DocumentFormat.PPTX,
+            ".ppt": DocumentFormat.PPT,
         }
         return format_map.get(ext, DocumentFormat.PLAIN_TEXT)

aiecs/tools/docs/document_parser_tool.py CHANGED Viewed

@@ -798,13 +798,18 @@ class DocumentParserTool(BaseTool):
             raise UnsupportedDocumentError("ImageTool not available for image OCR")
         try:
-            # Use image tool for OCR
-            ocr_result = self.image_tool.ocr_image(file_path)
+            # Use image tool for OCR - the ocr method returns a string directly
+            ocr_text = self.image_tool.ocr(file_path=file_path)
             if strategy == ParsingStrategy.TEXT_ONLY:
-                return ocr_result.get("text", "")
+                return ocr_text
             else:
-                return ocr_result
+                # Return structured result for other strategies
+                return {
+                    "text": ocr_text,
+                    "file_path": file_path,
+                    "document_type": DocumentType.IMAGE,
+                }
         except Exception as e:
             raise ParseError(f"Failed to parse image document: {str(e)}")

aiecs 1.7.6__py3-none-any.whl → 1.8.4__py3-none-any.whl

Potentially problematic release.

aiecs 1.7.6py3-none-any.whl → 1.8.4py3-none-any.whl