PyPI - amsdal_ml - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

amsdal_ml 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +617 -0
amsdal_ml/__about__.py +1 -1
amsdal_ml/agents/__init__.py +13 -0
amsdal_ml/agents/agent.py +5 -7
amsdal_ml/agents/default_qa_agent.py +108 -143
amsdal_ml/agents/functional_calling_agent.py +233 -0
amsdal_ml/agents/mcp_client_tool.py +46 -0
amsdal_ml/agents/python_tool.py +86 -0
amsdal_ml/agents/retriever_tool.py +5 -6
amsdal_ml/agents/tool_adapters.py +98 -0
amsdal_ml/fileio/base_loader.py +7 -5
amsdal_ml/fileio/openai_loader.py +16 -17
amsdal_ml/mcp_client/base.py +2 -0
amsdal_ml/mcp_client/http_client.py +7 -1
amsdal_ml/mcp_client/stdio_client.py +19 -16
amsdal_ml/mcp_server/server_retriever_stdio.py +8 -11
amsdal_ml/ml_ingesting/__init__.py +29 -0
amsdal_ml/ml_ingesting/default_ingesting.py +49 -51
amsdal_ml/ml_ingesting/embedders/__init__.py +4 -0
amsdal_ml/ml_ingesting/embedders/embedder.py +12 -0
amsdal_ml/ml_ingesting/embedders/openai_embedder.py +30 -0
amsdal_ml/ml_ingesting/embedding_data.py +3 -0
amsdal_ml/ml_ingesting/loaders/__init__.py +6 -0
amsdal_ml/ml_ingesting/loaders/folder_loader.py +52 -0
amsdal_ml/ml_ingesting/loaders/loader.py +28 -0
amsdal_ml/ml_ingesting/loaders/pdf_loader.py +136 -0
amsdal_ml/ml_ingesting/loaders/text_loader.py +44 -0
amsdal_ml/ml_ingesting/model_ingester.py +278 -0
amsdal_ml/ml_ingesting/pipeline.py +131 -0
amsdal_ml/ml_ingesting/pipeline_interface.py +31 -0
amsdal_ml/ml_ingesting/processors/__init__.py +4 -0
amsdal_ml/ml_ingesting/processors/cleaner.py +14 -0
amsdal_ml/ml_ingesting/processors/text_cleaner.py +42 -0
amsdal_ml/ml_ingesting/splitters/__init__.py +4 -0
amsdal_ml/ml_ingesting/splitters/splitter.py +15 -0
amsdal_ml/ml_ingesting/splitters/token_splitter.py +85 -0
amsdal_ml/ml_ingesting/stores/__init__.py +4 -0
amsdal_ml/ml_ingesting/stores/embedding_data.py +63 -0
amsdal_ml/ml_ingesting/stores/store.py +22 -0
amsdal_ml/ml_ingesting/types.py +40 -0
amsdal_ml/ml_models/models.py +96 -4
amsdal_ml/ml_models/openai_model.py +430 -122
amsdal_ml/ml_models/utils.py +7 -0
amsdal_ml/ml_retrievers/__init__.py +17 -0
amsdal_ml/ml_retrievers/adapters.py +93 -0
amsdal_ml/ml_retrievers/default_retriever.py +11 -1
amsdal_ml/ml_retrievers/openai_retriever.py +27 -7
amsdal_ml/ml_retrievers/query_retriever.py +487 -0
amsdal_ml/ml_retrievers/retriever.py +12 -0
amsdal_ml/models/embedding_model.py +7 -7
amsdal_ml/prompts/__init__.py +77 -0
amsdal_ml/prompts/database_query_agent.prompt +14 -0
amsdal_ml/prompts/functional_calling_agent_base.prompt +9 -0
amsdal_ml/prompts/nl_query_filter.prompt +318 -0
amsdal_ml/{agents/promts → prompts}/react_chat.prompt +17 -8
amsdal_ml/utils/__init__.py +5 -0
amsdal_ml/utils/query_utils.py +189 -0
{amsdal_ml-0.1.4.dist-info → amsdal_ml-0.2.1.dist-info}/METADATA +61 -3
amsdal_ml-0.2.1.dist-info/RECORD +72 -0
{amsdal_ml-0.1.4.dist-info → amsdal_ml-0.2.1.dist-info}/WHEEL +1 -1
amsdal_ml/agents/promts/__init__.py +0 -58
amsdal_ml-0.1.4.dist-info/RECORD +0 -39

amsdal_ml/agents/agent.py CHANGED Viewed

@@ -15,7 +15,7 @@ from amsdal_ml.fileio.base_loader import FileAttachment
 class AgentMessage(BaseModel):
-    role: Literal["SYSTEM", "USER", "ASSISTANT"]
+    role: Literal['SYSTEM', 'USER', 'ASSISTANT']
     content: str
@@ -32,8 +32,7 @@ class Agent(ABC):
         user_query: str,
         *,
         attachments: Optional[list[FileAttachment]] = None,
-    ) -> AgentOutput:
-        ...
+    ) -> AgentOutput: ...
     @abstractmethod
     async def astream(
@@ -41,8 +40,7 @@ class Agent(ABC):
         user_query: str,
         *,
         attachments: Optional[list[FileAttachment]] = None,
-    ) -> AsyncIterator[str]:
-        ...
+    ) -> AsyncIterator[str]: ...
     def run(
         self,
@@ -50,7 +48,7 @@ class Agent(ABC):
         *,
         attachments: Optional[list[FileAttachment]] = None,
     ) -> AgentOutput:
-        msg = "This agent is async-only. Use arun()."
+        msg = 'This agent is async-only. Use arun().'
         raise NotImplementedError(msg)
     def stream(
@@ -59,5 +57,5 @@ class Agent(ABC):
         *,
         attachments: Optional[list[FileAttachment]] = None,
     ) -> Iterator[str]:
-        msg = "This agent is async-only. Use astream()."
+        msg = 'This agent is async-only. Use astream().'
         raise NotImplementedError(msg)

amsdal_ml/agents/default_qa_agent.py CHANGED Viewed

@@ -12,23 +12,26 @@ from typing import no_type_check
 from amsdal_ml.agents.agent import Agent
 from amsdal_ml.agents.agent import AgentOutput
-from amsdal_ml.agents.promts import get_prompt
+from amsdal_ml.agents.mcp_client_tool import ClientToolProxy
+from amsdal_ml.agents.python_tool import PythonTool
+from amsdal_ml.agents.python_tool import _PythonToolProxy
 from amsdal_ml.fileio.base_loader import FileAttachment
 from amsdal_ml.mcp_client.base import ToolClient
 from amsdal_ml.mcp_client.base import ToolInfo
 from amsdal_ml.ml_models.models import MLModel
+from amsdal_ml.prompts import get_prompt
 # ---------- STRICT ReAct REGEX ----------
 _TOOL_CALL_RE = re.compile(
-    r"Thought:\s*Do I need to use a tool\?\s*Yes\s*"
-    r"Action:\s*(?P<action>[^\n]+)\s*"
-    r"Action Input:\s*(?P<input>\{.*\})\s*$",
+    r'Thought:\s*Do I need to use a tool\?\s*Yes[\.\!]?\s*'
+    r'Action:\s*(?P<action>[^\n]+)\s*'
+    r'Action Input:\s*(?P<input>\{.*\})\s*',
     re.DOTALL | re.IGNORECASE,
 )
 _FINAL_RE = re.compile(
-    r"Thought:\s*Do I need to use a tool\?\s*No\s*"
-    r"Final Answer:\s*(?P<answer>.+)",
+    r'(?:Thought:\s*Do I need to use a tool\?\s*No[\.\!]?\s*)?'
+    r'Final Answer:\s*(?P<answer>.+)',
     re.DOTALL | re.IGNORECASE,
 )
 # ---------- constants ----------
@@ -36,9 +39,9 @@ _FINAL_RE = re.compile(
 _MAX_PARSE_RETRIES = 5
 # ---------- STRICT ReAct REGEX ----------
 @dataclass
 class Route:
     name: str
@@ -47,61 +50,23 @@ class Route:
 class ParseErrorMode(Enum):
-    RAISE = "raise"
-    RETRY = "retry"
-# === proxy-tool ===
-class _ClientToolProxy:
-    def __init__(self, client: ToolClient, alias: str, name: str, schema: dict[str, Any], description: str):
-        self.client = client
-        self.alias = alias
-        self.name = name
-        self.qualified = f"{alias}.{name}"
-        self.parameters = schema
-        self.description = description
-        self._default_timeout: float | None = 20.0
-    def set_timeout(self, timeout: float | None) -> None:
-        self._default_timeout = timeout
-    async def run(
-        self,
-        args: dict[str, Any],
-        context=None,
-        *,
-        convert_result: bool = True,
-    ):
-        _ = (context, convert_result)
-        if self.parameters:
-            try:
-                import jsonschema
-                jsonschema.validate(instance=args, schema=self.parameters)
-            except Exception as exc:
-                msg = f"Tool input validation failed for {self.qualified}: {exc}"
-                raise ValueError(
-                    msg
-                ) from exc
+    RAISE = 'raise'
+    RETRY = 'retry'
-        return await self.client.call(self.name, args, timeout=self._default_timeout)
 class DefaultQAAgent(Agent):
     def __init__(
         self,
         *,
         model: MLModel,
-        tool_clients: list[ToolClient] | None = None,
+        tools: list[PythonTool | ToolClient] | None = None,
         max_steps: int = 6,
         on_parse_error: ParseErrorMode = ParseErrorMode.RAISE,
         enable_stop_guard: bool = True,
         per_call_timeout: float | None = 20.0,
     ):
-        # Only clients MCP (stdio/sse)
-        self._tool_clients: list[ToolClient] = tool_clients or []
-        self._indexed_tools: dict[str, Any] = {}  # qualified -> proxy
+        self._tools: list[PythonTool | ToolClient] = tools or []
+        self._indexed_tools: dict[str, ClientToolProxy | _PythonToolProxy] = {}
         self.model = model
         self.model.setup()
@@ -110,7 +75,7 @@ class DefaultQAAgent(Agent):
         self.on_parse_error = on_parse_error
         self.enable_stop_guard = enable_stop_guard
-        self._tools_index_built = False
+        self._is_tools_index_built = False
     # ---------- tools helpers ----------
     def _get_tool(self, name: str) -> Any:
@@ -118,64 +83,76 @@ class DefaultQAAgent(Agent):
         Look up tools ONLY among client-indexed tools.
         Expected names are qualified: '<alias>.<tool_name>'.
         """
-        if not self._tools_index_built:
-            msg = "Tool index not built. Ensure arun()/astream() was used."
+        if not self._is_tools_index_built:
+            msg = 'Tool index not built. Ensure arun()/astream() was used.'
             raise RuntimeError(msg)
         if name in self._indexed_tools:
             return self._indexed_tools[name]
         available = sorted(self._indexed_tool_names())
-        msg = f"Unknown tool: {name}. Available: {', '.join(available)}"
+        msg = f'Unknown tool: {name}. Available: {", ".join(available)}'
         raise KeyError(msg)
     def _indexed_tool_names(self) -> list[str]:
-        return list(self._indexed_tools.keys()) if self._tools_index_built else []
+        return list(self._indexed_tools.keys()) if self._is_tools_index_built else []
     def _tool_names(self) -> str:
-        return ", ".join(sorted(self._indexed_tool_names()))
+        return ', '.join(sorted(self._indexed_tool_names()))
     def _tool_descriptions(self) -> str:
         parts: list[str] = []
-        if self._tools_index_built:
+        if self._is_tools_index_built:
             for qn, t in self._indexed_tools.items():
-                desc = t.description or "No description."
+                desc = t.description or 'No description.'
                 try:
                     schema_json = json.dumps(t.parameters or {}, ensure_ascii=False)
                 except Exception:
                     schema_json = str(t.parameters)
-                parts.append(f"- {qn}: {desc}\n  Args JSON schema: {schema_json}")
-        return "\n".join(parts)
+                parts.append(f'- {qn}: {desc}\n  Args JSON schema: {schema_json}')
+        return '\n'.join(parts)
     async def _build_clients_index(self):
         self._indexed_tools.clear()
-        for client in self._tool_clients:
-            infos: list[ToolInfo] = await client.list_tools()
-            for ti in infos:
-                qname = f"{ti.alias}.{ti.name}"
-                proxy = _ClientToolProxy(
-                    client=client,
-                    alias=ti.alias,
-                    name=ti.name,
-                    schema=ti.input_schema or {},
-                    description=ti.description or "",
-                )
-                proxy.set_timeout(self.per_call_timeout)
-                self._indexed_tools[qname] = proxy
-        self._tools_index_built = True
+        for tool in self._tools:
+            if isinstance(tool, ToolClient):
+                infos: list[ToolInfo] = await tool.list_tools()
+                for ti in infos:
+                    qname = f'{ti.alias}.{ti.name}'
+                    proxy = ClientToolProxy(
+                        client=tool,
+                        alias=ti.alias,
+                        name=ti.name,
+                        schema=ti.input_schema or {},
+                        description=ti.description or '',
+                    )
+                    proxy.set_timeout(self.per_call_timeout)
+                    self._indexed_tools[qname] = proxy
+            elif isinstance(tool, PythonTool):
+                if tool.name in self._indexed_tools:
+                    msg = f'Tool name conflict: {tool.name} is already defined.'
+                    raise ValueError(msg)
+                proxy = _PythonToolProxy(tool, timeout=self.per_call_timeout) # type: ignore[assignment]
+                self._indexed_tools[tool.name] = proxy
+            else:
+                msg = f'Unsupported tool type: {type(tool)}'
+                raise TypeError(msg)
+        self._is_tools_index_built = True
     # ---------- prompt composition ----------
     def _react_text(self, user_query: str, scratchpad: str) -> str:
-        tmpl = get_prompt("react_chat")
+        tmpl = get_prompt('react_chat')
         return tmpl.render_text(
             user_query=user_query,
             tools=self._tool_descriptions(),
             tool_names=self._tool_names(),
             agent_scratchpad=scratchpad,
-            chat_history="",
+            chat_history='',
         )
     @staticmethod
     def _stopped_message() -> str:
-        return "Agent stopped due to iteration limit or time limit."
+        return 'Agent stopped due to iteration limit or time limit.'
     def _stopped_response(self, used_tools: list[str]) -> AgentOutput:
         return AgentOutput(answer=self._stopped_message(), used_tools=used_tools, citations=[])
@@ -183,7 +160,7 @@ class DefaultQAAgent(Agent):
     @staticmethod
     def _serialize_observation(content: Any) -> str:
         if isinstance(content, str | bytes):
-            return content if isinstance(content, str) else content.decode("utf-8", errors="ignore")
+            return content if isinstance(content, str) else content.decode('utf-8', errors='ignore')
         try:
             return json.dumps(content, ensure_ascii=False)
         except Exception:
@@ -191,82 +168,73 @@ class DefaultQAAgent(Agent):
     # ---------- core run ----------
     def run(self, user_query: str, *, attachments: list[FileAttachment] | None = None) -> AgentOutput:
-        msg = "DefaultQAAgent is async-only for now. Use arun()."
+        msg = 'DefaultQAAgent is async-only for now. Use arun().'
         raise NotImplementedError(msg)
     async def _run_async(self, user_query: str, *, attachments: list[FileAttachment] | None = None) -> AgentOutput:
-        if self._tool_clients and not self._tools_index_built:
+        if not self._is_tools_index_built:
             await self._build_clients_index()
-        scratch = ""
+        scratch = ''
         used_tools: list[str] = []
         parse_retries = 0
         for _ in range(self.max_steps):
             prompt = self._react_text(user_query, scratch)
             out = await self.model.ainvoke(prompt, attachments=attachments)
-            print("Model output:", out)  # noqa: T201
+            print('Model output:', out)  # noqa: T201
             print('promt:', prompt)  # noqa: T201
-            m_final = _FINAL_RE.search(out or "")
+            m_final = _FINAL_RE.search(out or '')
             if m_final:
                 return AgentOutput(
-                    answer=(m_final.group("answer") or "").strip(),
+                    answer=(m_final.group('answer') or '').strip(),
                     used_tools=used_tools,
                     citations=[],
                 )
-            m_tool = _TOOL_CALL_RE.search(out or "")
+            m_tool = _TOOL_CALL_RE.search(out or '')
             if not m_tool:
                 parse_retries += 1
                 if self.on_parse_error == ParseErrorMode.RAISE or parse_retries >= _MAX_PARSE_RETRIES:
-                    msg = (
-                        "Invalid ReAct output. Expected EXACT format (Final or Tool-call). "
-                        f"Got:\n{out}"
-                    )
-                    raise ValueError(
-                        msg
-                    )
+                    msg = f'Invalid ReAct output. Expected EXACT format (Final or Tool-call). Got:\n{out}'
+                    raise ValueError(msg)
                 scratch += (
-                    "\nThought: Previous output violated the strict format. "
-                    "Reply again using EXACTLY one of the two specified formats.\n"
+                    '\nThought: Previous output violated the strict format. '
+                    'Reply again using EXACTLY one of the two specified formats.\n'
                 )
                 continue
-            action = m_tool.group("action").strip()
-            raw_input = m_tool.group("input").strip()
+            action = m_tool.group('action').strip()
+            raw_input = m_tool.group('input').strip()
             try:
                 args = json.loads(raw_input)
                 if not isinstance(args, dict):
-                    msg = "Action Input must be a JSON object."
+                    msg = 'Action Input must be a JSON object.'
                     raise ValueError(msg)
             except Exception as e:
                 parse_retries += 1
                 if self.on_parse_error == ParseErrorMode.RAISE or parse_retries >= _MAX_PARSE_RETRIES:
-                    msg = f"Invalid Action Input JSON: {raw_input!r} ({e})"
+                    msg = f'Invalid Action Input JSON: {raw_input!r} ({e})'
                     raise ValueError(msg) from e
-                scratch += (
-                    "\nThought: Action Input must be a ONE-LINE JSON object. "
-                    "Retry with correct JSON.\n"
-                )
+                scratch += '\nThought: Action Input must be a ONE-LINE JSON object. Retry with correct JSON.\n'
                 continue
             tool = self._get_tool(action)
             try:
                 result = await tool.run(args, context=None, convert_result=True)
-                print("Similarity search result:", result)  # noqa: T201
+                print('Similarity search result:', result)  # noqa: T201
             except Exception as e:
                 # unified error payload
                 err = {
-                    "error": {
-                        "type": e.__class__.__name__,
-                        "server": getattr(tool, "alias", "local"),
-                        "tool": getattr(tool, "name", getattr(tool, "qualified", "unknown")),
-                        "message": str(e),
-                        "retryable": False,
+                    'error': {
+                        'type': e.__class__.__name__,
+                        'server': getattr(tool, 'alias', 'local'),
+                        'tool': getattr(tool, 'name', getattr(tool, 'qualified', 'unknown')),
+                        'message': str(e),
+                        'retryable': False,
                     }
                 }
                 result = err
@@ -275,10 +243,10 @@ class DefaultQAAgent(Agent):
             observation = self._serialize_observation(result)
             scratch += (
-                "\nThought: Do I need to use a tool? Yes"
-                f"\nAction: {action}"
-                f"\nAction Input: {raw_input}"
-                f"\nObservation: {observation}\n"
+                '\nThought: Do I need to use a tool? Yes'
+                f'\nAction: {action}'
+                f'\nAction Input: {raw_input}'
+                f'\nObservation: {observation}\n'
             )
         return self._stopped_response(used_tools)
@@ -290,17 +258,17 @@ class DefaultQAAgent(Agent):
     # ---------- streaming ----------
     @no_type_check
     async def astream(self, user_query: str, *, attachments: list[FileAttachment] | None = None) -> AsyncIterator[str]:
-        if self._tool_clients and not self._tools_index_built:
+        if not self._is_tools_index_built:
             await self._build_clients_index()
-        scratch = ""
+        scratch = ''
         used_tools: list[str] = []
         parse_retries = 0
         for _ in range(self.max_steps):
             prompt = self._react_text(user_query, scratch)
-            buffer = ""
+            buffer = ''
             # Normalize model.astream: it might be an async iterator already,
             # or a coroutine (or nested coroutines) that resolves to one.
@@ -309,8 +277,8 @@ class DefaultQAAgent(Agent):
                 _val = await _val
             # Optional guard (helpful during tests)
-            if not hasattr(_val, "__aiter__"):
-                msg = f"model.astream() did not yield an AsyncIterator; got {type(_val)!r}"
+            if not hasattr(_val, '__aiter__'):
+                msg = f'model.astream() did not yield an AsyncIterator; got {type(_val)!r}'
                 raise TypeError(msg)
             model_stream = _val  # now an AsyncIterator[str]
@@ -318,42 +286,39 @@ class DefaultQAAgent(Agent):
             async for chunk in model_stream:
                 buffer += chunk
-            m_final = _FINAL_RE.search(buffer or "")
+            m_final = _FINAL_RE.search(buffer or '')
             if m_final:
-                answer = (m_final.group("answer") or "").strip()
+                answer = (m_final.group('answer') or '').strip()
                 if answer:
                     yield answer
                 return
-            m_tool = _TOOL_CALL_RE.search(buffer or "")
+            m_tool = _TOOL_CALL_RE.search(buffer or '')
             if not m_tool:
                 parse_retries += 1
                 if self.on_parse_error == ParseErrorMode.RAISE or parse_retries >= _MAX_PARSE_RETRIES:
-                    msg = f"Invalid ReAct output (stream). Expected EXACT format. Got:\n{buffer}"
+                    msg = f'Invalid ReAct output (stream). Expected EXACT format. Got:\n{buffer}'
                     raise ValueError(msg)
                 scratch += (
-                    "\nThought: Previous output violated the strict format. "
-                    "Reply again using EXACTLY one of the two specified formats.\n"
+                    '\nThought: Previous output violated the strict format. '
+                    'Reply again using EXACTLY one of the two specified formats.\n'
                 )
                 continue
-            action = m_tool.group("action").strip()
-            raw_input = m_tool.group("input").strip()
+            action = m_tool.group('action').strip()
+            raw_input = m_tool.group('input').strip()
             try:
                 args = json.loads(raw_input)
                 if not isinstance(args, dict):
-                    msg = "Action Input must be a JSON object."
+                    msg = 'Action Input must be a JSON object.'
                     raise ValueError(msg)
             except Exception as e:
                 parse_retries += 1
                 if self.on_parse_error == ParseErrorMode.RAISE or parse_retries >= _MAX_PARSE_RETRIES:
-                    msg = f"Invalid Action Input JSON: {raw_input!r} ({e})"
+                    msg = f'Invalid Action Input JSON: {raw_input!r} ({e})'
                     raise ValueError(msg) from e
-                scratch += (
-                    "\nThought: Action Input must be a ONE-LINE JSON object. "
-                    "Retry with correct JSON.\n"
-                )
+                scratch += '\nThought: Action Input must be a ONE-LINE JSON object. Retry with correct JSON.\n'
                 continue
             tool = self._get_tool(action)
@@ -362,12 +327,12 @@ class DefaultQAAgent(Agent):
                 result = await tool.run(args, context=None, convert_result=True)
             except Exception as e:
                 result = {
-                    "error": {
-                        "type": e.__class__.__name__,
-                        "server": getattr(tool, "alias", "local"),
-                        "tool": getattr(tool, "name", getattr(tool, "qualified", "unknown")),
-                        "message": str(e),
-                        "retryable": False,
+                    'error': {
+                        'type': e.__class__.__name__,
+                        'server': getattr(tool, 'alias', 'local'),
+                        'tool': getattr(tool, 'name', getattr(tool, 'qualified', 'unknown')),
+                        'message': str(e),
+                        'retryable': False,
                     }
                 }
@@ -375,10 +340,10 @@ class DefaultQAAgent(Agent):
             observation = self._serialize_observation(result)
             scratch += (
-                "\nThought: Do I need to use a tool? Yes"
-                f"\nAction: {action}"
-                f"\nAction Input: {raw_input}"
-                f"\nObservation: {observation}\n"
+                '\nThought: Do I need to use a tool? Yes'
+                f'\nAction: {action}'
+                f'\nAction Input: {raw_input}'
+                f'\nObservation: {observation}\n'
             )
         yield self._stopped_message()

amsdal_ml 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

amsdal_ml 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl