PyPI - llms-py - Versions diffs - 3.0.1__py3-none-any.whl → 3.0.2__py3-none-any.whl - Mend

llms-py 3.0.1py3-none-any.whl → 3.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

llms/{extensions/app/db_manager.py → db.py} +170 -15
llms/extensions/app/__init__.py +95 -29
llms/extensions/app/db.py +16 -124
llms/extensions/app/ui/threadStore.mjs +20 -2
llms/extensions/core_tools/__init__.py +37 -0
llms/extensions/gallery/__init__.py +15 -13
llms/extensions/gallery/db.py +117 -172
llms/extensions/gallery/ui/index.mjs +1 -1
llms/extensions/providers/__init__.py +3 -1
llms/extensions/providers/anthropic.py +7 -3
llms/extensions/providers/cerebras.py +37 -0
llms/extensions/providers/chutes.py +1 -1
llms/extensions/providers/google.py +131 -28
llms/extensions/providers/nvidia.py +2 -2
llms/extensions/providers/openai.py +2 -2
llms/extensions/providers/openrouter.py +4 -2
llms/llms.json +3 -0
llms/main.py +81 -34
llms/providers.json +1 -1
llms/ui/ai.mjs +1 -1
llms/ui/app.css +96 -3
llms/ui/ctx.mjs +21 -0
llms/ui/index.mjs +2 -0
llms/ui/modules/chat/ChatBody.mjs +1 -0
llms/ui/modules/chat/index.mjs +19 -1
llms/ui/modules/icons.mjs +46 -0
llms/ui/modules/layout.mjs +28 -0
llms/ui/modules/model-selector.mjs +0 -40
llms/ui/utils.mjs +9 -1
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/METADATA +1 -1
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/RECORD +35 -33
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/WHEEL +0 -0
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/entry_points.txt +0 -0
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/licenses/LICENSE +0 -0
{llms_py-3.0.1.dist-info → llms_py-3.0.2.dist-info}/top_level.txt +0 -0

llms/extensions/providers/google.py CHANGED Viewed

@@ -79,16 +79,61 @@ def install_google(ctx):
             if "Authorization" in self.headers:
                 del self.headers["Authorization"]
-        async def chat(self, chat):
+        def provider_model(self, model):
+            if model.lower().startswith("gemini-"):
+                return model
+            return super().provider_model(model)
+        def model_info(self, model):
+            info = super().model_info(model)
+            if info:
+                return info
+            if model.lower().startswith("gemini-"):
+                return {
+                    "id": model,
+                    "name": model,
+                    "cost": {"input": 0, "output": 0},
+                }
+            return None
+        async def chat(self, chat, context=None):
             chat["model"] = self.provider_model(chat["model"]) or chat["model"]
+            model_info = (context.get("modelInfo") if context is not None else None) or self.model_info(chat["model"])
             chat = await self.process_chat(chat)
             generation_config = {}
+            tools = None
+            supports_tool_calls = model_info.get("tool_call", False)
+            if "tools" in chat and supports_tool_calls:
+                function_declarations = []
+                gemini_tools = {}
+                for tool in chat["tools"]:
+                    if tool["type"] == "function":
+                        f = tool["function"]
+                        function_declarations.append(
+                            {
+                                "name": f["name"],
+                                "description": f.get("description"),
+                                "parameters": f.get("parameters"),
+                            }
+                        )
+                    elif tool["type"] == "file_search":
+                        gemini_tools["file_search"] = tool["file_search"]
+                if function_declarations:
+                    gemini_tools["function_declarations"] = function_declarations
+                tools = [gemini_tools] if gemini_tools else None
             # Filter out system messages and convert to proper Gemini format
             contents = []
             system_prompt = None
+            # Track tool call IDs to names for response mapping
+            tool_id_map = {}
             async with aiohttp.ClientSession() as session:
                 for message in chat["messages"]:
                     if message["role"] == "system":
@@ -101,8 +146,55 @@ def install_google(ctx):
                         elif isinstance(content, str):
                             system_prompt = content
                     elif "content" in message:
+                        role = "user"
+                        if "role" in message:
+                            if message["role"] == "user":
+                                role = "user"
+                            elif message["role"] == "assistant":
+                                role = "model"
+                            elif message["role"] == "tool":
+                                role = "function"
+                        parts = []
+                        # Handle tool calls in assistant messages
+                        if message.get("role") == "assistant" and "tool_calls" in message:
+                            for tool_call in message["tool_calls"]:
+                                tool_id_map[tool_call["id"]] = tool_call["function"]["name"]
+                                parts.append(
+                                    {
+                                        "functionCall": {
+                                            "name": tool_call["function"]["name"],
+                                            "args": json.loads(tool_call["function"]["arguments"]),
+                                        }
+                                    }
+                                )
+                        # Handle tool responses from user
+                        if message.get("role") == "tool":
+                            # Gemini expects function response in 'functionResponse' part
+                            # We need to find the name associated with this tool_call_id
+                            tool_call_id = message.get("tool_call_id")
+                            name = tool_id_map.get(tool_call_id)
+                            # If we can't find the name (maybe from previous turn not in history or restart),
+                            # we might have an issue. But let's try to proceed.
+                            # Fallback: if we can't find the name, skip or try to infer?
+                            # Gemini strict validation requires the name.
+                            if name:
+                                # content is the string response
+                                # Some implementations pass the content directly.
+                                # Google docs say: response: { "name": "...", "content": { ... } }
+                                # Actually "response" field in functionResponse is a Struct/Map.
+                                parts.append(
+                                    {
+                                        "functionResponse": {
+                                            "name": name,
+                                            "response": {"name": name, "content": message["content"]},
+                                        }
+                                    }
+                                )
                         if isinstance(message["content"], list):
-                            parts = []
                             for item in message["content"]:
                                 if "type" in item:
                                     if item["type"] == "image_url" and "image_url" in item:
@@ -142,23 +234,14 @@ def install_google(ctx):
                                 if "text" in item:
                                     text = item["text"]
                                     parts.append({"text": text})
-                            if len(parts) > 0:
-                                contents.append(
-                                    {
-                                        "role": message["role"]
-                                        if "role" in message and message["role"] == "user"
-                                        else "model",
-                                        "parts": parts,
-                                    }
-                                )
-                        else:
-                            content = message["content"]
+                        elif message["content"]:  # String content
+                            parts.append({"text": message["content"]})
+                        if len(parts) > 0:
                             contents.append(
                                 {
-                                    "role": message["role"]
-                                    if "role" in message and message["role"] == "user"
-                                    else "model",
-                                    "parts": [{"text": content}],
+                                    "role": role,
+                                    "parts": parts,
                                 }
                             )
@@ -166,6 +249,9 @@ def install_google(ctx):
                     "contents": contents,
                 }
+                if tools:
+                    gemini_chat["tools"] = tools
                 if self.safety_settings:
                     gemini_chat["safetySettings"] = self.safety_settings
@@ -192,18 +278,12 @@ def install_google(ctx):
                 if len(generation_config) > 0:
                     gemini_chat["generationConfig"] = generation_config
-                if "tools" in chat:
-                    # gemini_chat["tools"] = chat["tools"]
-                    ctx.log("Error: tools not supported in Gemini")
-                elif self.tools:
-                    # gemini_chat["tools"] = self.tools.copy()
-                    ctx.log("Error: tools not supported in Gemini")
                 if "modalities" in chat:
                     generation_config["responseModalities"] = [modality.upper() for modality in chat["modalities"]]
                     if "image" in chat["modalities"] and "image_config" in chat:
                         # delete thinkingConfig
-                        del generation_config["thinkingConfig"]
+                        if "thinkingConfig" in generation_config:
+                            del generation_config["thinkingConfig"]
                         config_map = {
                             "aspect_ratio": "aspectRatio",
                             "image_size": "imageSize",
@@ -212,11 +292,16 @@ def install_google(ctx):
                             config_map[k]: v for k, v in chat["image_config"].items() if k in config_map
                         }
                     if "audio" in chat["modalities"] and self.speech_config:
-                        del generation_config["thinkingConfig"]
+                        if "thinkingConfig" in generation_config:
+                            del generation_config["thinkingConfig"]
                         generation_config["speechConfig"] = self.speech_config.copy()
                         # Currently Google Audio Models only accept AUDIO
                         generation_config["responseModalities"] = ["AUDIO"]
+                # Ensure generationConfig is set if we added anything to it
+                if len(generation_config) > 0:
+                    gemini_chat["generationConfig"] = generation_config
                 started_at = int(time.time() * 1000)
                 gemini_chat_url = f"https://generativelanguage.googleapis.com/v1beta/models/{chat['model']}:generateContent?key={self.api_key}"
@@ -237,6 +322,8 @@ def install_google(ctx):
                             timeout=aiohttp.ClientTimeout(total=120),
                         ) as res:
                             obj = await self.response_json(res)
+                            if context is not None:
+                                context["providerResponse"] = obj
                     except Exception as e:
                         ctx.log(f"Error: {res.status} {res.reason}: {e}")
                         text = await res.text()
@@ -271,7 +358,7 @@ def install_google(ctx):
                     "model": obj.get("modelVersion", chat["model"]),
                 }
                 choices = []
-                for i, candidate in enumerate(obj["candidates"]):
+                for i, candidate in enumerate(obj.get("candidates", [])):
                     role = "assistant"
                     if "content" in candidate and "role" in candidate["content"]:
                         role = "assistant" if candidate["content"]["role"] == "model" else candidate["content"]["role"]
@@ -281,6 +368,8 @@ def install_google(ctx):
                     reasoning = ""
                     images = []
                     audios = []
+                    tool_calls = []
                     if "content" in candidate and "parts" in candidate["content"]:
                         text_parts = []
                         reasoning_parts = []
@@ -290,6 +379,16 @@ def install_google(ctx):
                                     reasoning_parts.append(part["text"])
                                 else:
                                     text_parts.append(part["text"])
+                            if "functionCall" in part:
+                                fc = part["functionCall"]
+                                tool_calls.append(
+                                    {
+                                        "id": f"call_{len(tool_calls)}_{int(time.time())}",  # Gemini doesn't return ID, generate one
+                                        "type": "function",
+                                        "function": {"name": fc["name"], "arguments": json.dumps(fc["args"])},
+                                    }
+                                )
                             if "inlineData" in part:
                                 inline_data = part["inlineData"]
                                 mime_type = inline_data.get("mimeType", "image/png")
@@ -354,7 +453,7 @@ def install_google(ctx):
                         "finish_reason": candidate.get("finishReason", "stop"),
                         "message": {
                             "role": role,
-                            "content": content,
+                            "content": content if content else None,
                         },
                     }
                     if reasoning:
@@ -363,6 +462,10 @@ def install_google(ctx):
                         choice["message"]["images"] = images
                     if len(audios) > 0:
                         choice["message"]["audios"] = audios
+                    if len(tool_calls) > 0:
+                        choice["message"]["tool_calls"] = tool_calls
+                        # If we have tool calls, content can be null but message should probably exist
                     choices.append(choice)
                 response["choices"] = choices
                 if "usageMetadata" in obj:

llms/extensions/providers/nvidia.py CHANGED Viewed

@@ -54,7 +54,7 @@ def install_nvidia(ctx):
                     }
             raise Exception("No artifacts in response")
-        async def chat(self, chat, provider=None):
+        async def chat(self, chat, provider=None, context=None):
             headers = self.get_headers(provider, chat)
             if provider is not None:
                 chat["model"] = provider.provider_model(chat["model"]) or chat["model"]
@@ -100,6 +100,6 @@ def install_nvidia(ctx):
                     data=json.dumps(gen_request),
                     timeout=aiohttp.ClientTimeout(total=120),
                 ) as response:
-                    return self.to_response(await self.response_json(response), chat, started_at)
+                    return self.to_response(await self.response_json(response), chat, started_at, context=context)
     ctx.add_provider(NvidiaGenAi)

llms/extensions/providers/openai.py CHANGED Viewed

@@ -113,7 +113,7 @@ def install_openai(ctx):
             ctx.log(json.dumps(response, indent=2))
             raise Exception("No 'data' field in response.")
-        async def chat(self, chat, provider=None):
+        async def chat(self, chat, provider=None, context=None):
             headers = self.get_headers(provider, chat)
             if chat["model"] in self.map_image_models:
@@ -145,7 +145,7 @@ def install_openai(ctx):
                     text = await response.text()
                     ctx.log(text[:1024] + (len(text) > 1024 and "..." or ""))
                     if response.status < 300:
-                        return ctx.log_json(await self.to_response(json.loads(text), chat, started_at))
+                        return ctx.log_json(await self.to_response(json.loads(text), chat, started_at, context=context))
                     else:
                         raise Exception(f"Failed to generate image {response.status}")

llms/extensions/providers/openrouter.py CHANGED Viewed

@@ -39,7 +39,7 @@ def install_openrouter(ctx):
             return response
-        async def chat(self, chat, provider=None):
+        async def chat(self, chat, provider=None, context=None):
             headers = self.get_headers(provider, chat)
             if provider is not None:
                 chat["model"] = provider.provider_model(chat["model"]) or chat["model"]
@@ -67,6 +67,8 @@ def install_openrouter(ctx):
                 ) as response:
                     if metadata:
                         chat["metadata"] = metadata
-                    return ctx.log_json(self.to_response(await self.response_json(response), chat, started_at))
+                    return ctx.log_json(
+                        self.to_response(await self.response_json(response), chat, started_at, context=context)
+                    )
     ctx.add_provider(OpenRouterGenerator)

llms/llms.json CHANGED Viewed

@@ -145,6 +145,9 @@
         "groq": {
             "enabled": true
         },
+        "cerebras": {
+            "enabled": true
+        },
         "codestral": {
             "enabled": true,
             "id": "codestral",

llms/main.py CHANGED Viewed

@@ -41,7 +41,7 @@ try:
 except ImportError:
     HAS_PIL = False
-VERSION = "3.0.1"
+VERSION = "3.0.2"
 _ROOT = None
 DEBUG = os.getenv("DEBUG") == "1"
 MOCK = os.getenv("MOCK") == "1"
@@ -375,7 +375,7 @@ async def process_chat(chat, provider_id=None):
     if "stream" not in chat:
         chat["stream"] = False
     # Some providers don't support empty tools
-    if "tools" in chat and len(chat["tools"]) == 0:
+    if "tools" in chat and (chat["tools"] is None or len(chat["tools"]) == 0):
         del chat["tools"]
     if "messages" not in chat:
         return chat
@@ -618,7 +618,7 @@ def save_bytes_to_cache(base64_data, filename, file_info, ignore_info=False):
         _dbg(f"Cached bytes exists: {relative_path}")
         if ignore_info:
             return url, None
-        return url, json.load(open(info_path))
+        return url, json_from_file(info_path)
     os.makedirs(os.path.dirname(full_path), exist_ok=True)
@@ -665,7 +665,7 @@ def save_image_to_cache(base64_data, filename, image_info, ignore_info=False):
         _dbg(f"Saved image exists: {relative_path}")
         if ignore_info:
             return url, None
-        return url, json.load(open(info_path))
+        return url, json_from_file(info_path)
     os.makedirs(os.path.dirname(full_path), exist_ok=True)
@@ -870,7 +870,7 @@ class GeneratorBase:
     def to_response(self, response, chat, started_at):
         raise NotImplementedError
-    async def chat(self, chat, provider=None):
+    async def chat(self, chat, provider=None, context=None):
         return {
             "choices": [
                 {
@@ -1030,7 +1030,7 @@ class OpenAiCompatible:
     def response_json(self, response):
         return response_json(response)
-    def to_response(self, response, chat, started_at):
+    def to_response(self, response, chat, started_at, context=None):
         if "metadata" not in response:
             response["metadata"] = {}
         response["metadata"]["duration"] = int((time.time() - started_at) * 1000)
@@ -1038,6 +1038,8 @@ class OpenAiCompatible:
             pricing = self.model_cost(chat["model"])
             if pricing and "input" in pricing and "output" in pricing:
                 response["metadata"]["pricing"] = f"{pricing['input']}/{pricing['output']}"
+        if context is not None:
+            context["providerResponse"] = response
         return response
     def chat_summary(self, chat):
@@ -1046,17 +1048,18 @@ class OpenAiCompatible:
     def process_chat(self, chat, provider_id=None):
         return process_chat(chat, provider_id)
-    async def chat(self, chat):
+    async def chat(self, chat, context=None):
         chat["model"] = self.provider_model(chat["model"]) or chat["model"]
-        if "modalities" in chat:
-            for modality in chat.get("modalities", []):
+        modalities = chat.get("modalities") or []
+        if len(modalities) > 0:
+            for modality in modalities:
                 # use default implementation for text modalities
                 if modality == "text":
                     continue
                 modality_provider = self.modalities.get(modality)
                 if modality_provider:
-                    return await modality_provider.chat(chat, self)
+                    return await modality_provider.chat(chat, self, context=context)
                 else:
                     raise Exception(f"Provider {self.name} does not support '{modality}' modality")
@@ -1110,7 +1113,7 @@ class OpenAiCompatible:
                 self.chat_url, headers=self.headers, data=json.dumps(chat), timeout=aiohttp.ClientTimeout(total=120)
             ) as response:
                 chat["metadata"] = metadata
-                return self.to_response(await response_json(response), chat, started_at)
+                return self.to_response(await response_json(response), chat, started_at, context=context)
 class MistralProvider(OpenAiCompatible):
@@ -1282,6 +1285,9 @@ def api_providers():
 def to_error_message(e):
+    # check if has 'message' attribute
+    if hasattr(e, "message"):
+        return e.message
     return str(e)
@@ -1375,21 +1381,7 @@ async def g_chat_completion(chat, context=None):
             accumulated_cost = 0.0
             # Inject global tools if present
-            current_chat = chat.copy()
-            if g_app.tool_definitions:
-                only_tools_str = context.get("tools", "all")
-                include_all_tools = only_tools_str == "all"
-                only_tools = only_tools_str.split(",")
-                if include_all_tools or len(only_tools) > 0:
-                    if "tools" not in current_chat:
-                        current_chat["tools"] = []
-                    existing_tools = {t["function"]["name"] for t in current_chat["tools"]}
-                    for tool_def in g_app.tool_definitions:
-                        name = tool_def["function"]["name"]
-                        if name not in existing_tools and (include_all_tools or name in only_tools):
-                            current_chat["tools"].append(tool_def)
+            current_chat = g_app.create_chat_with_tools(chat, use_tools=context.get("tools", "all"))
             # Apply pre-chat filters ONCE
             context["chat"] = current_chat
@@ -1405,7 +1397,7 @@ async def g_chat_completion(chat, context=None):
                 if should_cancel_thread(context):
                     return
-                response = await provider.chat(current_chat)
+                response = await provider.chat(current_chat, context=context)
                 if should_cancel_thread(context):
                     return
@@ -1427,8 +1419,9 @@ async def g_chat_completion(chat, context=None):
                 choice = response.get("choices", [])[0] if response.get("choices") else {}
                 message = choice.get("message", {})
                 tool_calls = message.get("tool_calls")
+                supports_tool_calls = model_info.get("tool_call", False)
-                if tool_calls:
+                if tool_calls and supports_tool_calls:
                     # Append the assistant's message with tool calls to history
                     if "messages" not in current_chat:
                         current_chat["messages"] = []
@@ -2179,6 +2172,13 @@ def text_from_file(filename):
     return None
+def json_from_file(filename):
+    if os.path.exists(filename):
+        with open(filename, encoding="utf-8") as f:
+            return json.load(f)
+    return None
 async def text_from_resource_or_url(filename):
     text = text_from_resource(filename)
     if not text:
@@ -2472,6 +2472,27 @@ class AppExtensions:
         _dbg(f"exit({exit_code})")
         sys.exit(exit_code)
+    def create_chat_with_tools(self, chat, use_tools="all"):
+        # Inject global tools if present
+        current_chat = chat.copy()
+        tools = current_chat.get("tools")
+        if tools is None:
+            tools = current_chat["tools"] = []
+        if self.tool_definitions and len(tools) == 0:
+            include_all_tools = use_tools == "all"
+            only_tools_list = use_tools.split(",")
+            if include_all_tools or len(only_tools_list) > 0:
+                if "tools" not in current_chat:
+                    current_chat["tools"] = []
+                existing_tools = {t["function"]["name"] for t in current_chat["tools"]}
+                for tool_def in self.tool_definitions:
+                    name = tool_def["function"]["name"]
+                    if name not in existing_tools and (include_all_tools or name in only_tools_list):
+                        current_chat["tools"].append(tool_def)
+        return current_chat
 def handler_name(handler):
     if hasattr(handler, "__name__"):
@@ -2496,6 +2517,7 @@ class ExtensionContext:
         self.verbose = g_verbose
         self.aspect_ratios = app.aspect_ratios
         self.request_args = app.request_args
+        self.disabled = False
     def chat_to_prompt(self, chat):
         return chat_to_prompt(chat)
@@ -2521,6 +2543,9 @@ class ExtensionContext:
     def text_from_file(self, path):
         return text_from_file(path)
+    def json_from_file(self, path):
+        return json_from_file(path)
     def log(self, message):
         if self.verbose:
             print(f"[{self.name}] {message}", flush=True)
@@ -2626,6 +2651,9 @@ class ExtensionContext:
     def get_cache_path(self, path=""):
         return get_cache_path(path)
+    def get_file_mime_type(self, filename):
+        return get_file_mime_type(filename)
     def chat_request(self, template=None, text=None, model=None, system_prompt=None):
         return self.app.chat_request(template=template, text=text, model=model, system_prompt=system_prompt)
@@ -2668,6 +2696,9 @@ class ExtensionContext:
     def to_content(self, result):
         return to_content(result)
+    def create_chat_with_tools(self, chat, use_tools="all"):
+        return self.app.create_chat_with_tools(chat, use_tools)
 def get_extensions_path():
     return os.getenv("LLMS_EXTENSIONS_DIR", os.path.join(Path.home(), ".llms", "extensions"))
@@ -2787,6 +2818,10 @@ def install_extensions():
                 else:
                     _dbg(f"Extension {init_file} not found")
+                if ctx.disabled:
+                    _log(f"Extension {item} was disabled")
+                    continue
                 # if ui folder exists, serve as static files at /ext/{item}/
                 ui_path = os.path.join(item_path, "ui")
                 if os.path.exists(ui_path):
@@ -3366,7 +3401,7 @@ def main():
             # if file and its .info.json already exists, return it
             info_path = os.path.splitext(full_path)[0] + ".info.json"
             if os.path.exists(full_path) and os.path.exists(info_path):
-                return web.json_response(json.load(open(info_path)))
+                return web.json_response(json_from_file(info_path))
             os.makedirs(os.path.dirname(full_path), exist_ok=True)
@@ -3415,9 +3450,9 @@ def main():
         async def cache_handler(request):
             path = request.match_info["tail"]
             full_path = get_cache_path(path)
+            info_path = os.path.splitext(full_path)[0] + ".info.json"
             if "info" in request.query:
-                info_path = os.path.splitext(full_path)[0] + ".info.json"
                 if not os.path.exists(info_path):
                     return web.Response(text="404: Not Found", status=404)
@@ -3446,11 +3481,23 @@ def main():
             except Exception:
                 return web.Response(text="403: Forbidden", status=403)
-            with open(full_path, "rb") as f:
-                content = f.read()
             mimetype = get_file_mime_type(full_path)
-            return web.Response(body=content, content_type=mimetype)
+            if "download" in request.query:
+                # download file as an attachment
+                info = json_from_file(info_path) or {}
+                mimetype = info.get("type", mimetype)
+                filename = info.get("name") or os.path.basename(full_path)
+                mtime = info.get("date", os.path.getmtime(full_path))
+                mdate = datetime.fromtimestamp(mtime).isoformat()
+                return web.FileResponse(
+                    full_path,
+                    headers={
+                        "Content-Disposition": f'attachment; filename="{filename}"; modification-date="{mdate}"',
+                        "Content-Type": mimetype,
+                    },
+                )
+            else:
+                return web.FileResponse(full_path, headers={"Content-Type": mimetype})
         app.router.add_get("/~cache/{tail:.*}", cache_handler)

llms-py 3.0.1__py3-none-any.whl → 3.0.2__py3-none-any.whl

llms-py 3.0.1py3-none-any.whl → 3.0.2py3-none-any.whl