PyPI - llmstudio - Versions diffs - 0.3__tar.gz → 0.3.2__tar.gz - Mend

llmstudio 0.3tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

{llmstudio-0.3 → llmstudio-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llmstudio
-Version: 0.3
+Version: 0.3.2
 Summary: Prompt Perfection at Your Fingertips
 Home-page: https://llmstudio.ai/
 License: MIT
@@ -14,14 +14,14 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: anthropic (>=0.8.1,<0.9.0)
-Requires-Dist: cohere (>=4.39,<5.0)
+Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
+Requires-Dist: anthropic (>=0.16.0,<0.17.0)
 Requires-Dist: fastapi (>=0.108.0,<0.109.0)
-Requires-Dist: langchain (>=0.0.352,<0.0.353)
 Requires-Dist: openai (>=1.6.1,<2.0.0)
 Requires-Dist: pydantic (>=2.5.3,<3.0.0)
 Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
 Requires-Dist: requests (>=2.31.0,<3.0.0)
+Requires-Dist: sqlalchemy (>=2.0.27,<3.0.0)
 Requires-Dist: tiktoken (>=0.5.2,<0.6.0)
 Requires-Dist: tokenizer (>=3.4.3,<4.0.0)
 Requires-Dist: uvicorn (>=0.25.0,<0.26.0)
@@ -123,7 +123,7 @@ Step into the future of AI with LLMstudio, by watching our [introduction video](
 ## 👨‍💻 Contributing
 - Head on to our [Contribution Guide](https://github.com/TensorOpsAI/LLMstudio/tree/main/CONTRIBUTING.md) to see how you can help LLMstudio.
-- Join our [Discord](https://discord.gg/4H4nufwPdg) to talk with other LLMstudio enthusiasts.
+- Join our [Discord](https://discord.gg/GkAfPZR9wy) to talk with other LLMstudio enthusiasts.
 ## Training

{llmstudio-0.3 → llmstudio-0.3.2}/README.md RENAMED Viewed

@@ -91,7 +91,7 @@ Step into the future of AI with LLMstudio, by watching our [introduction video](
 ## 👨‍💻 Contributing
 - Head on to our [Contribution Guide](https://github.com/TensorOpsAI/LLMstudio/tree/main/CONTRIBUTING.md) to see how you can help LLMstudio.
-- Join our [Discord](https://discord.gg/4H4nufwPdg) to talk with other LLMstudio enthusiasts.
+- Join our [Discord](https://discord.gg/GkAfPZR9wy) to talk with other LLMstudio enthusiasts.
 ## Training

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/.env.template RENAMED Viewed

@@ -4,11 +4,9 @@ COHERE_API_KEY="your-cohere-api-key"
 AZURE_API_KEY=""
 AZURE_API_ENDPOINT=""
 AZURE_API_VERSION=""
-ENGINE_HOST="localhost"
-ENGINE_PORT=8000
-UI_HOST = "localhost"
-UI_PORT = 3000
+LLMSTUDIO_ENGINE_HOST="localhost"
+LLMSTUDIO_UI_HOST = "localhost"
 LOG_LEVEL = "info"
-TRACKING_HOST="localhost"
-TRACKING_PORT=8080
+LLMSTUDIO_TRACKING_HOST="localhost"
 BACKEND_TRACKING_URI="sqlite:///./llmstudio_mgmt.db"
+#BACKEND_TRACKING_URI="postgresql://user:1234@0.0.0.0:5556/llmstudio"

llmstudio-0.3.2/llmstudio/cli.py ADDED Viewed

@@ -0,0 +1,112 @@
+import os
+import signal
+import socket
+from threading import Thread
+import click
+import requests
+from dotenv import load_dotenv
+from llmstudio.engine import run_engine_app
+from llmstudio.tracking import run_tracking_app
+from llmstudio.ui import run_ui_app
+load_dotenv(os.path.join(os.getcwd(), ".env"))
+def assign_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
+os.environ["LLMSTUDIO_ENGINE_PORT"] = str(assign_port())
+os.environ["NEXT_PUBLIC_LLMSTUDIO_ENGINE_PORT"] = os.environ.get(
+    "LLMSTUDIO_ENGINE_PORT"
+)
+os.environ["LLMSTUDIO_TRACKING_PORT"] = str(assign_port())
+os.environ["NEXT_PUBLIC_LLMSTUDIO_TRACKING_PORT"] = os.environ.get(
+    "LLMSTUDIO_TRACKING_PORT"
+)
+os.environ["LLMSTUDIO_UI_PORT"] = str(assign_port())
+def is_server_running(host, port, path="/health"):
+    try:
+        response = requests.get(f"http://{host}:{port}{path}")
+        if response.status_code == 200 and response.json().get("status") == "healthy":
+            return True
+    except requests.ConnectionError:
+        pass
+    return False
+def start_server():
+    engine_port = int(os.environ.get("LLMSTUDIO_ENGINE_PORT"))
+    tracking_port = int(os.environ.get("LLMSTUDIO_TRACKING_PORT"))
+    engine_host = os.environ.get("LLMSTUDIO_ENGINE_HOST", "localhost")
+    tracking_host = os.environ.get("LLMSTUDIO_TRACKING_HOST", "localhost")
+    if not is_server_running(engine_host, engine_port):
+        engine_thread = Thread(target=run_engine_app, daemon=True)
+        engine_thread.start()
+    if not is_server_running(tracking_host, tracking_port):
+        tracking_thread = Thread(target=run_tracking_app, daemon=True)
+        tracking_thread.start()
+    def handle_shutdown(signum, frame):
+        print("Shutting down gracefully...")
+        os._exit(0)
+    signal.signal(signal.SIGINT, handle_shutdown)
+@click.group()
+def main():
+    pass
+@main.command()
+@click.option("--ui", is_flag=True, help="Start the UI server.")
+def server(ui):
+    def handle_shutdown(signum, frame):
+        print("Shutting down gracefully...")
+        os._exit(0)
+    # Register the signal handler
+    signal.signal(signal.SIGINT, handle_shutdown)
+    engine_host = os.getenv("LLMSTUDIO_ENGINE_HOST", "localhost")
+    tracking_host = os.getenv("LLMSTUDIO_TRACKING_HOST", "localhost")
+    engine_port = int(os.getenv("LLMSTUDIO_ENGINE_PORT"))
+    tracking_port = int(os.getenv("LLMSTUDIO_TRACKING_PORT"))
+    # Start the engine if it's not already running
+    if not is_server_running(engine_host, engine_port):
+        engine_thread = Thread(target=run_engine_app, daemon=True)
+        engine_thread.start()
+    else:
+        print(f"Engine server already running on {engine_host}:{engine_port}")
+    # Start the tracking if it's not already running
+    if not is_server_running(tracking_host, tracking_port):
+        tracking_thread = Thread(target=run_tracking_app, daemon=True)
+        tracking_thread.start()
+    else:
+        print(f"Tracking server already running on {tracking_host}:{tracking_port}")
+    # Start the UI if requested and not already running
+    if ui:
+        ui_port = int(os.getenv("LLMSTUDIO_UI_PORT"))
+        if not is_server_running("localhost", ui_port):
+            ui_thread = Thread(target=run_ui_app, daemon=True)
+            ui_thread.start()
+            ui_thread.join()
+        else:
+            print(f"UI server already running on localhost:{ui_port}")
+    if engine_thread:
+        engine_thread.join()
+    if tracking_thread:
+        tracking_thread.join()

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/__init__.py RENAMED Viewed

@@ -18,10 +18,6 @@ ENGINE_HEALTH_ENDPOINT = "/health"
 ENGINE_TITLE = "LLMstudio Engine API"
 ENGINE_DESCRIPTION = "The core API for LLM interactions"
 ENGINE_VERSION = "0.1.0"
-ENGINE_HOST = os.getenv("ENGINE_HOST", "localhost")
-ENGINE_PORT = int(os.getenv("ENGINE_PORT", 8000))
-ENGINE_URL = f"http://{ENGINE_HOST}:{ENGINE_PORT}"
-LOG_LEVEL = os.getenv("LOG_LEVEL", "critical")
 # Models for Configuration
@@ -89,7 +85,7 @@ def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI:
     app.add_middleware(
         CORSMiddleware,
-        allow_origins=["http://localhost:3000"],
+        allow_origins=["*"],
         allow_credentials=True,
         allow_methods=["*"],
         allow_headers=["*"],
@@ -168,13 +164,15 @@ def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI:
 def run_engine_app():
-    print(f"Running Engine on {ENGINE_HOST}:{ENGINE_PORT}")
+    print(
+        f"Running Engine on http://{os.getenv('LLMSTUDIO_ENGINE_HOST')}:{os.getenv('LLMSTUDIO_ENGINE_PORT')}"
+    )
     try:
         engine = create_engine_app()
         uvicorn.run(
             engine,
-            host=ENGINE_HOST,
-            port=ENGINE_PORT,
+            host=os.getenv("LLMSTUDIO_ENGINE_HOST"),
+            port=int(os.getenv("LLMSTUDIO_ENGINE_PORT")),
         )
     except Exception as e:
         print(f"Error running the Engine app: {e}")

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/config.yaml RENAMED Viewed

@@ -7,6 +7,16 @@ providers:
     keys:
       - ANTHROPIC_API_KEY
     models:
+      claude-3-opus-20240229:
+        mode: chat
+        max_tokens: 200000
+        input_token_cost: 0.000015
+        output_token_cost: 0.000075
+      claude-3-sonnet-2024022:
+        mode: chat
+        max_tokens: 200000
+        input_token_cost: 0.000003
+        output_token_cost: 0.000015
       claude-2.1:
         mode: chat
         max_tokens: 200000
@@ -22,11 +32,6 @@ providers:
         max_tokens: 100000
         input_token_cost: 0.00000163
         output_token_cost: 0.00000551
-      claude-instant-1:
-        mode: chat
-        max_tokens: 100000
-        input_token_cost: 0.00000163
-        output_token_cost: 0.00000551
     parameters:
       temperature:
         name: "Temperature"
@@ -191,82 +196,6 @@ providers:
         min: 0
         max: 2
         step: 0.01
-  cohere:
-    id: cohere
-    name: Cohere
-    chat: true
-    embed: true
-    keys:
-      - COHERE_API_KEY
-    models:
-      command-nightly:
-        mode: chat
-        max_tokens: 4096
-        input_token_cost: 0.000015
-        output_token_cost: 0.000015
-      command:
-        mode: chat
-        max_tokens: 4096
-        input_token_cost: 0.000015
-        output_token_cost: 0.000015
-      command-light:
-        mode: chat
-        max_tokens: 4096
-        input_token_cost: 0.000015
-        output_token_cost: 0.000015
-      command-medium-beta:
-        mode: chat
-        max_tokens: 4096
-        input_token_cost: 0.000015
-        output_token_cost: 0.000015
-      command-xlarge-beta:
-        mode: chat
-        max_tokens: 4096
-        input_token_cost: 0.000015
-        output_token_cost: 0.000015
-    parameters:
-      temperature:
-        name: "Temperature"
-        type: float
-        default: 0.75
-        min: 0
-        max: 5
-        step: 0.01
-      max_tokens:
-        name: "Maximum length"
-        type: int
-        default: 256
-        min: 1
-        max: 4096
-        step: 1
-      p:
-        name: "P"
-        type: float
-        default: 0
-        min: 0
-        max: 0.99
-        step: 0.01
-      k:
-        name: "K"
-        type: int
-        default: 0
-        min: 0
-        max: 500
-        step: 1
-      frequency_penalty:
-        name: "Frequency Penalty"
-        type: float
-        default: 0
-        min: 0
-        max: 10
-        step: 0.1
-      presence_penalty:
-        name: "Presence Penalty"
-        type: float
-        default: 0
-        min: 0
-        max: 1
-        step: 0.01
   azure:
     id: azure
     name: Azure

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/providers/__init__.py RENAMED Viewed

@@ -1,6 +1,5 @@
 from llmstudio.engine.providers.anthropic import AnthropicProvider
 from llmstudio.engine.providers.azure import AzureProvider
-from llmstudio.engine.providers.cohere import CohereProvider
 from llmstudio.engine.providers.ollama import OllamaProvider
 from llmstudio.engine.providers.openai import OpenAIProvider
 from llmstudio.engine.providers.provider import provider_registry

llmstudio-0.3.2/llmstudio/engine/providers/anthropic.py ADDED Viewed

@@ -0,0 +1,91 @@
+import asyncio
+import os
+import time
+import uuid
+from typing import Any, AsyncGenerator, Coroutine, Generator, Optional
+import anthropic
+from anthropic import Anthropic
+from fastapi import HTTPException
+from openai.types.chat import ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import Choice, ChoiceDelta
+from pydantic import BaseModel, Field
+from llmstudio.engine.providers.provider import ChatRequest, Provider, provider
+class ClaudeParameters(BaseModel):
+    temperature: Optional[float] = Field(1, ge=0, le=1)
+    max_tokens: Optional[int] = Field(4096, ge=1)
+    top_p: Optional[float] = Field(1, ge=0, le=1)
+    top_k: Optional[int] = Field(5, ge=0, le=500)
+class AnthropicRequest(ChatRequest):
+    parameters: Optional[ClaudeParameters] = ClaudeParameters()
+@provider
+class AnthropicProvider(Provider):
+    def __init__(self, config):
+        super().__init__(config)
+        self.API_KEY = os.getenv("ANTHROPIC_API_KEY")
+    def validate_request(self, request: AnthropicRequest):
+        return AnthropicRequest(**request)
+    async def generate_client(
+        self, request: AnthropicRequest
+    ) -> Coroutine[Any, Any, Generator]:
+        """Generate an Anthropic client"""
+        try:
+            client = Anthropic(api_key=request.api_key or self.API_KEY)
+            return await asyncio.to_thread(
+                client.messages.stream,
+                model=request.model,
+                messages=(
+                    [{"role": "user", "content": request.chat_input}]
+                    if isinstance(request.chat_input, str)
+                    else request.chat_input
+                ),
+                **request.parameters.dict(),
+            )
+        except anthropic._exceptions.APIError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.response.json())
+    async def parse_response(
+        self, response: AsyncGenerator, **kwargs
+    ) -> AsyncGenerator[str, None]:
+        with response as stream:
+            for chunk in stream:
+                if isinstance(
+                    chunk,
+                    anthropic.types.content_block_delta_event.ContentBlockDeltaEvent,
+                ):
+                    yield ChatCompletionChunk(
+                        id=str(uuid.uuid4()),
+                        choices=[
+                            Choice(
+                                delta=ChoiceDelta(
+                                    content=chunk.delta.text, role="assistant"
+                                ),
+                                finish_reason=None,
+                                index=0,
+                            )
+                        ],
+                        created=int(time.time()),
+                        model=kwargs.get("request").model,
+                        object="chat.completion.chunk",
+                    ).model_dump(),
+                elif isinstance(
+                    chunk, anthropic.types.message_stop_event.MessageStopEvent
+                ):
+                    yield ChatCompletionChunk(
+                        id=str(uuid.uuid4()),
+                        choices=[
+                            Choice(delta=ChoiceDelta(), finish_reason="stop", index=0)
+                        ],
+                        created=int(time.time()),
+                        model=kwargs.get("request").model,
+                        object="chat.completion.chunk",
+                    ).model_dump(),

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/providers/azure.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import os
-from typing import Any, AsyncGenerator, Coroutine, Generator, Optional
+from typing import Any, AsyncGenerator, Coroutine, Dict, Generator, List, Optional
 import openai
 from fastapi import HTTPException
@@ -12,7 +12,7 @@ from llmstudio.engine.providers.provider import ChatRequest, Provider, provider
 class AzureParameters(BaseModel):
     temperature: Optional[float] = Field(default=1, ge=0, le=2)
-    max_tokens: Optional[int] = Field(default=256, ge=1)
+    max_tokens: Optional[int] = Field(default=2048, ge=1)
     top_p: Optional[float] = Field(default=1, ge=0, le=1)
     frequency_penalty: Optional[float] = Field(default=0, ge=0, le=1)
     presence_penalty: Optional[float] = Field(default=0, ge=0, le=1)
@@ -20,8 +20,10 @@ class AzureParameters(BaseModel):
 class AzureRequest(ChatRequest):
     api_endpoint: Optional[str] = None
-    api_version: Optional[str] = "2023-05-15"
+    api_version: Optional[str] = None
     parameters: Optional[AzureParameters] = AzureParameters()
+    functions: Optional[List[Dict[str, Any]]] = None
+    chat_input: Any
 @provider
@@ -48,19 +50,21 @@ class AzureProvider(Provider):
             return await asyncio.to_thread(
                 client.chat.completions.create,
                 model=request.model,
-                messages=[{"role": "user", "content": request.chat_input}],
+                messages=(
+                    [{"role": "user", "content": request.chat_input}]
+                    if isinstance(request.chat_input, str)
+                    else request.chat_input
+                ),
+                functions=request.functions,
+                function_call="auto" if request.functions else None,
                 stream=True,
-                **request.parameters.dict(),
+                **request.parameters.model_dump(),
             )
         except openai._exceptions.APIError as e:
             raise HTTPException(status_code=e.status_code, detail=e.response.json())
     async def parse_response(
-        self, response: AsyncGenerator
+        self, response: AsyncGenerator, **kwargs
     ) -> AsyncGenerator[str, None]:
         for chunk in response:
-            if (
-                chunk.choices[0].finish_reason not in ["stop", "length"]
-                and chunk.choices[0].delta.content is not None
-            ):
-                yield chunk.choices[0].delta.content
+            yield chunk.model_dump()

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/providers/ollama.py RENAMED Viewed

@@ -1,9 +1,13 @@
 import asyncio
 import json
+import time
+import uuid
 from typing import Any, AsyncGenerator, Coroutine, Generator, Optional
 import requests
 from fastapi import HTTPException
+from openai.types.chat import ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import Choice, ChoiceDelta
 from pydantic import BaseModel, Field
 from llmstudio.engine.providers.provider import ChatRequest, Provider, provider
@@ -39,6 +43,7 @@ class OllamaProvider(Provider):
                 json={
                     "model": request.model,
                     "prompt": request.chat_input,
+                    "stream": True,
                     **request.parameters.dict(),
                 },
                 stream=True,
@@ -50,7 +55,7 @@ class OllamaProvider(Provider):
             )
     async def parse_response(
-        self, response: AsyncGenerator
+        self, response: AsyncGenerator, **kwargs
     ) -> AsyncGenerator[str, None]:
         for line in response.iter_lines():
             if not line:
@@ -59,5 +64,31 @@ class OllamaProvider(Provider):
             if "error" in chunk:
                 raise HTTPException(status_code=500, detail=chunk["error"])
             if chunk.get("done"):
+                print("done")
+                yield ChatCompletionChunk(
+                    id=str(uuid.uuid4()),
+                    choices=[
+                        Choice(delta=ChoiceDelta(), finish_reason="stop", index=0)
+                    ],
+                    created=int(time.time()),
+                    model=kwargs.get("request").model,
+                    object="chat.completion.chunk",
+                ).model_dump()
                 break
-            yield chunk["response"]
+            if chunk["response"] is not None:
+                yield ChatCompletionChunk(
+                    id=str(uuid.uuid4()),
+                    choices=[
+                        Choice(
+                            delta=ChoiceDelta(
+                                content=chunk["response"], role="assistant"
+                            ),
+                            finish_reason=None,
+                            index=0,
+                        )
+                    ],
+                    created=int(time.time()),
+                    model=kwargs.get("request").model,
+                    object="chat.completion.chunk",
+                ).model_dump()

{llmstudio-0.3 → llmstudio-0.3.2}/llmstudio/engine/providers/openai.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import os
-from typing import Any, AsyncGenerator, Coroutine, Generator, Optional
+from typing import Any, AsyncGenerator, Coroutine, Dict, Generator, List, Optional
 import openai
 from fastapi import HTTPException
@@ -12,7 +12,7 @@ from llmstudio.engine.providers.provider import ChatRequest, Provider, provider
 class OpenAIParameters(BaseModel):
     temperature: Optional[float] = Field(default=1, ge=0, le=2)
-    max_tokens: Optional[int] = Field(default=256, ge=1)
+    max_tokens: Optional[int] = Field(default=2048, ge=1)
     top_p: Optional[float] = Field(default=1, ge=0, le=1)
     frequency_penalty: Optional[float] = Field(default=0, ge=0, le=1)
     presence_penalty: Optional[float] = Field(default=0, ge=0, le=1)
@@ -20,6 +20,8 @@ class OpenAIParameters(BaseModel):
 class OpenAIRequest(ChatRequest):
     parameters: Optional[OpenAIParameters] = OpenAIParameters()
+    functions: Optional[List[Dict[str, Any]]] = None
+    chat_input: Any
 @provider
@@ -40,16 +42,21 @@ class OpenAIProvider(Provider):
             return await asyncio.to_thread(
                 client.chat.completions.create,
                 model=request.model,
-                messages=[{"role": "user", "content": request.chat_input}],
+                messages=(
+                    [{"role": "user", "content": request.chat_input}]
+                    if isinstance(request.chat_input, str)
+                    else request.chat_input
+                ),
+                functions=request.functions,
+                function_call="auto" if request.functions else None,
                 stream=True,
-                **request.parameters.dict(),
+                **request.parameters.model_dump(),
             )
         except openai._exceptions.APIError as e:
             raise HTTPException(status_code=e.status_code, detail=e.response.json())
     async def parse_response(
-        self, response: AsyncGenerator
+        self, response: AsyncGenerator, **kwargs
     ) -> AsyncGenerator[str, None]:
         for chunk in response:
-            if chunk.choices[0].finish_reason not in ["stop", "length"]:
-                yield chunk.choices[0].delta.content
+            yield chunk.model_dump()

llmstudio 0.3__tar.gz → 0.3.2__tar.gz

llmstudio 0.3tar.gz → 0.3.2tar.gz