PyPI - pirag - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

pirag 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{pirag-0.2.0 → pirag-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pirag
-Version: 0.2.0
+Version: 0.2.2
 Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
 Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
 Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
@@ -16,6 +16,8 @@ Requires-Dist: fastapi<0.116
 Requires-Dist: uvicorn<0.35
 Requires-Dist: ragas<0.3
 Requires-Dist: pymilvus<2.6
+Requires-Dist: langchain-openai<0.4
+Requires-Dist: langchain-ollama<0.4
 Dynamic: license-file
 <div align="center">

{pirag-0.2.0 → pirag-0.2.2}/app/main.py RENAMED Viewed

@@ -5,15 +5,6 @@ import app.rag.config as cfn
 import app.rag.api as api
 import app.rag.cli as cli
-# Command definitions
-commands = {
-    "serve"  : ("Start the RAG server",     "Run a FastAPI-based RAG server",              api.serve),
-    "chat"   : ("Chat with the RAG system", "Run an interactive chat with the RAG system", cli.chat),
-    "train"  : ("Train the RAG system",     "Run a pipeline to train the RAG system",      cli.train),
-    "test"   : ("Test the RAG system",      "Run a pipeline to test the RAG system",       cli.test),
-    "doctor" : ("Diagnose the RAG system",  "Run a pipeline to diagnose the RAG system",   cli.doctor),
-}
 # Main parser
 parser = ArgumentParser(
     formatter_class = ArgumentDefaultsHelpFormatter,
@@ -24,14 +15,24 @@ parser = ArgumentParser(
     add_help = False,
 )
+# Command definitions
+commands = {
+    # name: help, description, function, extra_parsers
+    "serve"  : ("Start the RAG server",     "Run a FastAPI-based RAG server",              api.serve,  []),
+    "chat"   : ("Chat with the RAG system", "Run an interactive chat with the RAG system", cli.chat,   [cfn.chat_parser]),
+    "train"  : ("Train the RAG system",     "Run a pipeline to train the RAG system",      cli.train,  []),
+    "test"   : ("Test the RAG system",      "Run a pipeline to test the RAG system",       cli.test,   []),
+    "doctor" : ("Diagnose the RAG system",  "Run a pipeline to diagnose the RAG system",   cli.doctor, [cfn.doctor_parser]),
+}
 # Add command parsers
 subparsers = parser.add_subparsers(title="commands", dest="command")
-for name, (help, description, _) in commands.items():
+for name, (help, description, _, extra_parsers) in commands.items():
     subparsers.add_parser(
         name = name,
         help = help,
         description = description,
-        parents = [cfn.common_parser],
+        parents = [cfn.common_parser] + extra_parsers,
         add_help = False,
     )
@@ -40,8 +41,21 @@ def main():
     cfn.setup_logger(cfn.LOG_LEVEL, cfn.LOG_SAVE, cfn.LOG_DIR)
     logger.debug(f"Parsed arguments: {args}")
-    if func := commands.get(args.command):
-        func[-1]()
+    if command_info := commands.get(args.command):
+        func, extra_parsers = command_info[2], command_info[3]
+        # Create parser options dict from extra_parsers
+        extra_options = {}
+        if extra_parsers:
+            for parser_obj in extra_parsers:
+                for action in parser_obj._actions:
+                    if action.dest == 'help':
+                        continue
+                    if hasattr(args, action.dest) and getattr(args, action.dest) != action.default:
+                        extra_options[action.dest] = getattr(args, action.dest)
+        # Run the command with the extra parser options
+        func(extra_options)
     else:
         parser.print_help()

pirag-0.2.2/app/rag/agent/services.py ADDED Viewed

@@ -0,0 +1,11 @@
+from app.rag.llm.client import client as llm_client
+def chat_only_llm():
+    response = llm_client.generate_with_metrics("Hello, how are you?")
+    print(response)
+def chat_with_rag():
+    pass

{pirag-0.2.0 → pirag-0.2.2}/app/rag/api.py RENAMED Viewed

@@ -1,10 +1,12 @@
 import uvicorn
-from fastapi import FastAPI, Request, Depends, HTTPException, Query
+from fastapi import FastAPI, APIRouter, Request, Depends, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from loguru import logger
 import app.rag.config as cfn
-from app.rag.v1.router import router as core_router
+from app.rag.routers import system_router
+from app.rag.v1.routers import router as v1_router
 # Initialize FastAPI app
 api = FastAPI(
@@ -22,26 +24,14 @@ api.add_middleware(
     allow_headers=["*"],
 )
+api.include_router(router=system_router, prefix="", tags=["System"])
+api.include_router(router=v1_router, prefix="/v1")
-@api.get("/")
-async def _():
-    return {"message": "RAG API is running"}
-@api.get("/livez")
-async def _():
-    return {"status": "ok"}
-@api.get("/readyz")
-async def _():
-    return {"status": "ok"}
-api.include_router(router=core_router, prefix="/v1")
-def serve():
+def serve(parser_options=None):
     print("Serving the RAG API...")
-    print(cfn.API_HOST, cfn.API_PORT, cfn.API_RELOAD)
+    if parser_options:
+        logger.debug(f"Serve parser options: {parser_options}")
     uvicorn.run(
         app = "app.rag.api:api",
         host = cfn.API_HOST,

pirag-0.2.2/app/rag/cli.py ADDED Viewed

@@ -0,0 +1,54 @@
+import app.rag.config as cfn
+from loguru import logger
+from app.rag.llm.services import doctor as doctor_llm
+from app.rag.embedding.services import doctor as doctor_embedding
+from app.rag.vector_store.services import doctor as doctor_vector_store
+from app.rag.agent.services import chat_only_llm, chat_with_rag
+def chat(options: dict):
+    logger.debug(f"Chat parser options: {options}")
+    no_rag = options.get('no_rag', False)
+    # -- Chat
+    if no_rag:
+        logger.info("💬 Chatting with the LLM system directly...")
+        chat_only_llm()
+    else:
+        logger.info("💬 Chatting with the RAG system...")
+        chat_with_rag()
+def train(options: dict):
+    print("Training the RAG system...")
+    logger.debug(f"Train parser options: {options}")
+def test(options: dict):
+    print("Testing the RAG system...")
+    logger.debug(f"Test parser options: {options}")
+def doctor(options: dict):
+    logger.info("💚 Doctoring the RAG system...")
+    logger.debug(f"Doctor parser options: {options}")
+    # Check if resolve option is present
+    resolve = options.get('resolve', False)
+    if resolve:
+        logger.info("🔧 Resolving issues is enabled")
+    # -- LLM Server
+    logger.info("🔍 Checking the LLM server (OpenAI-compatible)...")
+    doctor_llm(resolve)
+    # -- Embedding Server
+    logger.info("🔍 Checking the embedding server (OpenAI-compatible)...")
+    doctor_embedding(resolve)
+    # -- Vector Store
+    logger.info("🔍 Checking the vector store server (Milvus)...")
+    doctor_vector_store(resolve)
+    if resolve:
+        logger.info(f"🔧 Resolving issue completed. To make sure the issues are resolved, please try doctoring again.")

{pirag-0.2.0 → pirag-0.2.2}/app/rag/config.py RENAMED Viewed

@@ -40,12 +40,14 @@ API_RELOAD: bool = settings.get("API.RELOAD", True)
 LLM_BASE_URL: str = settings.get("LLM.BASE_URL", "http://localhost:11434")
 LLM_API_KEY: str = settings.get("LLM.API_KEY", "llm_api_key")
 LLM_MODEL: str = settings.get("LLM.MODEL", "gemma3:4b")
+LLM_SERVER_TYPE: str = settings.get("LLM.SERVER_TYPE", "openai")
 # -- Embedding Server
 EMBEDDING_BASE_URL: str = settings.get("EMBEDDING.BASE_URL", "http://localhost:11434")
 EMBEDDING_API_KEY: str = settings.get("EMBEDDING.API_KEY", "embedding_api_key")
 EMBEDDING_MODEL: str = settings.get("EMBEDDING.MODEL", "nomic-embed-text:latest")
+EMBEDDING_SERVER_TYPE: str = settings.get("EMBEDDING.SERVER_TYPE", "openai")
 EMBEDDING_DIMENSION: int = settings.get("EMBEDDING.DIMENSION", 768)
@@ -122,3 +124,21 @@ common_parser.add_argument(
     default = argparse.SUPPRESS,
     action = "help",
 )
+# Chat parser
+chat_parser = argparse.ArgumentParser(add_help=False)
+chat_parser.add_argument(
+    "-n", "--no-rag",
+    help = "Do not use RAG to answer the question. Just use the LLM to answer the question.",
+    action = "store_true",
+)
+# Doctor parser
+doctor_parser = argparse.ArgumentParser(add_help=False)
+doctor_parser.add_argument(
+    "-r", "--resolve",
+    help = "Resolve the issue",
+    action = "store_true",
+)

{pirag-0.2.0 → pirag-0.2.2}/app/rag/embedding/client.py RENAMED Viewed

@@ -2,7 +2,7 @@ import requests
 from langchain_openai.embeddings import OpenAIEmbeddings
 import app.rag.config as cfn
-from app.rag.utils import connection_check
+from app.rag.utilities import connection_check
 class EmbeddingClient:

pirag-0.2.0/app/rag/embedding/service.py → pirag-0.2.2/app/rag/embedding/services.py RENAMED Viewed

@@ -3,7 +3,7 @@ from loguru import logger
 import app.rag.config as cfn
 from .client import client
-def doctor():
+def doctor(resolve: bool):
     # Check connection
     is_connected = client.check_connection()
     if not is_connected:

pirag-0.2.2/app/rag/llm/client.py ADDED Viewed

@@ -0,0 +1,128 @@
+import requests
+import time
+from langchain_openai.llms import OpenAI
+from typing import Dict, Tuple, Any, List, Optional
+import app.rag.config as cfn
+from app.rag.utilities import connection_check
+from .utilities import MetricCallbackHandler
+class LLMClient:
+    def __init__(self, base_url: str, api_key: str, model: str):
+        self.base_url = base_url
+        self.api_key = api_key
+        self.model = model
+        self._is_connected = True
+        self._client = None
+        if self.check_connection():
+            try:
+                self._client = OpenAI(
+                    base_url = base_url,
+                    api_key = api_key,
+                    model = model
+                )
+            except Exception as e:
+                self._is_connected = False
+    def check_connection(self) -> bool:
+        """Check if the LLM server is accessible"""
+        try:
+            requests.head(url=self.base_url, timeout=5)
+        except requests.exceptions.ConnectionError:
+            self._is_connected = False
+            return False
+        self._is_connected = True
+        return True
+    @connection_check
+    def generate(self, prompt: str) -> tuple:
+        """Generate text from prompt and return usage information
+        Returns:
+            tuple: (generated_text, usage_info)
+        """
+        if not self._is_connected or self._client is None:
+            return "", {}
+        response = self._client.generate([prompt])
+        return response.generations[0][0].text, response.llm_output
+    @connection_check
+    def generate_with_metrics(self, prompt: str) -> Tuple[str, Dict[str, Any]]:
+        """Generate text with timing and usage metrics
+        Returns:
+            tuple: (generated_text, metrics_info)
+        """
+        if not self._is_connected or self._client is None:
+            return "", {"error": "LLM client not connected"}
+        handler = MetricCallbackHandler()
+        # Create streaming client with callback
+        streaming_client = OpenAI(
+            base_url=self.base_url,
+            api_key=self.api_key,
+            model=self.model,
+            streaming=True,
+            callbacks=[handler]
+        )
+        # Make a single request
+        response = streaming_client.generate([prompt], callbacks=[handler])
+        # Get base metrics from response
+        metrics = {}
+        # Extract token usage from response
+        llm_output = response.llm_output if hasattr(response, 'llm_output') else {}
+        # Check if token_usage exists in the response
+        token_usage = llm_output.get('token_usage', {})
+        if token_usage:
+            # If token_usage is available, copy it to our metrics
+            metrics.update(token_usage)
+        # Add model name if available
+        if 'model_name' in llm_output:
+            metrics['model'] = llm_output['model_name']
+        else:
+            metrics['model'] = self.model
+        # Calculate and add timing metrics
+        metrics['ttft'] = handler.ttft or 0.0
+        metrics['total_time'] = (handler.end_time or time.time()) - handler.start_time
+        metrics['tokens_per_second'] = handler.calculate_tokens_per_second()
+        metrics['completion_tokens'] = handler.token_count
+        return handler.result, metrics
+    @connection_check
+    def list_models(self) -> list:
+        """List available models"""
+        if not self._is_connected:
+            return []
+        try:
+            response = requests.get(
+                f"{self.base_url}/models",
+                headers={"Authorization": f"Bearer {self.api_key}"}
+            )
+            if response.status_code == 200:
+                return [model['id'] for model in response.json()['data']]
+            return []
+        except Exception:
+            return []
+    @connection_check
+    def has_model(self, model: str) -> bool:
+        """Check if model exists"""
+        if not self._is_connected:
+            return False
+        return model in self.list_models()
+client = LLMClient(
+    base_url = cfn.LLM_BASE_URL,
+    api_key = cfn.LLM_API_KEY,
+    model = cfn.LLM_MODEL,
+)

pirag-0.2.0/app/rag/llm/service.py → pirag-0.2.2/app/rag/llm/services.py RENAMED Viewed

@@ -3,7 +3,7 @@ from loguru import logger
 import app.rag.config as cfn
 from .client import client
-def doctor():
+def doctor(resolve: bool):
     # Check connection
     is_connected = client.check_connection()
     if not is_connected:

pirag-0.2.2/app/rag/llm/utilities.py ADDED Viewed

@@ -0,0 +1,40 @@
+import time
+from langchain.callbacks.base import BaseCallbackHandler
+class MetricCallbackHandler(BaseCallbackHandler):
+    def __init__(self):
+        self.start_time = time.time()
+        self.ttft = None
+        self.first_token_time = None
+        self.result = ""
+        self.end_time = None
+        self.token_count = 0
+        self.token_timestamps = []
+    def on_llm_new_token(self, token: str, **kwargs):
+        current_time = time.time()
+        self.token_count += 1
+        self.token_timestamps.append(current_time)
+        if self.ttft is None:
+            self.ttft = current_time - self.start_time
+            self.first_token_time = current_time
+        self.result += token
+    def on_llm_end(self, *args, **kwargs):
+        self.end_time = time.time()
+    def calculate_tokens_per_second(self):
+        """Calculate tokens per second after the first token"""
+        if self.token_count <= 1 or self.first_token_time is None or self.end_time is None:
+            return 0.0
+        # Calculate time from first token to completion (exclude TTFT)
+        generation_time = self.end_time - self.first_token_time
+        if generation_time <= 0:
+            return 0.0
+        # Exclude the first token from the count since we're measuring from after it arrived
+        tokens_after_first = self.token_count - 1
+        return tokens_after_first / generation_time

pirag-0.2.2/app/rag/models.py ADDED Viewed

@@ -0,0 +1,19 @@
+from pydantic import BaseModel
+class SystemStatusResponse(BaseModel):
+    """
+    Response model for the system status endpoint.
+    """
+    status: int
+    message: str
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "status": 200,
+                    "message": "System is running normally"
+                }
+            ]
+        }
+    }

pirag-0.2.2/app/rag/routers.py ADDED Viewed

@@ -0,0 +1,41 @@
+from fastapi import APIRouter
+from .models import SystemStatusResponse
+system_router = APIRouter()
+@system_router.get(
+    path = "/",
+    summary = "Root Endpoint",
+    description = "Root endpoint for the RAG API",
+    response_model = SystemStatusResponse,
+)
+async def root():
+    return SystemStatusResponse(
+        status = 200,
+        message = "RAG API is running. If you want to see API documentation, please visit /docs",
+    )
+@system_router.get(
+    path = "/livez",
+    summary = "Liveness Probe",
+    description = "Check if the RAG API is running",
+    response_model = SystemStatusResponse,
+)
+async def livez():
+    return SystemStatusResponse(
+        status = 200,
+        message = "RAG API is live",
+    )
+@system_router.get(
+    path = "/readyz",
+    summary = "Readiness Probe",
+    description = "Check if the RAG API is ready to serve requests",
+    response_model = SystemStatusResponse,
+)
+async def readyz():
+    return SystemStatusResponse(
+        status = 200,
+        message = "RAG API is ready to serve requests",
+    )

pirag-0.2.0/app/rag/utils.py → pirag-0.2.2/app/rag/utilities.py RENAMED Viewed

@@ -6,7 +6,7 @@ def connection_check(func):
     @wraps(func)
     def wrapper(self, *args, **kwargs):
         try:
-            requests.head(url=self.base_url, timeout=5)
+            requests.head(url=self.base_url, timeout=1)
             self._is_connected = True
             return func(self, *args, **kwargs)
         except requests.exceptions.ConnectionError:

{pirag-0.2.0 → pirag-0.2.2}/app/rag/vector_store/client.py RENAMED Viewed

@@ -3,7 +3,7 @@ from pymilvus import MilvusClient
 from pymilvus.exceptions import MilvusException
 import app.rag.config as cfn
-from app.rag.utils import connection_check
+from app.rag.utilities import connection_check
 class VectorStoreClient(MilvusClient):
@@ -40,6 +40,9 @@ class VectorStoreClient(MilvusClient):
         except requests.exceptions.ConnectionError:
             self._is_connected = False
             return False
+        except requests.exceptions.ReadTimeout:
+            self._is_connected = False
+            return False
         self._is_connected = True
         return True

pirag-0.2.0/app/rag/vector_store/service.py → pirag-0.2.2/app/rag/vector_store/services.py RENAMED Viewed

@@ -3,7 +3,7 @@ from loguru import logger
 import app.rag.config as cfn
 from .client import client
-def doctor():
+def doctor(resolve: bool):
     # Check connection
     is_connected = client.check_connection()
     if not is_connected:
@@ -20,6 +20,9 @@ def doctor():
         else:
             if not client.has_database(cfn.MILVUS_DATABASE):
                 logger.error(f"- ❌ FAILED: Vector store databases (Database '{cfn.MILVUS_DATABASE}' not found)")
+                if resolve:
+                    logger.info(f"- 🔧 Resolving issue: Creating database '{cfn.MILVUS_DATABASE}'")
+                    client.create_database(cfn.MILVUS_DATABASE)
             else:
                 logger.info(f"- ✅ PASSED: Vector store databases (Database '{cfn.MILVUS_DATABASE}' exists)")
     except Exception as e:
@@ -34,9 +37,19 @@ def doctor():
             logger.warning("- ⏭️  SKIPPED: Vector store collections (No database available)")
         elif len(collections) == 0:
             logger.error("- ❌ FAILED: Vector store collections (No collections available)")
+            if resolve:
+                logger.info(f"- 🔧 Resolving issue: Creating collection '{cfn.MILVUS_COLLECTION}'")
+                client.create_collection(cfn.MILVUS_COLLECTION)
         else:
             if not client.has_collection(cfn.MILVUS_COLLECTION):
                 logger.error(f"- ❌ FAILED: Vector store collections (Collection '{cfn.MILVUS_COLLECTION}' not found)")
+                if resolve:
+                    logger.info(f"- 🔧 Resolving issue: Creating collection '{cfn.MILVUS_COLLECTION}'")
+                    client.create_collection(
+                        collection_name = cfn.MILVUS_COLLECTION,
+                        dimension = cfn.EMBEDDING_DIMENSION,
+                        metric_type = cfn.MILVUS_METRIC_TYPE
+                    )
             else:
                 logger.info(f"- ✅ PASSED: Vector store collections (Collection '{cfn.MILVUS_COLLECTION}' exists)")
     except Exception as e:

{pirag-0.2.0 → pirag-0.2.2}/app/requirements.txt RENAMED Viewed

@@ -8,3 +8,5 @@ uvicorn < 0.35
 # RAG
 ragas < 0.3
 pymilvus < 2.6
+langchain-openai < 0.4
+langchain-ollama < 0.4

{pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pirag
-Version: 0.2.0
+Version: 0.2.2
 Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
 Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
 Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
@@ -16,6 +16,8 @@ Requires-Dist: fastapi<0.116
 Requires-Dist: uvicorn<0.35
 Requires-Dist: ragas<0.3
 Requires-Dist: pymilvus<2.6
+Requires-Dist: langchain-openai<0.4
+Requires-Dist: langchain-ollama<0.4
 Dynamic: license-file
 <div align="center">

{pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/SOURCES.txt RENAMED Viewed

@@ -7,18 +7,21 @@ app/setup.py
 app/rag/api.py
 app/rag/cli.py
 app/rag/config.py
-app/rag/utils.py
-app/rag/agent/client.py
+app/rag/models.py
+app/rag/routers.py
+app/rag/utilities.py
+app/rag/agent/services.py
 app/rag/embedding/client.py
-app/rag/embedding/service.py
+app/rag/embedding/services.py
 app/rag/llm/client.py
-app/rag/llm/service.py
+app/rag/llm/services.py
+app/rag/llm/utilities.py
 app/rag/test/client.py
 app/rag/train/client.py
-app/rag/v1/router.py
-app/rag/v1/service.py
+app/rag/v1/routers.py
+app/rag/v1/services.py
 app/rag/vector_store/client.py
-app/rag/vector_store/service.py
+app/rag/vector_store/services.py
 pirag.egg-info/PKG-INFO
 pirag.egg-info/SOURCES.txt
 pirag.egg-info/dependency_links.txt

{pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/requires.txt RENAMED Viewed

@@ -5,3 +5,5 @@ fastapi<0.116
 uvicorn<0.35
 ragas<0.3
 pymilvus<2.6
+langchain-openai<0.4
+langchain-ollama<0.4

{pirag-0.2.0 → pirag-0.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pirag"
-version = "0.2.0"
+version = "0.2.2"
 description = "CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB."
 authors = [
     { name="semir4in", email="semir4in@gmail.com" },

pirag-0.2.0/app/rag/cli.py DELETED Viewed

@@ -1,33 +0,0 @@
-import app.rag.config as cfn
-from loguru import logger
-from app.rag.llm.service import doctor as doctor_llm
-from app.rag.embedding.service import doctor as doctor_embedding
-from app.rag.vector_store.service import doctor as doctor_vector_store
-def chat():
-    print("Chatting with the RAG system...")
-def train():
-    print("Training the RAG system...")
-def test():
-    print("Testing the RAG system...")
-def doctor():
-    logger.info("💚 Doctoring the RAG system...")
-    # -- LLM Server
-    logger.info("Checking the LLM server (OpenAI-compatible)...")
-    doctor_llm()
-    # -- Embedding Server
-    logger.info("Checking the embedding server (OpenAI-compatible)...")
-    doctor_embedding()
-    # -- Vector Store
-    logger.info("Checking the vector store server (Milvus)...")
-    doctor_vector_store()

pirag-0.2.0/app/rag/llm/client.py DELETED Viewed

@@ -1,70 +0,0 @@
-import requests
-from langchain_openai.llms import OpenAI
-import app.rag.config as cfn
-from app.rag.utils import connection_check
-class LLMClient:
-    def __init__(self, base_url: str, api_key: str, model: str):
-        self.base_url = base_url
-        self.api_key = api_key
-        self.model = model
-        self._is_connected = True
-        self._client = None
-        if self.check_connection():
-            try:
-                self._client = OpenAI(
-                    base_url = base_url,
-                    api_key = api_key,
-                    model = model
-                )
-            except Exception as e:
-                self._is_connected = False
-    def check_connection(self) -> bool:
-        """Check if the LLM server is accessible"""
-        try:
-            requests.head(url=self.base_url, timeout=5)
-        except requests.exceptions.ConnectionError:
-            self._is_connected = False
-            return False
-        self._is_connected = True
-        return True
-    @connection_check
-    def generate(self, prompt: str) -> str:
-        """Generate text from prompt"""
-        if not self._is_connected or self._client is None:
-            return ""
-        return self._client.invoke(prompt)
-    @connection_check
-    def list_models(self) -> list:
-        """List available models"""
-        if not self._is_connected:
-            return []
-        try:
-            response = requests.get(
-                f"{self.base_url}/models",
-                headers={"Authorization": f"Bearer {self.api_key}"}
-            )
-            if response.status_code == 200:
-                return [model['id'] for model in response.json()['data']]
-            return []
-        except Exception:
-            return []
-    @connection_check
-    def has_model(self, model: str) -> bool:
-        """Check if model exists"""
-        if not self._is_connected:
-            return False
-        return model in self.list_models()
-client = LLMClient(
-    base_url = cfn.LLM_BASE_URL,
-    api_key = cfn.LLM_API_KEY,
-    model = cfn.LLM_MODEL,
-)