pirag 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {pirag-0.2.0 → pirag-0.2.2}/PKG-INFO +3 -1
  2. {pirag-0.2.0 → pirag-0.2.2}/app/main.py +27 -13
  3. pirag-0.2.2/app/rag/agent/services.py +11 -0
  4. {pirag-0.2.0 → pirag-0.2.2}/app/rag/api.py +10 -20
  5. pirag-0.2.2/app/rag/cli.py +54 -0
  6. {pirag-0.2.0 → pirag-0.2.2}/app/rag/config.py +20 -0
  7. {pirag-0.2.0 → pirag-0.2.2}/app/rag/embedding/client.py +1 -1
  8. pirag-0.2.0/app/rag/embedding/service.py → pirag-0.2.2/app/rag/embedding/services.py +1 -1
  9. pirag-0.2.2/app/rag/llm/client.py +128 -0
  10. pirag-0.2.0/app/rag/llm/service.py → pirag-0.2.2/app/rag/llm/services.py +1 -1
  11. pirag-0.2.2/app/rag/llm/utilities.py +40 -0
  12. pirag-0.2.2/app/rag/models.py +19 -0
  13. pirag-0.2.2/app/rag/routers.py +41 -0
  14. pirag-0.2.0/app/rag/utils.py → pirag-0.2.2/app/rag/utilities.py +1 -1
  15. {pirag-0.2.0 → pirag-0.2.2}/app/rag/vector_store/client.py +4 -1
  16. pirag-0.2.0/app/rag/vector_store/service.py → pirag-0.2.2/app/rag/vector_store/services.py +14 -1
  17. {pirag-0.2.0 → pirag-0.2.2}/app/requirements.txt +2 -0
  18. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/PKG-INFO +3 -1
  19. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/SOURCES.txt +10 -7
  20. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/requires.txt +2 -0
  21. {pirag-0.2.0 → pirag-0.2.2}/pyproject.toml +1 -1
  22. pirag-0.2.0/app/rag/cli.py +0 -33
  23. pirag-0.2.0/app/rag/llm/client.py +0 -70
  24. pirag-0.2.0/app/rag/v1/service.py +0 -0
  25. {pirag-0.2.0 → pirag-0.2.2}/LICENSE +0 -0
  26. {pirag-0.2.0 → pirag-0.2.2}/README.md +0 -0
  27. {pirag-0.2.0/app/rag/agent → pirag-0.2.2/app/rag/test}/client.py +0 -0
  28. {pirag-0.2.0/app/rag/test → pirag-0.2.2/app/rag/train}/client.py +0 -0
  29. /pirag-0.2.0/app/rag/v1/router.py → /pirag-0.2.2/app/rag/v1/routers.py +0 -0
  30. /pirag-0.2.0/app/rag/train/client.py → /pirag-0.2.2/app/rag/v1/services.py +0 -0
  31. {pirag-0.2.0 → pirag-0.2.2}/app/setup.py +0 -0
  32. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/dependency_links.txt +0 -0
  33. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/entry_points.txt +0 -0
  34. {pirag-0.2.0 → pirag-0.2.2}/pirag.egg-info/top_level.txt +0 -0
  35. {pirag-0.2.0 → pirag-0.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pirag
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
5
5
  Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
6
6
  Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
@@ -16,6 +16,8 @@ Requires-Dist: fastapi<0.116
16
16
  Requires-Dist: uvicorn<0.35
17
17
  Requires-Dist: ragas<0.3
18
18
  Requires-Dist: pymilvus<2.6
19
+ Requires-Dist: langchain-openai<0.4
20
+ Requires-Dist: langchain-ollama<0.4
19
21
  Dynamic: license-file
20
22
 
21
23
  <div align="center">
@@ -5,15 +5,6 @@ import app.rag.config as cfn
5
5
  import app.rag.api as api
6
6
  import app.rag.cli as cli
7
7
 
8
- # Command definitions
9
- commands = {
10
- "serve" : ("Start the RAG server", "Run a FastAPI-based RAG server", api.serve),
11
- "chat" : ("Chat with the RAG system", "Run an interactive chat with the RAG system", cli.chat),
12
- "train" : ("Train the RAG system", "Run a pipeline to train the RAG system", cli.train),
13
- "test" : ("Test the RAG system", "Run a pipeline to test the RAG system", cli.test),
14
- "doctor" : ("Diagnose the RAG system", "Run a pipeline to diagnose the RAG system", cli.doctor),
15
- }
16
-
17
8
  # Main parser
18
9
  parser = ArgumentParser(
19
10
  formatter_class = ArgumentDefaultsHelpFormatter,
@@ -24,14 +15,24 @@ parser = ArgumentParser(
24
15
  add_help = False,
25
16
  )
26
17
 
18
+ # Command definitions
19
+ commands = {
20
+ # name: help, description, function, extra_parsers
21
+ "serve" : ("Start the RAG server", "Run a FastAPI-based RAG server", api.serve, []),
22
+ "chat" : ("Chat with the RAG system", "Run an interactive chat with the RAG system", cli.chat, [cfn.chat_parser]),
23
+ "train" : ("Train the RAG system", "Run a pipeline to train the RAG system", cli.train, []),
24
+ "test" : ("Test the RAG system", "Run a pipeline to test the RAG system", cli.test, []),
25
+ "doctor" : ("Diagnose the RAG system", "Run a pipeline to diagnose the RAG system", cli.doctor, [cfn.doctor_parser]),
26
+ }
27
+
27
28
  # Add command parsers
28
29
  subparsers = parser.add_subparsers(title="commands", dest="command")
29
- for name, (help, description, _) in commands.items():
30
+ for name, (help, description, _, extra_parsers) in commands.items():
30
31
  subparsers.add_parser(
31
32
  name = name,
32
33
  help = help,
33
34
  description = description,
34
- parents = [cfn.common_parser],
35
+ parents = [cfn.common_parser] + extra_parsers,
35
36
  add_help = False,
36
37
  )
37
38
 
@@ -40,8 +41,21 @@ def main():
40
41
  cfn.setup_logger(cfn.LOG_LEVEL, cfn.LOG_SAVE, cfn.LOG_DIR)
41
42
  logger.debug(f"Parsed arguments: {args}")
42
43
 
43
- if func := commands.get(args.command):
44
- func[-1]()
44
+ if command_info := commands.get(args.command):
45
+ func, extra_parsers = command_info[2], command_info[3]
46
+
47
+ # Create parser options dict from extra_parsers
48
+ extra_options = {}
49
+ if extra_parsers:
50
+ for parser_obj in extra_parsers:
51
+ for action in parser_obj._actions:
52
+ if action.dest == 'help':
53
+ continue
54
+ if hasattr(args, action.dest) and getattr(args, action.dest) != action.default:
55
+ extra_options[action.dest] = getattr(args, action.dest)
56
+
57
+ # Run the command with the extra parser options
58
+ func(extra_options)
45
59
  else:
46
60
  parser.print_help()
47
61
 
@@ -0,0 +1,11 @@
1
+ from app.rag.llm.client import client as llm_client
2
+
3
+ def chat_only_llm():
4
+ response = llm_client.generate_with_metrics("Hello, how are you?")
5
+ print(response)
6
+
7
+
8
+ def chat_with_rag():
9
+ pass
10
+
11
+
@@ -1,10 +1,12 @@
1
1
  import uvicorn
2
- from fastapi import FastAPI, Request, Depends, HTTPException, Query
2
+ from fastapi import FastAPI, APIRouter, Request, Depends, HTTPException, Query
3
3
  from fastapi.middleware.cors import CORSMiddleware
4
4
 
5
5
  from loguru import logger
6
6
  import app.rag.config as cfn
7
- from app.rag.v1.router import router as core_router
7
+
8
+ from app.rag.routers import system_router
9
+ from app.rag.v1.routers import router as v1_router
8
10
 
9
11
  # Initialize FastAPI app
10
12
  api = FastAPI(
@@ -22,26 +24,14 @@ api.add_middleware(
22
24
  allow_headers=["*"],
23
25
  )
24
26
 
27
+ api.include_router(router=system_router, prefix="", tags=["System"])
28
+ api.include_router(router=v1_router, prefix="/v1")
25
29
 
26
- @api.get("/")
27
- async def _():
28
- return {"message": "RAG API is running"}
29
-
30
-
31
- @api.get("/livez")
32
- async def _():
33
- return {"status": "ok"}
34
-
35
-
36
- @api.get("/readyz")
37
- async def _():
38
- return {"status": "ok"}
39
-
40
- api.include_router(router=core_router, prefix="/v1")
41
-
42
- def serve():
30
+ def serve(parser_options=None):
43
31
  print("Serving the RAG API...")
44
- print(cfn.API_HOST, cfn.API_PORT, cfn.API_RELOAD)
32
+ if parser_options:
33
+ logger.debug(f"Serve parser options: {parser_options}")
34
+
45
35
  uvicorn.run(
46
36
  app = "app.rag.api:api",
47
37
  host = cfn.API_HOST,
@@ -0,0 +1,54 @@
1
+ import app.rag.config as cfn
2
+ from loguru import logger
3
+
4
+ from app.rag.llm.services import doctor as doctor_llm
5
+ from app.rag.embedding.services import doctor as doctor_embedding
6
+ from app.rag.vector_store.services import doctor as doctor_vector_store
7
+ from app.rag.agent.services import chat_only_llm, chat_with_rag
8
+
9
+ def chat(options: dict):
10
+ logger.debug(f"Chat parser options: {options}")
11
+ no_rag = options.get('no_rag', False)
12
+
13
+ # -- Chat
14
+ if no_rag:
15
+ logger.info("💬 Chatting with the LLM system directly...")
16
+ chat_only_llm()
17
+ else:
18
+ logger.info("💬 Chatting with the RAG system...")
19
+ chat_with_rag()
20
+
21
+
22
+ def train(options: dict):
23
+ print("Training the RAG system...")
24
+ logger.debug(f"Train parser options: {options}")
25
+
26
+
27
+ def test(options: dict):
28
+ print("Testing the RAG system...")
29
+ logger.debug(f"Test parser options: {options}")
30
+
31
+
32
+ def doctor(options: dict):
33
+ logger.info("💚 Doctoring the RAG system...")
34
+
35
+ logger.debug(f"Doctor parser options: {options}")
36
+ # Check if resolve option is present
37
+ resolve = options.get('resolve', False)
38
+ if resolve:
39
+ logger.info("🔧 Resolving issues is enabled")
40
+
41
+ # -- LLM Server
42
+ logger.info("🔍 Checking the LLM server (OpenAI-compatible)...")
43
+ doctor_llm(resolve)
44
+
45
+ # -- Embedding Server
46
+ logger.info("🔍 Checking the embedding server (OpenAI-compatible)...")
47
+ doctor_embedding(resolve)
48
+
49
+ # -- Vector Store
50
+ logger.info("🔍 Checking the vector store server (Milvus)...")
51
+ doctor_vector_store(resolve)
52
+
53
+ if resolve:
54
+ logger.info(f"🔧 Resolving issue completed. To make sure the issues are resolved, please try doctoring again.")
@@ -40,12 +40,14 @@ API_RELOAD: bool = settings.get("API.RELOAD", True)
40
40
  LLM_BASE_URL: str = settings.get("LLM.BASE_URL", "http://localhost:11434")
41
41
  LLM_API_KEY: str = settings.get("LLM.API_KEY", "llm_api_key")
42
42
  LLM_MODEL: str = settings.get("LLM.MODEL", "gemma3:4b")
43
+ LLM_SERVER_TYPE: str = settings.get("LLM.SERVER_TYPE", "openai")
43
44
 
44
45
 
45
46
  # -- Embedding Server
46
47
  EMBEDDING_BASE_URL: str = settings.get("EMBEDDING.BASE_URL", "http://localhost:11434")
47
48
  EMBEDDING_API_KEY: str = settings.get("EMBEDDING.API_KEY", "embedding_api_key")
48
49
  EMBEDDING_MODEL: str = settings.get("EMBEDDING.MODEL", "nomic-embed-text:latest")
50
+ EMBEDDING_SERVER_TYPE: str = settings.get("EMBEDDING.SERVER_TYPE", "openai")
49
51
  EMBEDDING_DIMENSION: int = settings.get("EMBEDDING.DIMENSION", 768)
50
52
 
51
53
 
@@ -122,3 +124,21 @@ common_parser.add_argument(
122
124
  default = argparse.SUPPRESS,
123
125
  action = "help",
124
126
  )
127
+
128
+
129
+ # Chat parser
130
+ chat_parser = argparse.ArgumentParser(add_help=False)
131
+ chat_parser.add_argument(
132
+ "-n", "--no-rag",
133
+ help = "Do not use RAG to answer the question. Just use the LLM to answer the question.",
134
+ action = "store_true",
135
+ )
136
+
137
+
138
+ # Doctor parser
139
+ doctor_parser = argparse.ArgumentParser(add_help=False)
140
+ doctor_parser.add_argument(
141
+ "-r", "--resolve",
142
+ help = "Resolve the issue",
143
+ action = "store_true",
144
+ )
@@ -2,7 +2,7 @@ import requests
2
2
  from langchain_openai.embeddings import OpenAIEmbeddings
3
3
 
4
4
  import app.rag.config as cfn
5
- from app.rag.utils import connection_check
5
+ from app.rag.utilities import connection_check
6
6
 
7
7
 
8
8
  class EmbeddingClient:
@@ -3,7 +3,7 @@ from loguru import logger
3
3
  import app.rag.config as cfn
4
4
  from .client import client
5
5
 
6
- def doctor():
6
+ def doctor(resolve: bool):
7
7
  # Check connection
8
8
  is_connected = client.check_connection()
9
9
  if not is_connected:
@@ -0,0 +1,128 @@
1
+ import requests
2
+ import time
3
+ from langchain_openai.llms import OpenAI
4
+ from typing import Dict, Tuple, Any, List, Optional
5
+
6
+ import app.rag.config as cfn
7
+ from app.rag.utilities import connection_check
8
+ from .utilities import MetricCallbackHandler
9
+
10
+ class LLMClient:
11
+ def __init__(self, base_url: str, api_key: str, model: str):
12
+ self.base_url = base_url
13
+ self.api_key = api_key
14
+ self.model = model
15
+ self._is_connected = True
16
+ self._client = None
17
+
18
+ if self.check_connection():
19
+ try:
20
+ self._client = OpenAI(
21
+ base_url = base_url,
22
+ api_key = api_key,
23
+ model = model
24
+ )
25
+ except Exception as e:
26
+ self._is_connected = False
27
+
28
+ def check_connection(self) -> bool:
29
+ """Check if the LLM server is accessible"""
30
+ try:
31
+ requests.head(url=self.base_url, timeout=5)
32
+ except requests.exceptions.ConnectionError:
33
+ self._is_connected = False
34
+ return False
35
+ self._is_connected = True
36
+ return True
37
+
38
+ @connection_check
39
+ def generate(self, prompt: str) -> tuple:
40
+ """Generate text from prompt and return usage information
41
+
42
+ Returns:
43
+ tuple: (generated_text, usage_info)
44
+ """
45
+ if not self._is_connected or self._client is None:
46
+ return "", {}
47
+
48
+ response = self._client.generate([prompt])
49
+ return response.generations[0][0].text, response.llm_output
50
+
51
+ @connection_check
52
+ def generate_with_metrics(self, prompt: str) -> Tuple[str, Dict[str, Any]]:
53
+ """Generate text with timing and usage metrics
54
+
55
+ Returns:
56
+ tuple: (generated_text, metrics_info)
57
+ """
58
+ if not self._is_connected or self._client is None:
59
+ return "", {"error": "LLM client not connected"}
60
+
61
+ handler = MetricCallbackHandler()
62
+
63
+ # Create streaming client with callback
64
+ streaming_client = OpenAI(
65
+ base_url=self.base_url,
66
+ api_key=self.api_key,
67
+ model=self.model,
68
+ streaming=True,
69
+ callbacks=[handler]
70
+ )
71
+
72
+ # Make a single request
73
+ response = streaming_client.generate([prompt], callbacks=[handler])
74
+
75
+ # Get base metrics from response
76
+ metrics = {}
77
+
78
+ # Extract token usage from response
79
+ llm_output = response.llm_output if hasattr(response, 'llm_output') else {}
80
+
81
+ # Check if token_usage exists in the response
82
+ token_usage = llm_output.get('token_usage', {})
83
+ if token_usage:
84
+ # If token_usage is available, copy it to our metrics
85
+ metrics.update(token_usage)
86
+
87
+ # Add model name if available
88
+ if 'model_name' in llm_output:
89
+ metrics['model'] = llm_output['model_name']
90
+ else:
91
+ metrics['model'] = self.model
92
+
93
+ # Calculate and add timing metrics
94
+ metrics['ttft'] = handler.ttft or 0.0
95
+ metrics['total_time'] = (handler.end_time or time.time()) - handler.start_time
96
+ metrics['tokens_per_second'] = handler.calculate_tokens_per_second()
97
+ metrics['completion_tokens'] = handler.token_count
98
+
99
+ return handler.result, metrics
100
+
101
+ @connection_check
102
+ def list_models(self) -> list:
103
+ """List available models"""
104
+ if not self._is_connected:
105
+ return []
106
+ try:
107
+ response = requests.get(
108
+ f"{self.base_url}/models",
109
+ headers={"Authorization": f"Bearer {self.api_key}"}
110
+ )
111
+ if response.status_code == 200:
112
+ return [model['id'] for model in response.json()['data']]
113
+ return []
114
+ except Exception:
115
+ return []
116
+
117
+ @connection_check
118
+ def has_model(self, model: str) -> bool:
119
+ """Check if model exists"""
120
+ if not self._is_connected:
121
+ return False
122
+ return model in self.list_models()
123
+
124
+ client = LLMClient(
125
+ base_url = cfn.LLM_BASE_URL,
126
+ api_key = cfn.LLM_API_KEY,
127
+ model = cfn.LLM_MODEL,
128
+ )
@@ -3,7 +3,7 @@ from loguru import logger
3
3
  import app.rag.config as cfn
4
4
  from .client import client
5
5
 
6
- def doctor():
6
+ def doctor(resolve: bool):
7
7
  # Check connection
8
8
  is_connected = client.check_connection()
9
9
  if not is_connected:
@@ -0,0 +1,40 @@
1
+ import time
2
+ from langchain.callbacks.base import BaseCallbackHandler
3
+
4
+ class MetricCallbackHandler(BaseCallbackHandler):
5
+ def __init__(self):
6
+ self.start_time = time.time()
7
+ self.ttft = None
8
+ self.first_token_time = None
9
+ self.result = ""
10
+ self.end_time = None
11
+ self.token_count = 0
12
+ self.token_timestamps = []
13
+
14
+ def on_llm_new_token(self, token: str, **kwargs):
15
+ current_time = time.time()
16
+ self.token_count += 1
17
+ self.token_timestamps.append(current_time)
18
+
19
+ if self.ttft is None:
20
+ self.ttft = current_time - self.start_time
21
+ self.first_token_time = current_time
22
+
23
+ self.result += token
24
+
25
+ def on_llm_end(self, *args, **kwargs):
26
+ self.end_time = time.time()
27
+
28
+ def calculate_tokens_per_second(self):
29
+ """Calculate tokens per second after the first token"""
30
+ if self.token_count <= 1 or self.first_token_time is None or self.end_time is None:
31
+ return 0.0
32
+
33
+ # Calculate time from first token to completion (exclude TTFT)
34
+ generation_time = self.end_time - self.first_token_time
35
+ if generation_time <= 0:
36
+ return 0.0
37
+
38
+ # Exclude the first token from the count since we're measuring from after it arrived
39
+ tokens_after_first = self.token_count - 1
40
+ return tokens_after_first / generation_time
@@ -0,0 +1,19 @@
1
+ from pydantic import BaseModel
2
+
3
+ class SystemStatusResponse(BaseModel):
4
+ """
5
+ Response model for the system status endpoint.
6
+ """
7
+ status: int
8
+ message: str
9
+
10
+ model_config = {
11
+ "json_schema_extra": {
12
+ "examples": [
13
+ {
14
+ "status": 200,
15
+ "message": "System is running normally"
16
+ }
17
+ ]
18
+ }
19
+ }
@@ -0,0 +1,41 @@
1
+ from fastapi import APIRouter
2
+
3
+ from .models import SystemStatusResponse
4
+
5
+ system_router = APIRouter()
6
+
7
+ @system_router.get(
8
+ path = "/",
9
+ summary = "Root Endpoint",
10
+ description = "Root endpoint for the RAG API",
11
+ response_model = SystemStatusResponse,
12
+ )
13
+ async def root():
14
+ return SystemStatusResponse(
15
+ status = 200,
16
+ message = "RAG API is running. If you want to see API documentation, please visit /docs",
17
+ )
18
+
19
+ @system_router.get(
20
+ path = "/livez",
21
+ summary = "Liveness Probe",
22
+ description = "Check if the RAG API is running",
23
+ response_model = SystemStatusResponse,
24
+ )
25
+ async def livez():
26
+ return SystemStatusResponse(
27
+ status = 200,
28
+ message = "RAG API is live",
29
+ )
30
+
31
+ @system_router.get(
32
+ path = "/readyz",
33
+ summary = "Readiness Probe",
34
+ description = "Check if the RAG API is ready to serve requests",
35
+ response_model = SystemStatusResponse,
36
+ )
37
+ async def readyz():
38
+ return SystemStatusResponse(
39
+ status = 200,
40
+ message = "RAG API is ready to serve requests",
41
+ )
@@ -6,7 +6,7 @@ def connection_check(func):
6
6
  @wraps(func)
7
7
  def wrapper(self, *args, **kwargs):
8
8
  try:
9
- requests.head(url=self.base_url, timeout=5)
9
+ requests.head(url=self.base_url, timeout=1)
10
10
  self._is_connected = True
11
11
  return func(self, *args, **kwargs)
12
12
  except requests.exceptions.ConnectionError:
@@ -3,7 +3,7 @@ from pymilvus import MilvusClient
3
3
  from pymilvus.exceptions import MilvusException
4
4
 
5
5
  import app.rag.config as cfn
6
- from app.rag.utils import connection_check
6
+ from app.rag.utilities import connection_check
7
7
 
8
8
 
9
9
  class VectorStoreClient(MilvusClient):
@@ -40,6 +40,9 @@ class VectorStoreClient(MilvusClient):
40
40
  except requests.exceptions.ConnectionError:
41
41
  self._is_connected = False
42
42
  return False
43
+ except requests.exceptions.ReadTimeout:
44
+ self._is_connected = False
45
+ return False
43
46
  self._is_connected = True
44
47
  return True
45
48
 
@@ -3,7 +3,7 @@ from loguru import logger
3
3
  import app.rag.config as cfn
4
4
  from .client import client
5
5
 
6
- def doctor():
6
+ def doctor(resolve: bool):
7
7
  # Check connection
8
8
  is_connected = client.check_connection()
9
9
  if not is_connected:
@@ -20,6 +20,9 @@ def doctor():
20
20
  else:
21
21
  if not client.has_database(cfn.MILVUS_DATABASE):
22
22
  logger.error(f"- ❌ FAILED: Vector store databases (Database '{cfn.MILVUS_DATABASE}' not found)")
23
+ if resolve:
24
+ logger.info(f"- 🔧 Resolving issue: Creating database '{cfn.MILVUS_DATABASE}'")
25
+ client.create_database(cfn.MILVUS_DATABASE)
23
26
  else:
24
27
  logger.info(f"- ✅ PASSED: Vector store databases (Database '{cfn.MILVUS_DATABASE}' exists)")
25
28
  except Exception as e:
@@ -34,9 +37,19 @@ def doctor():
34
37
  logger.warning("- ⏭️ SKIPPED: Vector store collections (No database available)")
35
38
  elif len(collections) == 0:
36
39
  logger.error("- ❌ FAILED: Vector store collections (No collections available)")
40
+ if resolve:
41
+ logger.info(f"- 🔧 Resolving issue: Creating collection '{cfn.MILVUS_COLLECTION}'")
42
+ client.create_collection(cfn.MILVUS_COLLECTION)
37
43
  else:
38
44
  if not client.has_collection(cfn.MILVUS_COLLECTION):
39
45
  logger.error(f"- ❌ FAILED: Vector store collections (Collection '{cfn.MILVUS_COLLECTION}' not found)")
46
+ if resolve:
47
+ logger.info(f"- 🔧 Resolving issue: Creating collection '{cfn.MILVUS_COLLECTION}'")
48
+ client.create_collection(
49
+ collection_name = cfn.MILVUS_COLLECTION,
50
+ dimension = cfn.EMBEDDING_DIMENSION,
51
+ metric_type = cfn.MILVUS_METRIC_TYPE
52
+ )
40
53
  else:
41
54
  logger.info(f"- ✅ PASSED: Vector store collections (Collection '{cfn.MILVUS_COLLECTION}' exists)")
42
55
  except Exception as e:
@@ -8,3 +8,5 @@ uvicorn < 0.35
8
8
  # RAG
9
9
  ragas < 0.3
10
10
  pymilvus < 2.6
11
+ langchain-openai < 0.4
12
+ langchain-ollama < 0.4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pirag
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
5
5
  Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
6
6
  Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
@@ -16,6 +16,8 @@ Requires-Dist: fastapi<0.116
16
16
  Requires-Dist: uvicorn<0.35
17
17
  Requires-Dist: ragas<0.3
18
18
  Requires-Dist: pymilvus<2.6
19
+ Requires-Dist: langchain-openai<0.4
20
+ Requires-Dist: langchain-ollama<0.4
19
21
  Dynamic: license-file
20
22
 
21
23
  <div align="center">
@@ -7,18 +7,21 @@ app/setup.py
7
7
  app/rag/api.py
8
8
  app/rag/cli.py
9
9
  app/rag/config.py
10
- app/rag/utils.py
11
- app/rag/agent/client.py
10
+ app/rag/models.py
11
+ app/rag/routers.py
12
+ app/rag/utilities.py
13
+ app/rag/agent/services.py
12
14
  app/rag/embedding/client.py
13
- app/rag/embedding/service.py
15
+ app/rag/embedding/services.py
14
16
  app/rag/llm/client.py
15
- app/rag/llm/service.py
17
+ app/rag/llm/services.py
18
+ app/rag/llm/utilities.py
16
19
  app/rag/test/client.py
17
20
  app/rag/train/client.py
18
- app/rag/v1/router.py
19
- app/rag/v1/service.py
21
+ app/rag/v1/routers.py
22
+ app/rag/v1/services.py
20
23
  app/rag/vector_store/client.py
21
- app/rag/vector_store/service.py
24
+ app/rag/vector_store/services.py
22
25
  pirag.egg-info/PKG-INFO
23
26
  pirag.egg-info/SOURCES.txt
24
27
  pirag.egg-info/dependency_links.txt
@@ -5,3 +5,5 @@ fastapi<0.116
5
5
  uvicorn<0.35
6
6
  ragas<0.3
7
7
  pymilvus<2.6
8
+ langchain-openai<0.4
9
+ langchain-ollama<0.4
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pirag"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  description = "CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB."
5
5
  authors = [
6
6
  { name="semir4in", email="semir4in@gmail.com" },
@@ -1,33 +0,0 @@
1
- import app.rag.config as cfn
2
- from loguru import logger
3
-
4
- from app.rag.llm.service import doctor as doctor_llm
5
- from app.rag.embedding.service import doctor as doctor_embedding
6
- from app.rag.vector_store.service import doctor as doctor_vector_store
7
-
8
- def chat():
9
- print("Chatting with the RAG system...")
10
-
11
-
12
- def train():
13
- print("Training the RAG system...")
14
-
15
-
16
- def test():
17
- print("Testing the RAG system...")
18
-
19
-
20
- def doctor():
21
- logger.info("💚 Doctoring the RAG system...")
22
-
23
- # -- LLM Server
24
- logger.info("Checking the LLM server (OpenAI-compatible)...")
25
- doctor_llm()
26
-
27
- # -- Embedding Server
28
- logger.info("Checking the embedding server (OpenAI-compatible)...")
29
- doctor_embedding()
30
-
31
- # -- Vector Store
32
- logger.info("Checking the vector store server (Milvus)...")
33
- doctor_vector_store()
@@ -1,70 +0,0 @@
1
- import requests
2
- from langchain_openai.llms import OpenAI
3
-
4
- import app.rag.config as cfn
5
- from app.rag.utils import connection_check
6
-
7
-
8
- class LLMClient:
9
- def __init__(self, base_url: str, api_key: str, model: str):
10
- self.base_url = base_url
11
- self.api_key = api_key
12
- self.model = model
13
- self._is_connected = True
14
- self._client = None
15
-
16
- if self.check_connection():
17
- try:
18
- self._client = OpenAI(
19
- base_url = base_url,
20
- api_key = api_key,
21
- model = model
22
- )
23
- except Exception as e:
24
- self._is_connected = False
25
-
26
- def check_connection(self) -> bool:
27
- """Check if the LLM server is accessible"""
28
- try:
29
- requests.head(url=self.base_url, timeout=5)
30
- except requests.exceptions.ConnectionError:
31
- self._is_connected = False
32
- return False
33
- self._is_connected = True
34
- return True
35
-
36
- @connection_check
37
- def generate(self, prompt: str) -> str:
38
- """Generate text from prompt"""
39
- if not self._is_connected or self._client is None:
40
- return ""
41
- return self._client.invoke(prompt)
42
-
43
- @connection_check
44
- def list_models(self) -> list:
45
- """List available models"""
46
- if not self._is_connected:
47
- return []
48
- try:
49
- response = requests.get(
50
- f"{self.base_url}/models",
51
- headers={"Authorization": f"Bearer {self.api_key}"}
52
- )
53
- if response.status_code == 200:
54
- return [model['id'] for model in response.json()['data']]
55
- return []
56
- except Exception:
57
- return []
58
-
59
- @connection_check
60
- def has_model(self, model: str) -> bool:
61
- """Check if model exists"""
62
- if not self._is_connected:
63
- return False
64
- return model in self.list_models()
65
-
66
- client = LLMClient(
67
- base_url = cfn.LLM_BASE_URL,
68
- api_key = cfn.LLM_API_KEY,
69
- model = cfn.LLM_MODEL,
70
- )
File without changes
File without changes
File without changes
File without changes
File without changes