PyPI - pirag - Versions diffs - 0.1.0__tar.gz - Mend

pirag 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

pirag-0.1.0/LICENSE +21 -0
pirag-0.1.0/PKG-INFO +132 -0
pirag-0.1.0/README.md +113 -0
pirag-0.1.0/app/main.py +80 -0
pirag-0.1.0/app/rag/agent.py +64 -0
pirag-0.1.0/app/rag/config.py +159 -0
pirag-0.1.0/app/requirements.txt +11 -0
pirag-0.1.0/app/setup.py +28 -0
pirag-0.1.0/pirag.egg-info/PKG-INFO +132 -0
pirag-0.1.0/pirag.egg-info/SOURCES.txt +14 -0
pirag-0.1.0/pirag.egg-info/dependency_links.txt +1 -0
pirag-0.1.0/pirag.egg-info/entry_points.txt +2 -0
pirag-0.1.0/pirag.egg-info/requires.txt +6 -0
pirag-0.1.0/pirag.egg-info/top_level.txt +1 -0
pirag-0.1.0/pyproject.toml +39 -0
pirag-0.1.0/setup.cfg +4 -0

pirag-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 jyje with studio.r4iny
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pirag-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,132 @@
+Metadata-Version: 2.4
+Name: pirag
+Version: 0.1.0
+Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
+Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
+Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
+Project-URL: Repository, https://github.com/jyje/pilot-onpremise-rag
+Project-URL: Issue, https://github.com/jyje/pilot-onpremise-rag/issues
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: python-dotenv<1.2
+Requires-Dist: loguru<0.8
+Requires-Dist: pytest<8.4
+Requires-Dist: black<25.2
+Requires-Dist: ragas<0.3
+Requires-Dist: pymilvus<2.6
+Dynamic: license-file
+<div align="center">
+# pirag: pilot-onpremise-rag
+<!-- <img alt="RAG Logo" src="docs/rag-logo.jpg" width="450" style="object-fit: contain; max-width: 100%; aspect-ratio: 16 / 9;"> -->
+🌱 LLM+RAG CLI project operating in On-Premise environment
+[![Python](https://img.shields.io/badge/3.9+-3776AB?style=flat&logo=Python&logoColor=white&label=Python)](https://typer.tiangolo.com/)
+[![CLI](https://img.shields.io/badge/CLI-orange?style=flat&logo=iterm2&logoColor=white)](https://typer.tiangolo.com/)
+[![LLM](https://img.shields.io/badge/LLM-green?style=flat&logo=OpenAI&logoColor=white)](https://openai.com)
+[![LangChain](https://img.shields.io/badge/LangChain-blue?style=flat&logo=Langchain&logoColor=white)](https://langchain.com)
+[![Milvus](https://img.shields.io/badge/Milvus-red?style=flat&logo=Milvus&logoColor=white)](https://milvus.io/)
+[![MinIO](https://img.shields.io/badge/MinIO-red?style=flat&logo=MinIO&logoColor=white)](https://min.io/)
+<!-- [![Docker](https://img.shields.io/badge/Docker-blue?style=flat&logo=Docker&logoColor=white)](https://docker.com) -->
+</div>
+## 🚀 Introduction
+**pilot-onpremise-rag** is a CLI tool that implements a knowledge-based RAG (Retrieval-Augmented Generation) system with LLM. It provides powerful document retrieval and generation capabilities while ensuring data privacy.
+## 🔧 Setup
+### (Optional) Setup External Dependencies
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+docker compose -f docker/compose.yaml up -d
+```
+### Install pirag
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+pip install --upgrade -e ./app
+```
+## 📚 Usage
+### Basic Commands
+```
+# View help
+pirag --help
+# Train documents
+pirag train --source ./documents
+# Ask a question
+pirag ask "Give me a joke for Cat-holic."
+```
+## 🏗️ Project Structure
+```
+pilot-onpremise-rag/
+├── app/                        # Main application directory
+│   ├── main.py                 # CLI main entry point
+│   ├── setup.py                # Package setup configuration
+│   ├── pyproject.toml          # PEP 517/518 build configuration
+│   ├── requirements.txt        # Dependencies
+│   ├── logs/                   # Application logs
+│   └── rag/                    # RAG implementation
+│       ├── config.py           # Configuration settings
+│       ├── agent.py            # Agent implementation
+│       ├── ask/                # Query handling module
+│       ├── train/              # Document training module
+│       ├── test/               # Testing module
+│       └── doctor/             # Diagnostic tools
+├── VERSION                     # Project version
+├── docker/                     # Docker configuration
+├── assets/                     # Static assets (Files are not included)
+└── LICENSE                     # License information
+```
+## 🔄 How It Works
+1. **Document Training**: Process local documents and store in vector database
+2. **Search Engine**: Find document chunks related to user queries
+3. **Context Generation**: Create LLM prompts from retrieved documents
+4. **Response Generation**: Provide accurate responses via local LLM
+## 💡 Key Features
+- **Privacy Guaranteed**: All data and processing occurs locally
+- **Multiple Document Support**: Support for PDF, Markdown, TXT, DOCX, and other formats
+- **Custom LLM**: Compatible with various local LLM models
+- **Vector Database**: Vector DB integration for efficient document retrieval
+## 🧪 Performance Optimization
+| Configuration | Memory Usage | Response Speed | Suitable Use Cases |
+|--------------|-------------|---------------|-------------------|
+| Light Model | 4-6GB | Fast | Simple queries, low-spec hardware |
+| Medium Model | 8-12GB | Medium | General use, most queries |
+| Large Model | 16GB+ | Slow | Complex document analysis, expert answers |
+## 🔗 References
+- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction)
+- [LLM Optimization Techniques](https://huggingface.co/docs/optimum/index)
+- [RAG Paper](https://arxiv.org/abs/2005.11401)
+## Contributing
+Any contributions are welcome!
+### Current Maintainers
+- [Studio R4iny](https://github.com/studior4iny)
+    - [jyje](https://github.com/jyje), [semir4in](https://github.com/semir4in) (Same person)

pirag-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,113 @@
+<div align="center">
+# pirag: pilot-onpremise-rag
+<!-- <img alt="RAG Logo" src="docs/rag-logo.jpg" width="450" style="object-fit: contain; max-width: 100%; aspect-ratio: 16 / 9;"> -->
+🌱 LLM+RAG CLI project operating in On-Premise environment
+[![Python](https://img.shields.io/badge/3.9+-3776AB?style=flat&logo=Python&logoColor=white&label=Python)](https://typer.tiangolo.com/)
+[![CLI](https://img.shields.io/badge/CLI-orange?style=flat&logo=iterm2&logoColor=white)](https://typer.tiangolo.com/)
+[![LLM](https://img.shields.io/badge/LLM-green?style=flat&logo=OpenAI&logoColor=white)](https://openai.com)
+[![LangChain](https://img.shields.io/badge/LangChain-blue?style=flat&logo=Langchain&logoColor=white)](https://langchain.com)
+[![Milvus](https://img.shields.io/badge/Milvus-red?style=flat&logo=Milvus&logoColor=white)](https://milvus.io/)
+[![MinIO](https://img.shields.io/badge/MinIO-red?style=flat&logo=MinIO&logoColor=white)](https://min.io/)
+<!-- [![Docker](https://img.shields.io/badge/Docker-blue?style=flat&logo=Docker&logoColor=white)](https://docker.com) -->
+</div>
+## 🚀 Introduction
+**pilot-onpremise-rag** is a CLI tool that implements a knowledge-based RAG (Retrieval-Augmented Generation) system with LLM. It provides powerful document retrieval and generation capabilities while ensuring data privacy.
+## 🔧 Setup
+### (Optional) Setup External Dependencies
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+docker compose -f docker/compose.yaml up -d
+```
+### Install pirag
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+pip install --upgrade -e ./app
+```
+## 📚 Usage
+### Basic Commands
+```
+# View help
+pirag --help
+# Train documents
+pirag train --source ./documents
+# Ask a question
+pirag ask "Give me a joke for Cat-holic."
+```
+## 🏗️ Project Structure
+```
+pilot-onpremise-rag/
+├── app/                        # Main application directory
+│   ├── main.py                 # CLI main entry point
+│   ├── setup.py                # Package setup configuration
+│   ├── pyproject.toml          # PEP 517/518 build configuration
+│   ├── requirements.txt        # Dependencies
+│   ├── logs/                   # Application logs
+│   └── rag/                    # RAG implementation
+│       ├── config.py           # Configuration settings
+│       ├── agent.py            # Agent implementation
+│       ├── ask/                # Query handling module
+│       ├── train/              # Document training module
+│       ├── test/               # Testing module
+│       └── doctor/             # Diagnostic tools
+├── VERSION                     # Project version
+├── docker/                     # Docker configuration
+├── assets/                     # Static assets (Files are not included)
+└── LICENSE                     # License information
+```
+## 🔄 How It Works
+1. **Document Training**: Process local documents and store in vector database
+2. **Search Engine**: Find document chunks related to user queries
+3. **Context Generation**: Create LLM prompts from retrieved documents
+4. **Response Generation**: Provide accurate responses via local LLM
+## 💡 Key Features
+- **Privacy Guaranteed**: All data and processing occurs locally
+- **Multiple Document Support**: Support for PDF, Markdown, TXT, DOCX, and other formats
+- **Custom LLM**: Compatible with various local LLM models
+- **Vector Database**: Vector DB integration for efficient document retrieval
+## 🧪 Performance Optimization
+| Configuration | Memory Usage | Response Speed | Suitable Use Cases |
+|--------------|-------------|---------------|-------------------|
+| Light Model | 4-6GB | Fast | Simple queries, low-spec hardware |
+| Medium Model | 8-12GB | Medium | General use, most queries |
+| Large Model | 16GB+ | Slow | Complex document analysis, expert answers |
+## 🔗 References
+- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction)
+- [LLM Optimization Techniques](https://huggingface.co/docs/optimum/index)
+- [RAG Paper](https://arxiv.org/abs/2005.11401)
+## Contributing
+Any contributions are welcome!
+### Current Maintainers
+- [Studio R4iny](https://github.com/studior4iny)
+    - [jyje](https://github.com/jyje), [semir4in](https://github.com/semir4in) (Same person)

pirag-0.1.0/app/main.py ADDED Viewed

@@ -0,0 +1,80 @@
+import argparse, os
+from dotenv import load_dotenv
+from loguru import logger
+load_dotenv(dotenv_path=os.environ.get('ENV_FILE', '.env'), override=True)
+from app.rag.config import top_parser, common_parser, setup_logger
+from app.rag.doctor import help as doctor_help, parser as doctor_parser, route as doctor_route
+from app.rag.train import help as train_help, parser as train_parser, route as train_route
+from app.rag.ask import help as ask_help, parser as ask_parser, route as ask_route
+from app.rag.test import help as test_help, parser as test_parser, route as test_route
+# Main parser
+parser = argparse.ArgumentParser(
+    formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+    description = 'Pilot of On-Premise RAG',
+    parents = [top_parser],
+    add_help = False,
+)
+# Commands
+subparsers = parser.add_subparsers(
+    title = 'commands',
+    dest = 'command',
+)
+subparsers.add_parser(
+    'doctor',
+    help = doctor_help,
+    description = doctor_parser.description,
+    parents = [top_parser, common_parser, doctor_parser],
+    add_help = False,
+)
+subparsers.add_parser(
+    'train',
+    help = train_help,
+    description = train_parser.description,
+    parents = [top_parser, common_parser, train_parser],
+    add_help = False,
+)
+subparsers.add_parser(
+    'test',
+    help = test_help,
+    description = test_parser.description,
+    parents = [top_parser, common_parser, test_parser],
+    add_help = False,
+)
+subparsers.add_parser(
+    'ask',
+    help = ask_help,
+    description = ask_parser.description,
+    parents = [top_parser, common_parser, ask_parser],
+    add_help = False,
+)
+def main():
+    args = parser.parse_args()
+    command_message = f"with command: {args.command}" if args.command else ""
+    logger.info(f"RAG Started {command_message}")
+    setup_logger(args.log_level)
+    logger.debug(f"Parsed arguments: {args}")
+    if args.command == 'doctor':
+        doctor_route(args)
+    elif args.command == 'ask':
+        ask_route(args)
+    elif args.command == 'train':
+        train_route(args)
+    elif args.command == 'test':
+        test_route(args)
+    else:
+        parser.print_help()
+if __name__ == '__main__':
+    main()

pirag-0.1.0/app/rag/agent.py ADDED Viewed

@@ -0,0 +1,64 @@
+from langchain_openai.llms import OpenAI
+from langchain_openai.embeddings import OpenAIEmbeddings
+from pymilvus import MilvusClient
+class Agent():
+    def __init__(
+        self,
+        llm_base_url: str,
+        llm_api_key: str,
+        llm_model: str,
+        embedding_base_url: str,
+        embedding_api_key: str,
+        embedding_model: str,
+        milvus_host: str,
+        milvus_database: str,
+        milvus_collection: str,
+    ):
+        self.llm_base_url = llm_base_url
+        self.llm_api_key = llm_api_key
+        self.llm_model = llm_model
+        self.embedding_base_url = embedding_base_url
+        self.embedding_api_key = embedding_api_key
+        self.embedding_model = embedding_model
+        self.milvus_host = milvus_host
+        self.milvus_database = milvus_database
+        self.milvus_collection = milvus_collection
+        self._llm_client: OpenAI = None
+        self._embedding_client: OpenAIEmbeddings = None
+        self._milvus_client: MilvusClient = None
+    def retrieve_knowledge_base(self, query: str) -> str:
+        llm_client = self._get_llm_client()
+        embedding_client = self._get_embedding_client()
+        milvus_client = self._get_milvus_client()
+    def _get_llm_client(self) -> OpenAI:
+        if self._llm_client is None:
+            self._llm_client = OpenAI(
+                base_url=self.llm_base_url,
+                api_key=self.llm_api_key,
+                model=self.llm_model,
+            )
+        return self._llm_client
+    def _get_embedding_client(self) -> OpenAIEmbeddings:
+        if self._embedding_client is None:
+            self._embedding_client = OpenAIEmbeddings(
+                base_url=self.embedding_base_url,
+                api_key=self.embedding_api_key,
+                model=self.embedding_model,
+            )
+        return self._embedding_client
+    def _get_milvus_client(self) -> MilvusClient:
+        if self._milvus_client is None:
+            self._milvus_client = MilvusClient(
+                host=self.milvus_host,
+                database=self.milvus_database,
+                collection=self.milvus_collection,
+            )
+        return self._milvus_client

pirag-0.1.0/app/rag/config.py ADDED Viewed

@@ -0,0 +1,159 @@
+import argparse, os, sys
+from pathlib import Path
+from loguru import logger
+# Logger format constants
+TIME_FORMAT = "{time:YYYY-MM-DD HH:mm:ss.SSS!UTC}Z"
+FILE_FORMAT = f"{TIME_FORMAT} | {{level: <8}} | {{name}}:{{function}}:{{line}} - {{message}}"
+CONSOLE_FORMAT_FULL = f"<green>{TIME_FORMAT}</green> | <level>{{level: <8}}</level> | <cyan>{{name}}</cyan>:<cyan>{{function}}</cyan>:<cyan>{{line}}</cyan> - <level>{{message}}</level>\n"
+CONSOLE_FORMAT_SIMPLE = f"<green>{TIME_FORMAT}</green> | <level>{{level: <8}}</level> | <level>{{message}}</level>\n"
+# Initial logger setup before setup_logger()
+logger.remove()
+logger.add(
+    sink = sys.stderr,
+    level = "INFO",
+    format = lambda record: CONSOLE_FORMAT_SIMPLE if record["level"].name == "INFO" else CONSOLE_FORMAT_FULL,
+    colorize = True
+)
+def setup_logger(log_level: str):
+    """Configure logger with specified level and outputs"""
+    logger.remove()
+    log_dir = Path("app/logs")
+    log_dir.mkdir(exist_ok=True, parents=True)
+    # File handler
+    logger.add(
+        sink = log_dir / "app.log",
+        level = log_level,
+        rotation = "500 MB",
+        format = FILE_FORMAT,
+        serialize = False,
+        enqueue = True,
+        backtrace = True,
+        diagnose = True,
+        catch = True
+    )
+    # Console handler
+    logger.add(
+        sink = sys.stderr,
+        level = log_level,
+        format = lambda record: CONSOLE_FORMAT_SIMPLE if record["level"].name == "INFO" else CONSOLE_FORMAT_FULL,
+        colorize = True
+    )
+class EnvDefault(argparse.Action):
+    """Custom argparse action that uses environment variables as defaults.
+    This action extends the standard argparse.Action to support reading default values
+    from environment variables. If the specified environment variable exists, its value
+    will be used as the default value for the argument.
+    For boolean flags (store_true/store_false), the environment variable is interpreted
+    as a boolean value where 'true', '1', 'yes', or 'on' (case-insensitive) are
+    considered True.
+    Args:
+        envvar (str): Name of the environment variable to use as default
+        required (bool, optional): Whether the argument is required. Defaults to True.
+            Note: If a default value is found in environment variables, required is set to False.
+        default (Any, optional): Default value if environment variable is not set. Defaults to None.
+        **kwargs: Additional arguments passed to argparse.Action
+    Example:
+        ```python
+        parser.add_argument(
+            '--log-level',
+            envvar='LOG_LEVEL',
+            help='Logging level',
+            default='INFO',
+            action=EnvDefault
+        )
+        ```
+    Note:
+        The help text is automatically updated to include the environment variable name.
+    """
+    def __init__(self, envvar, required=True, default=None, **kwargs):
+        if envvar and envvar in os.environ:
+            env_value = os.environ[envvar]
+            # Convert string environment variable to boolean
+            if kwargs.get('nargs') is None and kwargs.get('const') is not None:  # store_true/store_false case
+                default = env_value.lower() in ('true', '1', 'yes', 'on')
+            else:
+                default = env_value
+            logger.debug(f"Using {envvar}={default} from environment")
+        if envvar:
+            kwargs["help"] += f" (envvar: {envvar})"
+        if required and default:
+            required = False
+        super(EnvDefault, self).__init__(default=default, required=required, **kwargs)
+        self.envvar = envvar
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values if values is not None else self.default)
+# Top-level parser with common options
+top_parser = argparse.ArgumentParser(add_help=False)
+top_parser.add_argument(
+    '-h', '--help',
+    help = 'Show help message and exit',
+    default = argparse.SUPPRESS,
+    action = 'help',
+)
+top_parser.add_argument(
+    '--env-file',
+    envvar = 'ENV_FILE',
+    help = 'Path to environment file',
+    default = '.env',
+    type = str,
+    action = EnvDefault,
+)
+top_parser.add_argument(
+    '--log-level',
+    envvar = 'LOG_LEVEL',
+    help = 'Logging level',
+    default = 'INFO',
+    type = lambda x: x.upper(),
+    choices = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
+    required = False,
+    action = EnvDefault,
+)
+top_parser.add_argument(
+    '--log-path',
+    envvar = 'LOG_PATH',
+    help = 'Path to log file',
+    default = '.logs',
+    type = str,
+    required = False,
+    action = EnvDefault,
+)
+top_parser.add_argument(
+    '--log-save',
+    envvar = 'LOG_SAVE',
+    help = 'Save log to file. If this flag is set, the log will be saved to the file specified in the `--log-path`.',
+    default = False,
+    const = True,
+    nargs = 0,
+    type = bool,
+    required = False,
+    action = EnvDefault,
+)
+common_parser = argparse.ArgumentParser(
+    add_help = False,
+)

pirag-0.1.0/app/requirements.txt ADDED Viewed

@@ -0,0 +1,11 @@
+# Core dependencies
+python-dotenv < 1.2
+loguru < 0.8
+# Development
+pytest < 8.4
+black < 25.2
+# RAG
+ragas < 0.3
+pymilvus < 2.6

pirag-0.1.0/app/setup.py ADDED Viewed

@@ -0,0 +1,28 @@
+from setuptools import setup, find_packages
+import os, sys
+import tomli
+# Load requirements
+with open(os.path.join(os.path.dirname(__file__), 'requirements.txt'), 'r') as f:
+    requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
+# Get version from pyproject.toml
+with open(os.path.join(os.path.dirname(__file__), '../pyproject.toml'), 'rb') as f:
+    pyproject = tomli.load(f)
+    version = pyproject["project"]["version"]
+APP_NAME = "pirag"
+setup(
+    name = APP_NAME,
+    version = version,
+    packages = [".", "rag"],
+    package_dir = {"": ".", "rag": "rag"},
+    include_package_data = True,
+    install_requires = requirements,
+    entry_points = {
+        "console_scripts": [
+            f"{APP_NAME}=main:main",
+        ],
+    },
+)

pirag-0.1.0/pirag.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,132 @@
+Metadata-Version: 2.4
+Name: pirag
+Version: 0.1.0
+Summary: CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB.
+Author-email: semir4in <semir4in@gmail.com>, jyje <jyjeon@outlook.com>
+Project-URL: Homepage, https://github.com/jyje/pilot-onpremise-rag
+Project-URL: Repository, https://github.com/jyje/pilot-onpremise-rag
+Project-URL: Issue, https://github.com/jyje/pilot-onpremise-rag/issues
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: python-dotenv<1.2
+Requires-Dist: loguru<0.8
+Requires-Dist: pytest<8.4
+Requires-Dist: black<25.2
+Requires-Dist: ragas<0.3
+Requires-Dist: pymilvus<2.6
+Dynamic: license-file
+<div align="center">
+# pirag: pilot-onpremise-rag
+<!-- <img alt="RAG Logo" src="docs/rag-logo.jpg" width="450" style="object-fit: contain; max-width: 100%; aspect-ratio: 16 / 9;"> -->
+🌱 LLM+RAG CLI project operating in On-Premise environment
+[![Python](https://img.shields.io/badge/3.9+-3776AB?style=flat&logo=Python&logoColor=white&label=Python)](https://typer.tiangolo.com/)
+[![CLI](https://img.shields.io/badge/CLI-orange?style=flat&logo=iterm2&logoColor=white)](https://typer.tiangolo.com/)
+[![LLM](https://img.shields.io/badge/LLM-green?style=flat&logo=OpenAI&logoColor=white)](https://openai.com)
+[![LangChain](https://img.shields.io/badge/LangChain-blue?style=flat&logo=Langchain&logoColor=white)](https://langchain.com)
+[![Milvus](https://img.shields.io/badge/Milvus-red?style=flat&logo=Milvus&logoColor=white)](https://milvus.io/)
+[![MinIO](https://img.shields.io/badge/MinIO-red?style=flat&logo=MinIO&logoColor=white)](https://min.io/)
+<!-- [![Docker](https://img.shields.io/badge/Docker-blue?style=flat&logo=Docker&logoColor=white)](https://docker.com) -->
+</div>
+## 🚀 Introduction
+**pilot-onpremise-rag** is a CLI tool that implements a knowledge-based RAG (Retrieval-Augmented Generation) system with LLM. It provides powerful document retrieval and generation capabilities while ensuring data privacy.
+## 🔧 Setup
+### (Optional) Setup External Dependencies
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+docker compose -f docker/compose.yaml up -d
+```
+### Install pirag
+```bash
+git clone https://github.com/jyje/pilot-onpremise-rag
+cd pilot-onpremise-rag
+pip install --upgrade -e ./app
+```
+## 📚 Usage
+### Basic Commands
+```
+# View help
+pirag --help
+# Train documents
+pirag train --source ./documents
+# Ask a question
+pirag ask "Give me a joke for Cat-holic."
+```
+## 🏗️ Project Structure
+```
+pilot-onpremise-rag/
+├── app/                        # Main application directory
+│   ├── main.py                 # CLI main entry point
+│   ├── setup.py                # Package setup configuration
+│   ├── pyproject.toml          # PEP 517/518 build configuration
+│   ├── requirements.txt        # Dependencies
+│   ├── logs/                   # Application logs
+│   └── rag/                    # RAG implementation
+│       ├── config.py           # Configuration settings
+│       ├── agent.py            # Agent implementation
+│       ├── ask/                # Query handling module
+│       ├── train/              # Document training module
+│       ├── test/               # Testing module
+│       └── doctor/             # Diagnostic tools
+├── VERSION                     # Project version
+├── docker/                     # Docker configuration
+├── assets/                     # Static assets (Files are not included)
+└── LICENSE                     # License information
+```
+## 🔄 How It Works
+1. **Document Training**: Process local documents and store in vector database
+2. **Search Engine**: Find document chunks related to user queries
+3. **Context Generation**: Create LLM prompts from retrieved documents
+4. **Response Generation**: Provide accurate responses via local LLM
+## 💡 Key Features
+- **Privacy Guaranteed**: All data and processing occurs locally
+- **Multiple Document Support**: Support for PDF, Markdown, TXT, DOCX, and other formats
+- **Custom LLM**: Compatible with various local LLM models
+- **Vector Database**: Vector DB integration for efficient document retrieval
+## 🧪 Performance Optimization
+| Configuration | Memory Usage | Response Speed | Suitable Use Cases |
+|--------------|-------------|---------------|-------------------|
+| Light Model | 4-6GB | Fast | Simple queries, low-spec hardware |
+| Medium Model | 8-12GB | Medium | General use, most queries |
+| Large Model | 16GB+ | Slow | Complex document analysis, expert answers |
+## 🔗 References
+- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction)
+- [LLM Optimization Techniques](https://huggingface.co/docs/optimum/index)
+- [RAG Paper](https://arxiv.org/abs/2005.11401)
+## Contributing
+Any contributions are welcome!
+### Current Maintainers
+- [Studio R4iny](https://github.com/studior4iny)
+    - [jyje](https://github.com/jyje), [semir4in](https://github.com/semir4in) (Same person)

pirag-0.1.0/pirag.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+LICENSE
+README.md
+pyproject.toml
+app/main.py
+app/requirements.txt
+app/setup.py
+app/rag/agent.py
+app/rag/config.py
+pirag.egg-info/PKG-INFO
+pirag.egg-info/SOURCES.txt
+pirag.egg-info/dependency_links.txt
+pirag.egg-info/entry_points.txt
+pirag.egg-info/requires.txt
+pirag.egg-info/top_level.txt

pirag-0.1.0/pirag.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

pirag-0.1.0/pirag.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ pirag = app.main:main

pirag-0.1.0/pirag.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,6 @@
+python-dotenv<1.2
+loguru<0.8
+pytest<8.4
+black<25.2
+ragas<0.3
+pymilvus<2.6

pirag-0.1.0/pirag.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ app

pirag-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,39 @@
+[project]
+name = "pirag"
+version = "0.1.0"
+description = "CLI Projects of On-Premise RAG. You can use your own LLM and vector DB. Or just add remote LLM servers and vector DB."
+authors = [
+    { name="semir4in", email="semir4in@gmail.com" },
+    { name="jyje", email="jyjeon@outlook.com" }
+]
+readme = "README.md"
+requires-python = ">=3.9"
+dynamic = ["dependencies"]
+[project.urls]
+Homepage = "https://github.com/jyje/pilot-onpremise-rag"
+Repository = "https://github.com/jyje/pilot-onpremise-rag"
+Issue = "https://github.com/jyje/pilot-onpremise-rag/issues"
+[project.scripts]
+pirag = "app.main:main"
+[build-system]
+requires = [
+    "setuptools>=61.0",
+    "wheel<0.46",
+    "build>=1.0.3",
+    "pip>=23.0",
+    "tomli<2.1",
+]
+build-backend = "setuptools.build_meta"
+[tool.setuptools]
+packages = ["app", "app.rag"]
+[tool.setuptools.package-dir]
+app = "app"
+"app.rag" = "app/rag"
+[tool.setuptools.dynamic]
+dependencies = {file = ["app/requirements.txt"]}

pirag-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0