docpilot-cli 1.0.3__tar.gz → 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docpilot_cli-1.0.3/src/docpilot_cli.egg-info → docpilot_cli-1.0.4}/PKG-INFO +1 -1
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/pyproject.toml +1 -2
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/chat.py +5 -5
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/cli.py +2 -1
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/scrape.py +6 -1
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/store.py +80 -15
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4/src/docpilot_cli.egg-info}/PKG-INFO +1 -1
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/LICENSE +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/README.md +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/setup.cfg +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/__init__.py +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/__main__.py +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/embed.py +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/SOURCES.txt +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/dependency_links.txt +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/entry_points.txt +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/requires.txt +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/top_level.txt +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/tests/test_cli.py +0 -0
- {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/tests/test_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docpilot-cli"
|
|
3
|
-
version = "1.0.
|
|
3
|
+
version = "1.0.4"
|
|
4
4
|
description = "A local-first RAG pipeline CLI tool"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -22,7 +22,6 @@ dependencies = [
|
|
|
22
22
|
|
|
23
23
|
[project.scripts]
|
|
24
24
|
docpilot = "docpilot.cli:app"
|
|
25
|
-
|
|
26
25
|
[tool.setuptools.packages.find]
|
|
27
26
|
where = ["src"]
|
|
28
27
|
|
|
@@ -2,6 +2,7 @@ from langchain_ollama.llms import OllamaLLM as Ollama
|
|
|
2
2
|
from langchain_core.prompts import ChatPromptTemplate
|
|
3
3
|
from langchain_core.output_parsers import StrOutputParser
|
|
4
4
|
import os
|
|
5
|
+
from rich.console import Console
|
|
5
6
|
from . import store
|
|
6
7
|
from langchain_chroma import Chroma
|
|
7
8
|
from langchain_ollama import OllamaEmbeddings
|
|
@@ -10,22 +11,21 @@ config = store.load_config()
|
|
|
10
11
|
retrieval_k = int(config.get("retrieval_k", 6))
|
|
11
12
|
max_context_chars = int(config.get("max_context_chars", 3500))
|
|
12
13
|
max_doc_chars = int(config.get("max_doc_chars", 700))
|
|
14
|
+
console = Console()
|
|
13
15
|
|
|
14
16
|
model = Ollama(
|
|
15
17
|
model=config.get("default_model", "deepseek-coder-v2"),
|
|
16
|
-
num_predict=int(config.get("num_predict",
|
|
18
|
+
num_predict=int(config.get("num_predict", 400)),
|
|
17
19
|
num_ctx=int(config.get("num_ctx", 2048)),
|
|
18
20
|
num_thread=int(config.get("num_thread", max(1, (os.cpu_count() or 4) - 1))),
|
|
19
21
|
temperature=float(config.get("temperature", 0.1)),
|
|
20
22
|
)
|
|
21
|
-
|
|
22
23
|
def _get_vectorstore():
|
|
23
24
|
"""Get vectorstore for the active project."""
|
|
24
25
|
embeddings = OllamaEmbeddings(model=config.get("default_embed_model", "mxbai-embed-large:335m"))
|
|
25
26
|
db_location = store.get_active_project_path()
|
|
26
27
|
return Chroma(collection_name="documents", persist_directory=str(db_location), embedding_function=embeddings)
|
|
27
28
|
|
|
28
|
-
retriever = None
|
|
29
29
|
|
|
30
30
|
# Query Transformation Chain
|
|
31
31
|
rewrite_template = """You are an AI assistant helping to formulate a search query for a vector database.
|
|
@@ -37,7 +37,7 @@ rewrite_prompt = ChatPromptTemplate.from_template(rewrite_template)
|
|
|
37
37
|
rewrite_chain = rewrite_prompt | model | StrOutputParser()
|
|
38
38
|
|
|
39
39
|
template = """
|
|
40
|
-
You are an assistant for answering questions based on the following ingested documents.
|
|
40
|
+
You are an assistant named docpilot for answering questions based on the following ingested documents.
|
|
41
41
|
Use the information in the documents to answer the question as best as you can.
|
|
42
42
|
If you don't know the answer, say you don't know.
|
|
43
43
|
Always use the information in the documents and never make up an answer.
|
|
@@ -77,7 +77,7 @@ def askai(question):
|
|
|
77
77
|
max_doc_chars=max_doc_chars,
|
|
78
78
|
)
|
|
79
79
|
if rag_text == "":
|
|
80
|
-
print("NO RAG obtained add some documents to ingest")
|
|
80
|
+
console.print("[italic blue]Note - NO RAG obtained add some documents to ingest[/italic blue]")
|
|
81
81
|
result = chain.invoke({"reviews": rag_text, "question": question})
|
|
82
82
|
return result
|
|
83
83
|
|
|
@@ -120,7 +120,8 @@ def show():
|
|
|
120
120
|
|
|
121
121
|
info_panel = Panel(
|
|
122
122
|
f"[bold]Version:[/bold] [cyan]{PROJECT_VERSION}[/cyan]\n"
|
|
123
|
-
f"[bold]Description:[/bold] [cyan]{PROJECT_DESCRIPTION}[/cyan]"
|
|
123
|
+
f"[bold]Description:[/bold] [cyan]{PROJECT_DESCRIPTION}[/cyan]\n"
|
|
124
|
+
f"[bold italic][yellow]Developed by Aswin Ashok as an open source project[/yellow]",
|
|
124
125
|
border_style="cyan",
|
|
125
126
|
title="[bold green]System Info[/bold green]",
|
|
126
127
|
padding=(1, 2)
|
|
@@ -4,9 +4,14 @@ from urllib.parse import urljoin, urlparse
|
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
5
|
from collections import deque
|
|
6
6
|
import time
|
|
7
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
7
8
|
|
|
9
|
+
try:
|
|
10
|
+
__version__ = version("docpilot-cli")
|
|
11
|
+
except PackageNotFoundError:
|
|
12
|
+
__version__ = "unknown"
|
|
8
13
|
|
|
9
|
-
REQUEST_HEADERS = {"User-Agent": "docpilot/
|
|
14
|
+
REQUEST_HEADERS = {"User-Agent": f"docpilot/{__version__} (+https://pypi.org/project/docpilot-cli/)"}
|
|
10
15
|
MAX_RETRIES = 4
|
|
11
16
|
BACKOFF_SECONDS = 0.8
|
|
12
17
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from chromadb.utils import embedding_functions
|
|
2
|
+
from bs4 import element
|
|
3
|
+
from rich.console import Console
|
|
1
4
|
import tomllib
|
|
2
5
|
import tomli_w
|
|
3
6
|
from pathlib import Path
|
|
@@ -8,6 +11,7 @@ path = Path.home() / ".docpilot"
|
|
|
8
11
|
CONFIG_PATH = path / "config.toml"
|
|
9
12
|
PROJECTS_DIR = path / "projects"
|
|
10
13
|
|
|
14
|
+
console = Console()
|
|
11
15
|
|
|
12
16
|
def _get_available_models():
|
|
13
17
|
try:
|
|
@@ -28,8 +32,8 @@ DEFAULT_CONFIG = {
|
|
|
28
32
|
"retrieval_k": 6,
|
|
29
33
|
"max_context_chars": 3500,
|
|
30
34
|
"max_doc_chars": 700,
|
|
31
|
-
"num_predict":
|
|
32
|
-
"num_ctx":
|
|
35
|
+
"num_predict": 500,
|
|
36
|
+
"num_ctx": 4096,
|
|
33
37
|
"num_thread": max(1, (os.cpu_count() or 4) - 1),
|
|
34
38
|
"temperature": 0.1,
|
|
35
39
|
}
|
|
@@ -39,9 +43,6 @@ def check_ollama_connection():
|
|
|
39
43
|
try:
|
|
40
44
|
ollama.list()
|
|
41
45
|
except Exception:
|
|
42
|
-
from rich.console import Console
|
|
43
|
-
|
|
44
|
-
console = Console()
|
|
45
46
|
console.print("\n[bold red]❌ Error: Could not connect to Ollama.[/bold red]")
|
|
46
47
|
console.print("[yellow]Please ensure the Ollama application is running and try again.[/yellow]\n")
|
|
47
48
|
import sys
|
|
@@ -51,6 +52,9 @@ def check_ollama_connection():
|
|
|
51
52
|
|
|
52
53
|
def init_config():
|
|
53
54
|
"""Initialize config file with defaults if it doesn't exist."""
|
|
55
|
+
if path.exists():
|
|
56
|
+
if not CONFIG_PATH.exists():
|
|
57
|
+
console.print("[bold red]Config file lost, recreating it!")
|
|
54
58
|
if not CONFIG_PATH.exists():
|
|
55
59
|
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
56
60
|
PROJECTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
@@ -69,7 +73,22 @@ def interactive_setup(first_time=False):
|
|
|
69
73
|
console.print("\n[bold cyan]🚀 Welcome to Docpilot! Let's do a quick setup.[/bold cyan]")
|
|
70
74
|
else:
|
|
71
75
|
console.print("\n[bold cyan]⚙️ Docpilot Model Setup[/bold cyan]")
|
|
72
|
-
|
|
76
|
+
|
|
77
|
+
#Finding and handling error of embeding model
|
|
78
|
+
embedding_modelfound=0
|
|
79
|
+
embedsupportedmodel = [
|
|
80
|
+
"all-minilm",
|
|
81
|
+
"snowflake-arctic-embed",
|
|
82
|
+
"bge-m3",
|
|
83
|
+
"bge-large",
|
|
84
|
+
"paraphrase-multilingual",
|
|
85
|
+
"mxbai-embed-large",
|
|
86
|
+
"mxbai-embed-large:latest",
|
|
87
|
+
"mxbai-embed-large:335m",
|
|
88
|
+
"nomic-embed-text",
|
|
89
|
+
]
|
|
90
|
+
embedmodelfound=[]
|
|
91
|
+
embed_model=""
|
|
73
92
|
models = _get_available_models()
|
|
74
93
|
if not models:
|
|
75
94
|
console.print("[yellow]No local Ollama models found! Using default fallbacks.[/yellow]")
|
|
@@ -77,15 +96,56 @@ def interactive_setup(first_time=False):
|
|
|
77
96
|
embed_model = Prompt.ask("Enter embedding model name", default="mxbai-embed-large:335m")
|
|
78
97
|
chat_model = Prompt.ask("Enter chat model name", default="qwen2.5:latest")
|
|
79
98
|
else:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
99
|
+
for i in models:
|
|
100
|
+
for j in embedsupportedmodel:
|
|
101
|
+
if i.lower()==j.lower():
|
|
102
|
+
embedding_modelfound=1
|
|
103
|
+
embedmodelfound.append(j)
|
|
104
|
+
break
|
|
105
|
+
if not embedding_modelfound:
|
|
106
|
+
console.print("[bold yellow]Embedding model not found and is not encouraged to use this way")
|
|
107
|
+
while True:
|
|
108
|
+
userquestion = Prompt.ask("[blue]Run ollama pull mxbai-embed-large to fix this(y/n).[/blue]").lower()
|
|
109
|
+
if userquestion == "y":
|
|
110
|
+
os.system("ollama pull mxbai-embed-large")
|
|
111
|
+
embed_model="mxbai-embed-large"
|
|
112
|
+
break
|
|
113
|
+
elif userquestion == "n":
|
|
114
|
+
console.print("[red]Exiting...[/red]")
|
|
115
|
+
exit()
|
|
116
|
+
else:
|
|
117
|
+
console.print("Enter a valid option to continue")
|
|
118
|
+
|
|
119
|
+
#Asking embedding model if model not latestly downloaded
|
|
120
|
+
if embed_model == "":
|
|
121
|
+
embed_model = Prompt.ask(
|
|
122
|
+
"Select your [bold green]embedding model[/bold green]", choices=embedmodelfound, default=embedmodelfound[0]
|
|
123
|
+
)
|
|
124
|
+
#Chat Model
|
|
125
|
+
chatspecific_models=list(set(models)-set(embedsupportedmodel))
|
|
126
|
+
console.print("[bold yellow]Available chat Models:[/bold yellow]")
|
|
127
|
+
for i in range(len(chatspecific_models)):
|
|
128
|
+
console.print(f"[bold green]{i}:->{chatspecific_models[i]} .[/bold green]")
|
|
129
|
+
#Smart choice for model
|
|
130
|
+
chat_model=str()
|
|
131
|
+
if not chatspecific_models:
|
|
132
|
+
chatspecific_models = models # Fallback if they only have embedding models downloaded
|
|
133
|
+
|
|
134
|
+
chat_guess = next((m for m in models if "embed" not in m.lower()), chatspecific_models[1] if len(chatspecific_models) > 1 else chatspecific_models[0])
|
|
135
|
+
while True:
|
|
136
|
+
chatmodelanswer = Prompt.ask("Select your [bold blue]chat model[/bold blue]", default=chat_guess)
|
|
137
|
+
if chatmodelanswer.isdigit():
|
|
138
|
+
if int(chatmodelanswer)<len(chatspecific_models):
|
|
139
|
+
chat_model=chatspecific_models[int(chatmodelanswer)]
|
|
140
|
+
break
|
|
141
|
+
else:
|
|
142
|
+
console.print("[red]Invalid model number. Try again.")
|
|
143
|
+
else:
|
|
144
|
+
if check_exist(chatmodelanswer,chatspecific_models):
|
|
145
|
+
chat_model = chatmodelanswer
|
|
146
|
+
break
|
|
147
|
+
else:
|
|
148
|
+
console.print("[red]Invalid Option... try again")
|
|
89
149
|
config = load_config() if not first_time else DEFAULT_CONFIG.copy()
|
|
90
150
|
config["default_embed_model"] = embed_model
|
|
91
151
|
config["default_model"] = chat_model
|
|
@@ -93,6 +153,11 @@ def interactive_setup(first_time=False):
|
|
|
93
153
|
console.print(f"[bold green]✓ Configuration saved to {CONFIG_PATH}[/bold green]\n")
|
|
94
154
|
return load_config()
|
|
95
155
|
|
|
156
|
+
def check_exist(name,listname):
|
|
157
|
+
for i in listname:
|
|
158
|
+
if name == i.lower():
|
|
159
|
+
return True
|
|
160
|
+
return False
|
|
96
161
|
|
|
97
162
|
def load_config():
|
|
98
163
|
if not CONFIG_PATH.exists():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|