docpilot-cli 1.0.3__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {docpilot_cli-1.0.3/src/docpilot_cli.egg-info → docpilot_cli-1.0.4}/PKG-INFO +1 -1
  2. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/pyproject.toml +1 -2
  3. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/chat.py +5 -5
  4. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/cli.py +2 -1
  5. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/scrape.py +6 -1
  6. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/store.py +80 -15
  7. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4/src/docpilot_cli.egg-info}/PKG-INFO +1 -1
  8. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/LICENSE +0 -0
  9. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/README.md +0 -0
  10. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/setup.cfg +0 -0
  11. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/__init__.py +0 -0
  12. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/__main__.py +0 -0
  13. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot/embed.py +0 -0
  14. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/SOURCES.txt +0 -0
  15. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/dependency_links.txt +0 -0
  16. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/entry_points.txt +0 -0
  17. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/requires.txt +0 -0
  18. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/src/docpilot_cli.egg-info/top_level.txt +0 -0
  19. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/tests/test_cli.py +0 -0
  20. {docpilot_cli-1.0.3 → docpilot_cli-1.0.4}/tests/test_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpilot-cli
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: A local-first RAG pipeline CLI tool
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docpilot-cli"
3
- version = "1.0.3"
3
+ version = "1.0.4"
4
4
  description = "A local-first RAG pipeline CLI tool"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -22,7 +22,6 @@ dependencies = [
22
22
 
23
23
  [project.scripts]
24
24
  docpilot = "docpilot.cli:app"
25
-
26
25
  [tool.setuptools.packages.find]
27
26
  where = ["src"]
28
27
 
@@ -2,6 +2,7 @@ from langchain_ollama.llms import OllamaLLM as Ollama
2
2
  from langchain_core.prompts import ChatPromptTemplate
3
3
  from langchain_core.output_parsers import StrOutputParser
4
4
  import os
5
+ from rich.console import Console
5
6
  from . import store
6
7
  from langchain_chroma import Chroma
7
8
  from langchain_ollama import OllamaEmbeddings
@@ -10,22 +11,21 @@ config = store.load_config()
10
11
  retrieval_k = int(config.get("retrieval_k", 6))
11
12
  max_context_chars = int(config.get("max_context_chars", 3500))
12
13
  max_doc_chars = int(config.get("max_doc_chars", 700))
14
+ console = Console()
13
15
 
14
16
  model = Ollama(
15
17
  model=config.get("default_model", "deepseek-coder-v2"),
16
- num_predict=int(config.get("num_predict", 192)),
18
+ num_predict=int(config.get("num_predict", 400)),
17
19
  num_ctx=int(config.get("num_ctx", 2048)),
18
20
  num_thread=int(config.get("num_thread", max(1, (os.cpu_count() or 4) - 1))),
19
21
  temperature=float(config.get("temperature", 0.1)),
20
22
  )
21
-
22
23
  def _get_vectorstore():
23
24
  """Get vectorstore for the active project."""
24
25
  embeddings = OllamaEmbeddings(model=config.get("default_embed_model", "mxbai-embed-large:335m"))
25
26
  db_location = store.get_active_project_path()
26
27
  return Chroma(collection_name="documents", persist_directory=str(db_location), embedding_function=embeddings)
27
28
 
28
- retriever = None
29
29
 
30
30
  # Query Transformation Chain
31
31
  rewrite_template = """You are an AI assistant helping to formulate a search query for a vector database.
@@ -37,7 +37,7 @@ rewrite_prompt = ChatPromptTemplate.from_template(rewrite_template)
37
37
  rewrite_chain = rewrite_prompt | model | StrOutputParser()
38
38
 
39
39
  template = """
40
- You are an assistant for answering questions based on the following ingested documents.
40
+ You are an assistant named docpilot for answering questions based on the following ingested documents.
41
41
  Use the information in the documents to answer the question as best as you can.
42
42
  If you don't know the answer, say you don't know.
43
43
  Always use the information in the documents and never make up an answer.
@@ -77,7 +77,7 @@ def askai(question):
77
77
  max_doc_chars=max_doc_chars,
78
78
  )
79
79
  if rag_text == "":
80
- print("NO RAG obtained add some documents to ingest")
80
+ console.print("[italic blue]Note - NO RAG obtained add some documents to ingest[/italic blue]")
81
81
  result = chain.invoke({"reviews": rag_text, "question": question})
82
82
  return result
83
83
 
@@ -120,7 +120,8 @@ def show():
120
120
 
121
121
  info_panel = Panel(
122
122
  f"[bold]Version:[/bold] [cyan]{PROJECT_VERSION}[/cyan]\n"
123
- f"[bold]Description:[/bold] [cyan]{PROJECT_DESCRIPTION}[/cyan]",
123
+ f"[bold]Description:[/bold] [cyan]{PROJECT_DESCRIPTION}[/cyan]\n"
124
+ f"[bold italic][yellow]Developed by Aswin Ashok as an open source project[/yellow]",
124
125
  border_style="cyan",
125
126
  title="[bold green]System Info[/bold green]",
126
127
  padding=(1, 2)
@@ -4,9 +4,14 @@ from urllib.parse import urljoin, urlparse
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
5
  from collections import deque
6
6
  import time
7
+ from importlib.metadata import version, PackageNotFoundError
7
8
 
9
+ try:
10
+ __version__ = version("docpilot-cli")
11
+ except PackageNotFoundError:
12
+ __version__ = "unknown"
8
13
 
9
- REQUEST_HEADERS = {"User-Agent": "docpilot/0.0.1 (+https://github.com/foss-hack/docpilot)"}
14
+ REQUEST_HEADERS = {"User-Agent": f"docpilot/{__version__} (+https://pypi.org/project/docpilot-cli/)"}
10
15
  MAX_RETRIES = 4
11
16
  BACKOFF_SECONDS = 0.8
12
17
 
@@ -1,3 +1,6 @@
1
+ from chromadb.utils import embedding_functions
2
+ from bs4 import element
3
+ from rich.console import Console
1
4
  import tomllib
2
5
  import tomli_w
3
6
  from pathlib import Path
@@ -8,6 +11,7 @@ path = Path.home() / ".docpilot"
8
11
  CONFIG_PATH = path / "config.toml"
9
12
  PROJECTS_DIR = path / "projects"
10
13
 
14
+ console = Console()
11
15
 
12
16
  def _get_available_models():
13
17
  try:
@@ -28,8 +32,8 @@ DEFAULT_CONFIG = {
28
32
  "retrieval_k": 6,
29
33
  "max_context_chars": 3500,
30
34
  "max_doc_chars": 700,
31
- "num_predict": 192,
32
- "num_ctx": 2048,
35
+ "num_predict": 500,
36
+ "num_ctx": 4096,
33
37
  "num_thread": max(1, (os.cpu_count() or 4) - 1),
34
38
  "temperature": 0.1,
35
39
  }
@@ -39,9 +43,6 @@ def check_ollama_connection():
39
43
  try:
40
44
  ollama.list()
41
45
  except Exception:
42
- from rich.console import Console
43
-
44
- console = Console()
45
46
  console.print("\n[bold red]❌ Error: Could not connect to Ollama.[/bold red]")
46
47
  console.print("[yellow]Please ensure the Ollama application is running and try again.[/yellow]\n")
47
48
  import sys
@@ -51,6 +52,9 @@ def check_ollama_connection():
51
52
 
52
53
  def init_config():
53
54
  """Initialize config file with defaults if it doesn't exist."""
55
+ if path.exists():
56
+ if not CONFIG_PATH.exists():
57
+ console.print("[bold red]Config file lost, recreating it!")
54
58
  if not CONFIG_PATH.exists():
55
59
  CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
56
60
  PROJECTS_DIR.mkdir(parents=True, exist_ok=True)
@@ -69,7 +73,22 @@ def interactive_setup(first_time=False):
69
73
  console.print("\n[bold cyan]🚀 Welcome to Docpilot! Let's do a quick setup.[/bold cyan]")
70
74
  else:
71
75
  console.print("\n[bold cyan]⚙️ Docpilot Model Setup[/bold cyan]")
72
-
76
+
77
+ #Finding and handling error of embeding model
78
+ embedding_modelfound=0
79
+ embedsupportedmodel = [
80
+ "all-minilm",
81
+ "snowflake-arctic-embed",
82
+ "bge-m3",
83
+ "bge-large",
84
+ "paraphrase-multilingual",
85
+ "mxbai-embed-large",
86
+ "mxbai-embed-large:latest",
87
+ "mxbai-embed-large:335m",
88
+ "nomic-embed-text",
89
+ ]
90
+ embedmodelfound=[]
91
+ embed_model=""
73
92
  models = _get_available_models()
74
93
  if not models:
75
94
  console.print("[yellow]No local Ollama models found! Using default fallbacks.[/yellow]")
@@ -77,15 +96,56 @@ def interactive_setup(first_time=False):
77
96
  embed_model = Prompt.ask("Enter embedding model name", default="mxbai-embed-large:335m")
78
97
  chat_model = Prompt.ask("Enter chat model name", default="qwen2.5:latest")
79
98
  else:
80
- # Smart defaults based on names
81
- embed_guess = next((m for m in models if "embed" in m.lower()), models[0])
82
- chat_guess = next((m for m in models if "embed" not in m.lower()), models[1] if len(models) > 1 else models[0])
83
-
84
- embed_model = Prompt.ask(
85
- "Select your [bold green]embedding model[/bold green]", choices=models, default=embed_guess
86
- )
87
- chat_model = Prompt.ask("Select your [bold blue]chat model[/bold blue]", choices=models, default=chat_guess)
88
-
99
+ for i in models:
100
+ for j in embedsupportedmodel:
101
+ if i.lower()==j.lower():
102
+ embedding_modelfound=1
103
+ embedmodelfound.append(j)
104
+ break
105
+ if not embedding_modelfound:
106
+ console.print("[bold yellow]Embedding model not found and is not encouraged to use this way")
107
+ while True:
108
+ userquestion = Prompt.ask("[blue]Run ollama pull mxbai-embed-large to fix this(y/n).[/blue]").lower()
109
+ if userquestion == "y":
110
+ os.system("ollama pull mxbai-embed-large")
111
+ embed_model="mxbai-embed-large"
112
+ break
113
+ elif userquestion == "n":
114
+ console.print("[red]Exiting...[/red]")
115
+ exit()
116
+ else:
117
+ console.print("Enter a valid option to continue")
118
+
119
+ #Asking embedding model if model not latestly downloaded
120
+ if embed_model == "":
121
+ embed_model = Prompt.ask(
122
+ "Select your [bold green]embedding model[/bold green]", choices=embedmodelfound, default=embedmodelfound[0]
123
+ )
124
+ #Chat Model
125
+ chatspecific_models=list(set(models)-set(embedsupportedmodel))
126
+ console.print("[bold yellow]Available chat Models:[/bold yellow]")
127
+ for i in range(len(chatspecific_models)):
128
+ console.print(f"[bold green]{i}:->{chatspecific_models[i]} .[/bold green]")
129
+ #Smart choice for model
130
+ chat_model=str()
131
+ if not chatspecific_models:
132
+ chatspecific_models = models # Fallback if they only have embedding models downloaded
133
+
134
+ chat_guess = next((m for m in models if "embed" not in m.lower()), chatspecific_models[1] if len(chatspecific_models) > 1 else chatspecific_models[0])
135
+ while True:
136
+ chatmodelanswer = Prompt.ask("Select your [bold blue]chat model[/bold blue]", default=chat_guess)
137
+ if chatmodelanswer.isdigit():
138
+ if int(chatmodelanswer)<len(chatspecific_models):
139
+ chat_model=chatspecific_models[int(chatmodelanswer)]
140
+ break
141
+ else:
142
+ console.print("[red]Invalid model number. Try again.")
143
+ else:
144
+ if check_exist(chatmodelanswer,chatspecific_models):
145
+ chat_model = chatmodelanswer
146
+ break
147
+ else:
148
+ console.print("[red]Invalid Option... try again")
89
149
  config = load_config() if not first_time else DEFAULT_CONFIG.copy()
90
150
  config["default_embed_model"] = embed_model
91
151
  config["default_model"] = chat_model
@@ -93,6 +153,11 @@ def interactive_setup(first_time=False):
93
153
  console.print(f"[bold green]✓ Configuration saved to {CONFIG_PATH}[/bold green]\n")
94
154
  return load_config()
95
155
 
156
+ def check_exist(name,listname):
157
+ for i in listname:
158
+ if name == i.lower():
159
+ return True
160
+ return False
96
161
 
97
162
  def load_config():
98
163
  if not CONFIG_PATH.exists():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpilot-cli
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: A local-first RAG pipeline CLI tool
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
File without changes
File without changes
File without changes