sirchmunk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sirchmunk/api/__init__.py +1 -0
- sirchmunk/api/chat.py +1123 -0
- sirchmunk/api/components/__init__.py +0 -0
- sirchmunk/api/components/history_storage.py +402 -0
- sirchmunk/api/components/monitor_tracker.py +518 -0
- sirchmunk/api/components/settings_storage.py +353 -0
- sirchmunk/api/history.py +254 -0
- sirchmunk/api/knowledge.py +411 -0
- sirchmunk/api/main.py +120 -0
- sirchmunk/api/monitor.py +219 -0
- sirchmunk/api/run_server.py +54 -0
- sirchmunk/api/search.py +230 -0
- sirchmunk/api/settings.py +309 -0
- sirchmunk/api/tools.py +315 -0
- sirchmunk/cli/__init__.py +11 -0
- sirchmunk/cli/cli.py +789 -0
- sirchmunk/learnings/knowledge_base.py +5 -2
- sirchmunk/llm/prompts.py +12 -1
- sirchmunk/retrieve/text_retriever.py +186 -2
- sirchmunk/scan/file_scanner.py +2 -2
- sirchmunk/schema/knowledge.py +119 -35
- sirchmunk/search.py +384 -26
- sirchmunk/storage/__init__.py +2 -2
- sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
- sirchmunk/utils/constants.py +7 -5
- sirchmunk/utils/embedding_util.py +217 -0
- sirchmunk/utils/tokenizer_util.py +36 -1
- sirchmunk/version.py +1 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +196 -14
- sirchmunk-0.0.2.dist-info/RECORD +69 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
- sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
- sirchmunk_mcp/__init__.py +25 -0
- sirchmunk_mcp/cli.py +478 -0
- sirchmunk_mcp/config.py +276 -0
- sirchmunk_mcp/server.py +355 -0
- sirchmunk_mcp/service.py +327 -0
- sirchmunk_mcp/setup.py +15 -0
- sirchmunk_mcp/tools.py +410 -0
- sirchmunk-0.0.1.dist-info/RECORD +0 -45
- sirchmunk-0.0.1.dist-info/top_level.txt +0 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
sirchmunk/cli/cli.py
ADDED
|
@@ -0,0 +1,789 @@
|
|
|
1
|
+
# Copyright (c) ModelScope Contributors. All rights reserved.
|
|
2
|
+
"""
|
|
3
|
+
Command-line interface for Sirchmunk.
|
|
4
|
+
|
|
5
|
+
Provides commands for server management, initialization, configuration,
|
|
6
|
+
and search operations.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
sirchmunk init - Initialize Sirchmunk working directory
|
|
10
|
+
sirchmunk config - Show or generate configuration
|
|
11
|
+
sirchmunk serve - Start the API server
|
|
12
|
+
sirchmunk search - Perform a search query
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import asyncio
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Optional
|
|
23
|
+
|
|
24
|
+
from sirchmunk.version import __version__
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _get_default_work_path() -> Path:
|
|
31
|
+
"""Get the default work path for Sirchmunk."""
|
|
32
|
+
return Path(os.getenv("SIRCHMUNK_WORK_PATH", str(Path.home() / ".sirchmunk")))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _setup_logging(log_level: str = "INFO"):
|
|
36
|
+
"""Configure logging for CLI operations."""
|
|
37
|
+
logging.basicConfig(
|
|
38
|
+
level=getattr(logging, log_level.upper()),
|
|
39
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
40
|
+
stream=sys.stderr,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _load_env_file(env_file: Path) -> bool:
|
|
45
|
+
"""Load environment variables from .env file.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
env_file: Path to .env file
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
True if file was loaded, False otherwise
|
|
52
|
+
"""
|
|
53
|
+
if not env_file.exists():
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
from dotenv import load_dotenv
|
|
58
|
+
load_dotenv(env_file, override=False)
|
|
59
|
+
return True
|
|
60
|
+
except ImportError:
|
|
61
|
+
# Fallback: manual parsing if python-dotenv not installed
|
|
62
|
+
try:
|
|
63
|
+
with open(env_file, "r") as f:
|
|
64
|
+
for line in f:
|
|
65
|
+
line = line.strip()
|
|
66
|
+
if line and not line.startswith("#") and "=" in line:
|
|
67
|
+
key, _, value = line.partition("=")
|
|
68
|
+
key = key.strip()
|
|
69
|
+
value = value.strip().strip('"').strip("'")
|
|
70
|
+
if key and key not in os.environ:
|
|
71
|
+
os.environ[key] = value
|
|
72
|
+
return True
|
|
73
|
+
except Exception:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def cmd_init(args: argparse.Namespace) -> int:
|
|
78
|
+
"""Initialize Sirchmunk working directory.
|
|
79
|
+
|
|
80
|
+
Creates the work directory structure, checks dependencies, and generates
|
|
81
|
+
initial configuration.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
args: Command-line arguments
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Exit code (0 for success, non-zero for failure)
|
|
88
|
+
"""
|
|
89
|
+
import shutil
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
work_path = Path(args.work_path).expanduser().resolve()
|
|
93
|
+
|
|
94
|
+
print("=" * 60)
|
|
95
|
+
print(" Sirchmunk Initialization")
|
|
96
|
+
print("=" * 60)
|
|
97
|
+
print()
|
|
98
|
+
print(f"Work path: {work_path}")
|
|
99
|
+
print()
|
|
100
|
+
|
|
101
|
+
# Create directory structure
|
|
102
|
+
print("Creating directory structure...")
|
|
103
|
+
directories = [
|
|
104
|
+
work_path,
|
|
105
|
+
work_path / "data",
|
|
106
|
+
work_path / "logs",
|
|
107
|
+
work_path / ".cache",
|
|
108
|
+
work_path / ".cache" / "models",
|
|
109
|
+
work_path / ".cache" / "knowledge",
|
|
110
|
+
work_path / ".cache" / "history",
|
|
111
|
+
work_path / ".cache" / "settings",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
for directory in directories:
|
|
115
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
print(" ✓ Created work directory and subdirectories")
|
|
117
|
+
|
|
118
|
+
# Generate default .env file if not exists
|
|
119
|
+
env_file = work_path / ".env"
|
|
120
|
+
if not env_file.exists():
|
|
121
|
+
_generate_env_file(env_file)
|
|
122
|
+
print(f" ✓ Generated {env_file}")
|
|
123
|
+
else:
|
|
124
|
+
print(f" • Skipped {env_file} (already exists)")
|
|
125
|
+
|
|
126
|
+
# Check dependencies
|
|
127
|
+
print()
|
|
128
|
+
print("Checking dependencies...")
|
|
129
|
+
|
|
130
|
+
# Check ripgrep-all
|
|
131
|
+
if shutil.which("rga"):
|
|
132
|
+
print(" ✓ ripgrep-all (rga) is installed")
|
|
133
|
+
else:
|
|
134
|
+
print(" ✗ ripgrep-all (rga) is not installed")
|
|
135
|
+
print(" Installing ripgrep-all...")
|
|
136
|
+
try:
|
|
137
|
+
from sirchmunk.utils.install_rga import install_rga
|
|
138
|
+
install_rga()
|
|
139
|
+
print(" ✓ ripgrep-all installed successfully")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
print(f" ✗ Failed to install ripgrep-all: {e}")
|
|
142
|
+
print(" Please install manually: https://github.com/phiresky/ripgrep-all")
|
|
143
|
+
|
|
144
|
+
# Check ripgrep
|
|
145
|
+
if shutil.which("rg"):
|
|
146
|
+
print(" ✓ ripgrep (rg) is installed")
|
|
147
|
+
else:
|
|
148
|
+
print(" ✗ ripgrep (rg) is not installed")
|
|
149
|
+
print(" Please install: https://github.com/BurntSushi/ripgrep")
|
|
150
|
+
|
|
151
|
+
# Check Python packages
|
|
152
|
+
try:
|
|
153
|
+
import fastapi
|
|
154
|
+
print(f" ✓ FastAPI is installed")
|
|
155
|
+
except ImportError:
|
|
156
|
+
print(" ✗ FastAPI not found")
|
|
157
|
+
print(" Install with: pip install fastapi")
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
import uvicorn
|
|
161
|
+
print(f" ✓ uvicorn is installed")
|
|
162
|
+
except ImportError:
|
|
163
|
+
print(" ✗ uvicorn not found")
|
|
164
|
+
print(" Install with: pip install uvicorn")
|
|
165
|
+
|
|
166
|
+
# Check environment variables
|
|
167
|
+
print()
|
|
168
|
+
print("Checking environment variables...")
|
|
169
|
+
|
|
170
|
+
# Load env file first
|
|
171
|
+
_load_env_file(env_file)
|
|
172
|
+
|
|
173
|
+
llm_api_key = os.getenv("LLM_API_KEY")
|
|
174
|
+
if llm_api_key:
|
|
175
|
+
masked_key = llm_api_key[:8] + "..." if len(llm_api_key) > 8 else "***"
|
|
176
|
+
print(f" ✓ LLM_API_KEY is set ({masked_key})")
|
|
177
|
+
else:
|
|
178
|
+
print(" ✗ LLM_API_KEY is not set")
|
|
179
|
+
print(f" Set it in {env_file}")
|
|
180
|
+
|
|
181
|
+
llm_model = os.getenv("LLM_MODEL_NAME", "gpt-5.2")
|
|
182
|
+
print(f" • LLM_MODEL_NAME: {llm_model}")
|
|
183
|
+
|
|
184
|
+
llm_base_url = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
|
|
185
|
+
print(f" • LLM_BASE_URL: {llm_base_url}")
|
|
186
|
+
|
|
187
|
+
# Pre-download embedding model
|
|
188
|
+
print()
|
|
189
|
+
print("Downloading embedding model...")
|
|
190
|
+
print(" (This may take a few minutes on first run)")
|
|
191
|
+
try:
|
|
192
|
+
from sirchmunk.utils.embedding_util import EmbeddingUtil
|
|
193
|
+
|
|
194
|
+
model_cache_dir = str(work_path / ".cache" / "models")
|
|
195
|
+
model_dir = EmbeddingUtil.preload_model(
|
|
196
|
+
cache_dir=model_cache_dir,
|
|
197
|
+
)
|
|
198
|
+
print(f" ✓ Embedding model downloaded: {model_dir}")
|
|
199
|
+
except Exception as e:
|
|
200
|
+
print(f" ✗ Failed to download embedding model: {e}")
|
|
201
|
+
print(" Model will be downloaded on first search.")
|
|
202
|
+
|
|
203
|
+
print()
|
|
204
|
+
print("=" * 60)
|
|
205
|
+
print("✅ Initialization complete!")
|
|
206
|
+
print("=" * 60)
|
|
207
|
+
print()
|
|
208
|
+
print("Next steps:")
|
|
209
|
+
print(f" 1. Edit {env_file} to configure LLM_API_KEY")
|
|
210
|
+
print(" 2. Run 'sirchmunk serve' to start the API server")
|
|
211
|
+
print(" 3. Run 'sirchmunk search \"your query\"' to perform searches")
|
|
212
|
+
print()
|
|
213
|
+
|
|
214
|
+
return 0
|
|
215
|
+
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.error(f"Initialization failed: {e}", exc_info=True)
|
|
218
|
+
print(f"❌ Initialization failed: {e}")
|
|
219
|
+
return 1
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def cmd_config(args: argparse.Namespace) -> int:
|
|
223
|
+
"""Show or generate configuration.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
args: Command-line arguments
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Exit code (0 for success, non-zero for failure)
|
|
230
|
+
"""
|
|
231
|
+
try:
|
|
232
|
+
work_path = _get_default_work_path().expanduser().resolve()
|
|
233
|
+
|
|
234
|
+
if args.generate:
|
|
235
|
+
# Generate .env file
|
|
236
|
+
work_path.mkdir(parents=True, exist_ok=True)
|
|
237
|
+
env_file = work_path / ".env"
|
|
238
|
+
|
|
239
|
+
if env_file.exists() and not args.force:
|
|
240
|
+
print(f"⚠️ {env_file} already exists.")
|
|
241
|
+
print(" Use --force to overwrite.")
|
|
242
|
+
return 1
|
|
243
|
+
|
|
244
|
+
_generate_env_file(env_file)
|
|
245
|
+
print(f"✅ Generated {env_file}")
|
|
246
|
+
print()
|
|
247
|
+
print("Edit this file to configure your LLM settings:")
|
|
248
|
+
print(f" {env_file}")
|
|
249
|
+
return 0
|
|
250
|
+
|
|
251
|
+
# Show current configuration
|
|
252
|
+
print("=" * 60)
|
|
253
|
+
print("Sirchmunk Configuration")
|
|
254
|
+
print("=" * 60)
|
|
255
|
+
print()
|
|
256
|
+
|
|
257
|
+
# Load env file if exists
|
|
258
|
+
env_file = work_path / ".env"
|
|
259
|
+
if env_file.exists():
|
|
260
|
+
_load_env_file(env_file)
|
|
261
|
+
print(f"📄 Config file: {env_file}")
|
|
262
|
+
else:
|
|
263
|
+
print(f"📄 Config file: Not found ({env_file})")
|
|
264
|
+
print(" Run 'sirchmunk config --generate' to create one.")
|
|
265
|
+
|
|
266
|
+
print()
|
|
267
|
+
print("Current Settings:")
|
|
268
|
+
print(f" SIRCHMUNK_WORK_PATH: {os.getenv('SIRCHMUNK_WORK_PATH', '~/.sirchmunk (default)')}")
|
|
269
|
+
print(f" LLM_BASE_URL: {os.getenv('LLM_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1 (default)')}")
|
|
270
|
+
print(f" LLM_API_KEY: {'***' + os.getenv('LLM_API_KEY', '')[-4:] if os.getenv('LLM_API_KEY') else 'Not set'}")
|
|
271
|
+
print(f" LLM_MODEL_NAME: {os.getenv('LLM_MODEL_NAME', 'qwen3-max (default)')}")
|
|
272
|
+
print()
|
|
273
|
+
|
|
274
|
+
return 0
|
|
275
|
+
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(f"Config command failed: {e}", exc_info=True)
|
|
278
|
+
print(f"❌ Error: {e}")
|
|
279
|
+
return 1
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def cmd_serve(args: argparse.Namespace) -> int:
|
|
283
|
+
"""Start the Sirchmunk API server.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
args: Command-line arguments
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Exit code (0 for success, non-zero for failure)
|
|
290
|
+
"""
|
|
291
|
+
try:
|
|
292
|
+
# Load environment
|
|
293
|
+
work_path = _get_default_work_path().expanduser().resolve()
|
|
294
|
+
env_file = work_path / ".env"
|
|
295
|
+
if env_file.exists():
|
|
296
|
+
_load_env_file(env_file)
|
|
297
|
+
|
|
298
|
+
# Import uvicorn here to avoid slow startup
|
|
299
|
+
try:
|
|
300
|
+
import uvicorn
|
|
301
|
+
except ImportError:
|
|
302
|
+
print("❌ uvicorn is not installed.")
|
|
303
|
+
print(" Install it with: pip install uvicorn")
|
|
304
|
+
return 1
|
|
305
|
+
|
|
306
|
+
print("=" * 60)
|
|
307
|
+
print(f"Sirchmunk API Server v{__version__}")
|
|
308
|
+
print("=" * 60)
|
|
309
|
+
print()
|
|
310
|
+
print(f" Host: {args.host}")
|
|
311
|
+
print(f" Port: {args.port}")
|
|
312
|
+
print(f" Reload: {args.reload}")
|
|
313
|
+
print()
|
|
314
|
+
print(f" API Docs: http://{args.host}:{args.port}/docs")
|
|
315
|
+
print(f" Health: http://{args.host}:{args.port}/health")
|
|
316
|
+
print()
|
|
317
|
+
print("Press Ctrl+C to stop the server.")
|
|
318
|
+
print("=" * 60)
|
|
319
|
+
print()
|
|
320
|
+
|
|
321
|
+
uvicorn.run(
|
|
322
|
+
"sirchmunk.api.main:app",
|
|
323
|
+
host=args.host,
|
|
324
|
+
port=args.port,
|
|
325
|
+
reload=args.reload,
|
|
326
|
+
log_level=args.log_level.lower(),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
return 0
|
|
330
|
+
|
|
331
|
+
except KeyboardInterrupt:
|
|
332
|
+
print("\n✅ Server stopped.")
|
|
333
|
+
return 0
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.error(f"Server failed: {e}", exc_info=True)
|
|
336
|
+
print(f"❌ Server error: {e}")
|
|
337
|
+
return 1
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def cmd_search(args: argparse.Namespace) -> int:
|
|
341
|
+
"""Perform a search query.
|
|
342
|
+
|
|
343
|
+
Can operate in two modes:
|
|
344
|
+
- Local mode (default): Direct search using AgenticSearch
|
|
345
|
+
- Client mode (--api): Call the API server
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
args: Command-line arguments
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Exit code (0 for success, non-zero for failure)
|
|
352
|
+
"""
|
|
353
|
+
try:
|
|
354
|
+
# Load environment
|
|
355
|
+
work_path = _get_default_work_path().expanduser().resolve()
|
|
356
|
+
env_file = work_path / ".env"
|
|
357
|
+
if env_file.exists():
|
|
358
|
+
_load_env_file(env_file)
|
|
359
|
+
|
|
360
|
+
query = args.query
|
|
361
|
+
search_paths = args.paths or [os.getcwd()]
|
|
362
|
+
|
|
363
|
+
if args.api:
|
|
364
|
+
# Client mode: call API server
|
|
365
|
+
return _search_via_api(
|
|
366
|
+
query=query,
|
|
367
|
+
search_paths=search_paths,
|
|
368
|
+
api_url=args.api_url,
|
|
369
|
+
mode=args.mode,
|
|
370
|
+
output_format=args.output,
|
|
371
|
+
)
|
|
372
|
+
else:
|
|
373
|
+
# Local mode: direct search
|
|
374
|
+
return asyncio.run(_search_local(
|
|
375
|
+
query=query,
|
|
376
|
+
search_paths=search_paths,
|
|
377
|
+
mode=args.mode,
|
|
378
|
+
output_format=args.output,
|
|
379
|
+
verbose=args.verbose,
|
|
380
|
+
))
|
|
381
|
+
|
|
382
|
+
except KeyboardInterrupt:
|
|
383
|
+
print("\n⚠️ Search cancelled.")
|
|
384
|
+
return 130
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.error(f"Search failed: {e}", exc_info=True)
|
|
387
|
+
print(f"❌ Search error: {e}")
|
|
388
|
+
return 1
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
async def _search_local(
|
|
392
|
+
query: str,
|
|
393
|
+
search_paths: list,
|
|
394
|
+
mode: str = "DEEP",
|
|
395
|
+
output_format: str = "text",
|
|
396
|
+
verbose: bool = False,
|
|
397
|
+
) -> int:
|
|
398
|
+
"""Execute search locally using AgenticSearch.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
query: Search query
|
|
402
|
+
search_paths: Paths to search
|
|
403
|
+
mode: Search mode (FAST, DEEP, FILENAME_ONLY)
|
|
404
|
+
output_format: Output format (text, json)
|
|
405
|
+
verbose: Enable verbose output
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
Exit code
|
|
409
|
+
"""
|
|
410
|
+
from sirchmunk.search import AgenticSearch
|
|
411
|
+
from sirchmunk.llm.openai_chat import OpenAIChat
|
|
412
|
+
|
|
413
|
+
# Read LLM config from environment at runtime (after .env is loaded)
|
|
414
|
+
# Don't use constants module values as they are loaded at import time
|
|
415
|
+
llm_base_url = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
|
|
416
|
+
llm_api_key = os.getenv("LLM_API_KEY", "")
|
|
417
|
+
llm_model_name = os.getenv("LLM_MODEL_NAME", "gpt-5.2")
|
|
418
|
+
|
|
419
|
+
# Validate API key
|
|
420
|
+
if not llm_api_key:
|
|
421
|
+
print("❌ LLM_API_KEY is not set.")
|
|
422
|
+
print(" Configure it in ~/.sirchmunk/.env or set the environment variable.")
|
|
423
|
+
return 1
|
|
424
|
+
|
|
425
|
+
# Create LLM client
|
|
426
|
+
llm = OpenAIChat(
|
|
427
|
+
base_url=llm_base_url,
|
|
428
|
+
api_key=llm_api_key,
|
|
429
|
+
model=llm_model_name,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Create search instance
|
|
433
|
+
work_path = _get_default_work_path()
|
|
434
|
+
searcher = AgenticSearch(
|
|
435
|
+
llm=llm,
|
|
436
|
+
work_path=str(work_path),
|
|
437
|
+
verbose=verbose,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
if not verbose:
|
|
441
|
+
print(f"🔍 Searching: {query}")
|
|
442
|
+
print(f" Mode: {mode}")
|
|
443
|
+
print(f" Paths: {', '.join(search_paths)}")
|
|
444
|
+
print()
|
|
445
|
+
|
|
446
|
+
# Execute search
|
|
447
|
+
result = await searcher.search(
|
|
448
|
+
query=query,
|
|
449
|
+
search_paths=search_paths,
|
|
450
|
+
mode=mode,
|
|
451
|
+
return_cluster=output_format == "json",
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# Output result
|
|
455
|
+
if output_format == "json":
|
|
456
|
+
if hasattr(result, "to_dict"):
|
|
457
|
+
output = json.dumps(result.to_dict(), indent=2, ensure_ascii=False)
|
|
458
|
+
else:
|
|
459
|
+
output = json.dumps({"result": result}, indent=2, ensure_ascii=False)
|
|
460
|
+
print(output)
|
|
461
|
+
else:
|
|
462
|
+
if result:
|
|
463
|
+
print("=" * 60)
|
|
464
|
+
print("Search Results")
|
|
465
|
+
print("=" * 60)
|
|
466
|
+
print()
|
|
467
|
+
print(result)
|
|
468
|
+
else:
|
|
469
|
+
print("No results found.")
|
|
470
|
+
|
|
471
|
+
return 0
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _search_via_api(
|
|
475
|
+
query: str,
|
|
476
|
+
search_paths: list,
|
|
477
|
+
api_url: str = "http://localhost:8584",
|
|
478
|
+
mode: str = "DEEP",
|
|
479
|
+
output_format: str = "text",
|
|
480
|
+
) -> int:
|
|
481
|
+
"""Execute search via API server.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
query: Search query
|
|
485
|
+
search_paths: Paths to search
|
|
486
|
+
api_url: API server URL
|
|
487
|
+
mode: Search mode
|
|
488
|
+
output_format: Output format
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Exit code
|
|
492
|
+
"""
|
|
493
|
+
try:
|
|
494
|
+
import requests
|
|
495
|
+
except ImportError:
|
|
496
|
+
print("❌ requests library is not installed.")
|
|
497
|
+
print(" Install it with: pip install requests")
|
|
498
|
+
return 1
|
|
499
|
+
|
|
500
|
+
print(f"🔍 Searching via API: {api_url}")
|
|
501
|
+
print(f" Query: {query}")
|
|
502
|
+
print(f" Mode: {mode}")
|
|
503
|
+
print()
|
|
504
|
+
|
|
505
|
+
try:
|
|
506
|
+
response = requests.post(
|
|
507
|
+
f"{api_url}/api/v1/search",
|
|
508
|
+
json={
|
|
509
|
+
"query": query,
|
|
510
|
+
"search_paths": search_paths,
|
|
511
|
+
"mode": mode,
|
|
512
|
+
},
|
|
513
|
+
timeout=300, # 5 minute timeout for long searches
|
|
514
|
+
)
|
|
515
|
+
response.raise_for_status()
|
|
516
|
+
|
|
517
|
+
data = response.json()
|
|
518
|
+
|
|
519
|
+
if output_format == "json":
|
|
520
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
521
|
+
else:
|
|
522
|
+
if data.get("success"):
|
|
523
|
+
print("=" * 60)
|
|
524
|
+
print("Search Results")
|
|
525
|
+
print("=" * 60)
|
|
526
|
+
print()
|
|
527
|
+
print(data.get("data", {}).get("summary", "No results found."))
|
|
528
|
+
else:
|
|
529
|
+
print(f"❌ Search failed: {data.get('error', 'Unknown error')}")
|
|
530
|
+
return 1
|
|
531
|
+
|
|
532
|
+
return 0
|
|
533
|
+
|
|
534
|
+
except requests.exceptions.ConnectionError:
|
|
535
|
+
print(f"❌ Cannot connect to API server at {api_url}")
|
|
536
|
+
print(" Make sure the server is running: sirchmunk serve")
|
|
537
|
+
return 1
|
|
538
|
+
except requests.exceptions.Timeout:
|
|
539
|
+
print("❌ Request timed out.")
|
|
540
|
+
return 1
|
|
541
|
+
except Exception as e:
|
|
542
|
+
print(f"❌ API error: {e}")
|
|
543
|
+
return 1
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _generate_env_file(env_file: Path):
|
|
547
|
+
"""Generate a default .env configuration file.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
env_file: Path to write the .env file
|
|
551
|
+
"""
|
|
552
|
+
content = """# ===== Sirchmunk Configuration =====
|
|
553
|
+
# Generated by: sirchmunk config --generate
|
|
554
|
+
|
|
555
|
+
# ===== LLM Settings =====
|
|
556
|
+
# LLM API base URL (OpenAI-compatible endpoint)
|
|
557
|
+
LLM_BASE_URL=https://api.openai.com/v1
|
|
558
|
+
|
|
559
|
+
# LLM API key (REQUIRED - get from your LLM provider)
|
|
560
|
+
LLM_API_KEY=
|
|
561
|
+
|
|
562
|
+
# LLM model name
|
|
563
|
+
LLM_MODEL_NAME=gpt-5.2
|
|
564
|
+
|
|
565
|
+
# ===== Sirchmunk Settings =====
|
|
566
|
+
# Working directory for data and cache
|
|
567
|
+
SIRCHMUNK_WORK_PATH=~/.sirchmunk
|
|
568
|
+
|
|
569
|
+
# Enable verbose logging (true/false)
|
|
570
|
+
SIRCHMUNK_VERBOSE=false
|
|
571
|
+
|
|
572
|
+
# ===== Search Settings =====
|
|
573
|
+
# Maximum directory depth to search
|
|
574
|
+
DEFAULT_MAX_DEPTH=5
|
|
575
|
+
|
|
576
|
+
# Number of top files to return
|
|
577
|
+
DEFAULT_TOP_K_FILES=3
|
|
578
|
+
|
|
579
|
+
# Number of keyword granularity levels
|
|
580
|
+
DEFAULT_KEYWORD_LEVELS=3
|
|
581
|
+
|
|
582
|
+
# Grep operation timeout in seconds
|
|
583
|
+
GREP_TIMEOUT=60.0
|
|
584
|
+
|
|
585
|
+
# ===== Cluster Settings =====
|
|
586
|
+
# Enable knowledge cluster reuse with embeddings
|
|
587
|
+
SIRCHMUNK_ENABLE_CLUSTER_REUSE=true
|
|
588
|
+
|
|
589
|
+
# Similarity threshold for cluster reuse (0.0-1.0)
|
|
590
|
+
CLUSTER_SIM_THRESHOLD=0.85
|
|
591
|
+
|
|
592
|
+
# Number of similar clusters to retrieve
|
|
593
|
+
CLUSTER_SIM_TOP_K=3
|
|
594
|
+
|
|
595
|
+
# Maximum queries per cluster (FIFO)
|
|
596
|
+
MAX_QUERIES_PER_CLUSTER=5
|
|
597
|
+
"""
|
|
598
|
+
|
|
599
|
+
with open(env_file, "w") as f:
|
|
600
|
+
f.write(content)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def cmd_version(args: argparse.Namespace) -> int:
|
|
604
|
+
"""Show version information.
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
args: Command-line arguments
|
|
608
|
+
|
|
609
|
+
Returns:
|
|
610
|
+
Exit code
|
|
611
|
+
"""
|
|
612
|
+
print(f"sirchmunk {__version__}")
|
|
613
|
+
return 0
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
617
|
+
"""Create the argument parser for the CLI.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Configured argument parser
|
|
621
|
+
"""
|
|
622
|
+
parser = argparse.ArgumentParser(
|
|
623
|
+
prog="sirchmunk",
|
|
624
|
+
description="Sirchmunk: Agentic Search for raw data intelligence",
|
|
625
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
626
|
+
epilog="""
|
|
627
|
+
Examples:
|
|
628
|
+
sirchmunk init Initialize Sirchmunk
|
|
629
|
+
sirchmunk config --generate Generate configuration file
|
|
630
|
+
sirchmunk serve Start API server
|
|
631
|
+
sirchmunk serve --port 8000 Start on custom port
|
|
632
|
+
sirchmunk search "find auth" Search in current directory
|
|
633
|
+
sirchmunk search "bug" ./src Search in specific path
|
|
634
|
+
sirchmunk search "api" --mode FILENAME_ONLY
|
|
635
|
+
Quick filename search
|
|
636
|
+
""",
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
parser.add_argument(
|
|
640
|
+
"-V", "--version",
|
|
641
|
+
action="store_true",
|
|
642
|
+
help="Show version and exit",
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
646
|
+
|
|
647
|
+
# === init command ===
|
|
648
|
+
init_parser = subparsers.add_parser(
|
|
649
|
+
"init",
|
|
650
|
+
help="Initialize Sirchmunk working directory",
|
|
651
|
+
description="Create directory structure and generate initial configuration.",
|
|
652
|
+
)
|
|
653
|
+
init_parser.add_argument(
|
|
654
|
+
"--work-path",
|
|
655
|
+
default=str(_get_default_work_path()),
|
|
656
|
+
help="Working directory path (default: ~/.sirchmunk)",
|
|
657
|
+
)
|
|
658
|
+
init_parser.set_defaults(func=cmd_init)
|
|
659
|
+
|
|
660
|
+
# === config command ===
|
|
661
|
+
config_parser = subparsers.add_parser(
|
|
662
|
+
"config",
|
|
663
|
+
help="Show or generate configuration",
|
|
664
|
+
description="Display current configuration or generate a new .env file.",
|
|
665
|
+
)
|
|
666
|
+
config_parser.add_argument(
|
|
667
|
+
"--generate", "-g",
|
|
668
|
+
action="store_true",
|
|
669
|
+
help="Generate .env configuration file",
|
|
670
|
+
)
|
|
671
|
+
config_parser.add_argument(
|
|
672
|
+
"--force", "-f",
|
|
673
|
+
action="store_true",
|
|
674
|
+
help="Overwrite existing configuration file",
|
|
675
|
+
)
|
|
676
|
+
config_parser.set_defaults(func=cmd_config)
|
|
677
|
+
|
|
678
|
+
# === serve command ===
|
|
679
|
+
serve_parser = subparsers.add_parser(
|
|
680
|
+
"serve",
|
|
681
|
+
help="Start the Sirchmunk API server",
|
|
682
|
+
description="Launch the FastAPI server for API access and WebUI.",
|
|
683
|
+
)
|
|
684
|
+
serve_parser.add_argument(
|
|
685
|
+
"--host",
|
|
686
|
+
default="0.0.0.0",
|
|
687
|
+
help="Host to bind (default: 0.0.0.0)",
|
|
688
|
+
)
|
|
689
|
+
serve_parser.add_argument(
|
|
690
|
+
"--port", "-p",
|
|
691
|
+
type=int,
|
|
692
|
+
default=8584,
|
|
693
|
+
help="Port to listen on (default: 8584)",
|
|
694
|
+
)
|
|
695
|
+
serve_parser.add_argument(
|
|
696
|
+
"--reload",
|
|
697
|
+
action="store_true",
|
|
698
|
+
help="Enable auto-reload for development",
|
|
699
|
+
)
|
|
700
|
+
serve_parser.add_argument(
|
|
701
|
+
"--log-level",
|
|
702
|
+
default="INFO",
|
|
703
|
+
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
|
704
|
+
help="Logging level (default: INFO)",
|
|
705
|
+
)
|
|
706
|
+
serve_parser.set_defaults(func=cmd_serve)
|
|
707
|
+
|
|
708
|
+
# === search command ===
|
|
709
|
+
search_parser = subparsers.add_parser(
|
|
710
|
+
"search",
|
|
711
|
+
help="Perform a search query",
|
|
712
|
+
description="Search documents and code using AgenticSearch.",
|
|
713
|
+
)
|
|
714
|
+
search_parser.add_argument(
|
|
715
|
+
"query",
|
|
716
|
+
help="Search query or question",
|
|
717
|
+
)
|
|
718
|
+
search_parser.add_argument(
|
|
719
|
+
"paths",
|
|
720
|
+
nargs="*",
|
|
721
|
+
help="Paths to search (default: current directory)",
|
|
722
|
+
)
|
|
723
|
+
search_parser.add_argument(
|
|
724
|
+
"--mode", "-m",
|
|
725
|
+
default="DEEP",
|
|
726
|
+
choices=["FAST", "DEEP", "FILENAME_ONLY"],
|
|
727
|
+
help="Search mode (default: DEEP)",
|
|
728
|
+
)
|
|
729
|
+
search_parser.add_argument(
|
|
730
|
+
"--output", "-o",
|
|
731
|
+
default="text",
|
|
732
|
+
choices=["text", "json"],
|
|
733
|
+
help="Output format (default: text)",
|
|
734
|
+
)
|
|
735
|
+
search_parser.add_argument(
|
|
736
|
+
"--api",
|
|
737
|
+
action="store_true",
|
|
738
|
+
help="Use API server instead of local search",
|
|
739
|
+
)
|
|
740
|
+
search_parser.add_argument(
|
|
741
|
+
"--api-url",
|
|
742
|
+
default="http://localhost:8584",
|
|
743
|
+
help="API server URL (default: http://localhost:8584)",
|
|
744
|
+
)
|
|
745
|
+
search_parser.add_argument(
|
|
746
|
+
"--verbose", "-v",
|
|
747
|
+
action="store_true",
|
|
748
|
+
help="Enable verbose output",
|
|
749
|
+
)
|
|
750
|
+
search_parser.set_defaults(func=cmd_search)
|
|
751
|
+
|
|
752
|
+
# === version command ===
|
|
753
|
+
version_parser = subparsers.add_parser(
|
|
754
|
+
"version",
|
|
755
|
+
help="Show version information",
|
|
756
|
+
)
|
|
757
|
+
version_parser.set_defaults(func=cmd_version)
|
|
758
|
+
|
|
759
|
+
return parser
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def run_cmd():
|
|
763
|
+
"""Main entry point for the CLI."""
|
|
764
|
+
parser = create_parser()
|
|
765
|
+
args = parser.parse_args()
|
|
766
|
+
|
|
767
|
+
# Handle --version flag
|
|
768
|
+
if args.version:
|
|
769
|
+
print(f"sirchmunk {__version__}")
|
|
770
|
+
sys.exit(0)
|
|
771
|
+
|
|
772
|
+
# Handle no command
|
|
773
|
+
if not args.command:
|
|
774
|
+
parser.print_help()
|
|
775
|
+
sys.exit(0)
|
|
776
|
+
|
|
777
|
+
# Setup logging
|
|
778
|
+
_setup_logging()
|
|
779
|
+
|
|
780
|
+
# Execute command
|
|
781
|
+
if hasattr(args, "func"):
|
|
782
|
+
sys.exit(args.func(args))
|
|
783
|
+
else:
|
|
784
|
+
parser.print_help()
|
|
785
|
+
sys.exit(0)
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
if __name__ == "__main__":
|
|
789
|
+
run_cmd()
|