chunksilo 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chunksilo might be problematic. Click here for more details.
- chunksilo/__init__.py +4 -0
- chunksilo/__main__.py +3 -0
- chunksilo/cfgload.py +163 -0
- chunksilo/cli.py +124 -0
- chunksilo/confluence_html_formatter.py +96 -0
- chunksilo/index.py +1420 -0
- chunksilo/search.py +784 -0
- chunksilo/server.py +110 -0
- chunksilo-2.0.0.dist-info/METADATA +366 -0
- chunksilo-2.0.0.dist-info/RECORD +15 -0
- chunksilo-2.0.0.dist-info/WHEEL +5 -0
- chunksilo-2.0.0.dist-info/entry_points.txt +3 -0
- chunksilo-2.0.0.dist-info/licenses/LICENSE +191 -0
- chunksilo-2.0.0.dist-info/licenses/NOTICE +33 -0
- chunksilo-2.0.0.dist-info/top_level.txt +1 -0
chunksilo/server.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""
|
|
4
|
+
MCP server for querying documentation using RAG.
|
|
5
|
+
Returns raw document chunks for the calling LLM to synthesize.
|
|
6
|
+
"""
|
|
7
|
+
import argparse
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Annotated, Any
|
|
14
|
+
|
|
15
|
+
from pydantic import Field
|
|
16
|
+
from mcp.server.fastmcp import FastMCP
|
|
17
|
+
|
|
18
|
+
# Log file configuration
|
|
19
|
+
LOG_FILE = "mcp.log"
|
|
20
|
+
LOG_MAX_SIZE_MB = 10
|
|
21
|
+
LOG_MAX_SIZE_BYTES = LOG_MAX_SIZE_MB * 1024 * 1024
|
|
22
|
+
|
|
23
|
+
# Module-level state (set during initialization)
|
|
24
|
+
_server_config_path: Path | None = None
|
|
25
|
+
_mcp: FastMCP | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _rotate_log_if_needed():
|
|
29
|
+
"""Rotate log file if it exists and is over the size limit."""
|
|
30
|
+
log_path = Path(LOG_FILE)
|
|
31
|
+
if log_path.exists() and log_path.stat().st_size > LOG_MAX_SIZE_BYTES:
|
|
32
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
33
|
+
process_id = os.getpid()
|
|
34
|
+
rotated_name = f"mcp_{timestamp}_{process_id}.log"
|
|
35
|
+
rotated_path = log_path.parent / rotated_name
|
|
36
|
+
log_path.rename(rotated_path)
|
|
37
|
+
log_path.touch()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _setup_logging():
|
|
41
|
+
"""Configure logging for MCP server mode - file only, no stdout/stderr."""
|
|
42
|
+
_rotate_log_if_needed()
|
|
43
|
+
|
|
44
|
+
logging.basicConfig(
|
|
45
|
+
level=logging.INFO,
|
|
46
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
47
|
+
force=True,
|
|
48
|
+
handlers=[
|
|
49
|
+
logging.FileHandler(LOG_FILE, encoding="utf-8"),
|
|
50
|
+
],
|
|
51
|
+
)
|
|
52
|
+
logging.getLogger("llama_index.readers.confluence").setLevel(logging.WARNING)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _create_server() -> FastMCP:
|
|
56
|
+
"""Create and configure the MCP server with tools."""
|
|
57
|
+
from .search import run_search
|
|
58
|
+
|
|
59
|
+
mcp = FastMCP("llamaindex-docs-rag")
|
|
60
|
+
|
|
61
|
+
@mcp.tool()
|
|
62
|
+
async def search_docs(
|
|
63
|
+
query: Annotated[str, Field(description="Search query text")],
|
|
64
|
+
date_from: Annotated[str | None, Field(description="Optional start date filter (YYYY-MM-DD format, inclusive)")] = None,
|
|
65
|
+
date_to: Annotated[str | None, Field(description="Optional end date filter (YYYY-MM-DD format, inclusive)")] = None,
|
|
66
|
+
) -> dict[str, Any]:
|
|
67
|
+
"""Search across all your indexed documentation using a natural language query."""
|
|
68
|
+
loop = asyncio.get_running_loop()
|
|
69
|
+
return await loop.run_in_executor(
|
|
70
|
+
None, lambda: run_search(query, date_from, date_to, config_path=_server_config_path)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return mcp
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_server(config_path: Path | None = None):
|
|
77
|
+
"""Start the MCP server."""
|
|
78
|
+
global _server_config_path, _mcp
|
|
79
|
+
|
|
80
|
+
if config_path:
|
|
81
|
+
_server_config_path = config_path
|
|
82
|
+
os.environ["CHUNKSILO_CONFIG"] = str(config_path)
|
|
83
|
+
|
|
84
|
+
_mcp = _create_server()
|
|
85
|
+
_mcp.run()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def main():
|
|
89
|
+
"""Entry point for the chunksilo-mcp command."""
|
|
90
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
91
|
+
|
|
92
|
+
parser = argparse.ArgumentParser(
|
|
93
|
+
prog="chunksilo-mcp",
|
|
94
|
+
description="Run ChunkSilo MCP server (stdio transport)",
|
|
95
|
+
)
|
|
96
|
+
parser.add_argument("--config", help="Path to config.yaml")
|
|
97
|
+
args = parser.parse_args()
|
|
98
|
+
|
|
99
|
+
# Configure logging BEFORE importing anything that uses logging
|
|
100
|
+
_setup_logging()
|
|
101
|
+
|
|
102
|
+
logger = logging.getLogger(__name__)
|
|
103
|
+
logger.info("Starting ChunkSilo MCP server")
|
|
104
|
+
|
|
105
|
+
config_path = Path(args.config) if args.config else None
|
|
106
|
+
run_server(config_path)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
main()
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chunksilo
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Local RAG-based semantic document search with MCP server interface
|
|
5
|
+
Author: Fredrik Reveny
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Repository, https://github.com/Chetic/chunksilo
|
|
8
|
+
Project-URL: Issues, https://github.com/Chetic/chunksilo/issues
|
|
9
|
+
Keywords: rag,mcp,semantic-search,document-search,llm
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
License-File: NOTICE
|
|
21
|
+
Requires-Dist: llama-index<1,>=0.10.0
|
|
22
|
+
Requires-Dist: llama-index-readers-file<1,>=0.1.0
|
|
23
|
+
Requires-Dist: llama-index-embeddings-fastembed<1,>=0.5.0
|
|
24
|
+
Requires-Dist: llama-index-retrievers-bm25<1,>=0.1.0
|
|
25
|
+
Requires-Dist: pillow<11,>=10.3.0
|
|
26
|
+
Requires-Dist: pypdf<7,>=5.1.0
|
|
27
|
+
Requires-Dist: python-docx<2,>=1.1.0
|
|
28
|
+
Requires-Dist: mcp<2,>=1.0.0
|
|
29
|
+
Requires-Dist: python-dotenv<2,>=1.0.0
|
|
30
|
+
Requires-Dist: huggingface-hub<2,>=0.22.0
|
|
31
|
+
Requires-Dist: flashrank<1,>=0.1.0
|
|
32
|
+
Requires-Dist: fastembed<1,>=0.5.0
|
|
33
|
+
Requires-Dist: pyyaml<7,>=6.0
|
|
34
|
+
Provides-Extra: confluence
|
|
35
|
+
Requires-Dist: llama-index-readers-confluence<1,>=0.6.0; extra == "confluence"
|
|
36
|
+
Provides-Extra: test
|
|
37
|
+
Requires-Dist: pytest<9,>=7.4.0; extra == "test"
|
|
38
|
+
Requires-Dist: requests<3,>=2.31.0; extra == "test"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# ChunkSilo MCP Server
|
|
42
|
+
|
|
43
|
+
ChunkSilo is like a local Google for your documents. It uses semantic search — matching by meaning rather than exact keywords — so your LLM can find relevant information across all your files even when the wording differs from your query. Point it at your PDFs, Word docs, Markdown, and text files, and it builds a fully searchable index locally on your machine.
|
|
44
|
+
|
|
45
|
+
## Overview
|
|
46
|
+
|
|
47
|
+
- **No permissions headache**: Each user indexes only the files they already have access to. No centralized access-control system to build or maintain — document permissions stay exactly where they are.
|
|
48
|
+
- **No infrastructure required**: Runs entirely on the user's own machine as an MCP server. Nothing to deploy, no servers to manage.
|
|
49
|
+
- **Easy to set up**: Any user with an MCP-compatible LLM client can install, point at their document directories, and have everything indexed and searchable.
|
|
50
|
+
- **Works with what you have**: Supports PDF, DOCX, DOC, Markdown, and TXT from local folders, network drives, or shared mounts.
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- **Local indexing and search**: All indexing and search runs on your machine with bundled models — ChunkSilo itself makes no external network calls when `offline: true`. Note: search results are passed to your MCP client's LLM, which may be cloud-hosted.
|
|
55
|
+
- **Incremental indexing**: Only reindexes new or changed files, so re-runs are fast even on large document collections.
|
|
56
|
+
- **Heading-aware navigation**: Extracts headings from PDFs, Word docs, and Markdown so results include the full heading path (e.g. "Chapter 3 > Setup > Prerequisites").
|
|
57
|
+
- **Date filtering and recency boost**: Search within a date range or let recent documents rank higher automatically.
|
|
58
|
+
- **Dual retrieval**: Returns both meaning-based chunk matches and keyword-based filename matches separately, so file lookups don't get buried by unrelated content.
|
|
59
|
+
- **Multi-directory with per-folder rules**: Index multiple directories with individual include/exclude glob patterns — useful for shared drives with mixed content.
|
|
60
|
+
- **Confluence integration**: Optionally searches your Confluence instance alongside local files, with results returned in the same format.
|
|
61
|
+
- **Source links**: Each result includes a clickable link back to the source file or Confluence page in supported MCP clients.
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
### Option A: Install from PyPI (Recommended)
|
|
66
|
+
|
|
67
|
+
Requires Python 3.11 or later. Models are downloaded automatically on first run (~250MB). The first run may appear to pause while models download — this is normal.
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install chunksilo
|
|
71
|
+
|
|
72
|
+
# Or with Confluence support:
|
|
73
|
+
pip install chunksilo[confluence]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Then:
|
|
77
|
+
1. **Create** a config file at `~/.config/chunksilo/config.yaml` (see [Configuration](#configuration))
|
|
78
|
+
2. **Build** the index: `chunksilo --build-index`
|
|
79
|
+
3. **Configure** your MCP client (see [MCP Client Configuration](#mcp-client-configuration))
|
|
80
|
+
|
|
81
|
+
### Option B: Offline Bundle
|
|
82
|
+
|
|
83
|
+
A self-contained package with pre-downloaded models, ideal for air-gapped environments or systems without Python installed.
|
|
84
|
+
|
|
85
|
+
Download from the [Releases page](https://github.com/Chetic/chunksilo/releases):
|
|
86
|
+
|
|
87
|
+
1. **Download** the `chunksilo-vX.Y.Z-manylinux_2_34_x86_64.tar.gz` file
|
|
88
|
+
2. **Extract** and install:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
tar -xzf chunksilo-vX.Y.Z-manylinux_2_34_x86_64.tar.gz
|
|
92
|
+
cd chunksilo
|
|
93
|
+
./setup.sh
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
3. **Edit** `config.yaml` to set your document directories
|
|
97
|
+
4. **Build** the index: `./venv/bin/chunksilo --build-index`
|
|
98
|
+
5. **Configure** your MCP client (see [MCP Client Configuration](#mcp-client-configuration))
|
|
99
|
+
|
|
100
|
+
## Configuration
|
|
101
|
+
|
|
102
|
+
ChunkSilo uses a single configuration file: `config.yaml`
|
|
103
|
+
|
|
104
|
+
### Configuration File
|
|
105
|
+
|
|
106
|
+
Edit `config.yaml` to configure your settings:
|
|
107
|
+
|
|
108
|
+
```yaml
|
|
109
|
+
# Indexing settings - used by chunksilo --build-index
|
|
110
|
+
indexing:
|
|
111
|
+
directories:
|
|
112
|
+
- "./data"
|
|
113
|
+
- "/mnt/nfs/shared-docs"
|
|
114
|
+
- path: "/mnt/samba/engineering"
|
|
115
|
+
include: ["**/*.pdf", "**/*.md"]
|
|
116
|
+
exclude: ["**/archive/**"]
|
|
117
|
+
chunk_size: 1600
|
|
118
|
+
chunk_overlap: 200
|
|
119
|
+
|
|
120
|
+
# Retrieval settings - used when searching
|
|
121
|
+
retrieval:
|
|
122
|
+
embed_top_k: 20
|
|
123
|
+
rerank_top_k: 5
|
|
124
|
+
score_threshold: 0.1
|
|
125
|
+
|
|
126
|
+
# Confluence integration (optional)
|
|
127
|
+
confluence:
|
|
128
|
+
url: "https://confluence.example.com"
|
|
129
|
+
username: "your-username"
|
|
130
|
+
api_token: "your-api-token"
|
|
131
|
+
|
|
132
|
+
# Storage paths (usually don't need to change)
|
|
133
|
+
storage:
|
|
134
|
+
storage_dir: "./storage"
|
|
135
|
+
model_cache_dir: "./models"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
All settings are optional and have sensible defaults.
|
|
139
|
+
|
|
140
|
+
### Configuration Reference
|
|
141
|
+
|
|
142
|
+
#### Indexing Settings
|
|
143
|
+
|
|
144
|
+
| Setting | Default | Description |
|
|
145
|
+
| :--- | :--- | :--- |
|
|
146
|
+
| `indexing.directories` | `["./data"]` | List of directories to index (strings or objects) |
|
|
147
|
+
| `indexing.chunk_size` | `1600` | Maximum size of text chunks |
|
|
148
|
+
| `indexing.chunk_overlap` | `200` | Overlap between adjacent chunks |
|
|
149
|
+
|
|
150
|
+
**Per-directory options** (when using object format):
|
|
151
|
+
|
|
152
|
+
| Option | Default | Description |
|
|
153
|
+
| :--- | :--- | :--- |
|
|
154
|
+
| `path` | (required) | Directory path to index |
|
|
155
|
+
| `include` | `["**/*.pdf", "**/*.md", "**/*.txt", "**/*.docx", "**/*.doc"]` | Glob patterns for files to include |
|
|
156
|
+
| `exclude` | `[]` | Glob patterns for files to exclude |
|
|
157
|
+
| `recursive` | `true` | Whether to recurse into subdirectories |
|
|
158
|
+
| `enabled` | `true` | Whether to index this directory |
|
|
159
|
+
|
|
160
|
+
#### Retrieval Settings
|
|
161
|
+
|
|
162
|
+
| Setting | Default | Description |
|
|
163
|
+
| :--- | :--- | :--- |
|
|
164
|
+
| `retrieval.embed_model_name` | `BAAI/bge-small-en-v1.5` | Embedding model for vector search |
|
|
165
|
+
| `retrieval.embed_top_k` | `20` | Candidates from vector search before reranking |
|
|
166
|
+
| `retrieval.rerank_model_name` | `ms-marco-MiniLM-L-12-v2` | Reranker model |
|
|
167
|
+
| `retrieval.rerank_top_k` | `5` | Final results after reranking |
|
|
168
|
+
| `retrieval.rerank_candidates` | `100` | Maximum candidates sent to reranker |
|
|
169
|
+
| `retrieval.score_threshold` | `0.1` | Minimum score (0.0-1.0) for results |
|
|
170
|
+
| `retrieval.recency_boost` | `0.3` | Recency boost weight (0.0-1.0) |
|
|
171
|
+
| `retrieval.recency_half_life_days` | `365` | Days until recency boost halves |
|
|
172
|
+
| `retrieval.bm25_similarity_top_k` | `10` | Files returned by BM25 filename search |
|
|
173
|
+
| `retrieval.offline` | `false` | Prevent ML library network requests |
|
|
174
|
+
|
|
175
|
+
#### Confluence Settings (optional)
|
|
176
|
+
|
|
177
|
+
> **Note:** Confluence integration requires the optional dependency. Install with: `pip install chunksilo[confluence]`
|
|
178
|
+
|
|
179
|
+
| Setting | Default | Description |
|
|
180
|
+
| :--- | :--- | :--- |
|
|
181
|
+
| `confluence.url` | `""` | Confluence base URL (empty = disabled) |
|
|
182
|
+
| `confluence.username` | `""` | Confluence username |
|
|
183
|
+
| `confluence.api_token` | `""` | Confluence API token |
|
|
184
|
+
| `confluence.timeout` | `10.0` | Request timeout in seconds |
|
|
185
|
+
| `confluence.max_results` | `30` | Maximum results per search |
|
|
186
|
+
|
|
187
|
+
#### SSL Settings (optional)
|
|
188
|
+
|
|
189
|
+
| Setting | Default | Description |
|
|
190
|
+
| :--- | :--- | :--- |
|
|
191
|
+
| `ssl.ca_bundle_path` | `""` | Path to custom CA bundle file |
|
|
192
|
+
|
|
193
|
+
#### Storage Settings
|
|
194
|
+
|
|
195
|
+
| Setting | Default | Description |
|
|
196
|
+
| :--- | :--- | :--- |
|
|
197
|
+
| `storage.storage_dir` | `./storage` | Directory for vector index and state |
|
|
198
|
+
| `storage.model_cache_dir` | `./models` | Directory for model cache |
|
|
199
|
+
|
|
200
|
+
## CLI Usage
|
|
201
|
+
|
|
202
|
+
The `chunksilo` command provides indexing, searching, and model management:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
# Build or update the search index
|
|
206
|
+
chunksilo --build-index
|
|
207
|
+
|
|
208
|
+
# Search for documents
|
|
209
|
+
chunksilo "your search query"
|
|
210
|
+
|
|
211
|
+
# Search with date filtering
|
|
212
|
+
chunksilo "quarterly report" --date-from 2024-01-01 --date-to 2024-03-31
|
|
213
|
+
|
|
214
|
+
# Output results as JSON
|
|
215
|
+
chunksilo "search query" --json
|
|
216
|
+
|
|
217
|
+
# Show verbose output (model loading, search stats)
|
|
218
|
+
chunksilo "search query" --verbose
|
|
219
|
+
|
|
220
|
+
# Pre-download ML models (useful before going offline)
|
|
221
|
+
chunksilo --download-models
|
|
222
|
+
|
|
223
|
+
# Use a custom config file
|
|
224
|
+
chunksilo --build-index --config /path/to/config.yaml
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### CLI Options
|
|
228
|
+
|
|
229
|
+
| Option | Description |
|
|
230
|
+
| :--- | :--- |
|
|
231
|
+
| `query` | Search query text (positional argument) |
|
|
232
|
+
| `--build-index` | Build or update the search index, then exit |
|
|
233
|
+
| `--download-models` | Download required ML models, then exit |
|
|
234
|
+
| `--date-from` | Start date filter (YYYY-MM-DD format, inclusive) |
|
|
235
|
+
| `--date-to` | End date filter (YYYY-MM-DD format, inclusive) |
|
|
236
|
+
| `--json` | Output results as JSON instead of formatted text |
|
|
237
|
+
| `-v, --verbose` | Show diagnostic messages (model loading, search stats) |
|
|
238
|
+
| `--config` | Path to config.yaml (overrides auto-discovery) |
|
|
239
|
+
|
|
240
|
+
## MCP Client Configuration
|
|
241
|
+
|
|
242
|
+
Configure your MCP client to run ChunkSilo. Below are examples for common clients.
|
|
243
|
+
|
|
244
|
+
> **Note:** For PyPI installs, use `chunksilo-mcp` directly. For offline bundles, use the full path `/path/to/chunksilo/venv/bin/chunksilo-mcp`. You can find the PyPI-installed binary location with `which chunksilo-mcp`.
|
|
245
|
+
|
|
246
|
+
### Claude Code
|
|
247
|
+
|
|
248
|
+
Add chunksilo as an MCP server using the CLI:
|
|
249
|
+
|
|
250
|
+
**PyPI install:**
|
|
251
|
+
```bash
|
|
252
|
+
claude mcp add chunksilo --scope user -- chunksilo-mcp --config ~/.config/chunksilo/config.yaml
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
**Offline bundle:**
|
|
256
|
+
```bash
|
|
257
|
+
claude mcp add chunksilo --scope user -- /path/to/chunksilo/venv/bin/chunksilo-mcp --config /path/to/chunksilo/config.yaml
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Verify it's connected:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
claude mcp list
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
### Claude Desktop
|
|
267
|
+
|
|
268
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows):
|
|
269
|
+
|
|
270
|
+
**PyPI install:**
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"mcpServers": {
|
|
274
|
+
"chunksilo": {
|
|
275
|
+
"command": "chunksilo-mcp",
|
|
276
|
+
"args": ["--config", "/path/to/config.yaml"]
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Offline bundle:**
|
|
283
|
+
```json
|
|
284
|
+
{
|
|
285
|
+
"mcpServers": {
|
|
286
|
+
"chunksilo": {
|
|
287
|
+
"command": "/path/to/chunksilo/venv/bin/chunksilo-mcp",
|
|
288
|
+
"args": ["--config", "/path/to/chunksilo/config.yaml"]
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
### Cline (VS Code Extension)
|
|
295
|
+
|
|
296
|
+
Add to `cline_mcp_settings.json` (typically in `~/.config/Code/User/globalStorage/saoudrizwan.claude-dev/settings/`):
|
|
297
|
+
|
|
298
|
+
**PyPI install:**
|
|
299
|
+
```json
|
|
300
|
+
{
|
|
301
|
+
"mcpServers": {
|
|
302
|
+
"chunksilo": {
|
|
303
|
+
"command": "chunksilo-mcp",
|
|
304
|
+
"args": ["--config", "/path/to/config.yaml"],
|
|
305
|
+
"disabled": false,
|
|
306
|
+
"autoApprove": []
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Offline bundle:**
|
|
313
|
+
```json
|
|
314
|
+
{
|
|
315
|
+
"mcpServers": {
|
|
316
|
+
"chunksilo": {
|
|
317
|
+
"command": "/path/to/chunksilo/venv/bin/chunksilo-mcp",
|
|
318
|
+
"args": ["--config", "/path/to/chunksilo/config.yaml"],
|
|
319
|
+
"disabled": false,
|
|
320
|
+
"autoApprove": []
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Roo Code (VS Code Extension)
|
|
327
|
+
|
|
328
|
+
Add to `mcp_settings.json` (typically in `~/.config/Code/User/globalStorage/rooveterinaryinc.roo-cline/settings/`):
|
|
329
|
+
|
|
330
|
+
**PyPI install:**
|
|
331
|
+
```json
|
|
332
|
+
{
|
|
333
|
+
"mcpServers": {
|
|
334
|
+
"chunksilo": {
|
|
335
|
+
"command": "chunksilo-mcp",
|
|
336
|
+
"args": ["--config", "/path/to/config.yaml"]
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
**Offline bundle:**
|
|
343
|
+
```json
|
|
344
|
+
{
|
|
345
|
+
"mcpServers": {
|
|
346
|
+
"chunksilo": {
|
|
347
|
+
"command": "/path/to/chunksilo/venv/bin/chunksilo-mcp",
|
|
348
|
+
"args": ["--config", "/path/to/chunksilo/config.yaml"]
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
## Troubleshooting
|
|
355
|
+
|
|
356
|
+
- **Index missing**: Run `chunksilo --build-index` (PyPI install) or `./venv/bin/chunksilo --build-index` (offline bundle).
|
|
357
|
+
- **Retrieval errors**: Check paths in your MCP client configuration.
|
|
358
|
+
- **Offline mode**: PyPI installs default to `offline: false` (models auto-download). The offline bundle includes pre-downloaded models and sets `offline: true`. Set `retrieval.offline: true` in `config.yaml` to prevent network calls after initial model download.
|
|
359
|
+
- **Confluence Integration**: Install with `pip install chunksilo[confluence]`, then set `confluence.url`, `confluence.username`, and `confluence.api_token` in `config.yaml`.
|
|
360
|
+
- **Custom CA Bundle**: Set `ssl.ca_bundle_path` in `config.yaml` for custom certificates.
|
|
361
|
+
- **Network mounts**: Unavailable directories are skipped with a warning; indexing continues with available directories.
|
|
362
|
+
- **Legacy .doc files**: Requires LibreOffice to be installed for automatic conversion to .docx. If LibreOffice is not found, .doc files are skipped with a warning. Full heading extraction is supported.
|
|
363
|
+
|
|
364
|
+
## License
|
|
365
|
+
|
|
366
|
+
Apache-2.0. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
chunksilo/__init__.py,sha256=Ph1w-3A5CKoGdxzUNVlrB483eOM5JxAQN8K88-6HbuY,121
|
|
2
|
+
chunksilo/__main__.py,sha256=eY8-KfJfBz0nibDPY_jv2RvkLXEx7ZDSPRWiDJb7PpY,39
|
|
3
|
+
chunksilo/cfgload.py,sha256=A7ab2RkeYQhFTTvn56I900lWpXCqXb8ao8DGYeUai1U,4359
|
|
4
|
+
chunksilo/cli.py,sha256=ZKXzW-HOmnLXC4Ynu-D86XIbp7bdQ_OYVHuHFVlj41U,4170
|
|
5
|
+
chunksilo/confluence_html_formatter.py,sha256=D8pb5TCrai6exIqeajH49y4D_t0jfQkmI6aNm4BzPIg,2828
|
|
6
|
+
chunksilo/index.py,sha256=ECSTH8c0ZhnTxAHxM7dcCfFOGTk99STHuyDhc3uMCO0,51618
|
|
7
|
+
chunksilo/search.py,sha256=8_6zuvyVdZHNMCBHrK3NcP0Ct7oYeCSLuoOHT8RxD0o,28722
|
|
8
|
+
chunksilo/server.py,sha256=PhQWMvEGb3UqWwk0tm44kTOnKH2NH4NmshnKx11xwh8,3374
|
|
9
|
+
chunksilo-2.0.0.dist-info/licenses/LICENSE,sha256=kda7NTahQy3nKvLe-LGIVCdI-qePTyqNNx2e6HLVH3k,10766
|
|
10
|
+
chunksilo-2.0.0.dist-info/licenses/NOTICE,sha256=58GCfasR-XT7RPvz9OoWb4NrPg-3AqEwzw9FJOhngZo,1228
|
|
11
|
+
chunksilo-2.0.0.dist-info/METADATA,sha256=AA4R48_d6h7V89dHM9hb0av53WlfrDKZzALfOZ0jywU,13360
|
|
12
|
+
chunksilo-2.0.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
13
|
+
chunksilo-2.0.0.dist-info/entry_points.txt,sha256=TfgrfBQyIRHMG5NFaXR-owDLsjHBCxjJ4YGIGBnVzbQ,87
|
|
14
|
+
chunksilo-2.0.0.dist-info/top_level.txt,sha256=xfAH0GhTfZbwkbi_DgUA5keNP-osqVNcoZjwkhpwVms,10
|
|
15
|
+
chunksilo-2.0.0.dist-info/RECORD,,
|