pb-dolphin 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pb_dolphin-0.1.6/.gitignore +33 -0
- pb_dolphin-0.1.6/PKG-INFO +280 -0
- pb_dolphin-0.1.6/README.md +225 -0
- pb_dolphin-0.1.6/kb/__init__.py +5 -0
- pb_dolphin-0.1.6/kb/api/__init__.py +3 -0
- pb_dolphin-0.1.6/kb/api/app.py +302 -0
- pb_dolphin-0.1.6/kb/api/search_backend.py +143 -0
- pb_dolphin-0.1.6/kb/api/server.py +98 -0
- pb_dolphin-0.1.6/kb/chunkers/__init__.py +19 -0
- pb_dolphin-0.1.6/kb/chunkers/fallback_chunker.py +101 -0
- pb_dolphin-0.1.6/kb/chunkers/md_chunker.py +305 -0
- pb_dolphin-0.1.6/kb/chunkers/py_chunker.py +264 -0
- pb_dolphin-0.1.6/kb/chunkers/registry.py +324 -0
- pb_dolphin-0.1.6/kb/chunkers/repo_config.py +188 -0
- pb_dolphin-0.1.6/kb/chunkers/token_utils.py +132 -0
- pb_dolphin-0.1.6/kb/chunkers/ts_chunker.py +471 -0
- pb_dolphin-0.1.6/kb/chunkers/types.py +42 -0
- pb_dolphin-0.1.6/kb/cli.py +140 -0
- pb_dolphin-0.1.6/kb/config.py +177 -0
- pb_dolphin-0.1.6/kb/config.yaml +35 -0
- pb_dolphin-0.1.6/kb/config_template.toml +118 -0
- pb_dolphin-0.1.6/kb/embeddings/provider.py +189 -0
- pb_dolphin-0.1.6/kb/hashing.py +125 -0
- pb_dolphin-0.1.6/kb/ignores.py +98 -0
- pb_dolphin-0.1.6/kb/ingest/__init__.py +4 -0
- pb_dolphin-0.1.6/kb/ingest/_helpers.py +140 -0
- pb_dolphin-0.1.6/kb/ingest/cli.py +307 -0
- pb_dolphin-0.1.6/kb/ingest/dedup.py +73 -0
- pb_dolphin-0.1.6/kb/ingest/error_logging.py +161 -0
- pb_dolphin-0.1.6/kb/ingest/lang.py +51 -0
- pb_dolphin-0.1.6/kb/ingest/pipeline.py +431 -0
- pb_dolphin-0.1.6/kb/ingest/scanner.py +130 -0
- pb_dolphin-0.1.6/kb/retrieval/__init__.py +5 -0
- pb_dolphin-0.1.6/kb/retrieval/rankers.py +177 -0
- pb_dolphin-0.1.6/kb/store/__init__.py +4 -0
- pb_dolphin-0.1.6/kb/store/lancedb_store.py +211 -0
- pb_dolphin-0.1.6/kb/store/sql_models.py +118 -0
- pb_dolphin-0.1.6/kb/store/sqlite_meta.py +647 -0
- pb_dolphin-0.1.6/personas/__init__.py +0 -0
- pb_dolphin-0.1.6/personas/__main__.py +6 -0
- pb_dolphin-0.1.6/personas/cast/big-balls/persona.toml +33 -0
- pb_dolphin-0.1.6/personas/cast/big-balls/prompt.md +21 -0
- pb_dolphin-0.1.6/personas/cast/chief-of-staff/persona.toml +33 -0
- pb_dolphin-0.1.6/personas/cast/chief-of-staff/system.md +97 -0
- pb_dolphin-0.1.6/personas/cast/deep-dive/persona.toml +21 -0
- pb_dolphin-0.1.6/personas/cast/deep-dive/prompt.md +22 -0
- pb_dolphin-0.1.6/personas/cast/fancy-slave/persona.toml +21 -0
- pb_dolphin-0.1.6/personas/cast/fancy-slave/prompt.md +24 -0
- pb_dolphin-0.1.6/personas/cast/journalist/persona.toml +25 -0
- pb_dolphin-0.1.6/personas/cast/journalist/prompt.md +21 -0
- pb_dolphin-0.1.6/personas/cast/little-ripper/persona.toml +17 -0
- pb_dolphin-0.1.6/personas/cast/little-ripper/prompt.md +21 -0
- pb_dolphin-0.1.6/personas/cast/popeye/persona.toml +15 -0
- pb_dolphin-0.1.6/personas/cast/popeye/prompt.md +7 -0
- pb_dolphin-0.1.6/personas/cast/quiet-kid/persona.toml +25 -0
- pb_dolphin-0.1.6/personas/cast/quiet-kid/prompt.md +21 -0
- pb_dolphin-0.1.6/personas/cast/smartest-guy/persona.toml +17 -0
- pb_dolphin-0.1.6/personas/cast/smartest-guy/system.md +40 -0
- pb_dolphin-0.1.6/personas/docs/output_examples.md +374 -0
- pb_dolphin-0.1.6/personas/src/__init__.py +0 -0
- pb_dolphin-0.1.6/personas/src/continue_utils.py +213 -0
- pb_dolphin-0.1.6/personas/src/kilocode_utils.py +449 -0
- pb_dolphin-0.1.6/personas/src/persona_utils.py +493 -0
- pb_dolphin-0.1.6/personas/src/personas.py +476 -0
- pb_dolphin-0.1.6/personas/src/system.md +12 -0
- pb_dolphin-0.1.6/pyproject.toml +125 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Generated configuration file
|
|
2
|
+
mcpo_config.json
|
|
3
|
+
.mcpo.pid
|
|
4
|
+
|
|
5
|
+
# Local environment variables
|
|
6
|
+
.env
|
|
7
|
+
.env.*
|
|
8
|
+
!.env.example
|
|
9
|
+
|
|
10
|
+
# continue configs (contain secrets)
|
|
11
|
+
.continue/*
|
|
12
|
+
.continue/**/*
|
|
13
|
+
|
|
14
|
+
# generated persona configs (contain secrets and are private)
|
|
15
|
+
.continue-config/
|
|
16
|
+
.kilocode-config/
|
|
17
|
+
|
|
18
|
+
# output dirs
|
|
19
|
+
kilocode-config/*
|
|
20
|
+
|
|
21
|
+
# logs
|
|
22
|
+
*.log
|
|
23
|
+
|
|
24
|
+
# Python cache
|
|
25
|
+
__pycache__/
|
|
26
|
+
|
|
27
|
+
.coverage
|
|
28
|
+
.scratch/*
|
|
29
|
+
docs/ignore/*
|
|
30
|
+
|
|
31
|
+
# test artifacts
|
|
32
|
+
tests/**/test-results.xml
|
|
33
|
+
tests/**/coverage.xml
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pb-dolphin
|
|
3
|
+
Version: 0.1.6
|
|
4
|
+
Summary: Full-stack AI enablement platform
|
|
5
|
+
Author-email: "Plastic Beach, LLC" <taylor@plasticbeach.email>, tdc93 <taylor@plasticbeach.email>
|
|
6
|
+
Maintainer-email: "Plastic Beach, LLC" <taylor@plasticbeach.email>, tdc93 <taylor@plasticbeach.email>
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: ai,knowledge-base,ml,nlp
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: <3.14,>=3.12
|
|
16
|
+
Requires-Dist: docker>=7.1.0
|
|
17
|
+
Requires-Dist: fastapi
|
|
18
|
+
Requires-Dist: lancedb
|
|
19
|
+
Requires-Dist: markdown-it-py
|
|
20
|
+
Requires-Dist: mcp-server-git>=0.0.1
|
|
21
|
+
Requires-Dist: mcp-server-time>=0.1.0
|
|
22
|
+
Requires-Dist: mcpo>=0.0.19
|
|
23
|
+
Requires-Dist: openai
|
|
24
|
+
Requires-Dist: pathspec
|
|
25
|
+
Requires-Dist: pydantic
|
|
26
|
+
Requires-Dist: python-dotenv
|
|
27
|
+
Requires-Dist: pyyaml
|
|
28
|
+
Requires-Dist: requests>=2.32.4
|
|
29
|
+
Requires-Dist: sqlite-utils
|
|
30
|
+
Requires-Dist: sqlmodel
|
|
31
|
+
Requires-Dist: tiktoken
|
|
32
|
+
Requires-Dist: tomli; python_full_version < '3.11'
|
|
33
|
+
Requires-Dist: tree-sitter-javascript>=0.25.0
|
|
34
|
+
Requires-Dist: tree-sitter-python>=0.25.0
|
|
35
|
+
Requires-Dist: tree-sitter>=0.25.0
|
|
36
|
+
Requires-Dist: typer
|
|
37
|
+
Requires-Dist: uvicorn
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: isort>=5.12.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: pre-commit>=3.4.0; extra == 'dev'
|
|
43
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
44
|
+
Provides-Extra: test
|
|
45
|
+
Requires-Dist: fakeredis>=2.18.0; extra == 'test'
|
|
46
|
+
Requires-Dist: freezegun>=1.2.0; extra == 'test'
|
|
47
|
+
Requires-Dist: httpx>=0.25.0; extra == 'test'
|
|
48
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
49
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
|
|
50
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == 'test'
|
|
51
|
+
Requires-Dist: pytest-xdist>=3.3.0; extra == 'test'
|
|
52
|
+
Requires-Dist: pytest>=7.4.0; extra == 'test'
|
|
53
|
+
Requires-Dist: responses>=0.24.0; extra == 'test'
|
|
54
|
+
Description-Content-Type: text/markdown
|
|
55
|
+
|
|
56
|
+
# 🐬 Dolphin
|
|
57
|
+
|
|
58
|
+
**⚠️ EXPERIMENTAL - This is a developmental library under active development. APIs and interfaces are unstable and subject to change without notice.**
|
|
59
|
+
|
|
60
|
+
A semantic code search and knowledge management system with AI-native interfaces (MCP, REST API, CLI).
|
|
61
|
+
|
|
62
|
+
## Quick Start
|
|
63
|
+
|
|
64
|
+
### Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Install from PyPI
|
|
68
|
+
pip install pb-dolphin
|
|
69
|
+
|
|
70
|
+
# ⚠️ IMPORTANT: Ensure OPENAI_API_KEY is set as env var
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Basic Usage
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Initialize global knowledge store and index a repository
|
|
77
|
+
dolphin init
|
|
78
|
+
dolphin add-repo my-project /path/to/project
|
|
79
|
+
dolphin index my-project
|
|
80
|
+
|
|
81
|
+
# Search your indexed code
|
|
82
|
+
dolphin search "authentication logic"
|
|
83
|
+
|
|
84
|
+
# Start API server
|
|
85
|
+
dolphin serve
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Core Commands
|
|
89
|
+
|
|
90
|
+
- `dolphin init` - Initialize configuration (auto-creates `~/.dolphin/config.toml`)
|
|
91
|
+
- `dolphin init --repo` - Create repo-specific config in current directory
|
|
92
|
+
- `dolphin add-repo <name> <path>` - Register a repository for indexing
|
|
93
|
+
- `dolphin index <name>` - Index a repository with language-aware chunking
|
|
94
|
+
- `dolphin search <query>` - Search indexed code semantically
|
|
95
|
+
- `dolphin serve` - Start REST API server (port 7777)
|
|
96
|
+
- `dolphin config --show` - Display current configuration
|
|
97
|
+
|
|
98
|
+
## Architecture
|
|
99
|
+
|
|
100
|
+
### High-Level Overview
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
┌──────────────────────────────────────────┐
|
|
104
|
+
│ AI Interfaces (Claude, Continue, etc) │
|
|
105
|
+
└──────────────┬───────────────────────────┘
|
|
106
|
+
│ MCP Protocol
|
|
107
|
+
▼
|
|
108
|
+
┌──────────────────────────────────────────┐
|
|
109
|
+
│ Dolphin Knowledge Base │
|
|
110
|
+
│ ┌─────────────┐ ┌────────────────┐ │
|
|
111
|
+
│ │ MCP Bridge │◄──►│ REST API │ │
|
|
112
|
+
│ │ (TypeScript)│ │ (Python/FastAPI)│ │
|
|
113
|
+
│ └─────────────┘ └────────┬────────┘ │
|
|
114
|
+
└──────────────────────────────┼───────────┘
|
|
115
|
+
│
|
|
116
|
+
┌───────────────┴────────────┐
|
|
117
|
+
▼ ▼
|
|
118
|
+
┌─────────┐ ┌──────────┐
|
|
119
|
+
│LanceDB │ │ SQLite │
|
|
120
|
+
│(Vectors)│ │(Metadata)│
|
|
121
|
+
└─────────┘ └──────────┘
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Key Features
|
|
125
|
+
|
|
126
|
+
- **Language-Aware Chunking** - Intelligent code parsing for Python, TypeScript, JavaScript, Markdown
|
|
127
|
+
- **Semantic Search** - OpenAI embeddings with LanceDB vector storage
|
|
128
|
+
- **MCP Support** - Native Model Context Protocol integration for Claude Desktop
|
|
129
|
+
- **REST API** - FastAPI server with search, retrieval, and metadata endpoints
|
|
130
|
+
- **Unified CLI** - Single `dolphin` command for all operations
|
|
131
|
+
- **Auto-Configuration** - Smart config hierarchy (repo → user → defaults)
|
|
132
|
+
|
|
133
|
+
## Environment Variables
|
|
134
|
+
|
|
135
|
+
Dolphin requires the following environment variables depending on your usage:
|
|
136
|
+
|
|
137
|
+
### Required for OpenAI Embeddings
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Required when using OpenAI embeddings (recommended for production)
|
|
141
|
+
export OPENAI_API_KEY="sk-your-openai-api-key-here"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Setting Environment Variables
|
|
145
|
+
|
|
146
|
+
**macOS/Linux (bash/zsh):**
|
|
147
|
+
```bash
|
|
148
|
+
echo 'export OPENAI_API_KEY="sk-your-key-here"' >> ~/.bashrc
|
|
149
|
+
source ~/.bashrc
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**macOS/Linux (fish):**
|
|
153
|
+
```bash
|
|
154
|
+
set -Ux OPENAI_API_KEY "sk-your-key-here"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Windows (Command Prompt):**
|
|
158
|
+
```cmd
|
|
159
|
+
setx OPENAI_API_KEY "sk-your-key-here"
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Windows (PowerShell):**
|
|
163
|
+
```powershell
|
|
164
|
+
[System.Environment]::SetEnvironmentVariable('OPENAI_API_KEY', 'sk-your-key-here', 'User')
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Getting Your OpenAI API Key
|
|
168
|
+
|
|
169
|
+
1. Visit [OpenAI Platform](https://platform.openai.com/)
|
|
170
|
+
2. Sign up or log in to your account
|
|
171
|
+
3. Navigate to [API Keys](https://platform.openai.com/api-keys)
|
|
172
|
+
4. Click "Create new secret key"
|
|
173
|
+
5. Copy the key and set it as `OPENAI_API_KEY`
|
|
174
|
+
|
|
175
|
+
## Configuration
|
|
176
|
+
|
|
177
|
+
Dolphin uses a multi-level configuration system:
|
|
178
|
+
|
|
179
|
+
1. **Repo-specific** (`./.dolphin/config.toml`) - Per-repository chunking settings
|
|
180
|
+
2. **User-global** (`~/.dolphin/config.toml`) - Auto-created on first use
|
|
181
|
+
|
|
182
|
+
### Example Config
|
|
183
|
+
|
|
184
|
+
```toml
|
|
185
|
+
# ~/.dolphin/config.toml
|
|
186
|
+
default_embed_model = "large" # or "small"
|
|
187
|
+
|
|
188
|
+
[embedding]
|
|
189
|
+
provider = "openai"
|
|
190
|
+
batch_size = 100
|
|
191
|
+
|
|
192
|
+
[retrieval]
|
|
193
|
+
top_k = 8
|
|
194
|
+
score_cutoff = 0.15
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Claude Desktop Integration (MCP)
|
|
198
|
+
|
|
199
|
+
Add to your `claude_desktop_config.json`:
|
|
200
|
+
|
|
201
|
+
```json
|
|
202
|
+
{
|
|
203
|
+
"mcpServers": {
|
|
204
|
+
"dolphin": {
|
|
205
|
+
"command": "bun",
|
|
206
|
+
"args": ["run", "/path/to/dolphin/mcp-bridge/src/index.ts"],
|
|
207
|
+
"env": {
|
|
208
|
+
"OPENAI_API_KEY": "sk-..."
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Start the server: `dolphin serve`
|
|
216
|
+
|
|
217
|
+
Available MCP tools: `search_knowledge`, `fetch_chunk`, `fetch_lines`, `get_vector_store_info`
|
|
218
|
+
|
|
219
|
+
## REST API
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
# Start server
|
|
223
|
+
dolphin serve
|
|
224
|
+
|
|
225
|
+
# Search
|
|
226
|
+
curl -X POST http://127.0.0.1:7777/v1/search \
|
|
227
|
+
-H "Content-Type: application/json" \
|
|
228
|
+
-d '{"query": "authentication", "top_k": 5}'
|
|
229
|
+
|
|
230
|
+
# List repositories
|
|
231
|
+
curl http://127.0.0.1:7777/v1/repos
|
|
232
|
+
|
|
233
|
+
# Health check
|
|
234
|
+
curl http://127.0.0.1:7777/v1/health
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Development Status
|
|
238
|
+
|
|
239
|
+
**Current**: Pre-alpha (0.1.x)
|
|
240
|
+
|
|
241
|
+
- ✅ Core indexing and search pipeline
|
|
242
|
+
- ✅ Language-aware chunking (Python, TS, JS, Markdown)
|
|
243
|
+
- ✅ REST API with MCP bridge
|
|
244
|
+
- ⚠️ Developmental stage
|
|
245
|
+
|
|
246
|
+
**Upcoming**:
|
|
247
|
+
- Performance optimization
|
|
248
|
+
- Production hardening
|
|
249
|
+
- Evaluation framework
|
|
250
|
+
- Expanded language support
|
|
251
|
+
|
|
252
|
+
## Requirements
|
|
253
|
+
|
|
254
|
+
- Python ≥3.12
|
|
255
|
+
- OpenAI API key (for embeddings)
|
|
256
|
+
- Bun (for MCP bridge)
|
|
257
|
+
- Git (for repository scanning)
|
|
258
|
+
|
|
259
|
+
## Testing
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Run all tests
|
|
263
|
+
uv run pytest
|
|
264
|
+
|
|
265
|
+
# Run specific test suite
|
|
266
|
+
uv run pytest tests/unit/
|
|
267
|
+
uv run pytest tests/integration/
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
## License
|
|
271
|
+
|
|
272
|
+
MIT License
|
|
273
|
+
|
|
274
|
+
## Acknowledgments
|
|
275
|
+
|
|
276
|
+
Built with [LanceDB](https://lancedb.com/), [OpenAI](https://openai.com/), [FastAPI](https://fastapi.tiangolo.com/), and [Bun](https://bun.sh/)
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
**⚠️ Remember**: This is experimental software under active development. Use at your own risk.
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# 🐬 Dolphin
|
|
2
|
+
|
|
3
|
+
**⚠️ EXPERIMENTAL - This is a developmental library under active development. APIs and interfaces are unstable and subject to change without notice.**
|
|
4
|
+
|
|
5
|
+
A semantic code search and knowledge management system with AI-native interfaces (MCP, REST API, CLI).
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
### Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install from PyPI
|
|
13
|
+
pip install pb-dolphin
|
|
14
|
+
|
|
15
|
+
# ⚠️ IMPORTANT: Ensure OPENAI_API_KEY is set as env var
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### Basic Usage
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Initialize global knowledge store and index a repository
|
|
22
|
+
dolphin init
|
|
23
|
+
dolphin add-repo my-project /path/to/project
|
|
24
|
+
dolphin index my-project
|
|
25
|
+
|
|
26
|
+
# Search your indexed code
|
|
27
|
+
dolphin search "authentication logic"
|
|
28
|
+
|
|
29
|
+
# Start API server
|
|
30
|
+
dolphin serve
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Core Commands
|
|
34
|
+
|
|
35
|
+
- `dolphin init` - Initialize configuration (auto-creates `~/.dolphin/config.toml`)
|
|
36
|
+
- `dolphin init --repo` - Create repo-specific config in current directory
|
|
37
|
+
- `dolphin add-repo <name> <path>` - Register a repository for indexing
|
|
38
|
+
- `dolphin index <name>` - Index a repository with language-aware chunking
|
|
39
|
+
- `dolphin search <query>` - Search indexed code semantically
|
|
40
|
+
- `dolphin serve` - Start REST API server (port 7777)
|
|
41
|
+
- `dolphin config --show` - Display current configuration
|
|
42
|
+
|
|
43
|
+
## Architecture
|
|
44
|
+
|
|
45
|
+
### High-Level Overview
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
┌──────────────────────────────────────────┐
|
|
49
|
+
│ AI Interfaces (Claude, Continue, etc) │
|
|
50
|
+
└──────────────┬───────────────────────────┘
|
|
51
|
+
│ MCP Protocol
|
|
52
|
+
▼
|
|
53
|
+
┌──────────────────────────────────────────┐
|
|
54
|
+
│ Dolphin Knowledge Base │
|
|
55
|
+
│ ┌─────────────┐ ┌────────────────┐ │
|
|
56
|
+
│ │ MCP Bridge │◄──►│ REST API │ │
|
|
57
|
+
│ │ (TypeScript)│ │ (Python/FastAPI)│ │
|
|
58
|
+
│ └─────────────┘ └────────┬────────┘ │
|
|
59
|
+
└──────────────────────────────┼───────────┘
|
|
60
|
+
│
|
|
61
|
+
┌───────────────┴────────────┐
|
|
62
|
+
▼ ▼
|
|
63
|
+
┌─────────┐ ┌──────────┐
|
|
64
|
+
│LanceDB │ │ SQLite │
|
|
65
|
+
│(Vectors)│ │(Metadata)│
|
|
66
|
+
└─────────┘ └──────────┘
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Key Features
|
|
70
|
+
|
|
71
|
+
- **Language-Aware Chunking** - Intelligent code parsing for Python, TypeScript, JavaScript, Markdown
|
|
72
|
+
- **Semantic Search** - OpenAI embeddings with LanceDB vector storage
|
|
73
|
+
- **MCP Support** - Native Model Context Protocol integration for Claude Desktop
|
|
74
|
+
- **REST API** - FastAPI server with search, retrieval, and metadata endpoints
|
|
75
|
+
- **Unified CLI** - Single `dolphin` command for all operations
|
|
76
|
+
- **Auto-Configuration** - Smart config hierarchy (repo → user → defaults)
|
|
77
|
+
|
|
78
|
+
## Environment Variables
|
|
79
|
+
|
|
80
|
+
Dolphin requires the following environment variables depending on your usage:
|
|
81
|
+
|
|
82
|
+
### Required for OpenAI Embeddings
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Required when using OpenAI embeddings (recommended for production)
|
|
86
|
+
export OPENAI_API_KEY="sk-your-openai-api-key-here"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Setting Environment Variables
|
|
90
|
+
|
|
91
|
+
**macOS/Linux (bash/zsh):**
|
|
92
|
+
```bash
|
|
93
|
+
echo 'export OPENAI_API_KEY="sk-your-key-here"' >> ~/.bashrc
|
|
94
|
+
source ~/.bashrc
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**macOS/Linux (fish):**
|
|
98
|
+
```bash
|
|
99
|
+
set -Ux OPENAI_API_KEY "sk-your-key-here"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**Windows (Command Prompt):**
|
|
103
|
+
```cmd
|
|
104
|
+
setx OPENAI_API_KEY "sk-your-key-here"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Windows (PowerShell):**
|
|
108
|
+
```powershell
|
|
109
|
+
[System.Environment]::SetEnvironmentVariable('OPENAI_API_KEY', 'sk-your-key-here', 'User')
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Getting Your OpenAI API Key
|
|
113
|
+
|
|
114
|
+
1. Visit [OpenAI Platform](https://platform.openai.com/)
|
|
115
|
+
2. Sign up or log in to your account
|
|
116
|
+
3. Navigate to [API Keys](https://platform.openai.com/api-keys)
|
|
117
|
+
4. Click "Create new secret key"
|
|
118
|
+
5. Copy the key and set it as `OPENAI_API_KEY`
|
|
119
|
+
|
|
120
|
+
## Configuration
|
|
121
|
+
|
|
122
|
+
Dolphin uses a multi-level configuration system:
|
|
123
|
+
|
|
124
|
+
1. **Repo-specific** (`./.dolphin/config.toml`) - Per-repository chunking settings
|
|
125
|
+
2. **User-global** (`~/.dolphin/config.toml`) - Auto-created on first use
|
|
126
|
+
|
|
127
|
+
### Example Config
|
|
128
|
+
|
|
129
|
+
```toml
|
|
130
|
+
# ~/.dolphin/config.toml
|
|
131
|
+
default_embed_model = "large" # or "small"
|
|
132
|
+
|
|
133
|
+
[embedding]
|
|
134
|
+
provider = "openai"
|
|
135
|
+
batch_size = 100
|
|
136
|
+
|
|
137
|
+
[retrieval]
|
|
138
|
+
top_k = 8
|
|
139
|
+
score_cutoff = 0.15
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Claude Desktop Integration (MCP)
|
|
143
|
+
|
|
144
|
+
Add to your `claude_desktop_config.json`:
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{
|
|
148
|
+
"mcpServers": {
|
|
149
|
+
"dolphin": {
|
|
150
|
+
"command": "bun",
|
|
151
|
+
"args": ["run", "/path/to/dolphin/mcp-bridge/src/index.ts"],
|
|
152
|
+
"env": {
|
|
153
|
+
"OPENAI_API_KEY": "sk-..."
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Start the server: `dolphin serve`
|
|
161
|
+
|
|
162
|
+
Available MCP tools: `search_knowledge`, `fetch_chunk`, `fetch_lines`, `get_vector_store_info`
|
|
163
|
+
|
|
164
|
+
## REST API
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# Start server
|
|
168
|
+
dolphin serve
|
|
169
|
+
|
|
170
|
+
# Search
|
|
171
|
+
curl -X POST http://127.0.0.1:7777/v1/search \
|
|
172
|
+
-H "Content-Type: application/json" \
|
|
173
|
+
-d '{"query": "authentication", "top_k": 5}'
|
|
174
|
+
|
|
175
|
+
# List repositories
|
|
176
|
+
curl http://127.0.0.1:7777/v1/repos
|
|
177
|
+
|
|
178
|
+
# Health check
|
|
179
|
+
curl http://127.0.0.1:7777/v1/health
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Development Status
|
|
183
|
+
|
|
184
|
+
**Current**: Pre-alpha (0.1.x)
|
|
185
|
+
|
|
186
|
+
- ✅ Core indexing and search pipeline
|
|
187
|
+
- ✅ Language-aware chunking (Python, TS, JS, Markdown)
|
|
188
|
+
- ✅ REST API with MCP bridge
|
|
189
|
+
- ⚠️ Developmental stage
|
|
190
|
+
|
|
191
|
+
**Upcoming**:
|
|
192
|
+
- Performance optimization
|
|
193
|
+
- Production hardening
|
|
194
|
+
- Evaluation framework
|
|
195
|
+
- Expanded language support
|
|
196
|
+
|
|
197
|
+
## Requirements
|
|
198
|
+
|
|
199
|
+
- Python ≥3.12
|
|
200
|
+
- OpenAI API key (for embeddings)
|
|
201
|
+
- Bun (for MCP bridge)
|
|
202
|
+
- Git (for repository scanning)
|
|
203
|
+
|
|
204
|
+
## Testing
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Run all tests
|
|
208
|
+
uv run pytest
|
|
209
|
+
|
|
210
|
+
# Run specific test suite
|
|
211
|
+
uv run pytest tests/unit/
|
|
212
|
+
uv run pytest tests/integration/
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
MIT License
|
|
218
|
+
|
|
219
|
+
## Acknowledgments
|
|
220
|
+
|
|
221
|
+
Built with [LanceDB](https://lancedb.com/), [OpenAI](https://openai.com/), [FastAPI](https://fastapi.tiangolo.com/), and [Bun](https://bun.sh/)
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
**⚠️ Remember**: This is experimental software under active development. Use at your own risk.
|