desksearch 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- desksearch-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +42 -0
- desksearch-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- desksearch-0.1.0/.github/workflows/build-release.yml +91 -0
- desksearch-0.1.0/.gitignore +30 -0
- desksearch-0.1.0/ARCHITECTURE.md +197 -0
- desksearch-0.1.0/CONTRIBUTING.md +122 -0
- desksearch-0.1.0/DESIGN_DECISIONS.md +116 -0
- desksearch-0.1.0/LICENSE +21 -0
- desksearch-0.1.0/PKG-INFO +223 -0
- desksearch-0.1.0/PLUGINS.md +132 -0
- desksearch-0.1.0/README.md +181 -0
- desksearch-0.1.0/benchmarks/benchmark.py +608 -0
- desksearch-0.1.0/benchmarks/comparison.md +88 -0
- desksearch-0.1.0/benchmarks/results.json +97 -0
- desksearch-0.1.0/benchmarks/results.md +90 -0
- desksearch-0.1.0/conftest.py +8 -0
- desksearch-0.1.0/desksearch-backend.spec +57 -0
- desksearch-0.1.0/docs/logo.png +0 -0
- desksearch-0.1.0/docs/screenshot.png +0 -0
- desksearch-0.1.0/electron/assets/.gitkeep +0 -0
- desksearch-0.1.0/electron/assets/icon.icns +0 -0
- desksearch-0.1.0/electron/assets/icon.ico +0 -0
- desksearch-0.1.0/electron/assets/icon.png +0 -0
- desksearch-0.1.0/electron/assets/tray-icon.png +0 -0
- desksearch-0.1.0/electron/build-config.js +78 -0
- desksearch-0.1.0/electron/main.js +236 -0
- desksearch-0.1.0/electron/package-lock.json +5278 -0
- desksearch-0.1.0/electron/package.json +93 -0
- desksearch-0.1.0/electron/preload.js +13 -0
- desksearch-0.1.0/pyproject.toml +50 -0
- desksearch-0.1.0/scripts/build-app.sh +18 -0
- desksearch-0.1.0/src/desksearch/__init__.py +2 -0
- desksearch-0.1.0/src/desksearch/__main__.py +501 -0
- desksearch-0.1.0/src/desksearch/api/__init__.py +0 -0
- desksearch-0.1.0/src/desksearch/api/routes.py +854 -0
- desksearch-0.1.0/src/desksearch/api/schemas.py +167 -0
- desksearch-0.1.0/src/desksearch/api/server.py +133 -0
- desksearch-0.1.0/src/desksearch/config.py +78 -0
- desksearch-0.1.0/src/desksearch/core/__init__.py +5 -0
- desksearch-0.1.0/src/desksearch/core/bm25.py +125 -0
- desksearch-0.1.0/src/desksearch/core/dense.py +266 -0
- desksearch-0.1.0/src/desksearch/core/fusion.py +94 -0
- desksearch-0.1.0/src/desksearch/core/search.py +208 -0
- desksearch-0.1.0/src/desksearch/core/snippets.py +147 -0
- desksearch-0.1.0/src/desksearch/daemon/__init__.py +1 -0
- desksearch-0.1.0/src/desksearch/daemon/autostart.py +224 -0
- desksearch-0.1.0/src/desksearch/daemon/service.py +616 -0
- desksearch-0.1.0/src/desksearch/daemon/tray.py +145 -0
- desksearch-0.1.0/src/desksearch/indexer/__init__.py +4 -0
- desksearch-0.1.0/src/desksearch/indexer/chunker.py +129 -0
- desksearch-0.1.0/src/desksearch/indexer/embedder.py +308 -0
- desksearch-0.1.0/src/desksearch/indexer/parsers.py +161 -0
- desksearch-0.1.0/src/desksearch/indexer/pipeline.py +401 -0
- desksearch-0.1.0/src/desksearch/indexer/store.py +224 -0
- desksearch-0.1.0/src/desksearch/indexer/watcher.py +111 -0
- desksearch-0.1.0/src/desksearch/onboarding.py +263 -0
- desksearch-0.1.0/src/desksearch/plugins/__init__.py +26 -0
- desksearch-0.1.0/src/desksearch/plugins/base.py +97 -0
- desksearch-0.1.0/src/desksearch/plugins/builtin/__init__.py +1 -0
- desksearch-0.1.0/src/desksearch/plugins/builtin/browser_bookmarks.py +143 -0
- desksearch-0.1.0/src/desksearch/plugins/builtin/clipboard_monitor.py +81 -0
- desksearch-0.1.0/src/desksearch/plugins/builtin/email_connector.py +107 -0
- desksearch-0.1.0/src/desksearch/plugins/loader.py +126 -0
- desksearch-0.1.0/src/desksearch/plugins/registry.py +113 -0
- desksearch-0.1.0/src/ui/index.html +13 -0
- desksearch-0.1.0/src/ui/package-lock.json +2766 -0
- desksearch-0.1.0/src/ui/package.json +25 -0
- desksearch-0.1.0/src/ui/postcss.config.js +6 -0
- desksearch-0.1.0/src/ui/src/App.tsx +221 -0
- desksearch-0.1.0/src/ui/src/components/Dashboard.tsx +184 -0
- desksearch-0.1.0/src/ui/src/components/FileExplorer.tsx +279 -0
- desksearch-0.1.0/src/ui/src/components/Filters.tsx +117 -0
- desksearch-0.1.0/src/ui/src/components/FolderManager.tsx +360 -0
- desksearch-0.1.0/src/ui/src/components/IndexingProgress.tsx +62 -0
- desksearch-0.1.0/src/ui/src/components/Onboarding.tsx +263 -0
- desksearch-0.1.0/src/ui/src/components/ResultCard.tsx +143 -0
- desksearch-0.1.0/src/ui/src/components/ResultsList.tsx +75 -0
- desksearch-0.1.0/src/ui/src/components/SearchBar.tsx +92 -0
- desksearch-0.1.0/src/ui/src/components/Settings.tsx +192 -0
- desksearch-0.1.0/src/ui/src/components/StatusBar.tsx +51 -0
- desksearch-0.1.0/src/ui/src/config.ts +3 -0
- desksearch-0.1.0/src/ui/src/hooks/useIndexStatus.ts +38 -0
- desksearch-0.1.0/src/ui/src/hooks/useSearch.ts +56 -0
- desksearch-0.1.0/src/ui/src/main.tsx +10 -0
- desksearch-0.1.0/src/ui/src/styles/globals.css +35 -0
- desksearch-0.1.0/src/ui/src/types.ts +94 -0
- desksearch-0.1.0/src/ui/src/vite-env.d.ts +1 -0
- desksearch-0.1.0/src/ui/tailwind.config.js +31 -0
- desksearch-0.1.0/src/ui/tsconfig.json +21 -0
- desksearch-0.1.0/src/ui/vite.config.ts +22 -0
- desksearch-0.1.0/tests/__init__.py +0 -0
- desksearch-0.1.0/tests/test_api.py +157 -0
- desksearch-0.1.0/tests/test_indexer.py +346 -0
- desksearch-0.1.0/tests/test_integration.py +399 -0
- desksearch-0.1.0/tests/test_search.py +376 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Report a bug to help us improve DeskSearch
|
|
4
|
+
title: '[Bug] '
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Describe the Bug
|
|
10
|
+
|
|
11
|
+
A clear and concise description of what the bug is.
|
|
12
|
+
|
|
13
|
+
## Steps to Reproduce
|
|
14
|
+
|
|
15
|
+
1. Run `desksearch ...`
|
|
16
|
+
2. Search for '...'
|
|
17
|
+
3. See error
|
|
18
|
+
|
|
19
|
+
## Expected Behavior
|
|
20
|
+
|
|
21
|
+
What you expected to happen.
|
|
22
|
+
|
|
23
|
+
## Actual Behavior
|
|
24
|
+
|
|
25
|
+
What actually happened.
|
|
26
|
+
|
|
27
|
+
## Environment
|
|
28
|
+
|
|
29
|
+
- **OS**: [e.g., macOS 14.2, Ubuntu 22.04, Windows 11]
|
|
30
|
+
- **Python version**: [e.g., 3.12.1]
|
|
31
|
+
- **DeskSearch version**: [e.g., 0.1.0]
|
|
32
|
+
- **Install method**: [pip / source / desktop app]
|
|
33
|
+
|
|
34
|
+
## Logs / Error Output
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
Paste any error messages or logs here
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Additional Context
|
|
41
|
+
|
|
42
|
+
Add any other context, screenshots, or file samples (if relevant) here.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature Request
|
|
3
|
+
about: Suggest a new feature or improvement
|
|
4
|
+
title: '[Feature] '
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Problem
|
|
10
|
+
|
|
11
|
+
What problem does this feature solve? What are you trying to do that you can't do today?
|
|
12
|
+
|
|
13
|
+
## Proposed Solution
|
|
14
|
+
|
|
15
|
+
Describe the solution you'd like. How would it work from a user's perspective?
|
|
16
|
+
|
|
17
|
+
## Alternatives Considered
|
|
18
|
+
|
|
19
|
+
Any alternative solutions or features you've considered.
|
|
20
|
+
|
|
21
|
+
## Additional Context
|
|
22
|
+
|
|
23
|
+
Add any other context, mockups, or examples here.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
name: Build & Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
include:
|
|
16
|
+
- os: macos-latest
|
|
17
|
+
platform: mac
|
|
18
|
+
artifact: "dist/electron/DeskSearch-*.dmg"
|
|
19
|
+
- os: windows-latest
|
|
20
|
+
platform: win
|
|
21
|
+
artifact: "dist/electron/DeskSearch Setup *.exe"
|
|
22
|
+
- os: ubuntu-latest
|
|
23
|
+
platform: linux
|
|
24
|
+
artifact: "dist/electron/DeskSearch-*.AppImage"
|
|
25
|
+
|
|
26
|
+
runs-on: ${{ matrix.os }}
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
|
|
31
|
+
- name: Set up Python
|
|
32
|
+
uses: actions/setup-python@v5
|
|
33
|
+
with:
|
|
34
|
+
python-version: '3.12'
|
|
35
|
+
|
|
36
|
+
- name: Set up Node.js
|
|
37
|
+
uses: actions/setup-node@v4
|
|
38
|
+
with:
|
|
39
|
+
node-version: '22'
|
|
40
|
+
|
|
41
|
+
- name: Install Python dependencies
|
|
42
|
+
run: |
|
|
43
|
+
pip install -e ".[dev]"
|
|
44
|
+
pip install pyinstaller
|
|
45
|
+
|
|
46
|
+
- name: Build frontend
|
|
47
|
+
working-directory: src/ui
|
|
48
|
+
run: |
|
|
49
|
+
npm ci
|
|
50
|
+
npm run build
|
|
51
|
+
|
|
52
|
+
- name: Bundle backend with PyInstaller
|
|
53
|
+
run: |
|
|
54
|
+
pyinstaller --onedir --name desksearch-backend \
|
|
55
|
+
--hidden-import desksearch --hidden-import desksearch.api \
|
|
56
|
+
--hidden-import desksearch.api.server --hidden-import desksearch.api.routes \
|
|
57
|
+
--hidden-import desksearch.api.schemas --hidden-import desksearch.core \
|
|
58
|
+
--hidden-import desksearch.core.search --hidden-import desksearch.core.bm25 \
|
|
59
|
+
--hidden-import desksearch.core.dense --hidden-import desksearch.core.fusion \
|
|
60
|
+
--hidden-import desksearch.core.snippets --hidden-import desksearch.indexer \
|
|
61
|
+
--hidden-import desksearch.indexer.pipeline --hidden-import desksearch.indexer.parsers \
|
|
62
|
+
--hidden-import desksearch.indexer.chunker --hidden-import desksearch.indexer.embedder \
|
|
63
|
+
--hidden-import desksearch.indexer.store --hidden-import desksearch.indexer.watcher \
|
|
64
|
+
--hidden-import desksearch.config --hidden-import desksearch.onboarding \
|
|
65
|
+
--hidden-import desksearch.daemon --hidden-import desksearch.plugins \
|
|
66
|
+
--hidden-import uvicorn --hidden-import fastapi --hidden-import tantivy \
|
|
67
|
+
--hidden-import faiss --hidden-import onnxruntime --hidden-import tokenizers \
|
|
68
|
+
--hidden-import huggingface_hub \
|
|
69
|
+
--collect-all desksearch --collect-all tantivy --collect-all onnxruntime \
|
|
70
|
+
--collect-all tokenizers \
|
|
71
|
+
--add-data "src/ui/dist:ui/dist" \
|
|
72
|
+
--noconfirm \
|
|
73
|
+
src/desksearch/__main__.py
|
|
74
|
+
|
|
75
|
+
- name: Build Electron app
|
|
76
|
+
working-directory: electron
|
|
77
|
+
run: |
|
|
78
|
+
npm ci
|
|
79
|
+
npm run dist:${{ matrix.platform }}
|
|
80
|
+
|
|
81
|
+
- name: Upload artifact
|
|
82
|
+
uses: actions/upload-artifact@v4
|
|
83
|
+
with:
|
|
84
|
+
name: desksearch-${{ matrix.platform }}
|
|
85
|
+
path: ${{ matrix.artifact }}
|
|
86
|
+
|
|
87
|
+
- name: Upload to Release
|
|
88
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
89
|
+
uses: softprops/action-gh-release@v2
|
|
90
|
+
with:
|
|
91
|
+
files: ${{ matrix.artifact }}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
.venv/
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.pyc
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.pytest_cache/
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Node
|
|
12
|
+
node_modules/
|
|
13
|
+
|
|
14
|
+
# IDE
|
|
15
|
+
.idea/
|
|
16
|
+
.vscode/
|
|
17
|
+
*.swp
|
|
18
|
+
*.swo
|
|
19
|
+
|
|
20
|
+
# OS
|
|
21
|
+
.DS_Store
|
|
22
|
+
Thumbs.db
|
|
23
|
+
|
|
24
|
+
# LocalSearch data
|
|
25
|
+
.localsearch/
|
|
26
|
+
|
|
27
|
+
# Job search / personal
|
|
28
|
+
job-search/
|
|
29
|
+
arxiv-daily/
|
|
30
|
+
OVERNIGHT_PLAN.md
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# DeskSearch — Private Semantic Search Engine for Your Files
|
|
2
|
+
|
|
3
|
+
## Vision
|
|
4
|
+
A fast, beautiful, private semantic search engine that runs entirely on your laptop.
|
|
5
|
+
Index everything — documents, emails, notes, code, images — and find anything by asking in natural language.
|
|
6
|
+
"Perplexity for your own files."
|
|
7
|
+
|
|
8
|
+
## Architecture Overview
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
┌─────────────────────────────────────────────────┐
|
|
12
|
+
│ Web UI (React) │
|
|
13
|
+
│ Search bar → Results with snippets & sources │
|
|
14
|
+
│ File preview │ Filters │ Collections │
|
|
15
|
+
└──────────────────────┬──────────────────────────┘
|
|
16
|
+
│ HTTP/WebSocket
|
|
17
|
+
┌──────────────────────▼──────────────────────────┐
|
|
18
|
+
│ FastAPI Backend (Python) │
|
|
19
|
+
│ /search /index /status /settings │
|
|
20
|
+
│ Query understanding → Hybrid retrieval → │
|
|
21
|
+
│ Reranking → Snippet extraction │
|
|
22
|
+
└──────┬───────────┬───────────┬──────────────────┘
|
|
23
|
+
│ │ │
|
|
24
|
+
┌──────▼──┐ ┌─────▼────┐ ┌───▼──────────────────┐
|
|
25
|
+
│ BM25 │ │ Dense │ │ Metadata Store │
|
|
26
|
+
│ Index │ │ Index │ │ (SQLite) │
|
|
27
|
+
│(tantivy)│ │ (FAISS/ │ │ file paths, dates, │
|
|
28
|
+
│ │ │ usearch)│ │ types, thumbnails │
|
|
29
|
+
└─────────┘ └──────────┘ └──────────────────────┘
|
|
30
|
+
▲ ▲
|
|
31
|
+
│ │
|
|
32
|
+
┌──────┴───────────┴──────────────────────────────┐
|
|
33
|
+
│ Indexing Pipeline │
|
|
34
|
+
│ File watcher (watchdog) → Parser → Chunker → │
|
|
35
|
+
│ Embedder (local model) → Index writer │
|
|
36
|
+
│ │
|
|
37
|
+
│ Parsers: PDF(marker) │ DOCX │ TXT │ MD │ Code │
|
|
38
|
+
│ Images(OCR) │ Email(.eml) │ HTML │
|
|
39
|
+
└─────────────────────────────────────────────────┘
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Tech Stack
|
|
43
|
+
|
|
44
|
+
### Core (Python)
|
|
45
|
+
- **FastAPI** — async API server
|
|
46
|
+
- **SQLite + FTS5** — metadata store + full-text fallback
|
|
47
|
+
- **tantivy-py** — fast BM25 index (Rust-based, way faster than Whoosh)
|
|
48
|
+
- **FAISS or usearch** — dense vector index
|
|
49
|
+
- **sentence-transformers** — local embedding model (all-MiniLM-L6-v2 for MVP, upgrade later)
|
|
50
|
+
- **watchdog** — filesystem watcher for live indexing
|
|
51
|
+
|
|
52
|
+
### Document Parsing
|
|
53
|
+
- **marker** — PDF → markdown (best quality)
|
|
54
|
+
- **python-docx** — Word documents
|
|
55
|
+
- **python-pptx** — PowerPoint
|
|
56
|
+
- **openpyxl** — Excel
|
|
57
|
+
- **beautifulsoup4** — HTML/emails
|
|
58
|
+
- **Pillow + pytesseract** — OCR for images/screenshots
|
|
59
|
+
- **tree-sitter** — code files (syntax-aware chunking)
|
|
60
|
+
|
|
61
|
+
### UI (React + Vite)
|
|
62
|
+
- **React 18** with TypeScript
|
|
63
|
+
- **Tailwind CSS** — styling
|
|
64
|
+
- **Vite** — build tool
|
|
65
|
+
- Clean, minimal Perplexity-inspired design
|
|
66
|
+
- File preview panel
|
|
67
|
+
- Search filters (date, file type, folder)
|
|
68
|
+
|
|
69
|
+
### Packaging
|
|
70
|
+
- **pip install desksearch** — Python package
|
|
71
|
+
- **brew install desksearch** — macOS (later)
|
|
72
|
+
- Single command to start: `desksearch serve`
|
|
73
|
+
- Auto-indexes ~/Documents, ~/Desktop, ~/Downloads by default
|
|
74
|
+
|
|
75
|
+
## Key Design Decisions
|
|
76
|
+
|
|
77
|
+
1. **100% Local** — No cloud, no API keys needed for MVP. Embedding model runs locally.
|
|
78
|
+
2. **Hybrid Search** — BM25 + dense embeddings + reciprocal rank fusion. This is where Dylan's expertise shines.
|
|
79
|
+
3. **Incremental Indexing** — File watcher detects changes, only re-indexes modified files.
|
|
80
|
+
4. **Chunk with Context** — Each chunk stores parent document reference for full-context answers.
|
|
81
|
+
5. **Fast Startup** — Index persists on disk. Startup = load index + start server. Should be <2 seconds.
|
|
82
|
+
|
|
83
|
+
## MVP Scope (v0.1)
|
|
84
|
+
|
|
85
|
+
### Must Have
|
|
86
|
+
- [ ] Index text files: .txt, .md, .pdf, .docx
|
|
87
|
+
- [ ] Hybrid search: BM25 + dense embeddings
|
|
88
|
+
- [ ] Reciprocal rank fusion for combining results
|
|
89
|
+
- [ ] Web UI with search bar and results
|
|
90
|
+
- [ ] File snippets with highlighted matches
|
|
91
|
+
- [ ] Click result → open file in system default app
|
|
92
|
+
- [ ] CLI: `desksearch index <path>` and `desksearch serve`
|
|
93
|
+
- [ ] Incremental indexing (only new/changed files)
|
|
94
|
+
- [ ] Basic filters: file type, date range
|
|
95
|
+
|
|
96
|
+
### Nice to Have (v0.2)
|
|
97
|
+
- [ ] Image OCR indexing
|
|
98
|
+
- [ ] Code-aware indexing (tree-sitter)
|
|
99
|
+
- [ ] Answer generation (LLM summarizes top results)
|
|
100
|
+
- [ ] Email indexing (.eml, .mbox)
|
|
101
|
+
- [ ] Collections / saved searches
|
|
102
|
+
- [ ] System tray app (background daemon)
|
|
103
|
+
|
|
104
|
+
### Future (v1.0)
|
|
105
|
+
- [ ] macOS native app
|
|
106
|
+
- [ ] Browser extension (index bookmarks)
|
|
107
|
+
- [ ] Slack/Discord/Gmail integrations (premium)
|
|
108
|
+
- [ ] Team/shared search (premium)
|
|
109
|
+
- [ ] Mobile companion app
|
|
110
|
+
|
|
111
|
+
## Agent Assignments
|
|
112
|
+
|
|
113
|
+
### Agent 1: Core Search Engine (src/core/)
|
|
114
|
+
- Hybrid retrieval: BM25 (tantivy) + dense (FAISS)
|
|
115
|
+
- Reciprocal rank fusion
|
|
116
|
+
- Query processing
|
|
117
|
+
- Result ranking and snippet extraction
|
|
118
|
+
- Tests
|
|
119
|
+
|
|
120
|
+
### Agent 2: Indexing Pipeline (src/indexer/)
|
|
121
|
+
- File discovery and watching
|
|
122
|
+
- Document parsing (PDF, DOCX, TXT, MD, code)
|
|
123
|
+
- Chunking strategies
|
|
124
|
+
- Embedding generation
|
|
125
|
+
- SQLite metadata store
|
|
126
|
+
- Incremental index updates
|
|
127
|
+
- Tests
|
|
128
|
+
|
|
129
|
+
### Agent 3: API Server (src/api/)
|
|
130
|
+
- FastAPI endpoints: /search, /index, /status, /settings
|
|
131
|
+
- WebSocket for live indexing progress
|
|
132
|
+
- CORS for UI
|
|
133
|
+
- Settings management
|
|
134
|
+
- Tests
|
|
135
|
+
|
|
136
|
+
### Agent 4: Web UI (src/ui/)
|
|
137
|
+
- React + Vite + Tailwind
|
|
138
|
+
- Search interface (Perplexity-inspired)
|
|
139
|
+
- Results display with snippets, file icons, dates
|
|
140
|
+
- File type/date filters
|
|
141
|
+
- File preview panel
|
|
142
|
+
- Settings page (indexed folders, reindex trigger)
|
|
143
|
+
|
|
144
|
+
## File Structure
|
|
145
|
+
```
|
|
146
|
+
desksearch/
|
|
147
|
+
├── pyproject.toml
|
|
148
|
+
├── README.md
|
|
149
|
+
├── ARCHITECTURE.md
|
|
150
|
+
├── src/
|
|
151
|
+
│ ├── __init__.py
|
|
152
|
+
│ ├── __main__.py # CLI entry point
|
|
153
|
+
│ ├── config.py # Settings/configuration
|
|
154
|
+
│ ├── core/
|
|
155
|
+
│ │ ├── __init__.py
|
|
156
|
+
│ │ ├── search.py # Hybrid search engine
|
|
157
|
+
│ │ ├── bm25.py # Tantivy BM25 wrapper
|
|
158
|
+
│ │ ├── dense.py # FAISS/usearch dense index
|
|
159
|
+
│ │ ├── fusion.py # Reciprocal rank fusion
|
|
160
|
+
│ │ └── snippets.py # Snippet extraction & highlighting
|
|
161
|
+
│ ├── indexer/
|
|
162
|
+
│ │ ├── __init__.py
|
|
163
|
+
│ │ ├── pipeline.py # Main indexing pipeline
|
|
164
|
+
│ │ ├── watcher.py # Filesystem watcher
|
|
165
|
+
│ │ ├── parsers.py # Document parsers
|
|
166
|
+
│ │ ├── chunker.py # Text chunking
|
|
167
|
+
│ │ ├── embedder.py # Local embedding model
|
|
168
|
+
│ │ └── store.py # SQLite metadata store
|
|
169
|
+
│ ├── api/
|
|
170
|
+
│ │ ├── __init__.py
|
|
171
|
+
│ │ ├── server.py # FastAPI app
|
|
172
|
+
│ │ ├── routes.py # API endpoints
|
|
173
|
+
│ │ └── schemas.py # Pydantic models
|
|
174
|
+
│ └── ui/ # React app (built separately)
|
|
175
|
+
│ ├── package.json
|
|
176
|
+
│ ├── vite.config.ts
|
|
177
|
+
│ ├── index.html
|
|
178
|
+
│ ├── src/
|
|
179
|
+
│ │ ├── App.tsx
|
|
180
|
+
│ │ ├── components/
|
|
181
|
+
│ │ │ ├── SearchBar.tsx
|
|
182
|
+
│ │ │ ├── ResultsList.tsx
|
|
183
|
+
│ │ │ ├── ResultCard.tsx
|
|
184
|
+
│ │ │ ├── FilePreview.tsx
|
|
185
|
+
│ │ │ └── Filters.tsx
|
|
186
|
+
│ │ ├── hooks/
|
|
187
|
+
│ │ │ └── useSearch.ts
|
|
188
|
+
│ │ └── styles/
|
|
189
|
+
│ │ └── globals.css
|
|
190
|
+
│ └── tailwind.config.js
|
|
191
|
+
├── tests/
|
|
192
|
+
│ ├── test_search.py
|
|
193
|
+
│ ├── test_indexer.py
|
|
194
|
+
│ └── test_api.py
|
|
195
|
+
└── data/ # Default index storage
|
|
196
|
+
└── .gitkeep
|
|
197
|
+
```
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Contributing to DeskSearch
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in contributing! This guide will help you get started.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
1. **Fork** the repository on GitHub
|
|
8
|
+
2. **Clone** your fork locally:
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/YOUR_USERNAME/desksearch.git
|
|
11
|
+
cd desksearch
|
|
12
|
+
```
|
|
13
|
+
3. **Install** development dependencies:
|
|
14
|
+
```bash
|
|
15
|
+
pip install -e ".[dev]"
|
|
16
|
+
```
|
|
17
|
+
4. **Create a branch** for your change:
|
|
18
|
+
```bash
|
|
19
|
+
git checkout -b feature/my-feature
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Development Setup
|
|
23
|
+
|
|
24
|
+
### Backend (Python)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Install in development mode
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
# Run tests
|
|
31
|
+
pytest
|
|
32
|
+
|
|
33
|
+
# Run a specific test
|
|
34
|
+
pytest tests/test_search.py -v
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Frontend (Web UI)
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cd src/ui
|
|
41
|
+
npm install
|
|
42
|
+
npm run dev # Start dev server with hot reload
|
|
43
|
+
npm run build # Production build
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Making Changes
|
|
47
|
+
|
|
48
|
+
### Code Style
|
|
49
|
+
|
|
50
|
+
- **Python**: Follow PEP 8. Use type hints where practical.
|
|
51
|
+
- **TypeScript/React**: Follow the existing patterns in `src/ui/`.
|
|
52
|
+
- Keep changes focused — one feature or fix per PR.
|
|
53
|
+
|
|
54
|
+
### Testing
|
|
55
|
+
|
|
56
|
+
- Add tests for new functionality
|
|
57
|
+
- Ensure all existing tests pass: `pytest`
|
|
58
|
+
- Test across file formats if touching parsing/indexing
|
|
59
|
+
|
|
60
|
+
### Commit Messages
|
|
61
|
+
|
|
62
|
+
Use clear, descriptive commit messages:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
feat: add EPUB parser plugin
|
|
66
|
+
fix: handle empty PDF pages in parser
|
|
67
|
+
docs: update CLI reference with daemon commands
|
|
68
|
+
perf: batch embedding for 3x indexing speedup
|
|
69
|
+
refactor: extract snippet highlighting into module
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Prefixes: `feat`, `fix`, `docs`, `perf`, `refactor`, `test`, `chore`
|
|
73
|
+
|
|
74
|
+
## Pull Request Process
|
|
75
|
+
|
|
76
|
+
1. **Update documentation** if your change affects user-facing behavior
|
|
77
|
+
2. **Add tests** for new functionality
|
|
78
|
+
3. **Run the test suite** and make sure everything passes
|
|
79
|
+
4. **Push** to your fork and open a Pull Request
|
|
80
|
+
5. **Describe your changes** — what and why, not just how
|
|
81
|
+
6. **Link related issues** if applicable
|
|
82
|
+
|
|
83
|
+
### PR Title Format
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
feat: add support for PPTX files
|
|
87
|
+
fix: search crash when index is empty
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Reporting Bugs
|
|
91
|
+
|
|
92
|
+
Open an issue using the [bug report template](.github/ISSUE_TEMPLATE/bug_report.md) and include:
|
|
93
|
+
|
|
94
|
+
- Steps to reproduce
|
|
95
|
+
- Expected vs actual behavior
|
|
96
|
+
- Python version and OS
|
|
97
|
+
- Error messages or logs
|
|
98
|
+
|
|
99
|
+
## Requesting Features
|
|
100
|
+
|
|
101
|
+
Open an issue using the [feature request template](.github/ISSUE_TEMPLATE/feature_request.md) and describe:
|
|
102
|
+
|
|
103
|
+
- The problem you're trying to solve
|
|
104
|
+
- Your proposed solution
|
|
105
|
+
- Any alternatives you've considered
|
|
106
|
+
|
|
107
|
+
## Architecture Overview
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
src/desksearch/
|
|
111
|
+
├── __main__.py # CLI entry point (Click)
|
|
112
|
+
├── config.py # Configuration (Pydantic)
|
|
113
|
+
├── api/ # FastAPI web server
|
|
114
|
+
├── core/ # Search engines (BM25, dense, fusion, snippets)
|
|
115
|
+
├── indexer/ # Parsing, chunking, embedding, pipeline
|
|
116
|
+
├── plugins/ # Plugin system (base classes + loader)
|
|
117
|
+
└── daemon/ # Background service, tray, autostart
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## License
|
|
121
|
+
|
|
122
|
+
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Design Decisions — Performance & UX
|
|
2
|
+
|
|
3
|
+
## 1. Search Must Be INSTANT (<50ms)
|
|
4
|
+
|
|
5
|
+
### Problem
|
|
6
|
+
Users expect Google-speed search. Loading a sentence-transformer model takes 2-5 seconds. Embedding a query takes 50-200ms. This is too slow for keystroke-by-keystroke search.
|
|
7
|
+
|
|
8
|
+
### Solution: Dual-Mode Search
|
|
9
|
+
- **Phase 1 (instant, <10ms):** BM25 only via tantivy (Rust-based, sub-millisecond). Results appear as you type.
|
|
10
|
+
- **Phase 2 (background, <200ms):** Dense embedding search runs in background, results merge in via RRF. UI smoothly updates.
|
|
11
|
+
- **Startup:** Tantivy index loads in <100ms from memory-mapped files. Dense index loads async in background.
|
|
12
|
+
- **Model pre-loading:** Embedding model loaded once at startup, stays in memory. First search may have 1-2s delay for model load, then instant.
|
|
13
|
+
|
|
14
|
+
### Alternative Considered
|
|
15
|
+
- Pre-compute query embeddings for common terms → too much storage, stale
|
|
16
|
+
- Use ONNX runtime instead of PyTorch → 3-5x faster inference, should implement in v0.2
|
|
17
|
+
|
|
18
|
+
## 2. App-Like Experience
|
|
19
|
+
|
|
20
|
+
### Problem
|
|
21
|
+
`pip install` + `desksearch serve` is developer-friendly but not consumer-friendly. Regular users want to download an app.
|
|
22
|
+
|
|
23
|
+
### Solution: Progressive Packaging
|
|
24
|
+
- **v0.1 (now):** pip install + CLI. Target: developers and power users.
|
|
25
|
+
- **v0.2:** System tray app using `pystray`. Runs in background, menubar icon, opens web UI in browser.
|
|
26
|
+
- **v0.3:** Electron or Tauri desktop app. Single .dmg/.exe download. Bundles Python runtime + models.
|
|
27
|
+
- **v1.0:** Native macOS app (Swift) / Windows app. Proper file system integration.
|
|
28
|
+
|
|
29
|
+
### For MVP: System Tray Daemon
|
|
30
|
+
```
|
|
31
|
+
desksearch install-daemon # Creates LaunchAgent (macOS) or service (Linux/Windows)
|
|
32
|
+
# App starts on login, sits in system tray
|
|
33
|
+
# Click tray icon → opens search in browser
|
|
34
|
+
# Keyboard shortcut: Cmd+Shift+Space → instant search
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## 3. Search Quality (This is the differentiator)
|
|
38
|
+
|
|
39
|
+
### Why Our Search Is Better Than Spotlight/Windows Search
|
|
40
|
+
1. **Hybrid retrieval:** BM25 catches exact terms, dense catches semantic meaning. Neither alone is sufficient.
|
|
41
|
+
2. **Reciprocal Rank Fusion:** Principled way to combine two ranking signals (Dylan's IR expertise).
|
|
42
|
+
3. **Smart chunking:** Paragraph-aware, respects document structure. Not blind character splits.
|
|
43
|
+
4. **Query understanding:** Detect query type (keyword vs question vs phrase) and adjust retrieval strategy.
|
|
44
|
+
5. **Snippet quality:** Show the MOST RELEVANT passage, not just the first occurrence.
|
|
45
|
+
|
|
46
|
+
### Ranking Pipeline (v0.2+)
|
|
47
|
+
```
|
|
48
|
+
Query → Query Analysis → BM25 Search + Dense Search → RRF Fusion → Reranker (cross-encoder) → Results
|
|
49
|
+
```
|
|
50
|
+
Adding a cross-encoder reranker (e.g., ms-marco-MiniLM) on top-20 fused results would dramatically improve precision. This is standard in production search but NO local search tool does it.
|
|
51
|
+
|
|
52
|
+
## 4. Indexing Must Be Background & Non-Intrusive
|
|
53
|
+
|
|
54
|
+
### Requirements
|
|
55
|
+
- First index: scan + parse + embed all files. May take 5-30 min depending on corpus size.
|
|
56
|
+
- Incremental: file watcher detects changes, re-indexes only modified files. <1s per file.
|
|
57
|
+
- CPU usage: cap embedding at 50% CPU. Don't make the laptop fan spin up.
|
|
58
|
+
- Disk usage: index should be <10% of original corpus size.
|
|
59
|
+
|
|
60
|
+
### Implementation
|
|
61
|
+
- Use ThreadPoolExecutor with max_workers=2 for embedding (limits CPU)
|
|
62
|
+
- Batch embed: collect 32 chunks, embed in one call (much faster than 1-by-1)
|
|
63
|
+
- Progress reporting via WebSocket to UI
|
|
64
|
+
- Pause/resume indexing
|
|
65
|
+
|
|
66
|
+
## 5. Embedding Model Choice
|
|
67
|
+
|
|
68
|
+
### For MVP: all-MiniLM-L6-v2
|
|
69
|
+
- 80MB model, loads fast
|
|
70
|
+
- 384-dim embeddings (small index)
|
|
71
|
+
- Good quality for general text
|
|
72
|
+
- Runs on CPU in ~5ms per query
|
|
73
|
+
|
|
74
|
+
### For v0.2: User-selectable
|
|
75
|
+
- Option to use larger models (e5-large, bge-base) for better quality
|
|
76
|
+
- ONNX runtime for 3-5x speedup
|
|
77
|
+
- Apple Silicon MPS acceleration
|
|
78
|
+
|
|
79
|
+
### For v1.0: Custom model
|
|
80
|
+
- Train a model specifically optimized for local file search
|
|
81
|
+
- Matryoshka embeddings (Dylan's Starbucks work) for flexible dim/precision tradeoff
|
|
82
|
+
- This is the ultimate differentiator — a model trained FOR this exact use case
|
|
83
|
+
|
|
84
|
+
## 6. Global Keyboard Shortcut
|
|
85
|
+
|
|
86
|
+
Critical for adoption. Users should be able to:
|
|
87
|
+
- Press `Cmd+Shift+Space` (configurable) from anywhere
|
|
88
|
+
- Search bar pops up immediately
|
|
89
|
+
- Type query → instant results
|
|
90
|
+
- Press Enter → open file
|
|
91
|
+
- Press Escape → dismiss
|
|
92
|
+
|
|
93
|
+
Implementation: Requires native integration.
|
|
94
|
+
- macOS: NSEvent global monitor (Swift helper or pyobjc)
|
|
95
|
+
- Linux: X11/Wayland hotkey binding
|
|
96
|
+
- Windows: RegisterHotKey
|
|
97
|
+
|
|
98
|
+
## 7. File Type Priority Ranking
|
|
99
|
+
|
|
100
|
+
Not all files are equal. A matching PDF in ~/Documents is more relevant than a .pyc in node_modules.
|
|
101
|
+
|
|
102
|
+
Default boosts:
|
|
103
|
+
- Documents (.pdf, .docx, .md): 1.5x
|
|
104
|
+
- Notes/text (.txt, .org): 1.3x
|
|
105
|
+
- Code (.py, .js): 1.0x
|
|
106
|
+
- Config (.json, .yaml, .toml): 0.8x
|
|
107
|
+
- Recent files: 1.2x boost for files modified in last 7 days
|
|
108
|
+
- Frequently opened: track open count, boost popular files
|
|
109
|
+
|
|
110
|
+
## 8. Privacy & Security
|
|
111
|
+
|
|
112
|
+
- ALL processing local. No network calls except to download embedding model (one-time).
|
|
113
|
+
- Index stored in ~/.desksearch/ with user-only permissions (0700).
|
|
114
|
+
- No telemetry, no analytics, no phone home.
|
|
115
|
+
- Option to encrypt index at rest (v0.3).
|
|
116
|
+
- Excluded paths: .ssh, .gnupg, .env files with secrets auto-excluded.
|
desksearch-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Shuai Wang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|