@lon-ask/dockit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +674 -0
  2. package/README.md +496 -0
  3. package/SKILL.md +154 -0
  4. package/apps/client/dist/assets/index-CqOXxsEZ.js +240 -0
  5. package/apps/client/dist/assets/index-DwvaANnI.css +1 -0
  6. package/apps/client/dist/index.html +13 -0
  7. package/apps/server/src/core/domain/entry.ts +22 -0
  8. package/apps/server/src/core/domain/errors.ts +27 -0
  9. package/apps/server/src/core/domain/knowledge-graph.ts +51 -0
  10. package/apps/server/src/core/domain/types.ts +168 -0
  11. package/apps/server/src/core/ports/IBuildRepository.ts +7 -0
  12. package/apps/server/src/core/ports/IDocumentNormalizer.ts +6 -0
  13. package/apps/server/src/core/ports/IDocumentStore.ts +4 -0
  14. package/apps/server/src/core/ports/IEntryReadModel.ts +9 -0
  15. package/apps/server/src/core/ports/IEntryRepository.ts +11 -0
  16. package/apps/server/src/core/ports/IKnowledgeGraph.ts +10 -0
  17. package/apps/server/src/core/ports/IPathResolver.ts +3 -0
  18. package/apps/server/src/core/ports/ISearchEngine.ts +9 -0
  19. package/apps/server/src/core/ports/ISourceProcessor.ts +7 -0
  20. package/apps/server/src/core/ports/ISourceRepository.ts +11 -0
  21. package/apps/server/src/core/usecases/BuildUseCase.ts +98 -0
  22. package/apps/server/src/core/usecases/ConfigUseCase.ts +64 -0
  23. package/apps/server/src/core/usecases/SearchUseCase.ts +16 -0
  24. package/apps/server/src/index.ts +98 -0
  25. package/apps/server/src/infrastructure/filesystem/FileSystemDocumentStore.ts +27 -0
  26. package/apps/server/src/infrastructure/graph/GraphSearchDecorator.ts +53 -0
  27. package/apps/server/src/infrastructure/graph/GraphifyKnowledgeGraph.ts +172 -0
  28. package/apps/server/src/infrastructure/graph/index.ts +2 -0
  29. package/apps/server/src/infrastructure/persistence/sqlite/SqliteBuildRepository.ts +34 -0
  30. package/apps/server/src/infrastructure/persistence/sqlite/SqliteEntryReadModel.ts +17 -0
  31. package/apps/server/src/infrastructure/persistence/sqlite/SqliteEntryRepository.ts +81 -0
  32. package/apps/server/src/infrastructure/persistence/sqlite/SqliteSourceRepository.ts +65 -0
  33. package/apps/server/src/infrastructure/persistence/sqlite/connection.ts +52 -0
  34. package/apps/server/src/infrastructure/search/SearchEngineFactory.ts +43 -0
  35. package/apps/server/src/infrastructure/search/json/JsonSearchEngine.ts +164 -0
  36. package/apps/server/src/infrastructure/search/vector/EmbeddingService.ts +23 -0
  37. package/apps/server/src/infrastructure/search/vector/VectorSearchEngine.ts +480 -0
  38. package/apps/server/src/infrastructure/source-processors/AntoraSourceProcessor.ts +14 -0
  39. package/apps/server/src/infrastructure/source-processors/AsciidocSourceProcessor.ts +12 -0
  40. package/apps/server/src/infrastructure/source-processors/DocumentNormalizer.ts +16 -0
  41. package/apps/server/src/infrastructure/source-processors/GithubMarkdownSourceProcessor.ts +12 -0
  42. package/apps/server/src/infrastructure/source-processors/MavenSourceProcessor.ts +12 -0
  43. package/apps/server/src/infrastructure/source-processors/PathResolver.ts +6 -0
  44. package/apps/server/src/infrastructure/source-processors/SourceCodeSourceProcessor.ts +260 -0
  45. package/apps/server/src/infrastructure/source-processors/ZipSourceProcessor.ts +12 -0
  46. package/apps/server/src/mcp-http.ts +102 -0
  47. package/apps/server/src/mcp.ts +432 -0
  48. package/apps/server/src/routes/build.ts +105 -0
  49. package/apps/server/src/routes/entries.ts +62 -0
  50. package/apps/server/src/routes/graph.ts +57 -0
  51. package/apps/server/src/routes/search.ts +28 -0
  52. package/apps/server/src/routes/sources.ts +105 -0
  53. package/apps/server/src/routes/viewer.ts +28 -0
  54. package/apps/server/src/services/antora.ts +238 -0
  55. package/apps/server/src/services/asciidoc.ts +221 -0
  56. package/apps/server/src/services/configLoader.ts +207 -0
  57. package/apps/server/src/services/githubMarkdown.ts +236 -0
  58. package/apps/server/src/services/maven.ts +178 -0
  59. package/apps/server/src/services/normalizer.ts +63 -0
  60. package/apps/server/src/services/paths.ts +5 -0
  61. package/apps/server/src/services/textExtractor.ts +49 -0
  62. package/apps/server/src/services/zip.ts +84 -0
  63. package/bin/commands/build.ts +85 -0
  64. package/bin/commands/dev.ts +36 -0
  65. package/bin/commands/get.ts +36 -0
  66. package/bin/commands/graph.ts +153 -0
  67. package/bin/commands/init.ts +170 -0
  68. package/bin/commands/list.ts +47 -0
  69. package/bin/commands/mcp.ts +32 -0
  70. package/bin/commands/search.ts +185 -0
  71. package/bin/commands/serve.ts +23 -0
  72. package/bin/commands/status.ts +46 -0
  73. package/bin/dockit-cli.ts +92 -0
  74. package/bin/dockit.js +17 -0
  75. package/bin/utils.ts +85 -0
  76. package/dockit.yaml +154 -0
  77. package/package.json +60 -0
  78. package/scripts/mcp-wrapper.sh +44 -0
package/README.md ADDED
@@ -0,0 +1,496 @@
1
+ # Dockit
2
+
3
+ Local documentation hub that aggregates multiple documentation source types (ZIP, Maven, Antora, AsciiDoc, GitHub Markdown) into a unified, searchable HTML bundle — useful as LLM context. Also supports **source code knowledge graphs** powered by Graphify (Tree-sitter AST), producing structural dependency graphs for 15+ languages.
4
+
5
+ Ships with two search engines: a lightweight **TF-IDF engine** and a **hybrid semantic+keyword engine** (LanceDB + all-MiniLM-L6-v2 embeddings) configurable via a single toggle.
6
+
7
+ All operational data (SQLite DB, build outputs, search indexes, knowledge graphs, embeddings model cache) is stored in `~/.dockit/` by default. Override with the `DOCKIT_DATA_DIR` environment variable.
8
+
9
+ ## Quick Start
10
+
11
+ ```bash
12
+ # 1. Clone and install
13
+ git clone https://github.com/your-org/dockit.git
14
+ cd dockit
15
+ npm install
16
+ pip3 install graphify openai # optional — for source code graphs
17
+
18
+ # 2. Make CLI available globally
19
+ npm link
20
+
21
+ # 3a. Build pre-configured docs (one-time per entry)
22
+ dockit build quarkus
23
+
24
+ # 3b. Or init a project with source code + markdown scanning
25
+ dockit init --path /path/to/project --code-path src
26
+
27
+ # 4. Start searching
28
+ dockit search quarkus "configure cache"
29
+ dockit graph query dockit "BuildUseCase" # if source-code built
30
+ ```
31
+
32
+ ## Pre-configured Documentation
33
+
34
+ | Entry | Version | Source Type | Description |
35
+ |-------|---------|-------------|-------------|
36
+ | **Quarkus** | 3.35 | AsciiDoc | Quarkus framework documentation |
37
+ | **Quarkus Core** | 3.35.2 | Maven Javadoc | Quarkus Core API reference |
38
+ | **React** | 19 | GitHub Markdown | React library documentation |
39
+ | **Spring Boot** | 3.5.x | Antora | Production-ready Spring applications |
40
+ | **Spring Framework** | 7.x | Antora | Core Spring ecosystem reference |
41
+ | **Quarkus Source Code** | 3.35 | Source Code | Quarkus framework source — knowledge graph |
42
+ | **Quarkus (Docs + Code)** | 3.35 | AsciiDoc + Source Code | Docs + code graph combined |
43
+ | **Dockit** | 1.0 | Source Code + Markdown | Self-hosted dockit project entry |
44
+
45
+ Add your own entries by editing `dockit.yaml` or using `dockit init` — see [Supported Sources](#supported-documentation-sources) below.
46
+
47
+ ## Search Engine
48
+
49
+ Dockit ships two search engines toggleable via `dockit.yaml`:
50
+
51
+ ```yaml
52
+ search:
53
+ engine: vector # 'vector' (default) | 'json' (TF-IDF fallback)
54
+ ```
55
+
56
+ | | JSON (TF-IDF) | Vector (Hybrid) |
57
+ |---|---|---|
58
+ | **Storage per entry** | ~300 KB | ~32 MB (LanceDB) |
59
+ | **Runtime memory** | Minimal | ~200 MB |
60
+ | **Build time** | Fast | Slower (embeds docs via ONNX) |
61
+ | **Search method** | Term-frequency scoring (title 10x, headings 3x, body 1x) | Hybrid: parallel cosine ANN + BM25 FTS → RRF fusion |
62
+ | **Keyword precision** | High (exact term matches) | Very high (FTS component recovers keywords) |
63
+ | **Semantic matching** | None | Yes (finds conceptually related docs even without exact terms) |
64
+ | **Model** | None | `all-MiniLM-L6-v2` via `@huggingface/transformers` (~88 MB ONNX) |
65
+ | **Works offline** | Yes — zero dependencies | Yes — model bundles in npm package |
66
+
67
+ ### How hybrid search works
68
+
69
+ ```
70
+ query → vector (cosine ANN) + BM25 (FTS) in parallel
71
+ → deduplicate per-path → RRF fusion → top N
72
+ ```
73
+
74
+ - Vector search finds semantically related pages (e.g., "Ahead-of-Time Caching" for a cache query)
75
+ - FTS recovers exact keyword matches (e.g., "caffeine" in the Caching guide)
76
+ - Reciprocal Rank Fusion combines both with dynamic weighting: confident FTS gets 2x weight, uncertain FTS gets 0.7x
77
+ - Title matches in FTS results get an additional 1.5x boost
78
+
79
+ ### When to use each
80
+
81
+ - **`json`**: Low-resource environments, fast builds, or when exact keyword matching is sufficient
82
+ - **`vector`** (default): Better discovery of non-obvious matches, section-level chunking with heading context in results, hybrid search that matches both keywords and concepts
83
+
84
+ ## CLI Usage (Recommended)
85
+
86
+ The CLI is the primary way to interact with Dockit. Works from any directory, requires no server process, and is ideal for LLM agents that can execute shell commands.
87
+
88
+ ### Commands
89
+
90
+ | Command | Description |
91
+ |---------|-------------|
92
+ | `dockit search [<entry>] <query>` | Search documentation |
93
+ | `dockit search <query>` | Global search — top result per entry |
94
+ | `dockit search [<entry>] <query> --get-top [N]` | Fetch full content for top N results (default 3) |
95
+ | `dockit list` | List all entries |
96
+ | `dockit build <entry>` | Build documentation for an entry |
97
+ | `dockit status <entry>` | Check build status |
98
+ | `dockit get <entry> <path>` | Fetch full document content |
99
+ | `dockit graph query <entry> <query>` | Search knowledge graph nodes by name, file, or type |
100
+ | `dockit graph path <entry> <from> <to>` | Find shortest dependency path between two nodes |
101
+ | `dockit graph gods <entry>` | List most connected (god) nodes |
102
+ | `dockit graph explain <entry> <node>` | Show node details and connections |
103
+ | `dockit init --path <dir> [--code-path <subdir>]` | Initialize a project as a dockit source |
104
+ | `dockit dev` | Start dev servers (web UI) |
105
+ | `dockit serve` | Start production server |
106
+ | `dockit mcp` | Start MCP server |
107
+
108
+ ### Search Workflow
109
+
110
+ **Step 1: Global search** — discover which entries are relevant
111
+
112
+ ```bash
113
+ dockit search "cache"
114
+ # Returns top result per entry:
115
+ # [React] cache
116
+ # [Quarkus] caching-guide
117
+ # [Quarkus Core] Cache API
118
+ ```
119
+
120
+ **Step 2: Scoped search** — dive deeper into the chosen entry
121
+
122
+ ```bash
123
+ dockit search quarkus "cache" --get-top 3
124
+ # Returns full content for top 3 Quarkus cache documents
125
+ ```
126
+
127
+ ### Knowledge Graph Queries
128
+
129
+ For `source-code` entries built with Graphify:
130
+
131
+ ```bash
132
+ # Search nodes
133
+ dockit graph query dockit-code "BuildUseCase"
134
+
135
+ # List most connected nodes
136
+ dockit graph gods dockit-code --limit 5
137
+
138
+ # Find shortest path between two nodes
139
+ dockit graph path dockit-code "BuildUseCase" "SourceCodeSourceProcessor"
140
+
141
+ # Show node details and connections
142
+ dockit graph explain dockit-code "BuildUseCase"
143
+ ```
144
+
145
+ ### Init a Project
146
+
147
+ ```bash
148
+ # From project root — auto-detects name
149
+ dockit init --code-path apps
150
+
151
+ # With explicit path and version
152
+ dockit init --path /path/to/project --name "MyApp" --version "2.0" --code-path src
153
+
154
+ # This creates:
155
+ # - source-code source (graphify on --code-path)
156
+ # - github-markdown source (scans all .md files)
157
+ # - Builds both immediately
158
+ ```
159
+
160
+ ### Flags
161
+
162
+ | Flag | Description |
163
+ |------|-------------|
164
+ | `--json` | Output as JSON (for search, list, status, graph) |
165
+ | `--limit <n>` | Max results (default 20) |
166
+ | `--get-top [N]` | Fetch full content for top N results (default 3) |
167
+ | `--port <port>` | Custom port (for serve, mcp --http) |
168
+
169
+ ### Examples
170
+
171
+ ```bash
172
+ # Global search — see which entries match
173
+ dockit search "hooks"
174
+
175
+ # Scoped search with full content
176
+ dockit search react "how to create a hook" --get-top
177
+
178
+ # JSON output for scripts/agents
179
+ dockit search react "useState" --get-top 3 --json
180
+
181
+ # Build documentation
182
+ dockit build quarkus
183
+ dockit status quarkus
184
+
185
+ # Fetch a specific document
186
+ dockit get react react-docs-markdown/reference/react/hooks.html
187
+
188
+ # Graph queries
189
+ dockit graph query dockit-code "SourceCodeSourceProcessor"
190
+ dockit graph gods dockit-code
191
+ ```
192
+
193
+ ## MCP Server (Optional)
194
+
195
+ Dockit exposes an MCP (Model Context Protocol) server for AI tools like Claude Desktop, Cline, and OpenCode.
196
+
197
+ ### OpenCode
198
+
199
+ ```json
200
+ // ~/.config/opencode/opencode.json
201
+ {
202
+ "$schema": "https://opencode.ai/config.json",
203
+ "mcp": {
204
+ "dockit": {
205
+ "type": "local",
206
+ "command": ["bash", "/path/to/dockit/scripts/mcp-wrapper.sh"],
207
+ "enabled": true
208
+ }
209
+ }
210
+ }
211
+ ```
212
+
213
+ ### Claude Desktop / Cline
214
+
215
+ ```json
216
+ // ~/.claude/claude_desktop_config.json
217
+ {
218
+ "mcpServers": {
219
+ "dockit": {
220
+ "command": "bash",
221
+ "args": ["/path/to/dockit/scripts/mcp-wrapper.sh"]
222
+ }
223
+ }
224
+ }
225
+ ```
226
+
227
+ ### HTTP Transport
228
+
229
+ ```bash
230
+ # Start HTTP bridge on port 3456
231
+ DOCKIT_MCP_HTTP_PORT=3456 ./scripts/mcp-wrapper.sh
232
+
233
+ # Then curl:
234
+ curl -X POST http://localhost:3456 \
235
+ -H "Content-Type: application/json" \
236
+ -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}'
237
+ ```
238
+
239
+ ### MCP Tools
240
+
241
+ | Tool | Description |
242
+ |------|-------------|
243
+ | `dockit_list_entries` | List all configured entries |
244
+ | `dockit_find_entry` | Find entries by name/description |
245
+ | `dockit_search` | Search within a specific entry |
246
+ | `dockit_global_search` | Search across all entries |
247
+ | `dockit_get_doc` | Fetch full document content |
248
+ | `dockit_build` / `dockit_build_status` | Build / check status |
249
+ | `dockit_graph_query` | Search knowledge graph (MCP only) |
250
+ | `dockit_graph_path` | Find shortest path between nodes (MCP only) |
251
+ | `dockit_graph_explain` | Node details with edges (MCP only) |
252
+ | `dockit_graph_gods` | Most connected nodes (MCP only) |
253
+
254
+ ## How LLMs Use Dockit
255
+
256
+ Dockit includes `SKILL.md` — a skill file that instructs LLMs how to use Dockit effectively. When an LLM has access to the `dockit` CLI or MCP tools, it follows this workflow:
257
+
258
+ 1. **`dockit list`** / **`dockit_list_entries`** — discover available documentation
259
+ 2. **`dockit search "query"`** — global search to find relevant entries
260
+ 3. **`dockit search <entry> "query" --get-top`** — scoped search with full content
261
+ 4. **For source-code entries**: use `dockit graph query <entry> "node"` for structural queries
262
+ 5. **Answer the user's question** using the retrieved documentation as context
263
+
264
+ The LLM strips conversational filler from queries, scopes searches to the right entry, and prefers Dockit documentation over training data.
265
+
266
+ ## Supported Documentation Sources
267
+
268
+ | Type | Description | Remote Fields | Local/Offline Fields |
269
+ |------|-------------|---------------|---------------------|
270
+ | **ZIP Bundle** | Download or extract a ZIP of HTML documentation | `url` | `localPath` — path to pre-downloaded .zip |
271
+ | **Maven Artifact** | Download a documentation JAR (javadoc) from Maven Central | *(none extra)* | `useMavenCommand: true` — uses local Maven + settings.xml; `localJar` — path to pre-downloaded .jar |
272
+ | **Antora** | Build a multi-page HTML site with Antora | `repoUrl` | `localPath` — path to pre-cloned repo |
273
+ | **AsciiDoc** | Convert `.adoc` files to HTML | `repoUrl`, `sourcePath` (optional) | `localPath` — path to pre-cloned repo |
274
+ | **GitHub Markdown** | Clone a GitHub repo and convert `.md` files to HTML | `repoUrl`, `sourcePath` (optional), `branch` (optional) | `localPath` — path to pre-cloned repo |
275
+ | **Source Code** | Build a knowledge graph via Graphify (Tree-sitter AST) | `repoUrl`, `sourcePath` (optional), `branch` (optional) | `localPath` — path to local repo |
276
+ | **Combined** | Add `graphifyEnabled: true` on doc sources (AsciiDoc, Markdown, Antora) to also generate a graph | *(inherits from parent type)* | *(inherits from parent type)* |
277
+
278
+ ### Source code knowledge graphs
279
+
280
+ The `source-code` source type runs [Graphify](https://github.com/anomalyco/graphify) (Tree-sitter AST parser) on the source directory, producing a `graph.json` with nodes (classes, functions, files) and edges (imports, calls, inherits). Supports 15+ languages including TypeScript, JavaScript, Python, Java, Go, Rust, and C++.
281
+
282
+ **Configuration fields:**
283
+
284
+ | Field | Description |
285
+ |-------|-------------|
286
+ | `repoUrl` / `localPath` / `zipPath` | Source acquisition (same pattern as other types) |
287
+ | `sourcePath` | Subdirectory to scan for code files |
288
+ | `graphifySourcePath` | Separate subdirectory for graphify (when different from sourcePath e.g. docs vs code) |
289
+
290
+ For existing doc sources (AsciiDoc, GitHub Markdown, Antora) that point to a repo containing source code, toggle `graphifyEnabled: true` and set `graphifySourcePath` to scan the code during build:
291
+
292
+ ```yaml
293
+ sources:
294
+ - type: asciidoc
295
+ label: "Docs"
296
+ repoUrl: "https://github.com/myorg/myrepo.git"
297
+ sourcePath: "docs" # doc files
298
+ graphifyEnabled: true
299
+ graphifySourcePath: "src" # code files for graph
300
+ ```
301
+
302
+ ## Offline / Proxy Mode
303
+
304
+ For environments behind corporate proxies or without internet access, Dockit supports multiple fallback mechanisms:
305
+
306
+ ### Source Repositories (local clones)
307
+
308
+ Each source type supports local paths that take precedence over remote URLs:
309
+
310
+ ```yaml
311
+ # dockit.yaml — local mode entries
312
+ entries:
313
+ - id: quarkus-local
314
+ name: Quarkus (Local)
315
+ version: "3.35"
316
+ sources:
317
+ - type: asciidoc
318
+ label: "Quarkus Docs"
319
+ localPath: "/home/user/repos/quarkus"
320
+ sourcePath: "docs/src/main/asciidoc"
321
+ ```
322
+
323
+ ### Embedding Model (air-gapped vector search)
324
+
325
+ The embedding model downloads on first `embed()` call by default into `~/.dockit/models/`. Override with `DOCKIT_DATA_DIR` or `configure({ cacheDir: '...' })`. For air-gapped environments:
326
+
327
+ **Option A — Pre-seed on connected machine, then copy:**
328
+ ```bash
329
+ # On connected machine
330
+ npm run download-model -w packages/embeddings
331
+
332
+ # Copy ~/.dockit/models/ to the target machine
333
+ ```
334
+
335
+ **Option B — Install offline via npm:**
336
+ The model ONNX bundle ships inside the `@dockit/embeddings` npm package under `packages/embeddings/model/`. If `~/.dockit/models/` is empty at first `embed()` call, it will attempt to download — set `DOCKIT_DATA_DIR` to point to a pre-seeded directory or use `configure({ cacheDir: '/path/to/model' })`.
337
+
338
+ ### Pre-built Index Bundling
339
+
340
+ For environments where even building is impractical, LanceDB and JSON indexes can be pre-built and bundled:
341
+
342
+ 1. Build indexes on a connected machine:
343
+ ```bash
344
+ dockit build quarkus
345
+ dockit build spring-boot
346
+ # ... all desired entries
347
+ ```
348
+ 2. Package the `~/.dockit/` directory (or specific `.lancedb/` + `index.json` files)
349
+ 3. Deploy to target machines via the same `~/.dockit/` path
350
+
351
+ ### Proxy Configuration
352
+
353
+ Standard proxy environment variables:
354
+
355
+ ```bash
356
+ export HTTP_PROXY=http://proxy.corp:8080
357
+ export HTTPS_PROXY=http://proxy.corp:8080
358
+ ```
359
+
360
+ Or override the HuggingFace CDN host via code:
361
+
362
+ ```ts
363
+ import { env } from '@huggingface/transformers';
364
+ env.remoteHost = 'https://internal-mirror.corp'; // point to internal mirror
365
+ ```
366
+
367
+ ### Native Binaries
368
+
369
+ `@lancedb/lancedb` ships prebuilt binaries for linux x64/arm64, macOS x64/arm64, and Windows x64/arm64 — no compilation needed.
370
+
371
+ ### Additional offline fields
372
+
373
+ | Source Type | Fields (all take precedence over remote) |
374
+ |-------------|------------------------------------------|
375
+ | **ZIP** | `localPath` — path to pre-downloaded .zip |
376
+ | **Maven** | `useMavenCommand: true` — uses local `~/.m2/settings.xml` (proxies, mirrors); `localJar` — pre-downloaded .jar |
377
+ | **Antora** | `localPath` — pre-cloned repo |
378
+ | **AsciiDoc** | `localPath` — pre-cloned repo (kept, not cleaned up) |
379
+ | **GitHub Markdown** | `localPath` — pre-cloned repo |
380
+ | **Source Code** | `localPath` — local repo directory |
381
+
382
+ ## Web UI (Optional)
383
+
384
+ Dockit includes a web interface for managing entries, configuring sources, and browsing documentation.
385
+
386
+ ```bash
387
+ # Start dev servers
388
+ dockit dev
389
+ # Or: npm run dev
390
+
391
+ # Frontend → http://localhost:5173
392
+ # Backend → http://localhost:3001
393
+ ```
394
+
395
+ 1. Open http://localhost:5173 in your browser
396
+ 2. Click **New Entry** in the sidebar
397
+ 3. Add sources (including **Source Code** type) and click **Build Now**
398
+ 4. For doc sources with repos, toggle **Generate source code knowledge graph** and set the **Source Code Path**
399
+ 5. Use the embedded viewer to browse, or search across indexed content
400
+
401
+ ### Build Modes
402
+
403
+ - **Build Now** — server-side processing with live log output
404
+ - **Download Script** — exports a self-contained `.sh` script with all curl commands
405
+
406
+ ## Data Storage
407
+
408
+ All runtime data is stored in `~/.dockit/` by default:
409
+
410
+ | Path | Description |
411
+ |------|-------------|
412
+ | `~/.dockit/dockit.db` | SQLite database (entries, sources, builds) |
413
+ | `~/.dockit/dockit.yaml` | User configuration (auto-created by `dockit init`) |
414
+ | `~/.dockit/.lancedb/` | Vector search index (LanceDB) |
415
+ | `~/.dockit/models/` | HuggingFace ONNX embedding model cache |
416
+ | `~/.dockit/{entryId}/bundle/` | Built HTML documentation per entry |
417
+ | `~/.dockit/{entryId}/sources/` | Raw source processing artifacts |
418
+ | `~/.dockit/{entryId}/graph.json` | Knowledge graph (source-code entries) |
419
+
420
+ Override with the environment variable:
421
+
422
+ ```bash
423
+ export DOCKIT_DATA_DIR=/custom/path # all data goes here instead of ~/.dockit/
424
+ ```
425
+
426
+ Configuration (`dockit.yaml`) is resolved in order:
427
+ 1. `~/.dockit/dockit.yaml` (user home, persisted by `dockit init`)
428
+ 2. Project root `dockit.yaml` (backward compatibility for development)
429
+
430
+ ## Architecture
431
+
432
+ ```
433
+ dockit/
434
+ ├── apps/
435
+ │ ├── server/ Express + TypeScript backend (port 3001)
436
+ │ │ └── src/
437
+ │ │ ├── core/domain/ Domain types & knowledge-graph types
438
+ │ │ ├── core/ports/ IKnowledgeGraph, ISourceProcessor + ports
439
+ │ │ ├── core/usecases/ BuildUseCase, ConfigUseCase, SearchUseCase
440
+ │ │ ├── infrastructure/graph/ GraphifyKnowledgeGraph, GraphSearchDecorator
441
+ │ │ ├── infrastructure/source-processors/ SourceCodeSourceProcessor + others
442
+ │ │ └── routes/graph.ts Graph REST endpoints
443
+ │ └── client/ React + Vite + Tailwind CSS frontend (port 5173)
444
+ ├── bin/ CLI entry point, graph commands, init command
445
+ ├── packages/
446
+ │ └── embeddings/ @dockit/embeddings — ONNX model wrapper (@huggingface/transformers)
447
+ ├── ~/.dockit/ Runtime data (created automatically on first run)
448
+ │ ├── dockit.db SQLite database
449
+ │ ├── dockit.yaml Entries/sources config (auto-created by `dockit init`)
450
+ │ ├── .lancedb/ Vector search index
451
+ │ ├── models/ HuggingFace ONNX embedding model cache
452
+ │ ├── {entryId}/bundle/ Build outputs per entry
453
+ │ ├── {entryId}/sources/ Raw source processing artifacts
454
+ │ └── {entryId}/graph.json Knowledge graph (source-code entries)
455
+ ├── dockit.yaml Entries/sources config
456
+ ├── SKILL.md LLM skill instructions
457
+ ├── GRAPHIFY_SOURCE_PLAN.md Graphify feature plan
458
+ └── package.json npm workspace root
459
+ ```
460
+
461
+ ## API Overview
462
+
463
+ | Method | Path | Purpose |
464
+ |--------|------|---------|
465
+ | `GET` | `/api/entries` | List entries |
466
+ | `POST` | `/api/entries` | Create entry |
467
+ | `GET` | `/api/entries/:id` | Get entry detail + sources |
468
+ | `PUT` | `/api/entries/:id` | Update entry |
469
+ | `DELETE` | `/api/entries/:id` | Delete entry + all data |
470
+ | `POST` | `/api/entries/:id/sources` | Add source to entry |
471
+ | `PUT` | `/api/sources/:id` | Update source |
472
+ | `DELETE` | `/api/sources/:id` | Remove source |
473
+ | `POST` | `/api/entries/:id/build` | Trigger build |
474
+ | `GET` | `/api/entries/:id/build-status` | Poll build progress |
475
+ | `GET` | `/api/entries/:id/cli-script` | Download CLI script |
476
+ | `GET` | `/api/graph/:entry/query?q=...` | Search knowledge graph nodes |
477
+ | `GET` | `/api/graph/:entry/path?from=...&to=...` | Find path between nodes |
478
+ | `GET` | `/api/graph/:entry/gods` | List most connected nodes |
479
+ | `GET` | `/api/entries/:id/search?q=term` | Search built docs |
480
+ | `GET` | `/api/bundle/:entryId/*` | Serve bundled HTML |
481
+
482
+ ## Tech Stack
483
+
484
+ | Layer | Technology |
485
+ |-------|-----------|
486
+ | Frontend | React 19, TypeScript, Vite 6, Tailwind CSS 4, React Router 7 |
487
+ | Backend | Express 4, TypeScript, tsx |
488
+ | Database | SQLite via better-sqlite3 |
489
+ | MCP | @modelcontextprotocol/server 2.0.0-alpha.2 |
490
+ | HTML Parsing | node-html-parser |
491
+ | AsciiDoc | @asciidoctor/core |
492
+ | Archives | unzipper |
493
+ | Build Pipeline | Antora CLI, Git, Maven dependency plugin |
494
+ | Markdown | marked |
495
+ | Vector Search | LanceDB embedded (Rust native), all-MiniLM-L6-v2 via @huggingface/transformers |
496
+ | Knowledge Graph | Graphify (Tree-sitter AST, 15+ languages) |
package/SKILL.md ADDED
@@ -0,0 +1,154 @@
1
+ ---
2
+ name: dockit
3
+ description: Documentation index and search tool that provides on-demand access to up-to-date framework and library documentation for LLM context
4
+ license: MIT
5
+ compatibility: opencode
6
+ metadata:
7
+ audience: developers
8
+ workflow: documentation
9
+ ---
10
+
11
+ ## What I do
12
+ - Search and retrieve documentation for frameworks and libraries (e.g., Quarkus, Spring Boot, React)
13
+ - Provide API documentation, class references, and configuration guides
14
+ - Fetch full document content for LLM context via CLI or MCP tools
15
+
16
+ ## When to use me
17
+ Use this skill when the user asks about:
18
+ - How to use a specific framework or library
19
+ - API documentation, class references, or configuration reference
20
+ - Any technology listed in available dockit entries
21
+
22
+ ## Primary Method: CLI Commands
23
+
24
+ The `dockit` CLI is the recommended way to search and retrieve documentation.
25
+
26
+ ### `dockit list`
27
+ Lists all configured documentation entries. Run this first to discover what's available.
28
+
29
+ ### `dockit search [<entry>] <query>`
30
+ Searches documentation. Always provide the entry name as the first argument when you know which framework the question is about.
31
+
32
+ ```bash
33
+ # Scoped to a specific entry (recommended)
34
+ dockit search react "how to create a hook"
35
+ dockit search quarkus "configure cache"
36
+
37
+ # Global search — top result per entry (when unsure which entry)
38
+ dockit search "cache"
39
+ ```
40
+
41
+ ### `dockit search [<entry>] <query> --get-top [N]`
42
+ Searches and fetches full document content for the top N results (default 3). This is the **primary command for LLMs** — it combines search + content retrieval in one step.
43
+
44
+ ```bash
45
+ # Get full content for top 3 results
46
+ dockit search react "useState" --get-top
47
+
48
+ # Get full content for top 5 results, as JSON
49
+ dockit search react "hooks" --get-top 5 --json
50
+ ```
51
+
52
+ ### `dockit get <entry> <path>`
53
+ Fetches full content of a specific document by path (from search results).
54
+
55
+ ### `dockit build <entry>` / `dockit status <entry>`
56
+ Builds documentation or checks build status.
57
+
58
+ ## Recommended Workflow
59
+
60
+ ### Step 1: Identify the entry
61
+ Determine which documentation entry is relevant:
62
+ - "How do I use useState in React?" → entry: `react`
63
+ - "How to configure cache in Quarkus?" → entry: `quarkus`
64
+
65
+ If unsure, run `dockit list` to see available entries.
66
+
67
+ ### Step 2: Search pattern
68
+ **Global search** (no entry) — returns top result per entry:
69
+ ```bash
70
+ dockit search "cache"
71
+ ```
72
+
73
+ **Scoped search** (with entry) — dive deeper:
74
+ ```bash
75
+ dockit search quarkus "cache" --get-top 3
76
+ ```
77
+
78
+ Always scope to the entry once you know which framework the user is asking about.
79
+
80
+ ### Step 3: Refine the query
81
+ Strip conversational filler. Keep only technical terms:
82
+
83
+ | User Question | Good Query |
84
+ |---------------|------------|
85
+ | "How do I create a custom hook in React?" | `"create custom hook"` |
86
+ | "What is the latest Quarkus feature for caching?" | `"cache latest feature"` |
87
+
88
+ ### Step 4: Handle missing builds
89
+ If an entry shows status `pending` or `error`, build it first:
90
+ ```bash
91
+ dockit build react
92
+ dockit status react
93
+ ```
94
+
95
+ ## Alternative: MCP Tools
96
+
97
+ If Dockit is configured as an MCP server, use `dockit_*` tools instead of CLI commands:
98
+
99
+ | MCP Tool | CLI Equivalent |
100
+ |----------|----------------|
101
+ | `dockit_list_entries` | `dockit list` |
102
+ | `dockit_global_search` | `dockit search "query"` |
103
+ | `dockit_search` | `dockit search <entry> "query"` |
104
+ | `dockit_get_doc` | `dockit get <entry> <path>` |
105
+ | `dockit_build` / `dockit_build_status` | `dockit build` / `dockit status` |
106
+ | `dockit_graph_query` | (MCP only) |
107
+ | `dockit_graph_path` | (MCP only) |
108
+ | `dockit_graph_explain` | (MCP only) |
109
+ | `dockit_graph_gods` | (MCP only) |
110
+
111
+ ## Source Code Entries (Knowledge Graph)
112
+
113
+ For entries with `source-code` sources, the primary query mechanism is the **knowledge graph** instead of text search. Graphify's Tree-sitter AST pass parses 15+ languages and produces structural edges (*calls*, *imports*, *inherits*).
114
+
115
+ ### Graph MCP Tools
116
+
117
+ | Tool | Description |
118
+ |------|-------------|
119
+ | `dockit_graph_query <entry> <query>` | Search graph nodes by name, file, or type |
120
+ | `dockit_graph_path <entry> <from> <to>` | Find shortest dependency path between two nodes |
121
+ | `dockit_graph_explain <entry> <node>` | Get node details with edges and connections |
122
+ | `dockit_graph_gods <entry>` | List most connected (highest-degree) nodes |
123
+
124
+ ### Behavior by Entry Type
125
+
126
+ | Entry Type | Search | Graph Query |
127
+ |------------|--------|-------------|
128
+ | Source-code only | `dockit search` returns empty | Use `dockit_graph_*` tools |
129
+ | Mixed (docs + code) | `dockit search` works + results graph-boosted | `dockit_graph_*` tools work |
130
+ | Docs only | `dockit search` works | No graph tools |
131
+
132
+ ## Notes
133
+ - Documentation is plain text extracted from HTML
134
+ - Content is truncated at 50KB per document
135
+ - Entries start as `pending` and must be built before searchable
136
+
137
+ ## Always Show Source
138
+
139
+ After answering with documentation content, always display the source in a table at the end:
140
+
141
+ | Field | Value |
142
+ |-------|-------|
143
+ | **Type** | `<source type>` |
144
+ | **Label** | `<source label>` |
145
+ | **Repo** | `<repoUrl>` |
146
+ | **Source Path** | `<sourcePath>` |
147
+ | **Version** | `<entry version>` |
148
+
149
+ To get source details, use `--json` flag with search or check `dockit list --json`. Source fields come from the entry's `sources` array in `dockit.yaml`:
150
+ - `type` — source type (e.g., `github-markdown`, `asciidoc`, `maven`, `source-code`)
151
+ - `label` — human-readable label
152
+ - `repoUrl` or `localPath` — repository URL or local path
153
+ - `sourcePath` — path within the repo
154
+ - Entry `version` — the version of the documentation entry