tea-rags 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/README.md +193 -37
  2. package/benchmarks/benchmark-embeddings.mjs +148 -0
  3. package/benchmarks/lib/benchmarks.mjs +726 -0
  4. package/benchmarks/lib/cleanup.mjs +40 -0
  5. package/benchmarks/lib/colors.mjs +52 -0
  6. package/benchmarks/lib/config.mjs +115 -0
  7. package/benchmarks/lib/embedding-calibration.mjs +508 -0
  8. package/benchmarks/lib/estimator.mjs +93 -0
  9. package/benchmarks/lib/files.mjs +94 -0
  10. package/benchmarks/lib/output.mjs +218 -0
  11. package/benchmarks/lib/provider.mjs +66 -0
  12. package/benchmarks/lib/smart-stepping.mjs +186 -0
  13. package/benchmarks/lib/stopping.mjs +79 -0
  14. package/benchmarks/tune.mjs +753 -0
  15. package/build/bootstrap/config/schemas.js +1 -1
  16. package/build/bootstrap/config/schemas.js.map +1 -1
  17. package/build/bootstrap/factory.d.ts.map +1 -1
  18. package/build/bootstrap/factory.js +50 -28
  19. package/build/bootstrap/factory.js.map +1 -1
  20. package/build/core/adapters/embeddings/ollama.d.ts.map +1 -1
  21. package/build/core/adapters/embeddings/ollama.js +17 -4
  22. package/build/core/adapters/embeddings/ollama.js.map +1 -1
  23. package/build/core/api/index.d.ts +1 -0
  24. package/build/core/api/index.d.ts.map +1 -1
  25. package/build/core/api/index.js.map +1 -1
  26. package/build/core/api/internal/composition.d.ts +2 -0
  27. package/build/core/api/internal/composition.d.ts.map +1 -1
  28. package/build/core/api/internal/composition.js +9 -1
  29. package/build/core/api/internal/composition.js.map +1 -1
  30. package/build/core/api/internal/facades/explore-facade.d.ts +9 -49
  31. package/build/core/api/internal/facades/explore-facade.d.ts.map +1 -1
  32. package/build/core/api/internal/facades/explore-facade.js +49 -424
  33. package/build/core/api/internal/facades/explore-facade.js.map +1 -1
  34. package/build/core/api/internal/facades/ingest-facade.d.ts +30 -47
  35. package/build/core/api/internal/facades/ingest-facade.d.ts.map +1 -1
  36. package/build/core/api/internal/facades/ingest-facade.js +61 -241
  37. package/build/core/api/internal/facades/ingest-facade.js.map +1 -1
  38. package/build/core/api/internal/ops/explore-ops.d.ts +73 -0
  39. package/build/core/api/internal/ops/explore-ops.d.ts.map +1 -0
  40. package/build/core/api/internal/ops/explore-ops.js +266 -0
  41. package/build/core/api/internal/ops/explore-ops.js.map +1 -0
  42. package/build/core/api/internal/ops/indexing-ops.d.ts +103 -0
  43. package/build/core/api/internal/ops/indexing-ops.d.ts.map +1 -0
  44. package/build/core/api/internal/ops/indexing-ops.js +266 -0
  45. package/build/core/api/internal/ops/indexing-ops.js.map +1 -0
  46. package/build/core/contracts/index.d.ts +1 -0
  47. package/build/core/contracts/index.d.ts.map +1 -1
  48. package/build/core/contracts/index.js.map +1 -1
  49. package/build/core/contracts/types/stats-accumulator.d.ts +56 -0
  50. package/build/core/contracts/types/stats-accumulator.d.ts.map +1 -0
  51. package/build/core/contracts/types/stats-accumulator.js +34 -0
  52. package/build/core/contracts/types/stats-accumulator.js.map +1 -0
  53. package/build/core/contracts/types/trajectory.d.ts +2 -0
  54. package/build/core/contracts/types/trajectory.d.ts.map +1 -1
  55. package/build/core/domains/explore/queries/index-metrics.d.ts +25 -0
  56. package/build/core/domains/explore/queries/index-metrics.d.ts.map +1 -0
  57. package/build/core/domains/explore/queries/index-metrics.js +113 -0
  58. package/build/core/domains/explore/queries/index-metrics.js.map +1 -0
  59. package/build/core/domains/explore/reranker.d.ts +21 -0
  60. package/build/core/domains/explore/reranker.d.ts.map +1 -1
  61. package/build/core/domains/explore/reranker.js +53 -34
  62. package/build/core/domains/explore/reranker.js.map +1 -1
  63. package/build/core/domains/explore/strategies/file-outline.d.ts +34 -0
  64. package/build/core/domains/explore/strategies/file-outline.d.ts.map +1 -0
  65. package/build/core/domains/explore/strategies/file-outline.js +60 -0
  66. package/build/core/domains/explore/strategies/file-outline.js.map +1 -0
  67. package/build/core/domains/explore/strategies/index.d.ts +4 -0
  68. package/build/core/domains/explore/strategies/index.d.ts.map +1 -1
  69. package/build/core/domains/explore/strategies/index.js +2 -0
  70. package/build/core/domains/explore/strategies/index.js.map +1 -1
  71. package/build/core/domains/explore/strategies/symbol.d.ts +38 -0
  72. package/build/core/domains/explore/strategies/symbol.d.ts.map +1 -0
  73. package/build/core/domains/explore/strategies/symbol.js +78 -0
  74. package/build/core/domains/explore/strategies/symbol.js.map +1 -0
  75. package/build/core/domains/ingest/collection-stats.d.ts +2 -3
  76. package/build/core/domains/ingest/collection-stats.d.ts.map +1 -1
  77. package/build/core/domains/ingest/collection-stats.js +121 -122
  78. package/build/core/domains/ingest/collection-stats.js.map +1 -1
  79. package/build/core/domains/ingest/pipeline/chunker/hooks/markdown/chunker.d.ts +6 -0
  80. package/build/core/domains/ingest/pipeline/chunker/hooks/markdown/chunker.d.ts.map +1 -1
  81. package/build/core/domains/ingest/pipeline/chunker/hooks/markdown/chunker.js +71 -69
  82. package/build/core/domains/ingest/pipeline/chunker/hooks/markdown/chunker.js.map +1 -1
  83. package/build/core/domains/ingest/pipeline/chunker/tree-sitter.d.ts +13 -0
  84. package/build/core/domains/ingest/pipeline/chunker/tree-sitter.d.ts.map +1 -1
  85. package/build/core/domains/ingest/pipeline/chunker/tree-sitter.js +120 -117
  86. package/build/core/domains/ingest/pipeline/chunker/tree-sitter.js.map +1 -1
  87. package/build/core/domains/trajectory/git/stats/author-counts.d.ts +8 -0
  88. package/build/core/domains/trajectory/git/stats/author-counts.d.ts.map +1 -0
  89. package/build/core/domains/trajectory/git/stats/author-counts.js +19 -0
  90. package/build/core/domains/trajectory/git/stats/author-counts.js.map +1 -0
  91. package/build/core/domains/trajectory/git/stats/chunk-time-range.d.ts +13 -0
  92. package/build/core/domains/trajectory/git/stats/chunk-time-range.d.ts.map +1 -0
  93. package/build/core/domains/trajectory/git/stats/chunk-time-range.js +21 -0
  94. package/build/core/domains/trajectory/git/stats/chunk-time-range.js.map +1 -0
  95. package/build/core/domains/trajectory/git/stats/file-time-range.d.ts +13 -0
  96. package/build/core/domains/trajectory/git/stats/file-time-range.d.ts.map +1 -0
  97. package/build/core/domains/trajectory/git/stats/file-time-range.js +24 -0
  98. package/build/core/domains/trajectory/git/stats/file-time-range.js.map +1 -0
  99. package/build/core/domains/trajectory/git/stats/git-data-paths.d.ts +9 -0
  100. package/build/core/domains/trajectory/git/stats/git-data-paths.d.ts.map +1 -0
  101. package/build/core/domains/trajectory/git/stats/git-data-paths.js +20 -0
  102. package/build/core/domains/trajectory/git/stats/git-data-paths.js.map +1 -0
  103. package/build/core/domains/trajectory/git/stats/index.d.ts +14 -0
  104. package/build/core/domains/trajectory/git/stats/index.d.ts.map +1 -0
  105. package/build/core/domains/trajectory/git/stats/index.js +22 -0
  106. package/build/core/domains/trajectory/git/stats/index.js.map +1 -0
  107. package/build/core/domains/trajectory/git/stats/utils.d.ts +7 -0
  108. package/build/core/domains/trajectory/git/stats/utils.d.ts.map +1 -0
  109. package/build/core/domains/trajectory/git/stats/utils.js +18 -0
  110. package/build/core/domains/trajectory/git/stats/utils.js.map +1 -0
  111. package/build/core/domains/trajectory/git.d.ts +1 -0
  112. package/build/core/domains/trajectory/git.d.ts.map +1 -1
  113. package/build/core/domains/trajectory/git.js +2 -0
  114. package/build/core/domains/trajectory/git.js.map +1 -1
  115. package/build/core/domains/trajectory/registry.d.ts +3 -0
  116. package/build/core/domains/trajectory/registry.d.ts.map +1 -1
  117. package/build/core/domains/trajectory/registry.js +10 -0
  118. package/build/core/domains/trajectory/registry.js.map +1 -1
  119. package/build/core/domains/trajectory/static/index.d.ts +1 -0
  120. package/build/core/domains/trajectory/static/index.d.ts.map +1 -1
  121. package/build/core/domains/trajectory/static/index.js +2 -0
  122. package/build/core/domains/trajectory/static/index.js.map +1 -1
  123. package/build/core/domains/trajectory/static/stats/chunk-type-counts.d.ts +8 -0
  124. package/build/core/domains/trajectory/static/stats/chunk-type-counts.d.ts.map +1 -0
  125. package/build/core/domains/trajectory/static/stats/chunk-type-counts.js +17 -0
  126. package/build/core/domains/trajectory/static/stats/chunk-type-counts.js.map +1 -0
  127. package/build/core/domains/trajectory/static/stats/distinct-paths.d.ts +8 -0
  128. package/build/core/domains/trajectory/static/stats/distinct-paths.d.ts.map +1 -0
  129. package/build/core/domains/trajectory/static/stats/distinct-paths.js +17 -0
  130. package/build/core/domains/trajectory/static/stats/distinct-paths.js.map +1 -0
  131. package/build/core/domains/trajectory/static/stats/docs-code-counts.d.ts +13 -0
  132. package/build/core/domains/trajectory/static/stats/docs-code-counts.d.ts.map +1 -0
  133. package/build/core/domains/trajectory/static/stats/docs-code-counts.js +21 -0
  134. package/build/core/domains/trajectory/static/stats/docs-code-counts.js.map +1 -0
  135. package/build/core/domains/trajectory/static/stats/index.d.ts +13 -0
  136. package/build/core/domains/trajectory/static/stats/index.d.ts.map +1 -0
  137. package/build/core/domains/trajectory/static/stats/index.js +21 -0
  138. package/build/core/domains/trajectory/static/stats/index.js.map +1 -0
  139. package/build/core/domains/trajectory/static/stats/language-counts.d.ts +8 -0
  140. package/build/core/domains/trajectory/static/stats/language-counts.d.ts.map +1 -0
  141. package/build/core/domains/trajectory/static/stats/language-counts.js +17 -0
  142. package/build/core/domains/trajectory/static/stats/language-counts.js.map +1 -0
  143. package/build/mcp/tools/explore.d.ts.map +1 -1
  144. package/build/mcp/tools/explore.js +44 -53
  145. package/build/mcp/tools/explore.js.map +1 -1
  146. package/package.json +2 -1
package/README.md CHANGED
@@ -1,70 +1,221 @@
1
1
  <p align="center">
2
2
  <a href="https://artk0de.github.io/TeaRAGs-MCP/">
3
- <img src="public/logo.png">
3
+ <img src="public/logo.png" alt="TeaRAGs logo">
4
4
  </a>
5
5
  </p>
6
6
 
7
- <h1 align="center">TeaRAGs</h1>
7
+ <h1 align="center">TeaRAGs 🦖🍵</h1>
8
8
 
9
9
  <p align="center">
10
10
  <strong>Trajectory Enrichment-Aware RAG for Coding Agents</strong>
11
11
  </p>
12
12
 
13
- ![MCP compatible](https://img.shields.io/badge/MCP-compatible-%234f46e5)
14
- [![quickstart < 15 min](https://img.shields.io/badge/quickstart-%3C%2015%20min-f59e0b)](#-quick-start)
15
- [![local-first](https://img.shields.io/badge/deployment-local--first-15803d)](#-quick-start)
16
- [![reproducible: docker](https://img.shields.io/badge/reproducible-docker-0f172a)](#-quick-start)
17
- [![provider agnostic](https://img.shields.io/badge/provider-agnostic-0891b2)](#-quick-start)
18
-
19
- [![CI](https://github.com/artk0de/TeaRAGs-MCP/actions/workflows/ci.yml/badge.svg)](https://github.com/artk0de/TeaRAGs-MCP/actions/workflows/ci.yml)
20
- [![codecov](https://codecov.io/gh/artk0de/TeaRAGs-MCP/graph/badge.svg?token=BU255N03YF)](https://codecov.io/gh/artk0de/TeaRAGs-MCP)
13
+ <p align="center">
14
+ <img src="https://img.shields.io/badge/MCP-compatible-%234f46e5" alt="MCP compatible">
15
+ <a href="https://artk0de.github.io/TeaRAGs-MCP/quickstart/installation"><img src="https://img.shields.io/badge/quickstart-%3C%2015%20min-f59e0b" alt="15-minute quickstart"></a>
16
+ <img src="https://img.shields.io/badge/deployment-local--first-15803d" alt="local-first">
17
+ <img src="https://img.shields.io/badge/provider-agnostic-0891b2" alt="provider agnostic">
18
+ <br>
19
+ <a href="https://github.com/artk0de/TeaRAGs-MCP/actions/workflows/ci.yml"><img src="https://github.com/artk0de/TeaRAGs-MCP/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
20
+ <a href="https://codecov.io/gh/artk0de/TeaRAGs-MCP"><img src="https://codecov.io/gh/artk0de/TeaRAGs-MCP/graph/badge.svg?token=BU255N03YF" alt="codecov"></a>
21
+ </p>
21
22
 
22
23
  ---
23
24
 
24
- **MCP server** for semantic code search with **git trajectory reranking**. AST-aware chunking, incremental indexing, millions of LOC. Reranks results using authorship, churn, bug-fix rates, and 19 other signals — not just embedding similarity. Built on Qdrant. Works with Ollama (local) or cloud providers (OpenAI, Cohere, Voyage).
25
+ **Your coding agent copies the first code it finds — not the right one.**
26
+
27
+ TeaRAGs is an MCP server for code search that enriches every retrieved chunk
28
+ with git history: authorship, churn, bug-fix rate, ownership. Your agent stops
29
+ learning from hotspots and starts learning from **stable, owned, battle-tested
30
+ code**.
31
+
32
+ 📖 **[Full documentation](https://artk0de.github.io/TeaRAGs-MCP/)** · 🏁
33
+ **[15-minute quickstart](https://artk0de.github.io/TeaRAGs-MCP/quickstart/installation)**
34
+ · 🧠
35
+ **[Core concepts](https://artk0de.github.io/TeaRAGs-MCP/introduction/core-concepts)**
36
+
37
+ ## The Problem
38
+
39
+ ### 1. Understanding a monorepo is expensive — for humans AND agents
40
+
41
+ Every new developer pays in hours. Every fresh agent session pays in tokens.
42
+ Naming conventions, domain logic, local idioms — all of it has to be rebuilt
43
+ from scratch, every time.
44
+
45
+ ### 2. Bad code hygiene is a tax on your agent
46
+
47
+ Confusing names mean the agent reads more files. More files mean more tokens,
48
+ slower responses, and a higher chance of picking the wrong example. Your
49
+ codebase's technical debt is now your AI bill.
50
+
51
+ ### 3. Agents can't tell stable code from a hotspot
52
+
53
+ Standard code search ranks by embedding similarity alone. It doesn't know which
54
+ function gets bug-fixed every sprint, which module hasn't been touched in two
55
+ years, or whose name is on the commits. So the agent copies whatever looks
56
+ similar — including the broken examples.
57
+
58
+ ## The Solution
59
+
60
+ TeaRAGs gives your agent two things it can't get from vanilla code search.
61
+
62
+ ### 1. Every chunk carries its own history
63
+
64
+ Retrieved code comes with signals about **who wrote it, how stable it is, how
65
+ often it gets bug-fixed**, and **how impactful a change would be**. Semantic
66
+ similarity stops being the whole answer — it becomes the floor.
67
+
68
+ ### 2. Pre-built skills, not just raw tools
69
+
70
+ TeaRAGs ships agent **skills** — ready-made playbooks that tell your agent when
71
+ and how to use the signals. No prompt engineering required:
72
+
73
+ - `explore` — orient in an unfamiliar codebase
74
+ - `data-driven-generation` — write code backed by stable, owned templates
75
+ - `risk-assessment` — know what you'd break before you break it
76
+ - `refactoring-scan` · `bug-hunt` · `pattern-search` — and more
77
+
78
+ Install the plugin, your agent learns the workflow.
79
+ [See all skills →](https://artk0de.github.io/TeaRAGs-MCP/usage/skills/)
80
+
81
+ **Bonus: `dinopowers`** — a companion plugin with 10 wrappers over
82
+ [`superpowers:*`](https://github.com/obra/superpowers) skills (Jesse Vincent's
83
+ skills library for Claude Code) that inject tea-rags signals into brainstorming,
84
+ planning, debugging, TDD, review, and completion flows. Mean eval delta +71pp
85
+ across 136 cases.
86
+ [Learn more →](https://artk0de.github.io/TeaRAGs-MCP/usage/skills/#dinopowers--wrappers-over-superpowers)
87
+
88
+ ## Use Cases
25
89
 
26
- > 📖 **[Full documentation](https://artk0de.github.io/TeaRAGs-MCP/)** — 15-minute quickstart, agent workflows, architecture deep dives.
90
+ ### 🛡️ Safe code generation
27
91
 
28
- ## 🧬 Trajectory Enrichment
92
+ Your agent writes new code backed by **stable, canonical templates** — modules
93
+ with a low bug-fix rate, long stability, and a clear owner. No more copying from
94
+ last sprint's hotspot. _Skill: `data-driven-generation` ·
95
+ [Why stable code is safer →](https://artk0de.github.io/TeaRAGs-MCP/knowledge-base/code-churn-research)_
29
96
 
30
- Standard code RAG retrieves by similarity alone. **Trajectory enrichment** augments each chunk with signals about how code *evolves* — at the function level, not just file level.
97
+ ### 🔧 Refactoring planning & problem-pattern discovery
31
98
 
32
- - 🔀 **Git trajectory** churn, authorship, volatility, bug-fix rates, task traceability. **19 signals** feed composable rerank presets (`hotspots`, `ownership`, `techDebt`, `securityAudit`...)
33
- - 🕸️ **Topological trajectory** *(planned)* symbol graphs, cross-file coupling, blast radius
99
+ Find the 5% of code responsible for 80% of incidents. **High churn + high
100
+ bug-fix rate + concentrated ownership = your next production issue** and your
101
+ next refactoring candidate. _Skills: `refactoring-scan`, `bug-hunt`_
34
102
 
35
- Opt-in via `CODE_ENABLE_GIT_METADATA=true`. Without it — standard semantic search with AST-aware chunking.
103
+ ### 🎯 Risk assessment before changes
36
104
 
37
- > 💡 An agent can **find stable templates**, **avoid anti-patterns**, **match domain owner's style**, and **assess modification risk** — all backed by empirical data. [Read more →](https://artk0de.github.io/TeaRAGs-MCP/introduction/core-concepts)
105
+ Before modifying a function, the agent checks **who depends on it, how often it
106
+ breaks, and what its ticket history says**. Know the blast radius before you
107
+ blast. _Skill: `risk-assessment` ·
108
+ [Coupling & blast radius theory →](https://artk0de.github.io/TeaRAGs-MCP/knowledge-base/code-quality-metrics)_
109
+
110
+ ### 🗺️ Learning an unfamiliar codebase
111
+
112
+ Ask questions instead of reading directory trees. _"How does auth work?"_
113
+ returns the **stable, canonical implementation** with its history attached — not
114
+ a random similar-looking snippet. _Skill: `explore`_
115
+
116
+ ## How It Works
117
+
118
+ ```mermaid
119
+ flowchart LR
120
+ User([👤 You])
121
+
122
+ subgraph mcp["TeaRAGs MCP Server"]
123
+ Agent[🤖 Agent<br/>runs skills]
124
+ TeaRAGs[🍵 TeaRAGs<br/>search · enrich · rerank]
125
+ Agent <--> TeaRAGs
126
+ end
127
+
128
+ Qdrant[(🗄️ Qdrant<br/>vector DB)]
129
+ Embeddings[✨ Embeddings<br/>Ollama/OpenAI]
130
+ Codebase[📁 Your Codebase<br/>+ Git History]
131
+
132
+ User <--> Agent
133
+ TeaRAGs <--> Qdrant
134
+ TeaRAGs <--> Embeddings
135
+ TeaRAGs <--> Codebase
136
+ ```
137
+
138
+ You talk to your agent. The agent runs a TeaRAGs skill. TeaRAGs searches your
139
+ code, enriches each result with git history, and ranks by what the skill needs —
140
+ stability, ownership, risk, or pure relevance.
141
+
142
+ ## What You Get
143
+
144
+ - 🧬 **Trajectory-aware retrieval** — the only open-source code RAG that scores
145
+ results by git history, not just embedding similarity
146
+ - 📚 **Ships with agent skills** — 6 ready-made playbooks for exploration,
147
+ generation, risk assessment, and index management (plus 2 internal strategies)
148
+ - 🔒 **Local-first, privacy-first** — works fully offline with Ollama; your code
149
+ never leaves your machine (cloud providers optional)
150
+ - 🚀 **Built for monorepos** — AST-aware chunking across 10+ languages,
151
+ incremental reindexing, parallel pipelines, millions of LOC tested
152
+
153
+ ## Who It's For
154
+
155
+ - **Developers in large monorepos** — where "find similar code" returns a dozen
156
+ near-duplicates and you need the _canonical_ one
157
+ - **Solo devs doing agentic development** — agent-driven workflows produce
158
+ bursts of micro-commits that wreck churn metrics. TeaRAGs ships a
159
+ [**GIT SESSIONS**](https://artk0de.github.io/TeaRAGs-MCP/architecture/git-enrichment-pipeline#git-sessions)
160
+ mode (`TRAJECTORY_GIT_SQUASH_AWARE_SESSIONS=true`) that groups commits by
161
+ `(author, time gap)` so a 20-commit refactor session counts as **one**. Churn,
162
+ bug-fix rate, and ownership stay meaningful even with a single human + an
163
+ agent as the only contributors.
164
+ - **Tech leads worried about AI code quality** — who want their team's agents to
165
+ learn from stable modules, not from last sprint's hotspot
166
+ - **Privacy-sensitive teams** — finance, healthcare, defense, or anyone who
167
+ can't send source code to a cloud API
168
+
169
+ **Not for:** repos without git history (no signal to enrich) or teams that only
170
+ need autocomplete (use Copilot).
38
171
 
39
172
  ## 🚀 Quick Start
40
173
 
41
- ```bash
42
- git clone https://github.com/artk0de/TeaRAGs-MCP.git
43
- cd TeaRAGs-MCP
44
- npm install && npm run build
174
+ Inside **Claude Code**, install the TeaRAGs plugins and run the setup wizard:
175
+
176
+ ```
177
+ /plugin marketplace add artk0de/TeaRAGs-MCP
178
+ /plugin install tea-rags-setup@tea-rags
179
+ /tea-rags-setup:install
180
+ ```
181
+
182
+ Then install the skills plugin (Claude-only, final step):
183
+
184
+ ```
185
+ /plugin install tea-rags@tea-rags
186
+ ```
45
187
 
46
- # Start Qdrant + Ollama
47
- podman compose up -d
48
- podman exec ollama ollama pull unclemusclez/jina-embeddings-v2-base-code:latest
188
+ Optionally install `dinopowers` for wrappers over `superpowers:*` skills:
49
189
 
50
- # Add to Claude Code
51
- claude mcp add tea-rags -s user -- node /path/to/tea-rags/build/index.js \
52
- -e QDRANT_URL=http://localhost:6333 \
53
- -e EMBEDDING_BASE_URL=http://localhost:11434
54
190
  ```
191
+ /plugin install dinopowers@tea-rags
192
+ ```
193
+
194
+ Index your codebase:
195
+
196
+ ```
197
+ /tea-rags:index
198
+ ```
199
+
200
+ Ask your agent anything: _"How does auth work in this project?"_, _"Find stable
201
+ examples of retry logic"_, _"What should I know before touching the payment
202
+ module?"_.
55
203
 
56
- Then ask your agent: *"Index this codebase for semantic search"*
204
+ For other MCP clients, CI, or air-gapped setups, see the
205
+ [manual install](https://artk0de.github.io/TeaRAGs-MCP/quickstart/installation#option-b--manual-install)
206
+ (Node + `npm install -g tea-rags` + Ollama/ONNX/OpenAI/Cohere/Voyage).
57
207
 
58
208
  ## 📚 Documentation
59
209
 
60
210
  **[artk0de.github.io/TeaRAGs-MCP](https://artk0de.github.io/TeaRAGs-MCP/)**
61
211
 
62
- | | Section | What's inside |
63
- |---|---------|---------------|
64
- | 🏁 | [Quickstart](https://artk0de.github.io/TeaRAGs-MCP/quickstart/installation) | Installation, first index & query |
65
- | ⚙️ | [Configuration](https://artk0de.github.io/TeaRAGs-MCP/usage/configuration) | Env vars, providers, tuning |
66
- | 🤖 | [Agent Integration](https://artk0de.github.io/TeaRAGs-MCP/agent-integration/search-strategies) | Prompt strategies, generation modes, deep analysis |
67
- | 🏗️ | [Architecture](https://artk0de.github.io/TeaRAGs-MCP/architecture/overview) | Pipeline, data model, reranker internals |
212
+ | I want to… | Start here |
213
+ | ---------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
214
+ | **Get it running** | [Quickstart (15 min)](https://artk0de.github.io/TeaRAGs-MCP/quickstart/installation) install, index, first query |
215
+ | **Understand the concept** | [Core Concepts](https://artk0de.github.io/TeaRAGs-MCP/introduction/core-concepts) vectorization, trajectory enrichment, reranking |
216
+ | **See what my agent can do** | [Skills](https://artk0de.github.io/TeaRAGs-MCP/usage/skills/) 6 ready-made agent playbooks for exploration, generation, risk |
217
+ | **Look under the hood** | [Architecture](https://artk0de.github.io/TeaRAGs-MCP/architecture/overview) pipelines, data model, reranker internals |
218
+ | **Learn the theory** | [Knowledge Base](https://artk0de.github.io/TeaRAGs-MCP/knowledge-base/rag-fundamentals) — RAG, code search, software evolution |
68
219
 
69
220
  ## 🤝 Contributing
70
221
 
@@ -72,7 +223,12 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for workflow and conventions.
72
223
 
73
224
  ## 🙏 Acknowledgments
74
225
 
75
- Built on a fork of **[mhalder/qdrant-mcp-server](https://github.com/mhalder/qdrant-mcp-server)** — clean architecture, solid tests, open-source spirit. And its ancestor **[qdrant/mcp-server-qdrant](https://github.com/qdrant/mcp-server-qdrant)**. Code vectorization inspired by **[claude-context](https://github.com/zilliztech/claude-context)** (Zilliz).
226
+ Built on a fork of
227
+ **[mhalder/qdrant-mcp-server](https://github.com/mhalder/qdrant-mcp-server)** —
228
+ clean architecture, solid tests, open-source spirit. And its ancestor
229
+ **[qdrant/mcp-server-qdrant](https://github.com/qdrant/mcp-server-qdrant)**.
230
+ Code vectorization inspired by
231
+ **[claude-context](https://github.com/zilliztech/claude-context)** (Zilliz).
76
232
 
77
233
  _Feel free to fork this fork. It's forks all the way down._ 🐢
78
234
 
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Embedding Diagnostic Benchmark
4
+ *
5
+ * Automatically calibrates EMBEDDING_BATCH_SIZE and EMBEDDING_CONCURRENCY
6
+ * using a three-phase plateau-detection algorithm.
7
+ *
8
+ * Phase 1: Find batch size plateau (CONCURRENCY=1)
9
+ * Phase 2: Test concurrency on plateau batches
10
+ * Phase 3: Select robust configuration (within 2% of max, prefer lower concurrency/batch)
11
+ *
12
+ * Run: npm run benchmark-embeddings
13
+ */
14
+ import { c, printBox } from "./lib/colors.mjs";
15
+ import { AVG_LOC_PER_CHUNK, config, MEDIAN_CODE_CHUNK_SIZE } from "./lib/config.mjs";
16
+ import { calibrateEmbeddings } from "./lib/embedding-calibration.mjs";
17
+ import { checkProviderConnectivity, createEmbeddingProvider } from "./lib/provider.mjs";
18
+
19
+ /**
20
+ * Format time in human readable format
21
+ */
22
+ function formatTime(ms) {
23
+ if (ms < 1000) return `${ms}ms`;
24
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
25
+ const minutes = Math.floor(ms / 60000);
26
+ const seconds = Math.round((ms % 60000) / 1000);
27
+ return `${minutes}m ${seconds}s`;
28
+ }
29
+
30
+ async function main() {
31
+ console.clear();
32
+ printBox("EMBEDDING CALIBRATION BENCHMARK", "Three-phase plateau detection");
33
+
34
+ // Show configuration
35
+ console.log(`${c.bold}Configuration:${c.reset}`);
36
+ console.log(` ${c.dim}Ollama:${c.reset} ${config.EMBEDDING_BASE_URL}`);
37
+ console.log(` ${c.dim}Model:${c.reset} ${config.EMBEDDING_MODEL}`);
38
+ console.log(` ${c.dim}Chunk size:${c.reset} ${MEDIAN_CODE_CHUNK_SIZE} chars (median from production)`);
39
+ console.log();
40
+
41
+ // Check embedding provider
42
+ process.stdout.write(`${c.dim}Checking embedding provider...${c.reset} `);
43
+ const embeddingCheck = await checkProviderConnectivity();
44
+ if (!embeddingCheck.ok) {
45
+ console.log(`${c.red}FAILED${c.reset}`);
46
+ console.log(`\n${c.red}Error:${c.reset} ${embeddingCheck.error}`);
47
+ process.exit(1);
48
+ }
49
+ console.log(`${c.green}OK${c.reset}`);
50
+
51
+ // Initialize embeddings
52
+ const { provider: embeddings, name: providerName } = await createEmbeddingProvider();
53
+ console.log(` ${c.green}✓${c.reset} Embedding provider: ${providerName}`);
54
+ console.log(` ${c.green}✓${c.reset} Vector dimension: ${embeddings.getDimensions()}`);
55
+ console.log();
56
+
57
+ // Run calibration
58
+ const result = await calibrateEmbeddings(embeddings, { verbose: true });
59
+
60
+ // ========== OUTPUT ==========
61
+ console.log();
62
+
63
+ // Detect setup type
64
+ const isOnnx = providerName === "onnx";
65
+ const isRemote =
66
+ !isOnnx && !config.EMBEDDING_BASE_URL.includes("localhost") && !config.EMBEDDING_BASE_URL.includes("127.0.0.1");
67
+ const setupIcon = isOnnx ? "⚡" : isRemote ? "🌐" : "🏠";
68
+ const setupName = isOnnx ? "Local ONNX" : isRemote ? "Remote GPU" : "Local GPU";
69
+
70
+ printBox(`${setupIcon} ${setupName.toUpperCase()} - OPTIMAL CONFIGURATION`, "");
71
+
72
+ // Main result
73
+ console.log(
74
+ ` ${c.bold}EMBEDDING_BATCH_SIZE${c.reset} = ${c.green}${c.bold}${result.EMBEDDING_BATCH_SIZE}${c.reset}`,
75
+ );
76
+ console.log(
77
+ ` ${c.bold}EMBEDDING_CONCURRENCY${c.reset} = ${c.green}${c.bold}${result.EMBEDDING_CONCURRENCY}${c.reset}`,
78
+ );
79
+ console.log();
80
+ console.log(` ${c.bold}Throughput:${c.reset} ${c.cyan}${result.throughput_chunks_per_sec} chunks/s${c.reset}`);
81
+ console.log();
82
+
83
+ // Explain the choice
84
+ console.log(`${c.bold}Why this configuration?${c.reset}`);
85
+ if (isOnnx) {
86
+ console.log(` ${c.dim}•${c.reset} Local ONNX runtime (${providerName})`);
87
+ console.log(` ${c.dim}•${c.reset} In-process inference, no network overhead`);
88
+ if (result.EMBEDDING_BATCH_SIZE <= 16) {
89
+ console.log(` ${c.dim}•${c.reset} Small batches optimal for ONNX memory management`);
90
+ }
91
+ } else if (isRemote) {
92
+ console.log(` ${c.dim}•${c.reset} Remote GPU detected (${config.EMBEDDING_BASE_URL})`);
93
+ console.log(` ${c.dim}•${c.reset} Lower batch + higher concurrency hides network latency`);
94
+ console.log(` ${c.dim}•${c.reset} While one batch transfers, GPU processes another`);
95
+ if (result.EMBEDDING_CONCURRENCY > 1) {
96
+ console.log(
97
+ ` ${c.dim}•${c.reset} CONCURRENCY=${result.EMBEDDING_CONCURRENCY} overlaps network I/O with GPU compute`,
98
+ );
99
+ }
100
+ } else {
101
+ console.log(` ${c.dim}•${c.reset} Local GPU detected (minimal network latency)`);
102
+ console.log(` ${c.dim}•${c.reset} Higher batch + lower concurrency minimizes overhead`);
103
+ if (result.EMBEDDING_CONCURRENCY === 1) {
104
+ console.log(` ${c.dim}•${c.reset} CONCURRENCY=1 indicates GPU-bound workload`);
105
+ }
106
+ }
107
+ console.log();
108
+
109
+ // Environment export
110
+ console.log(`${c.bold}Add to your environment:${c.reset}`);
111
+ console.log();
112
+ console.log(` ${c.cyan}export EMBEDDING_BATCH_SIZE=${result.EMBEDDING_BATCH_SIZE}${c.reset}`);
113
+ console.log(` ${c.cyan}export EMBEDDING_CONCURRENCY=${result.EMBEDDING_CONCURRENCY}${c.reset}`);
114
+ console.log();
115
+
116
+ // Time estimates
117
+ console.log(`${c.bold}Estimated indexing times:${c.reset}`);
118
+ const projects = [
119
+ { name: "10K LoC", loc: 10_000 },
120
+ { name: "100K LoC", loc: 100_000 },
121
+ { name: "1M LoC", loc: 1_000_000 },
122
+ { name: "VS Code (3.5M)", loc: 3_500_000 },
123
+ ];
124
+ for (const p of projects) {
125
+ const chunks = Math.ceil(p.loc / AVG_LOC_PER_CHUNK);
126
+ const seconds = Math.ceil(chunks / result.throughput_chunks_per_sec);
127
+ console.log(` ${c.dim}${p.name.padEnd(20)}${c.reset} ${c.bold}${formatTime(seconds * 1000)}${c.reset}`);
128
+ }
129
+ console.log();
130
+
131
+ // Stats
132
+ console.log(`${c.dim}────────────────────────────────────────${c.reset}`);
133
+ console.log(
134
+ `${c.dim}Configs tested: ${result.stable_configs_count} stable, ${result.discarded_configs_count} discarded${c.reset}`,
135
+ );
136
+ console.log(`${c.bold}Total benchmark time: ${formatTime(result.calibration_time_ms)}${c.reset}`);
137
+
138
+ // Terminate provider (ONNX keeps socket alive)
139
+ if ("terminate" in embeddings && typeof embeddings.terminate === "function") {
140
+ await embeddings.terminate();
141
+ }
142
+ }
143
+
144
+ main().catch((err) => {
145
+ console.error(`${c.red}Fatal error:${c.reset}`, err.message);
146
+ console.error(err.stack);
147
+ process.exit(1);
148
+ });