codesift-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +66 -21
- package/README.md +346 -56
- package/dist/cli/args.d.ts +2 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +11 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +177 -67
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +157 -0
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/hooks.d.ts +3 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +163 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/setup.d.ts +25 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +400 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/formatters-shortening.d.ts +7 -0
- package/dist/formatters-shortening.d.ts.map +1 -0
- package/dist/formatters-shortening.js +68 -0
- package/dist/formatters-shortening.js.map +1 -0
- package/dist/formatters.d.ts +314 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +396 -0
- package/dist/formatters.js.map +1 -0
- package/dist/instructions.d.ts +6 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +72 -0
- package/dist/instructions.js.map +1 -0
- package/dist/lsp/lsp-client.d.ts +21 -0
- package/dist/lsp/lsp-client.d.ts.map +1 -0
- package/dist/lsp/lsp-client.js +122 -0
- package/dist/lsp/lsp-client.js.map +1 -0
- package/dist/lsp/lsp-manager.d.ts +12 -0
- package/dist/lsp/lsp-manager.d.ts.map +1 -0
- package/dist/lsp/lsp-manager.js +82 -0
- package/dist/lsp/lsp-manager.js.map +1 -0
- package/dist/lsp/lsp-servers.d.ts +13 -0
- package/dist/lsp/lsp-servers.d.ts.map +1 -0
- package/dist/lsp/lsp-servers.js +57 -0
- package/dist/lsp/lsp-servers.js.map +1 -0
- package/dist/lsp/lsp-tools.d.ts +67 -0
- package/dist/lsp/lsp-tools.d.ts.map +1 -0
- package/dist/lsp/lsp-tools.js +359 -0
- package/dist/lsp/lsp-tools.js.map +1 -0
- package/dist/parser/extractors/_shared.d.ts +11 -0
- package/dist/parser/extractors/_shared.d.ts.map +1 -0
- package/dist/parser/extractors/_shared.js +38 -0
- package/dist/parser/extractors/_shared.js.map +1 -0
- package/dist/parser/extractors/astro.d.ts +15 -0
- package/dist/parser/extractors/astro.d.ts.map +1 -0
- package/dist/parser/extractors/astro.js +104 -0
- package/dist/parser/extractors/astro.js.map +1 -0
- package/dist/parser/extractors/conversation.d.ts +16 -0
- package/dist/parser/extractors/conversation.d.ts.map +1 -0
- package/dist/parser/extractors/conversation.js +196 -0
- package/dist/parser/extractors/conversation.js.map +1 -0
- package/dist/parser/extractors/go.d.ts.map +1 -1
- package/dist/parser/extractors/go.js +22 -45
- package/dist/parser/extractors/go.js.map +1 -1
- package/dist/parser/extractors/python.d.ts +1 -1
- package/dist/parser/extractors/python.d.ts.map +1 -1
- package/dist/parser/extractors/python.js +19 -50
- package/dist/parser/extractors/python.js.map +1 -1
- package/dist/parser/extractors/rust.d.ts +1 -1
- package/dist/parser/extractors/rust.d.ts.map +1 -1
- package/dist/parser/extractors/rust.js +7 -34
- package/dist/parser/extractors/rust.js.map +1 -1
- package/dist/parser/extractors/typescript.d.ts +1 -1
- package/dist/parser/extractors/typescript.d.ts.map +1 -1
- package/dist/parser/extractors/typescript.js +99 -68
- package/dist/parser/extractors/typescript.js.map +1 -1
- package/dist/parser/parser-manager.d.ts.map +1 -1
- package/dist/parser/parser-manager.js +12 -2
- package/dist/parser/parser-manager.js.map +1 -1
- package/dist/parser/symbol-extractor.d.ts +2 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -1
- package/dist/parser/symbol-extractor.js +2 -0
- package/dist/parser/symbol-extractor.js.map +1 -1
- package/dist/register-tools.d.ts +127 -0
- package/dist/register-tools.d.ts.map +1 -0
- package/dist/register-tools.js +1453 -0
- package/dist/register-tools.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +4 -26
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
- package/dist/retrieval/codebase-retrieval.js +105 -403
- package/dist/retrieval/codebase-retrieval.js.map +1 -1
- package/dist/retrieval/retrieval-constants.d.ts +27 -0
- package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
- package/dist/retrieval/retrieval-constants.js +27 -0
- package/dist/retrieval/retrieval-constants.js.map +1 -0
- package/dist/retrieval/retrieval-schemas.d.ts +107 -0
- package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
- package/dist/retrieval/retrieval-schemas.js +102 -0
- package/dist/retrieval/retrieval-schemas.js.map +1 -0
- package/dist/retrieval/retrieval-utils.d.ts +40 -0
- package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
- package/dist/retrieval/retrieval-utils.js +139 -0
- package/dist/retrieval/retrieval-utils.js.map +1 -0
- package/dist/retrieval/semantic-handlers.d.ts +8 -0
- package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
- package/dist/retrieval/semantic-handlers.js +152 -0
- package/dist/retrieval/semantic-handlers.js.map +1 -0
- package/dist/search/bm25.d.ts +6 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +95 -32
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/chunker.d.ts +10 -0
- package/dist/search/chunker.d.ts.map +1 -1
- package/dist/search/chunker.js +63 -11
- package/dist/search/chunker.js.map +1 -1
- package/dist/search/reranker.d.ts +15 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +126 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/semantic.d.ts +1 -1
- package/dist/search/semantic.d.ts.map +1 -1
- package/dist/search/semantic.js +40 -45
- package/dist/search/semantic.js.map +1 -1
- package/dist/server-helpers.d.ts +29 -0
- package/dist/server-helpers.d.ts.map +1 -0
- package/dist/server-helpers.js +312 -0
- package/dist/server-helpers.js.map +1 -0
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -271
- package/dist/server.js.map +1 -1
- package/dist/storage/_shared.d.ts +9 -0
- package/dist/storage/_shared.d.ts.map +1 -0
- package/dist/storage/_shared.js +26 -0
- package/dist/storage/_shared.js.map +1 -0
- package/dist/storage/chunk-store.d.ts.map +1 -1
- package/dist/storage/chunk-store.js +23 -63
- package/dist/storage/chunk-store.js.map +1 -1
- package/dist/storage/embedding-store.d.ts +6 -3
- package/dist/storage/embedding-store.d.ts.map +1 -1
- package/dist/storage/embedding-store.js +54 -30
- package/dist/storage/embedding-store.js.map +1 -1
- package/dist/storage/graph-store.d.ts +48 -0
- package/dist/storage/graph-store.d.ts.map +1 -0
- package/dist/storage/graph-store.js +52 -0
- package/dist/storage/graph-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +5 -0
- package/dist/storage/index-store.d.ts.map +1 -1
- package/dist/storage/index-store.js +28 -16
- package/dist/storage/index-store.js.map +1 -1
- package/dist/storage/registry.d.ts +4 -0
- package/dist/storage/registry.d.ts.map +1 -1
- package/dist/storage/registry.js +16 -16
- package/dist/storage/registry.js.map +1 -1
- package/dist/storage/usage-stats.d.ts +6 -0
- package/dist/storage/usage-stats.d.ts.map +1 -1
- package/dist/storage/usage-stats.js +59 -11
- package/dist/storage/usage-stats.js.map +1 -1
- package/dist/storage/usage-tracker.d.ts +3 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -1
- package/dist/storage/usage-tracker.js +50 -132
- package/dist/storage/usage-tracker.js.map +1 -1
- package/dist/storage/watcher.d.ts +2 -1
- package/dist/storage/watcher.d.ts.map +1 -1
- package/dist/storage/watcher.js +16 -16
- package/dist/storage/watcher.js.map +1 -1
- package/dist/tools/ast-query-tools.d.ts +29 -0
- package/dist/tools/ast-query-tools.d.ts.map +1 -0
- package/dist/tools/ast-query-tools.js +110 -0
- package/dist/tools/ast-query-tools.js.map +1 -0
- package/dist/tools/boundary-tools.d.ts +31 -0
- package/dist/tools/boundary-tools.d.ts.map +1 -0
- package/dist/tools/boundary-tools.js +62 -0
- package/dist/tools/boundary-tools.js.map +1 -0
- package/dist/tools/clone-tools.d.ts +35 -0
- package/dist/tools/clone-tools.d.ts.map +1 -0
- package/dist/tools/clone-tools.js +181 -0
- package/dist/tools/clone-tools.js.map +1 -0
- package/dist/tools/community-tools.d.ts +23 -0
- package/dist/tools/community-tools.d.ts.map +1 -0
- package/dist/tools/community-tools.js +297 -0
- package/dist/tools/community-tools.js.map +1 -0
- package/dist/tools/complexity-tools.d.ts +34 -0
- package/dist/tools/complexity-tools.d.ts.map +1 -0
- package/dist/tools/complexity-tools.js +135 -0
- package/dist/tools/complexity-tools.js.map +1 -0
- package/dist/tools/context-tools.d.ts +44 -3
- package/dist/tools/context-tools.d.ts.map +1 -1
- package/dist/tools/context-tools.js +329 -99
- package/dist/tools/context-tools.js.map +1 -1
- package/dist/tools/conversation-tools.d.ts +107 -0
- package/dist/tools/conversation-tools.d.ts.map +1 -0
- package/dist/tools/conversation-tools.js +419 -0
- package/dist/tools/conversation-tools.js.map +1 -0
- package/dist/tools/coordinator-tools.d.ts +73 -0
- package/dist/tools/coordinator-tools.d.ts.map +1 -0
- package/dist/tools/coordinator-tools.js +153 -0
- package/dist/tools/coordinator-tools.js.map +1 -0
- package/dist/tools/cross-repo-tools.d.ts +43 -0
- package/dist/tools/cross-repo-tools.d.ts.map +1 -0
- package/dist/tools/cross-repo-tools.js +55 -0
- package/dist/tools/cross-repo-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +4 -1
- package/dist/tools/diff-tools.d.ts.map +1 -1
- package/dist/tools/diff-tools.js +23 -5
- package/dist/tools/diff-tools.js.map +1 -1
- package/dist/tools/frequency-tools.d.ts +46 -0
- package/dist/tools/frequency-tools.d.ts.map +1 -0
- package/dist/tools/frequency-tools.js +184 -0
- package/dist/tools/frequency-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts.map +1 -1
- package/dist/tools/generate-tools.js +13 -2
- package/dist/tools/generate-tools.js.map +1 -1
- package/dist/tools/graph-tools.d.ts +44 -11
- package/dist/tools/graph-tools.d.ts.map +1 -1
- package/dist/tools/graph-tools.js +147 -104
- package/dist/tools/graph-tools.js.map +1 -1
- package/dist/tools/hotspot-tools.d.ts +24 -0
- package/dist/tools/hotspot-tools.d.ts.map +1 -0
- package/dist/tools/hotspot-tools.js +122 -0
- package/dist/tools/hotspot-tools.js.map +1 -0
- package/dist/tools/impact-tools.d.ts +13 -0
- package/dist/tools/impact-tools.d.ts.map +1 -0
- package/dist/tools/impact-tools.js +238 -0
- package/dist/tools/impact-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +44 -3
- package/dist/tools/index-tools.d.ts.map +1 -1
- package/dist/tools/index-tools.js +530 -222
- package/dist/tools/index-tools.js.map +1 -1
- package/dist/tools/memory-tools.d.ts +35 -0
- package/dist/tools/memory-tools.d.ts.map +1 -0
- package/dist/tools/memory-tools.js +229 -0
- package/dist/tools/memory-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +24 -13
- package/dist/tools/outline-tools.d.ts.map +1 -1
- package/dist/tools/outline-tools.js +113 -87
- package/dist/tools/outline-tools.js.map +1 -1
- package/dist/tools/pattern-tools.d.ts +32 -0
- package/dist/tools/pattern-tools.d.ts.map +1 -0
- package/dist/tools/pattern-tools.js +116 -0
- package/dist/tools/pattern-tools.js.map +1 -0
- package/dist/tools/report-tools.d.ts +5 -0
- package/dist/tools/report-tools.d.ts.map +1 -0
- package/dist/tools/report-tools.js +167 -0
- package/dist/tools/report-tools.js.map +1 -0
- package/dist/tools/review-diff-tools.d.ts +148 -0
- package/dist/tools/review-diff-tools.d.ts.map +1 -0
- package/dist/tools/review-diff-tools.js +852 -0
- package/dist/tools/review-diff-tools.js.map +1 -0
- package/dist/tools/route-tools.d.ts +32 -0
- package/dist/tools/route-tools.d.ts.map +1 -0
- package/dist/tools/route-tools.js +276 -0
- package/dist/tools/route-tools.js.map +1 -0
- package/dist/tools/search-ranker.d.ts +5 -0
- package/dist/tools/search-ranker.d.ts.map +1 -0
- package/dist/tools/search-ranker.js +142 -0
- package/dist/tools/search-ranker.js.map +1 -0
- package/dist/tools/search-tools.d.ts +24 -1
- package/dist/tools/search-tools.d.ts.map +1 -1
- package/dist/tools/search-tools.js +459 -225
- package/dist/tools/search-tools.js.map +1 -1
- package/dist/tools/secret-tools.d.ts +104 -0
- package/dist/tools/secret-tools.d.ts.map +1 -0
- package/dist/tools/secret-tools.js +410 -0
- package/dist/tools/secret-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +90 -2
- package/dist/tools/symbol-tools.d.ts.map +1 -1
- package/dist/tools/symbol-tools.js +576 -42
- package/dist/tools/symbol-tools.js.map +1 -1
- package/dist/types.d.ts +34 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/framework-detect.d.ts +5 -0
- package/dist/utils/framework-detect.d.ts.map +1 -0
- package/dist/utils/framework-detect.js +36 -0
- package/dist/utils/framework-detect.js.map +1 -0
- package/dist/utils/glob.d.ts +19 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +74 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/import-graph.d.ts +29 -0
- package/dist/utils/import-graph.d.ts.map +1 -0
- package/dist/utils/import-graph.js +125 -0
- package/dist/utils/import-graph.js.map +1 -0
- package/dist/utils/test-file.d.ts.map +1 -1
- package/dist/utils/test-file.js +1 -0
- package/dist/utils/test-file.js.map +1 -1
- package/dist/utils/walk.d.ts +45 -0
- package/dist/utils/walk.d.ts.map +1 -0
- package/dist/utils/walk.js +87 -0
- package/dist/utils/walk.js.map +1 -0
- package/package.json +10 -4
- package/rules/codesift.md +187 -0
- package/rules/codesift.mdc +192 -0
- package/rules/codex.md +187 -0
- package/rules/gemini.md +187 -0
package/LICENSE
CHANGED
|
@@ -1,21 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
1
|
+
License text copyright (c) 2020 MariaDB Corporation Ab, All Rights Reserved.
|
|
2
|
+
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
|
3
|
+
|
|
4
|
+
Parameters
|
|
5
|
+
|
|
6
|
+
Licensor: Greg Laskowski
|
|
7
|
+
Licensed Work: CodeSift MCP. The Licensed Work is (c) 2026 Greg Laskowski.
|
|
8
|
+
Additional Use Grant: You may make non-production use of the Licensed Work.
|
|
9
|
+
Personal, educational, and non-commercial use is permitted.
|
|
10
|
+
|
|
11
|
+
Any commercial use — including use within a for-profit
|
|
12
|
+
organization, by employees or contractors of a for-profit
|
|
13
|
+
entity, or in connection with revenue-generating activities —
|
|
14
|
+
requires a commercial license from the Licensor.
|
|
15
|
+
|
|
16
|
+
For commercial licensing, visit https://codesift.app/pricing
|
|
17
|
+
Change Date: Four years from the date the Licensed Work is published.
|
|
18
|
+
Change License: Apache License, Version 2.0
|
|
19
|
+
|
|
20
|
+
For information about alternative licensing arrangements for the Licensed Work,
|
|
21
|
+
please contact greg@codesift.app.
|
|
22
|
+
|
|
23
|
+
Notice
|
|
24
|
+
|
|
25
|
+
Business Source License 1.1
|
|
26
|
+
|
|
27
|
+
Terms
|
|
28
|
+
|
|
29
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
30
|
+
works, redistribute, and make non-production use of the Licensed Work. The
|
|
31
|
+
Licensor may make an Additional Use Grant, above, permitting limited production use.
|
|
32
|
+
|
|
33
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly
|
|
34
|
+
available distribution of a specific version of the Licensed Work under this
|
|
35
|
+
License, whichever comes first, the Licensor hereby grants you rights under
|
|
36
|
+
the terms of the Change License, and the rights granted in the paragraph
|
|
37
|
+
above terminate.
|
|
38
|
+
|
|
39
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
40
|
+
currently in effect as described in this License, you must purchase a
|
|
41
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
42
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
43
|
+
|
|
44
|
+
All copies of the original and modified Licensed Work, and derivative works
|
|
45
|
+
of the Licensed Work, are subject to this License. This License applies
|
|
46
|
+
separately for each version of the Licensed Work and the Change Date may vary
|
|
47
|
+
for each version of the Licensed Work released by Licensor.
|
|
48
|
+
|
|
49
|
+
You must conspicuously display this License on each original or modified copy
|
|
50
|
+
of the Licensed Work. If you receive the Licensed Work in original or
|
|
51
|
+
modified form from a third party, the terms and conditions set forth in this
|
|
52
|
+
License apply to your use of that work.
|
|
53
|
+
|
|
54
|
+
Any use of the Licensed Work in violation of this License will automatically
|
|
55
|
+
terminate your rights under this License for the current and all other
|
|
56
|
+
versions of the Licensed Work.
|
|
57
|
+
|
|
58
|
+
This License does not grant you any right in any trademark or logo of
|
|
59
|
+
Licensor or its affiliates (provided that you may use a trademark or logo of
|
|
60
|
+
Licensor as expressly required by this License).
|
|
61
|
+
|
|
62
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
|
63
|
+
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
|
66
|
+
TITLE.
|
package/README.md
CHANGED
|
@@ -1,13 +1,45 @@
|
|
|
1
1
|
# CodeSift -- Token-efficient code intelligence for AI agents
|
|
2
2
|
|
|
3
|
-
CodeSift indexes your codebase with tree-sitter AST parsing and gives AI agents
|
|
3
|
+
CodeSift indexes your codebase with tree-sitter AST parsing and gives AI agents 63 search, retrieval, and analysis tools via CLI or MCP server. It uses 61-95% fewer tokens than raw grep/Read workflows on typical code navigation tasks.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
**Works with:** Claude Code, Cursor, Codex, Gemini CLI, Zed, Aider, Continue — any MCP client.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
6
8
|
|
|
7
9
|
```bash
|
|
8
10
|
npm install -g codesift-mcp
|
|
9
11
|
```
|
|
10
12
|
|
|
13
|
+
Then configure your AI coding tool (pick one, or use `all`):
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
codesift setup claude # Claude Code — config + rules + hooks
|
|
17
|
+
codesift setup codex # Codex CLI — config + AGENTS.md rules
|
|
18
|
+
codesift setup cursor # Cursor IDE — config + .cursor/rules
|
|
19
|
+
codesift setup gemini # Gemini CLI — config + GEMINI.md rules
|
|
20
|
+
codesift setup all # All platforms at once
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
**What `setup` installs (all by default):**
|
|
24
|
+
|
|
25
|
+
| Component | What it does | Opt-out |
|
|
26
|
+
|-----------|-------------|---------|
|
|
27
|
+
| **MCP config** | Registers codesift-mcp server | (required) |
|
|
28
|
+
| **Rules file** | Tool mapping, hints, ALWAYS/NEVER rules for your AI agent | `--no-rules` |
|
|
29
|
+
| **Hooks** (Claude only) | Auto-index after Edit/Write, redirect large Read to CodeSift | `--no-hooks` |
|
|
30
|
+
|
|
31
|
+
Additionally, every MCP client receives ~800 tokens of compact guidance automatically via the MCP `instructions` field — zero setup needed.
|
|
32
|
+
|
|
33
|
+
## Update
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
npm update -g codesift-mcp
|
|
37
|
+
codesift setup all # Updates rules files to latest version
|
|
38
|
+
codesift setup all --force # Force-update even if you modified rules
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
If you use `npx -y codesift-mcp` (the default), each platform automatically picks up the latest published version on next session start. Re-run `setup` to update rules files to the latest version.
|
|
42
|
+
|
|
11
43
|
## Quick start
|
|
12
44
|
|
|
13
45
|
```bash
|
|
@@ -24,18 +56,54 @@ codesift retrieve local/my-project \
|
|
|
24
56
|
|
|
25
57
|
## Benchmark results
|
|
26
58
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
|
32
|
-
|
|
33
|
-
|
|
|
34
|
-
|
|
|
35
|
-
|
|
|
36
|
-
|
|
|
37
|
-
|
|
38
|
-
|
|
59
|
+
### Combo benchmark (real-world tool sequences)
|
|
60
|
+
|
|
61
|
+
772 real tasks from usage.jsonl — exact query sequences agents used across 33+ repos. Native (grep/find/read) vs CodeSift.
|
|
62
|
+
|
|
63
|
+
| Sequence | Runs | Tok native | Tok Sift | Delta | Wins |
|
|
64
|
+
|----------|------|-----------|----------|-------|------|
|
|
65
|
+
| pat→st→pat→st (4-gram) | 37 | 377,258 | 36,758 | **-90%** | 28/37 |
|
|
66
|
+
| pat→st→pat | 39 | 186,436 | 20,500 | **-89%** | 31/39 |
|
|
67
|
+
| st→pat→st→pat | 35 | 307,490 | 35,905 | **-88%** | 25/35 |
|
|
68
|
+
| ss→st | 78 | 202,837 | 36,408 | **-82%** | 35/78 |
|
|
69
|
+
| st→pat→st | 40 | 250,240 | 44,424 | **-82%** | 27/40 |
|
|
70
|
+
| st→tree→st | 28 | 262,703 | 61,093 | **-77%** | 22/28 |
|
|
71
|
+
| tree→st | 57 | 380,324 | 133,578 | **-65%** | 44/57 |
|
|
72
|
+
| **AGGREGATE** | **772** | **5,130,240** | **1,994,825** | **-61%** | **542/772** |
|
|
73
|
+
|
|
74
|
+
### Per-tool (single-tool benchmark)
|
|
75
|
+
|
|
76
|
+
| Tool | Tok native | Tok Sift | Delta |
|
|
77
|
+
|------|-----------|----------|-------|
|
|
78
|
+
| search_text vs rg | 1,015,245 | 49,718 | **-95%** |
|
|
79
|
+
| search_symbols vs rg | 192,486 | 34,186 | **-82%** |
|
|
80
|
+
| get_file_outline vs Read | 91,796 | 58,229 | **-37%** |
|
|
81
|
+
|
|
82
|
+
## Performance features
|
|
83
|
+
|
|
84
|
+
| Feature | Description | Impact |
|
|
85
|
+
|---------|-------------|--------|
|
|
86
|
+
| **mtime-based incremental indexing** | Skip files with unchanged mtime on reindex | 5.6x faster reindex (57s → 10s on 778-file repo) |
|
|
87
|
+
| **index_file** | Re-index a single file without full repo walk | 9ms (unchanged) / 153ms (changed) vs 3-8s full folder |
|
|
88
|
+
| **detail_level** on search_symbols | `compact` (~15 tok/result), `standard`, `full` | compact is 63% fewer tokens than standard |
|
|
89
|
+
| **token_budget** on search_symbols | Pack results to token limit instead of guessing top_k | Precise budget control |
|
|
90
|
+
| **Centrality bonus** in BM25 | Symbols in frequently-imported files rank higher | Core utilities surface first in search |
|
|
91
|
+
| **Response dedup cache** | Identical calls within 30s return cached result | Eliminates duplicate API calls |
|
|
92
|
+
| **In-flight dedup** | Parallel identical requests coalesce into one | Prevents race condition duplicates |
|
|
93
|
+
| **Auto-grouping** | Force group_by_file when output exceeds 80K chars | Prevents 100K+ token responses |
|
|
94
|
+
| **Relevance-gap filtering** | Cut search results below 15% of top score | 50→21 results (cleaner output) |
|
|
95
|
+
| **Semantic chunking** | Chunk by symbol boundaries, not fixed lines | Functions stay intact for semantic search |
|
|
96
|
+
| **Token savings display** | "Saved ~X tokens ($Y)" on every response | Visible ROI per call |
|
|
97
|
+
| **Framework-aware dead code** | Whitelist React hooks, NestJS lifecycle, Next.js handlers | <10% false positives (was ~40%) |
|
|
98
|
+
| **Mermaid diagrams** | `detect_communities`, `get_knowledge_map`, `trace_route` output Mermaid | Paste-ready architecture diagrams |
|
|
99
|
+
| **HTML report** | `generate_report` → standalone browser report | Complexity, dead code, hotspots, communities |
|
|
100
|
+
| **Progressive cascade** | >15K tok → compact format, >25K → counts only, >30K → truncate | Auto-adjusting response size |
|
|
101
|
+
| **Tool visibility** | Non-core tools hidden via MCP `disable()`, discoverable on demand | ~10K fewer tokens in system prompt |
|
|
102
|
+
| **MCP instructions** | ~800 tok of agent guidance sent automatically to every client | Zero-setup onboarding |
|
|
103
|
+
| **Ranked search** | `search_text(ranked=true)` classifies hits by containing symbol, deduplicates | Saves 1-3 follow-up calls |
|
|
104
|
+
| **PreToolUse hooks** | Redirect large-file Read to CodeSift outline/search | Prevents 5K+ token file dumps |
|
|
105
|
+
| **PostToolUse hooks** | Auto-reindex after Edit/Write | Always-fresh index |
|
|
106
|
+
| **Sequential hints** | Prepended hints (H1-H9) suggest batching after 3+ consecutive calls | Guides agents toward efficient usage |
|
|
39
107
|
|
|
40
108
|
## CLI commands
|
|
41
109
|
|
|
@@ -43,7 +111,7 @@ CodeSift wins 4 of 6 categories. Symbol search is at parity (verbose output, bei
|
|
|
43
111
|
|
|
44
112
|
| Command | Description |
|
|
45
113
|
|---------|-------------|
|
|
46
|
-
| `codesift index <path>` | Index a local folder |
|
|
114
|
+
| `codesift index <path>` | Index a local folder (mtime-based incremental — skips unchanged files) |
|
|
47
115
|
| `codesift index-repo <url>` | Clone and index a remote git repository |
|
|
48
116
|
| `codesift repos` | List all indexed repositories |
|
|
49
117
|
| `codesift invalidate <repo>` | Clear index cache for a repository |
|
|
@@ -53,7 +121,7 @@ CodeSift wins 4 of 6 categories. Symbol search is at parity (verbose output, bei
|
|
|
53
121
|
| Command | Description |
|
|
54
122
|
|---------|-------------|
|
|
55
123
|
| `codesift search <repo> <query>` | Full-text search across all files |
|
|
56
|
-
| `codesift symbols <repo> <query>` | Search symbols by name/signature |
|
|
124
|
+
| `codesift symbols <repo> <query>` | Search symbols by name/signature (supports `--detail compact\|standard\|full` and `--token-budget N`) |
|
|
57
125
|
|
|
58
126
|
### Outline
|
|
59
127
|
|
|
@@ -71,15 +139,35 @@ CodeSift wins 4 of 6 categories. Symbol search is at parity (verbose output, bei
|
|
|
71
139
|
| `codesift symbols-batch <repo> <ids...>` | Get multiple symbols by ID |
|
|
72
140
|
| `codesift find <repo> <query>` | Find symbol and show source |
|
|
73
141
|
| `codesift refs <repo> <name>` | Find all references to a symbol |
|
|
142
|
+
| `codesift context-bundle <repo> <name>` | Symbol + imports + siblings + types used in one call |
|
|
74
143
|
|
|
75
144
|
### Graph & analysis
|
|
76
145
|
|
|
77
146
|
| Command | Description |
|
|
78
147
|
|---------|-------------|
|
|
79
|
-
| `codesift trace <repo> <name>` | Trace call chain (callers/callees) |
|
|
80
|
-
| `codesift impact <repo> --since <ref>` | Blast radius of git changes |
|
|
81
|
-
| `codesift context <repo> <query>` | Assemble relevant code context |
|
|
82
|
-
| `codesift knowledge-map <repo>` | Module dependency map |
|
|
148
|
+
| `codesift trace <repo> <name>` | Trace call chain (callers/callees). Supports `--format mermaid` for flowchart output. |
|
|
149
|
+
| `codesift impact <repo> --since <ref>` | Blast radius of git changes + affected tests + risk scores per file |
|
|
150
|
+
| `codesift context <repo> <query>` | Assemble relevant code context. Supports `--level L0\|L1\|L2\|L3` for compression. |
|
|
151
|
+
| `codesift knowledge-map <repo>` | Module dependency map with circular dependency detection |
|
|
152
|
+
| `codesift trace-route <repo> <path>` | Trace HTTP route → handler → service → DB calls (NestJS/Next.js/Express) |
|
|
153
|
+
| `codesift communities <repo>` | Louvain community detection — discover code clusters from import graph |
|
|
154
|
+
|
|
155
|
+
### Code analysis
|
|
156
|
+
|
|
157
|
+
| Command | Description |
|
|
158
|
+
|---------|-------------|
|
|
159
|
+
| `codesift dead-code <repo>` | Find exported symbols with zero external references |
|
|
160
|
+
| `codesift complexity <repo>` | Cyclomatic complexity + nesting depth per function |
|
|
161
|
+
| `codesift clones <repo>` | Copy-paste detection (hash bucketing + line similarity) |
|
|
162
|
+
| `codesift hotspots <repo>` | Git churn x complexity = risk-ranked file list |
|
|
163
|
+
| `codesift patterns <repo> <pattern>` | Structural anti-pattern search (9 built-in + custom regex) |
|
|
164
|
+
|
|
165
|
+
### Cross-repo
|
|
166
|
+
|
|
167
|
+
| Command | Description |
|
|
168
|
+
|---------|-------------|
|
|
169
|
+
| `codesift cross-search <query>` | Search symbols across ALL indexed repositories |
|
|
170
|
+
| `codesift cross-refs <name>` | Find references across ALL indexed repositories |
|
|
83
171
|
|
|
84
172
|
### Diff
|
|
85
173
|
|
|
@@ -95,6 +183,90 @@ CodeSift wins 4 of 6 categories. Symbol search is at parity (verbose output, bei
|
|
|
95
183
|
| `codesift retrieve <repo> --queries <json>` | Batch multiple queries in one call |
|
|
96
184
|
| `codesift stats` | Show usage statistics |
|
|
97
185
|
| `codesift generate-claude-md <repo>` | Generate CLAUDE.md project summary |
|
|
186
|
+
| `codesift list-patterns` | List all built-in anti-pattern names |
|
|
187
|
+
|
|
188
|
+
## MCP tools (63 total — 13 core + 50 discoverable)
|
|
189
|
+
|
|
190
|
+
When running as an MCP server, CodeSift exposes 13 core tools directly. The remaining 50 tools are discoverable via `discover_tools` and `describe_tools` to minimize system prompt token overhead.
|
|
191
|
+
|
|
192
|
+
| Category | Tools |
|
|
193
|
+
|----------|-------|
|
|
194
|
+
| **Indexing** | `index_folder` (mtime skip, dirty propagation), `index_repo`, `index_file` (single-file reindex, 9ms), `list_repos`, `invalidate_cache` |
|
|
195
|
+
| **Search** | `search_symbols` (detail_level: compact/standard/full, token_budget), `search_text` (auto_group, group_by_file) |
|
|
196
|
+
| **Outline** | `get_file_tree`, `get_file_outline`, `get_repo_outline`, `suggest_queries` |
|
|
197
|
+
| **Symbol retrieval** | `get_symbol`, `get_symbols`, `find_and_show`, `get_context_bundle` |
|
|
198
|
+
| **References & graph** | `find_references` (LSP-enhanced), `trace_call_chain`, `impact_analysis`, `trace_route` (HTTP route → handler → DB) |
|
|
199
|
+
| **LSP bridge** | `go_to_definition` (LSP + index fallback), `get_type_info` (hover), `rename_symbol` (cross-file type-safe rename) |
|
|
200
|
+
| **Context & knowledge** | `assemble_context` (level: L0/L1/L2/L3), `get_knowledge_map`, `detect_communities` (Louvain) |
|
|
201
|
+
| **Conversation search** | `index_conversations`, `search_conversations`, `find_conversations_for_symbol` |
|
|
202
|
+
| **Diff** | `diff_outline`, `changed_symbols` |
|
|
203
|
+
| **Batch retrieval** | `codebase_retrieval` (batch multiple sub-queries with shared token budget, incl. `type: "conversation"`) |
|
|
204
|
+
| **Security** | `scan_secrets` (AST-aware secret detection, ~1,100 rules, masked output) |
|
|
205
|
+
| **Analysis** | `find_dead_code` (framework-aware), `analyze_complexity`, `find_clones`, `analyze_hotspots`, `search_patterns` (9 built-in incl. scaffolding), `list_patterns`, `frequency_analysis` (AST subtree clustering) |
|
|
206
|
+
| **Architecture** | `classify_roles` (symbol role classification via call graph), `check_boundaries` (architecture boundary enforcement), `ast_query` (structural grep via tree-sitter) |
|
|
207
|
+
| **Cross-repo** | `cross_repo_search`, `cross_repo_refs` |
|
|
208
|
+
| **Report** | `generate_report` (standalone HTML with complexity, dead code, hotspots, communities) |
|
|
209
|
+
| **Tool discovery** | `discover_tools` (keyword search across hidden tools), `describe_tools` (full schema on demand, optional `reveal`) |
|
|
210
|
+
| **Utility** | `generate_claude_md` (architecture + behavioral guidance), `usage_stats` (with token savings tracking) |
|
|
211
|
+
|
|
212
|
+
### Conversation search
|
|
213
|
+
|
|
214
|
+
Search past Claude Code conversation history — the decisions, rationale, and debugging sessions that shaped your code.
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
# Index conversations for current project (auto-detected from cwd)
|
|
218
|
+
# Also runs automatically at startup via auto-discovery
|
|
219
|
+
index_conversations()
|
|
220
|
+
|
|
221
|
+
# Index a specific project's conversations
|
|
222
|
+
index_conversations(project_path="/Users/me/.claude/projects/-Users-me-DEV-my-project")
|
|
223
|
+
|
|
224
|
+
# Search past conversations
|
|
225
|
+
search_conversations(query="auth middleware bug", limit=5)
|
|
226
|
+
|
|
227
|
+
# Find conversations that discussed a specific code symbol
|
|
228
|
+
find_conversations_for_symbol(symbol_name="processPayment", repo="local/my-project")
|
|
229
|
+
|
|
230
|
+
# In codebase_retrieval batch queries
|
|
231
|
+
codebase_retrieval(repo, queries=[
|
|
232
|
+
{"type": "semantic", "query": "how does auth work"},
|
|
233
|
+
{"type": "conversation", "query": "why we chose Redis over Postgres cache"}
|
|
234
|
+
])
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
**Features:**
|
|
238
|
+
- Auto-discovery at startup (zero config)
|
|
239
|
+
- Session-end hook for immediate re-indexing
|
|
240
|
+
- Noise filtering: tool_result dumps stripped, tool_use truncated, images → `[image]`
|
|
241
|
+
- Compaction-aware: skips summary injections, indexes last summary as meta-doc
|
|
242
|
+
- Cross-reference: link code symbols to the conversations that discussed them
|
|
243
|
+
|
|
244
|
+
### Secret scanning
|
|
245
|
+
|
|
246
|
+
Detect hardcoded secrets (API keys, JWT tokens, passwords, connection strings) in your indexed codebase. Uses ~1,100 detection rules from TruffleHog via `@sanity-labs/secret-scan`, with CodeSift's tree-sitter AST for false-positive reduction.
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
# Scan entire repo for secrets
|
|
250
|
+
scan_secrets(repo="local/my-project")
|
|
251
|
+
|
|
252
|
+
# Filter by severity
|
|
253
|
+
scan_secrets(repo="local/my-project", severity="critical")
|
|
254
|
+
|
|
255
|
+
# Only high-confidence findings, including test files
|
|
256
|
+
scan_secrets(repo="local/my-project", min_confidence="high", exclude_tests=false)
|
|
257
|
+
|
|
258
|
+
# Scope to specific directory
|
|
259
|
+
scan_secrets(repo="local/my-project", file_pattern="src/config/**")
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
**Features:**
|
|
263
|
+
- Eager scanning on file change — results are cached and instant on query
|
|
264
|
+
- AST-aware confidence: test files, docs, placeholder variables auto-demoted to `low`
|
|
265
|
+
- Masked output — secrets shown as `sk-p***hijk`, raw values never in cache or logs
|
|
266
|
+
- Inline allowlist — add `// codesift:allow-secret` to suppress a finding
|
|
267
|
+
- Config files indexed — `.env`, `.yaml`, `.toml`, `.json`, `.ini`, `.properties` scanned
|
|
268
|
+
- Severity mapping: cloud keys (AWS, GCP) = critical, API keys (OpenAI, GitHub) = high
|
|
269
|
+
- Inline warnings in `index_file` responses when secrets detected
|
|
98
270
|
|
|
99
271
|
## When to use CodeSift vs grep
|
|
100
272
|
|
|
@@ -104,36 +276,85 @@ CodeSift wins 4 of 6 categories. Symbol search is at parity (verbose output, bei
|
|
|
104
276
|
| Find function by name | `codesift symbols` | Returns signature + body in 1 call |
|
|
105
277
|
| File structure | `codesift tree` | 20% fewer tokens, symbol counts |
|
|
106
278
|
| "How does X work?" | `codesift retrieve` (semantic) | 20% better quality on concept queries |
|
|
279
|
+
| Call chain tracing | `codesift trace` | AST-based caller/callee graph, Mermaid output |
|
|
280
|
+
| Dead code / unused exports | `codesift dead-code` | Automated scan, no manual grep needed |
|
|
281
|
+
| Complexity hotspots | `codesift complexity` | Cyclomatic complexity + nesting depth |
|
|
282
|
+
| Copy-paste detection | `codesift clones` | Hash bucketing + line similarity scoring |
|
|
283
|
+
| Anti-pattern search | `codesift patterns` | 9 built-in CQ patterns + custom regex |
|
|
284
|
+
| Explore new codebase | `codesift suggest-queries` | Instant overview: top files, kind distribution, example queries |
|
|
285
|
+
| Re-index after edit | `index_file` | 9ms skip / 153ms reparse vs 3-8s full folder |
|
|
286
|
+
| Trace HTTP route | `trace_route` | URL → handler → service → DB calls in one call |
|
|
287
|
+
| Discover code modules | `detect_communities` | Louvain clustering finds architectural boundaries |
|
|
288
|
+
| Dense context (5-10x) | `assemble_context --level L1` | Signatures only — fits 56 symbols where L0 fits 19 |
|
|
289
|
+
| Go to definition | `go_to_definition` | LSP-precise when available, index fallback |
|
|
290
|
+
| Get type info | `get_type_info` | Return types + docs via LSP hover — no file reading |
|
|
291
|
+
| Rename across files | `rename_symbol` | LSP type-safe rename in all files at once |
|
|
292
|
+
| Detect hardcoded secrets | `scan_secrets` | ~1,100 rules, AST-aware, masked output, auto-cached |
|
|
107
293
|
| Find ALL occurrences | `grep -rn` | Exhaustive, no top_k cap |
|
|
108
294
|
| Count matches | `grep -c` | Simple exact count |
|
|
109
|
-
| Call chain tracing | `grep -rn "fn("` | CodeSift trace is being rewritten |
|
|
110
295
|
|
|
111
|
-
##
|
|
296
|
+
## Built-in anti-patterns
|
|
297
|
+
|
|
298
|
+
The `patterns` command searches for common code quality issues across your codebase:
|
|
299
|
+
|
|
300
|
+
| Pattern | What it finds |
|
|
301
|
+
|---------|---------------|
|
|
302
|
+
| `empty-catch` | `catch (e) {}` — swallowed errors |
|
|
303
|
+
| `any-type` | `: any` or `as any` — lost type safety |
|
|
304
|
+
| `console-log` | `console.log/debug/info` in production code |
|
|
305
|
+
| `await-in-loop` | Sequential `await` inside `for` loops |
|
|
306
|
+
| `useEffect-no-cleanup` | React useEffect without cleanup return |
|
|
307
|
+
| `no-error-type` | Catch without `instanceof Error` narrowing |
|
|
308
|
+
| `toctou` | Read-then-write without atomic operation |
|
|
309
|
+
| `unbounded-findmany` | Prisma `findMany` without `take` limit |
|
|
310
|
+
| `scaffolding` | TODO/FIXME/HACK markers, Phase/Step stubs, "not implemented" throws |
|
|
311
|
+
|
|
312
|
+
Custom regex is also supported: `codesift patterns local/project "Promise<.*any>"`.
|
|
313
|
+
|
|
314
|
+
## MCP server setup
|
|
315
|
+
|
|
316
|
+
CodeSift runs as an [MCP](https://modelcontextprotocol.io) server, exposing 63 tools to AI agents (13 core visible + 50 discoverable). The fastest setup method is `codesift setup <platform>` which handles everything automatically. Manual configuration is also supported:
|
|
317
|
+
|
|
318
|
+
### OpenAI Codex
|
|
319
|
+
|
|
320
|
+
Add this to `~/.codex/config.toml`:
|
|
321
|
+
|
|
322
|
+
```toml
|
|
323
|
+
[mcp_servers.codesift]
|
|
324
|
+
command = "npx"
|
|
325
|
+
args = ["-y", "codesift-mcp"]
|
|
326
|
+
tool_timeout_sec = 120
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
You can also add it manually or via the Codex CLI:
|
|
112
330
|
|
|
113
|
-
|
|
331
|
+
```bash
|
|
332
|
+
codex mcp add codesift -- npx -y codesift-mcp
|
|
333
|
+
```
|
|
114
334
|
|
|
115
|
-
### Claude Code
|
|
335
|
+
### Claude Code
|
|
116
336
|
|
|
117
|
-
Add to `~/.claude.json`:
|
|
337
|
+
Add this to `~/.claude/settings.json`:
|
|
118
338
|
|
|
119
339
|
```json
|
|
120
340
|
{
|
|
121
341
|
"mcpServers": {
|
|
122
342
|
"codesift": {
|
|
123
|
-
"command": "
|
|
343
|
+
"command": "npx",
|
|
344
|
+
"args": ["-y", "codesift-mcp"]
|
|
124
345
|
}
|
|
125
346
|
}
|
|
126
347
|
}
|
|
127
348
|
```
|
|
128
349
|
|
|
129
|
-
|
|
350
|
+
With semantic search (OpenAI embeddings), add the env var manually:
|
|
130
351
|
|
|
131
352
|
```json
|
|
132
353
|
{
|
|
133
354
|
"mcpServers": {
|
|
134
355
|
"codesift": {
|
|
135
|
-
"command": "
|
|
136
|
-
"args": ["
|
|
356
|
+
"command": "/bin/sh",
|
|
357
|
+
"args": ["-c", "CODESIFT_OPENAI_API_KEY='sk-...' exec codesift-mcp"]
|
|
137
358
|
}
|
|
138
359
|
}
|
|
139
360
|
}
|
|
@@ -154,6 +375,50 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS)
|
|
|
154
375
|
}
|
|
155
376
|
```
|
|
156
377
|
|
|
378
|
+
### Cursor
|
|
379
|
+
|
|
380
|
+
Add this to `~/.cursor/mcp.json`, or to `.cursor/mcp.json` in your project:
|
|
381
|
+
|
|
382
|
+
```json
|
|
383
|
+
{
|
|
384
|
+
"mcpServers": {
|
|
385
|
+
"codesift": {
|
|
386
|
+
"command": "npx",
|
|
387
|
+
"args": ["-y", "codesift-mcp"]
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### Gemini CLI
|
|
394
|
+
|
|
395
|
+
Add this to `~/.gemini/settings.json`, or to `.gemini/settings.json` in your project:
|
|
396
|
+
|
|
397
|
+
```json
|
|
398
|
+
{
|
|
399
|
+
"mcpServers": {
|
|
400
|
+
"codesift": {
|
|
401
|
+
"command": "npx",
|
|
402
|
+
"args": ["-y", "codesift-mcp"]
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
You can also use the Gemini CLI:
|
|
409
|
+
|
|
410
|
+
```bash
|
|
411
|
+
gemini mcp add codesift -s user npx -- -y codesift-mcp
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
### All platforms at once
|
|
415
|
+
|
|
416
|
+
```bash
|
|
417
|
+
codesift setup all
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
This configures Codex, Claude Code, Cursor, and Gemini CLI in one command. Safe to run multiple times — skips platforms that are already configured.
|
|
421
|
+
|
|
157
422
|
## Semantic search
|
|
158
423
|
|
|
159
424
|
Semantic search uses embeddings to answer concept queries like "how does authentication work?" that keyword search misses.
|
|
@@ -162,11 +427,11 @@ Semantic search uses embeddings to answer concept queries like "how does authent
|
|
|
162
427
|
|
|
163
428
|
Set **one** of these environment variables:
|
|
164
429
|
|
|
165
|
-
| Variable | Provider | Model |
|
|
166
|
-
|
|
167
|
-
| `CODESIFT_VOYAGE_API_KEY` | [Voyage AI](https://voyageai.com/) | `voyage-code-3` |
|
|
168
|
-
| `CODESIFT_OPENAI_API_KEY` | [OpenAI](https://openai.com/) | `text-embedding-3-small` |
|
|
169
|
-
| `CODESIFT_OLLAMA_URL` | [Ollama](https://ollama.com/) (local) | `nomic-embed-text` |
|
|
430
|
+
| Variable | Provider | Model | Cost |
|
|
431
|
+
|----------|----------|-------|------|
|
|
432
|
+
| `CODESIFT_VOYAGE_API_KEY` | [Voyage AI](https://voyageai.com/) | `voyage-code-3` | Best for code |
|
|
433
|
+
| `CODESIFT_OPENAI_API_KEY` | [OpenAI](https://openai.com/) | `text-embedding-3-small` | ~$0.02/1M tok (~$0.21 for 44 repos) |
|
|
434
|
+
| `CODESIFT_OLLAMA_URL` | [Ollama](https://ollama.com/) (local) | `nomic-embed-text` | Free (local) |
|
|
170
435
|
|
|
171
436
|
### Usage
|
|
172
437
|
|
|
@@ -182,21 +447,6 @@ codesift retrieve local/my-project \
|
|
|
182
447
|
|
|
183
448
|
Semantic and hybrid queries exclude test files by default to maximize token efficiency. To include test files, set `"exclude_tests": false` in the sub-query or pass `--exclude-tests=false` on the CLI.
|
|
184
449
|
|
|
185
|
-
### MCP example
|
|
186
|
-
|
|
187
|
-
```json
|
|
188
|
-
{
|
|
189
|
-
"mcpServers": {
|
|
190
|
-
"codesift": {
|
|
191
|
-
"command": "codesift-mcp",
|
|
192
|
-
"env": {
|
|
193
|
-
"CODESIFT_OPENAI_API_KEY": "sk-..."
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
```
|
|
199
|
-
|
|
200
450
|
## Configuration
|
|
201
451
|
|
|
202
452
|
All configuration is via environment variables.
|
|
@@ -206,13 +456,15 @@ All configuration is via environment variables.
|
|
|
206
456
|
| `CODESIFT_DATA_DIR` | Storage directory for indexes | `~/.codesift` |
|
|
207
457
|
| `CODESIFT_WATCH_DEBOUNCE_MS` | File watcher debounce interval | `500` |
|
|
208
458
|
| `CODESIFT_DEFAULT_TOKEN_BUDGET` | Default token budget for retrieval | `8000` |
|
|
209
|
-
| `CODESIFT_DEFAULT_TOP_K` | Default max results for search | `
|
|
459
|
+
| `CODESIFT_DEFAULT_TOP_K` | Default max results for search | `50` |
|
|
460
|
+
| `CODESIFT_EMBEDDING_BATCH_SIZE` | Symbols per embedding API call | `128` |
|
|
461
|
+
| `CODESIFT_SECRET_SCAN` | Enable/disable secret scanning | `true` (set `false` to disable) |
|
|
210
462
|
|
|
211
463
|
## How it works
|
|
212
464
|
|
|
213
|
-
1. **Indexing** -- Tree-sitter WASM grammars parse source files into ASTs. Symbol extraction produces functions, classes, methods, types, constants, etc. with signatures, docstrings, and source code.
|
|
465
|
+
1. **Indexing** -- Tree-sitter WASM grammars parse source files into ASTs. Symbol extraction produces functions, classes, methods, types, constants, etc. with signatures, docstrings, and source code. Filesystem mtime is stored per file for incremental skip on reindex.
|
|
214
466
|
|
|
215
|
-
2. **BM25F search** -- Symbols are tokenized (camelCase/snake_case splitting) and indexed with field-weighted BM25 scoring. Name matches rank
|
|
467
|
+
2. **BM25F search** -- Symbols are tokenized (camelCase/snake_case splitting) and indexed with field-weighted BM25 scoring. Name matches rank 5x higher than body matches. Symbols in frequently-imported files get a log-scaled centrality bonus as tiebreaker.
|
|
216
468
|
|
|
217
469
|
3. **Semantic search** (optional) -- Source code is chunked and embedded via the configured provider. Queries are embedded at search time and ranked by cosine similarity. Multi-sub-query decomposition with Reciprocal Rank Fusion (RRF, k=60).
|
|
218
470
|
|
|
@@ -220,22 +472,60 @@ All configuration is via environment variables.
|
|
|
220
472
|
|
|
221
473
|
5. **File watcher** -- chokidar watches indexed folders for changes. Modified files are re-parsed and the index is updated incrementally.
|
|
222
474
|
|
|
475
|
+
6. **Response guards** -- Multiple layers prevent token waste: auto-grouping at 80K chars, 30K token hard cap, response dedup cache (30s), in-flight request coalescing, sequential call hints, and source truncation.
|
|
476
|
+
|
|
477
|
+
7. **LSP bridge** (optional) -- When a language server is installed (typescript-language-server, pylsp, gopls, rust-analyzer, solargraph, intelephense), CodeSift uses it for type-safe `find_references`, precise `go_to_definition`, `get_type_info` via hover, and cross-file `rename_symbol`. Falls back to tree-sitter/grep when LSP is unavailable. Lazy start + 5 min idle kill — zero overhead when not used.
|
|
478
|
+
|
|
479
|
+
## Glob pattern support
|
|
480
|
+
|
|
481
|
+
File pattern parameters (`file_pattern`) support full glob syntax via [picomatch](https://github.com/micromatch/picomatch):
|
|
482
|
+
|
|
483
|
+
- `*.ts` — match by extension at any depth
|
|
484
|
+
- `*.{ts,tsx}` — brace expansion
|
|
485
|
+
- `src/**/*.service.ts` — directory globbing
|
|
486
|
+
- `[!.]*.ts` — character classes
|
|
487
|
+
- `service` — plain substring match (no glob chars)
|
|
488
|
+
|
|
223
489
|
## Supported languages
|
|
224
490
|
|
|
225
|
-
TypeScript, JavaScript (JSX/TSX), Python, Go, Rust, Java, Ruby, PHP, Markdown, CSS, Prisma.
|
|
491
|
+
TypeScript, JavaScript (JSX/TSX), Python, Go, Rust, Java, Ruby, PHP, Markdown, CSS, Prisma, Astro.
|
|
226
492
|
|
|
227
493
|
## Development
|
|
228
494
|
|
|
229
495
|
```bash
|
|
230
|
-
git clone https://github.com/
|
|
496
|
+
git clone https://github.com/greglas75/codesift.git
|
|
231
497
|
cd codesift-mcp
|
|
232
498
|
npm install
|
|
233
499
|
npm run download-wasm # Download tree-sitter WASM grammars
|
|
234
500
|
npm run build # TypeScript compilation
|
|
235
|
-
npm test # Run tests (Vitest)
|
|
501
|
+
npm test # Run tests (Vitest, 895+ tests)
|
|
236
502
|
npm run test:coverage # Coverage report
|
|
503
|
+
npm run lint # Type check (tsc --noEmit)
|
|
237
504
|
```
|
|
238
505
|
|
|
239
506
|
## License
|
|
240
507
|
|
|
241
|
-
|
|
508
|
+
BSL-1.1
|
|
509
|
+
|
|
510
|
+
<!-- Evidence Map
|
|
511
|
+
| Section | Source file(s) |
|
|
512
|
+
|---------|---------------|
|
|
513
|
+
| Tool count (63) | src/register-tools.ts (TOOL_DEFINITIONS + discover_tools + describe_tools) |
|
|
514
|
+
| Quick install | package.json:bin (line 8-11) |
|
|
515
|
+
| Quick start | src/cli/commands.ts |
|
|
516
|
+
| Benchmark | benchmarks/ directory, previously measured |
|
|
517
|
+
| Performance features | src/tools/index-tools.ts (mtime), src/tools/search-tools.ts (detail_level, token_budget), src/search/bm25.ts (centrality), src/server-helpers.ts (cache, dedup, guards) |
|
|
518
|
+
| CLI commands | src/cli/commands.ts:1-515 |
|
|
519
|
+
| MCP tools | src/register-tools.ts (all tool definitions) |
|
|
520
|
+
| Anti-patterns | src/tools/pattern-tools.ts |
|
|
521
|
+
| MCP setup | manual configs verified |
|
|
522
|
+
| Semantic search | src/search/semantic.ts, src/config.ts:40-47 |
|
|
523
|
+
| Configuration | src/config.ts:36-72 |
|
|
524
|
+
| How it works | src/search/bm25.ts, src/parser/, src/storage/watcher.ts, src/server-helpers.ts |
|
|
525
|
+
| Glob support | src/utils/glob.ts (picomatch) |
|
|
526
|
+
| LSP bridge | src/lsp/lsp-client.ts, src/lsp/lsp-manager.ts, src/lsp/lsp-servers.ts, src/lsp/lsp-tools.ts |
|
|
527
|
+
| Secret scanning | src/tools/secret-tools.ts, @sanity-labs/secret-scan (package.json) |
|
|
528
|
+
| Languages | src/parser/parser-manager.ts, src/parser/extractors/ |
|
|
529
|
+
| Development | package.json:scripts (line 19-28) |
|
|
530
|
+
| Git URL | package.json:repository (line 62-64) |
|
|
531
|
+
-->
|
package/dist/cli/args.d.ts
CHANGED
|
@@ -10,4 +10,6 @@ export declare function getNumFlag(flags: Flags, name: string): number | undefin
|
|
|
10
10
|
export declare function output(data: unknown, flags: Flags): void;
|
|
11
11
|
export declare function die(message: string): never;
|
|
12
12
|
export declare function requireArg(args: string[], index: number, name: string): string;
|
|
13
|
+
export declare function requireFlag(flags: Flags, name: string): string;
|
|
14
|
+
export declare function parseCommaSeparated(flags: Flags, name: string): string[] | undefined;
|
|
13
15
|
//# sourceMappingURL=args.d.ts.map
|
package/dist/cli/args.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC;CACzC;AAED,MAAM,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC;AAErD,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CA4BpD;AAED,wBAAgB,OAAO,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAItE;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,CAM3E;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAQzE;AAMD,wBAAgB,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAIxD;AAED,wBAAgB,GAAG,CAAC,OAAO,EAAE,MAAM,GAAG,KAAK,CAG1C;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAM9E"}
|
|
1
|
+
{"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC;CACzC;AAED,MAAM,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC;AAErD,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CA4BpD;AAED,wBAAgB,OAAO,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAItE;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,CAM3E;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAQzE;AAMD,wBAAgB,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAIxD;AAED,wBAAgB,GAAG,CAAC,OAAO,EAAE,MAAM,GAAG,KAAK,CAG1C;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAM9E;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAM9D;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAGpF"}
|
package/dist/cli/args.js
CHANGED
|
@@ -76,4 +76,15 @@ export function requireArg(args, index, name) {
|
|
|
76
76
|
}
|
|
77
77
|
return val;
|
|
78
78
|
}
|
|
79
|
+
export function requireFlag(flags, name) {
|
|
80
|
+
const val = getFlag(flags, name);
|
|
81
|
+
if (!val) {
|
|
82
|
+
die(`Missing required flag: --${name} <ref>`);
|
|
83
|
+
}
|
|
84
|
+
return val;
|
|
85
|
+
}
|
|
86
|
+
export function parseCommaSeparated(flags, name) {
|
|
87
|
+
const raw = getFlag(flags, name);
|
|
88
|
+
return raw ? raw.split(",").map(p => p.trim()) : undefined;
|
|
89
|
+
}
|
|
79
90
|
//# sourceMappingURL=args.js.map
|