claude-flow-novice 2.18.15 → 2.18.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/cfn-dev-team/analysts/analyst.md +9 -0
- package/.claude/agents/cfn-dev-team/analysts/root-cause-analyst.md +9 -0
- package/.claude/agents/cfn-dev-team/architecture/api-designer-persona.md +10 -1
- package/.claude/agents/cfn-dev-team/architecture/base-template-generator.md +9 -0
- package/.claude/agents/cfn-dev-team/architecture/goal-planner.md +10 -1
- package/.claude/agents/cfn-dev-team/architecture/planner.md +9 -0
- package/.claude/agents/cfn-dev-team/architecture/system-architect.md +9 -0
- package/.claude/agents/cfn-dev-team/coordinators/cfn-frontend-coordinator.md +9 -0
- package/.claude/agents/cfn-dev-team/coordinators/consensus-builder.md +10 -1
- package/.claude/agents/cfn-dev-team/coordinators/handoff-coordinator.md +9 -0
- package/.claude/agents/cfn-dev-team/coordinators/multi-sprint-coordinator.md +9 -0
- package/.claude/agents/cfn-dev-team/dev-ops/devops-engineer.md +9 -0
- package/.claude/agents/cfn-dev-team/dev-ops/docker-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/dev-ops/github-commit-agent.md +9 -0
- package/.claude/agents/cfn-dev-team/dev-ops/kubernetes-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/api-gateway-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/data/data-engineer.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/database/database-architect.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/database/supabase-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/frontend/mobile-dev.md +10 -1
- package/.claude/agents/cfn-dev-team/developers/frontend/typescript-specialist.md +10 -1
- package/.claude/agents/cfn-dev-team/developers/frontend/ui-designer.md +10 -1
- package/.claude/agents/cfn-dev-team/developers/graphql-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/developers/rust-developer.md +10 -1
- package/.claude/agents/cfn-dev-team/documentation/agent-type-guidelines.md +9 -0
- package/.claude/agents/cfn-dev-team/documentation/api-documentation.md +10 -1
- package/.claude/agents/cfn-dev-team/documentation/pseudocode.md +10 -1
- package/.claude/agents/cfn-dev-team/documentation/specification-agent.md +10 -1
- package/.claude/agents/cfn-dev-team/product-owners/accessibility-advocate-persona.md +10 -1
- package/.claude/agents/cfn-dev-team/product-owners/cto-agent.md +9 -0
- package/.claude/agents/cfn-dev-team/product-owners/power-user-persona.md +10 -1
- package/.claude/agents/cfn-dev-team/product-owners/product-owner.md +9 -0
- package/.claude/agents/cfn-dev-team/reviewers/quality/code-quality-validator.md +9 -0
- package/.claude/agents/cfn-dev-team/reviewers/quality/cyclomatic-complexity-reducer.md +9 -0
- package/.claude/agents/cfn-dev-team/reviewers/quality/perf-analyzer.md +9 -0
- package/.claude/agents/cfn-dev-team/reviewers/quality/performance-benchmarker.md +10 -1
- package/.claude/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/api-testing-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/chaos-engineering-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/contract-tester.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/e2e/playwright-tester.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/integration-tester.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/interaction-tester.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/load-testing-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/mutation-testing-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/testers/playwright-tester.md +10 -1
- package/.claude/agents/cfn-dev-team/testers/unit/tdd-london-unit-swarm.md +10 -1
- package/.claude/agents/cfn-dev-team/testers/validation/validation-production-validator.md +10 -1
- package/.claude/agents/cfn-dev-team/testing/test-validation-agent.md +9 -0
- package/.claude/agents/cfn-dev-team/utility/agent-builder.md +10 -1
- package/.claude/agents/cfn-dev-team/utility/context-curator.md +9 -0
- package/.claude/agents/cfn-dev-team/utility/memory-leak-specialist.md +9 -0
- package/.claude/agents/cfn-dev-team/utility/researcher.md +9 -0
- package/.claude/agents/cfn-dev-team/utility/z-ai-specialist.md +9 -0
- package/.claude/hooks/SessionStart-cfn-build-ruvector.sh +12 -0
- package/.claude/hooks/SessionStart:cfn-build-ruvector.sh +28 -0
- package/.claude/skills/bulk-add-ruvector-instructions.sh +34 -46
- package/.claude/skills/cfn-local-ruvector-accelerator/.claude/hooks/SessionStart-cfn-build-ruvector.sh +12 -0
- package/.claude/skills/cfn-local-ruvector-accelerator/SKILL.md +89 -6
- package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/index.rs +250 -30
- package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/index_ast.rs +4 -1
- package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/stats.rs +18 -18
- package/.claude/skills/cfn-local-ruvector-accelerator/src/extractors/mod.rs +1 -1
- package/.claude/skills/cfn-local-ruvector-accelerator/src/migration_v2.rs +7 -7
- package/.claude/skills/cfn-local-ruvector-accelerator/src/query_api.rs +6 -3
- package/.claude/skills/cfn-local-ruvector-accelerator/src/schema_v2.rs +16 -1
- package/.claude/skills/cfn-local-ruvector-accelerator/src/store_v2.rs +7 -3
- package/.claude/skills/cfn-local-ruvector-accelerator/src/store_v2_tx.rs +5 -3
- package/.claude/skills/cfn-local-ruvector-accelerator/src/transaction_tests.rs +8 -2
- package/CLAUDE.md +2 -1
- package/dist/trigger.config.d.ts +15 -1
- package/dist/trigger.config.js +11 -1
- package/package.json +1 -1
|
@@ -1,8 +1,91 @@
|
|
|
1
|
-
#
|
|
2
|
-
./target/release/local-ruvector init
|
|
1
|
+
# RuVector Local Semantic Code Search
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
./target/release/local-ruvector index --path /path/to/project --types rs
|
|
3
|
+
## WHEN TO USE THIS SKILL
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
**USE RuVector V2 SQL for ALL indexed projects (400x FASTER than grep):**
|
|
6
|
+
```bash
|
|
7
|
+
# Exact name lookup - 0.002s vs grep's 0.8s
|
|
8
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT file_path, line_number FROM entities WHERE name = 'MyFunction';"
|
|
9
|
+
|
|
10
|
+
# Fuzzy search - 0.004s
|
|
11
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT file_path, line_number FROM entities WHERE name LIKE '%Store%' LIMIT 10;"
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
**USE grep/rg ONLY when:**
|
|
15
|
+
- Project is NOT indexed yet
|
|
16
|
+
- Searching for strings that aren't code entities (error messages, comments, config values)
|
|
17
|
+
- Quick one-off search in small directory
|
|
18
|
+
|
|
19
|
+
**USE RuVector semantic search when:**
|
|
20
|
+
- "Where is authentication implemented?" (conceptual search)
|
|
21
|
+
- Finding similar patterns you can't name exactly
|
|
22
|
+
- Discovering how a feature is built
|
|
23
|
+
|
|
24
|
+
## Quick Commands
|
|
25
|
+
|
|
26
|
+
### Semantic Search (V1 - Embeddings)
|
|
27
|
+
```bash
|
|
28
|
+
# Natural language search
|
|
29
|
+
/codebase-search "authentication middleware pattern"
|
|
30
|
+
/cfn-ruvector-search "error handling in API routes"
|
|
31
|
+
|
|
32
|
+
# CLI direct
|
|
33
|
+
./.claude/skills/cfn-local-ruvector-accelerator/target/release/local-ruvector query --pattern "user login flow"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Structural Search (V2 - SQL on AST)
|
|
37
|
+
```bash
|
|
38
|
+
# Find all callers of a function
|
|
39
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db \
|
|
40
|
+
"SELECT * FROM refs WHERE target_name = 'MyFunction';"
|
|
41
|
+
|
|
42
|
+
# Find all functions in a file
|
|
43
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db \
|
|
44
|
+
"SELECT name, line_number FROM entities WHERE file_path LIKE '%myfile.rs' AND kind = 'function';"
|
|
45
|
+
|
|
46
|
+
# Find entities by project (multi-project isolation)
|
|
47
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db \
|
|
48
|
+
"SELECT COUNT(*) FROM entities WHERE project_root = '/path/to/project';"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Index Management
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Index a project (first time or full rebuild)
|
|
55
|
+
./target/release/local-ruvector index --path /path/to/project --types rs,ts,py
|
|
56
|
+
|
|
57
|
+
# Incremental update (after code changes)
|
|
58
|
+
/codebase-reindex
|
|
59
|
+
|
|
60
|
+
# Check index stats
|
|
61
|
+
sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT project_root, COUNT(*) FROM entities GROUP BY project_root;"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Key Features
|
|
65
|
+
|
|
66
|
+
- **Multi-project isolation**: Index multiple projects in single database without data collision
|
|
67
|
+
- **Non-destructive**: Indexing one project never deletes data from other projects
|
|
68
|
+
- **Centralized storage**: `~/.local/share/ruvector/index_v2.db`
|
|
69
|
+
- **Dual search**: V1 semantic (embeddings) + V2 structural (SQL on AST)
|
|
70
|
+
- **Fast**: Rust binary with SQLite backend
|
|
71
|
+
|
|
72
|
+
## Database Location
|
|
73
|
+
```
|
|
74
|
+
~/.local/share/ruvector/index_v2.db
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## For Agents
|
|
78
|
+
|
|
79
|
+
Before implementing changes, ALWAYS query RuVector first:
|
|
80
|
+
```bash
|
|
81
|
+
# Find similar patterns
|
|
82
|
+
/codebase-search "relevant search terms" --top 5
|
|
83
|
+
|
|
84
|
+
# Query past errors
|
|
85
|
+
./.claude/skills/cfn-ruvector-codebase-index/query-error-patterns.sh --task-description "description"
|
|
86
|
+
|
|
87
|
+
# Query learnings
|
|
88
|
+
./.claude/skills/cfn-ruvector-codebase-index/query-learnings.sh --task-description "description" --category PATTERN
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
This prevents duplicated work and leverages existing solutions.
|
|
@@ -1,3 +1,47 @@
|
|
|
1
|
+
//! # RuVector Index Command
|
|
2
|
+
//!
|
|
3
|
+
//! ## IMPORTANT: Run from PROJECT ROOT
|
|
4
|
+
//!
|
|
5
|
+
//! This indexer MUST be run from the project root directory to index all files correctly.
|
|
6
|
+
//! Running from a subdirectory will only index that subdirectory.
|
|
7
|
+
//!
|
|
8
|
+
//! ## Recommended Usage:
|
|
9
|
+
//! ```bash
|
|
10
|
+
//! cd /path/to/project-root
|
|
11
|
+
//! local-ruvector index --path . --types rs,ts,js,json,md,sh --force
|
|
12
|
+
//! ```
|
|
13
|
+
//!
|
|
14
|
+
//! ## Supported File Types (default):
|
|
15
|
+
//! - rs, ts, js, json, md, sh, yaml, yml, txt, config
|
|
16
|
+
//! - Use --types to specify custom extensions
|
|
17
|
+
//!
|
|
18
|
+
//! ## Excluded Directories (see EXCLUDED_DIRS constant - 52 patterns):
|
|
19
|
+
//! - Dependencies: node_modules, vendor, .pnpm, .yarn
|
|
20
|
+
//! - Build artifacts: target, dist, build, out, .next, .nuxt, .output, .turbo, .parcel-cache
|
|
21
|
+
//! - VCS: .git, .svn, .hg
|
|
22
|
+
//! - IDE: .idea, .vscode, .vs
|
|
23
|
+
//! - Cache: .cache, __pycache__, .pytest_cache, .mypy_cache, .ruff_cache, coverage, .nyc_output
|
|
24
|
+
//! - Virtual envs: .venv, venv, env
|
|
25
|
+
//! - IaC: .terraform, .serverless, .aws-sam
|
|
26
|
+
//! - Project-specific: .artifacts, .ruvector, .archive, archive
|
|
27
|
+
//! - Backups/temp: backups, .backups, backup, tmp, .tmp, temp, logs
|
|
28
|
+
//! - Test artifacts: __snapshots__, __mocks__, playwright-report, test-results
|
|
29
|
+
//! - Doc builds: _site, .docusaurus, site
|
|
30
|
+
//! - NOTE: .claude directory IS included (contains important config)
|
|
31
|
+
//!
|
|
32
|
+
//! ## Excluded Files (see EXCLUDED_FILES constant - 41 patterns):
|
|
33
|
+
//! - Secrets: .env*, credentials.json, secrets.json, .npmrc, .pypirc, .netrc, id_rsa, *.pem, *.key
|
|
34
|
+
//! - Lock files: package-lock.json, yarn.lock, pnpm-lock.yaml, Cargo.lock, go.sum, etc.
|
|
35
|
+
//! - Backups: *.bak, *.backup, *.orig, *.swp, *~
|
|
36
|
+
//! - Minified/generated: *.min.js, *.min.css, *.bundle.js, *.chunk.js, *.js.map, *.d.ts
|
|
37
|
+
//! - Binary/data: *.wasm, *.db, *.sqlite
|
|
38
|
+
//! - Build info: *.snap, *.eslintcache, *.tsbuildinfo
|
|
39
|
+
//!
|
|
40
|
+
//! ## Multi-Project Isolation:
|
|
41
|
+
//! - Each project root is isolated via project_root column in v2 schema
|
|
42
|
+
//! - Centralized database at ~/.local/share/ruvector/index_v2.db
|
|
43
|
+
//! - Queries are scoped to the project root passed during indexing
|
|
44
|
+
|
|
1
45
|
use anyhow::{Result, Context, anyhow};
|
|
2
46
|
use std::fs;
|
|
3
47
|
use std::path::{Path, PathBuf};
|
|
@@ -21,6 +65,152 @@ use crate::schema_v2::{EntityKind, RefKind, Visibility};
|
|
|
21
65
|
use crate::path_validator;
|
|
22
66
|
use local_ruvector::paths::{get_ruvector_dir, get_database_path, get_v1_index_dir};
|
|
23
67
|
|
|
68
|
+
/// Directories to exclude from indexing.
|
|
69
|
+
/// These are typically build artifacts, dependencies, VCS, or sensitive directories.
|
|
70
|
+
const EXCLUDED_DIRS: &[&str] = &[
|
|
71
|
+
// Package managers & dependencies
|
|
72
|
+
"node_modules", // npm/yarn/pnpm dependencies
|
|
73
|
+
"vendor", // Go/PHP vendor dependencies
|
|
74
|
+
".pnpm", // pnpm store
|
|
75
|
+
".yarn", // Yarn 2+ PnP cache
|
|
76
|
+
|
|
77
|
+
// Build artifacts
|
|
78
|
+
"target", // Rust/Maven build artifacts
|
|
79
|
+
"dist", // JS/TS build output
|
|
80
|
+
"build", // Generic build output
|
|
81
|
+
"out", // Common output directory
|
|
82
|
+
".next", // Next.js build
|
|
83
|
+
".nuxt", // Nuxt.js build
|
|
84
|
+
".output", // Nitro/Nuxt output
|
|
85
|
+
".turbo", // Turborepo cache
|
|
86
|
+
".parcel-cache", // Parcel bundler cache
|
|
87
|
+
".webpack", // Webpack cache
|
|
88
|
+
|
|
89
|
+
// Version control
|
|
90
|
+
".git", // Git repository data
|
|
91
|
+
".svn", // Subversion
|
|
92
|
+
".hg", // Mercurial
|
|
93
|
+
|
|
94
|
+
// IDE & editor
|
|
95
|
+
".idea", // JetBrains IDEs
|
|
96
|
+
".vscode", // VS Code (may contain sensitive settings)
|
|
97
|
+
".vs", // Visual Studio
|
|
98
|
+
|
|
99
|
+
// Cache & temp
|
|
100
|
+
".cache", // Generic cache directories
|
|
101
|
+
"__pycache__", // Python bytecode cache
|
|
102
|
+
".pytest_cache", // Pytest cache
|
|
103
|
+
".mypy_cache", // Mypy cache
|
|
104
|
+
".ruff_cache", // Ruff linter cache
|
|
105
|
+
"coverage", // Test coverage reports
|
|
106
|
+
".nyc_output", // NYC coverage output
|
|
107
|
+
".eslintcache", // ESLint cache (dir form)
|
|
108
|
+
|
|
109
|
+
// Virtual environments
|
|
110
|
+
".venv", // Python virtual environments
|
|
111
|
+
"venv", // Python venv (alternate)
|
|
112
|
+
".env", // dotenv directories (not files)
|
|
113
|
+
"env", // Generic env directory
|
|
114
|
+
|
|
115
|
+
// Infrastructure as Code
|
|
116
|
+
".terraform", // Terraform state/cache
|
|
117
|
+
".serverless", // Serverless framework
|
|
118
|
+
".aws-sam", // AWS SAM
|
|
119
|
+
|
|
120
|
+
// Project-specific
|
|
121
|
+
".artifacts", // CFN Loop artifacts
|
|
122
|
+
".ruvector", // RuVector local index (avoid self-indexing)
|
|
123
|
+
".archive", // Archived/deprecated code
|
|
124
|
+
"archive", // Archive directories
|
|
125
|
+
|
|
126
|
+
// Backups & generated
|
|
127
|
+
"backups", // Backup directories
|
|
128
|
+
".backups", // Hidden backup directories
|
|
129
|
+
"backup", // Singular backup directory
|
|
130
|
+
".backup", // Hidden singular backup
|
|
131
|
+
"tmp", // Temporary files
|
|
132
|
+
".tmp", // Hidden temp files
|
|
133
|
+
"temp", // Temp directory
|
|
134
|
+
"logs", // Log directories
|
|
135
|
+
".logs", // Hidden logs
|
|
136
|
+
|
|
137
|
+
// Test artifacts (not source code)
|
|
138
|
+
"__snapshots__", // Jest snapshots
|
|
139
|
+
"__mocks__", // Jest mocks (usually generated)
|
|
140
|
+
".storybook", // Storybook config (not source)
|
|
141
|
+
"storybook-static", // Storybook build output
|
|
142
|
+
"playwright-report", // Playwright test reports
|
|
143
|
+
"test-results", // Generic test results
|
|
144
|
+
|
|
145
|
+
// Documentation builds
|
|
146
|
+
"_site", // Jekyll output
|
|
147
|
+
".docusaurus", // Docusaurus cache
|
|
148
|
+
"site", // MkDocs output
|
|
149
|
+
];
|
|
150
|
+
|
|
151
|
+
/// File patterns to exclude from indexing.
|
|
152
|
+
/// These are sensitive files or files that shouldn't be semantically indexed.
|
|
153
|
+
const EXCLUDED_FILES: &[&str] = &[
|
|
154
|
+
// Sensitive/secrets
|
|
155
|
+
".env", // Environment variables (secrets!)
|
|
156
|
+
".env.local", // Local env overrides
|
|
157
|
+
".env.development", // Dev env
|
|
158
|
+
".env.production", // Prod env
|
|
159
|
+
".env.test", // Test env
|
|
160
|
+
".env.example", // Example env (may contain structure hints)
|
|
161
|
+
"credentials.json", // GCP/generic credentials
|
|
162
|
+
"secrets.json", // Generic secrets
|
|
163
|
+
"secrets.yaml", // Kubernetes secrets
|
|
164
|
+
"service-account.json", // GCP service account
|
|
165
|
+
".npmrc", // npm auth tokens
|
|
166
|
+
".pypirc", // PyPI auth
|
|
167
|
+
".netrc", // Network credentials
|
|
168
|
+
"id_rsa", // SSH private key
|
|
169
|
+
"id_ed25519", // SSH private key
|
|
170
|
+
".pem", // Certificate/key files
|
|
171
|
+
".key", // Key files
|
|
172
|
+
|
|
173
|
+
// Lock files (large, not useful for semantic search)
|
|
174
|
+
"package-lock.json", // npm lock
|
|
175
|
+
"yarn.lock", // Yarn lock
|
|
176
|
+
"pnpm-lock.yaml", // pnpm lock
|
|
177
|
+
"Cargo.lock", // Rust lock
|
|
178
|
+
"poetry.lock", // Python poetry lock
|
|
179
|
+
"Gemfile.lock", // Ruby bundler lock
|
|
180
|
+
"composer.lock", // PHP composer lock
|
|
181
|
+
"go.sum", // Go module checksums
|
|
182
|
+
"flake.lock", // Nix flake lock
|
|
183
|
+
|
|
184
|
+
// Backups
|
|
185
|
+
".bak", // Generic backup extension
|
|
186
|
+
".backup", // Backup files
|
|
187
|
+
".orig", // Original files (merge conflicts)
|
|
188
|
+
".swp", // Vim swap files
|
|
189
|
+
".swo", // Vim swap files
|
|
190
|
+
"~", // Emacs backup files
|
|
191
|
+
|
|
192
|
+
// Generated/minified (not useful for semantic search)
|
|
193
|
+
".min.js", // Minified JS
|
|
194
|
+
".min.css", // Minified CSS
|
|
195
|
+
".bundle.js", // Bundled JS
|
|
196
|
+
".chunk.js", // Webpack chunks
|
|
197
|
+
".js.map", // JavaScript source maps
|
|
198
|
+
".css.map", // CSS source maps
|
|
199
|
+
".d.ts", // TypeScript declarations (generated, verbose)
|
|
200
|
+
".d.ts.map", // TypeScript declaration maps
|
|
201
|
+
|
|
202
|
+
// Binary/data files (can't extract meaningful entities)
|
|
203
|
+
".wasm", // WebAssembly binary
|
|
204
|
+
".db", // SQLite/database files
|
|
205
|
+
".sqlite", // SQLite files
|
|
206
|
+
".sqlite3", // SQLite3 files
|
|
207
|
+
|
|
208
|
+
// Large generated files
|
|
209
|
+
".snap", // Jest snapshots
|
|
210
|
+
".eslintcache", // ESLint cache file
|
|
211
|
+
".tsbuildinfo", // TypeScript incremental build info
|
|
212
|
+
];
|
|
213
|
+
|
|
24
214
|
#[derive(Debug)]
|
|
25
215
|
pub struct IndexStats {
|
|
26
216
|
pub files_processed: usize,
|
|
@@ -148,21 +338,19 @@ impl IndexCommand {
|
|
|
148
338
|
|
|
149
339
|
fn collect_files(&self) -> Result<Vec<PathBuf>> {
|
|
150
340
|
info!("Collecting files to index from: {}", self.source_path.display());
|
|
341
|
+
info!("Excluded directories: {} patterns", EXCLUDED_DIRS.len());
|
|
342
|
+
info!("Excluded files: {} patterns", EXCLUDED_FILES.len());
|
|
151
343
|
|
|
152
344
|
let mut files = Vec::new();
|
|
153
345
|
|
|
154
346
|
let walker = WalkDir::new(&self.source_path)
|
|
155
347
|
.into_iter()
|
|
156
348
|
.filter_entry(|e| {
|
|
157
|
-
let path = e.path();
|
|
158
349
|
let name = e.file_name().to_string_lossy();
|
|
159
350
|
|
|
160
|
-
// Exclude build artifacts, dependencies, and
|
|
161
|
-
// Allow .claude and other important
|
|
162
|
-
|
|
163
|
-
"node_modules" | "target" | "dist" | "build" | ".git" | ".artifacts" => false,
|
|
164
|
-
_ => true
|
|
165
|
-
}
|
|
351
|
+
// Exclude build artifacts, dependencies, and sensitive directories
|
|
352
|
+
// Allow .claude and other important folders (not in EXCLUDED_DIRS)
|
|
353
|
+
!EXCLUDED_DIRS.contains(&name.as_ref())
|
|
166
354
|
})
|
|
167
355
|
.filter_map(|e| e.ok())
|
|
168
356
|
.filter(|e| {
|
|
@@ -174,8 +362,25 @@ impl IndexCommand {
|
|
|
174
362
|
return false;
|
|
175
363
|
}
|
|
176
364
|
|
|
177
|
-
|
|
178
|
-
|
|
365
|
+
let file_name = e.file_name().to_string_lossy();
|
|
366
|
+
|
|
367
|
+
// Exclude sensitive files by exact name match
|
|
368
|
+
if EXCLUDED_FILES.contains(&file_name.as_ref()) {
|
|
369
|
+
return false;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Exclude files by suffix pattern (e.g., ".min.js", ".bak")
|
|
373
|
+
for pattern in EXCLUDED_FILES {
|
|
374
|
+
if pattern.starts_with('.') && file_name.ends_with(pattern) {
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Exclude emacs backup files ending with ~
|
|
380
|
+
if file_name.ends_with('~') {
|
|
381
|
+
return false;
|
|
382
|
+
}
|
|
383
|
+
|
|
179
384
|
true
|
|
180
385
|
});
|
|
181
386
|
|
|
@@ -187,17 +392,6 @@ impl IndexCommand {
|
|
|
187
392
|
Ok(files)
|
|
188
393
|
}
|
|
189
394
|
|
|
190
|
-
fn is_hidden(entry: &DirEntry) -> bool {
|
|
191
|
-
entry.file_name()
|
|
192
|
-
.to_str()
|
|
193
|
-
.map(|s| {
|
|
194
|
-
if s == ".claude" {
|
|
195
|
-
return false;
|
|
196
|
-
}
|
|
197
|
-
s.starts_with('.')
|
|
198
|
-
})
|
|
199
|
-
.unwrap_or(false)
|
|
200
|
-
}
|
|
201
395
|
|
|
202
396
|
fn process_files(&mut self, files: Vec<PathBuf>) -> Result<IndexStats> {
|
|
203
397
|
let stats = Arc::new(RwLock::new(IndexStats::default()));
|
|
@@ -238,14 +432,21 @@ impl IndexCommand {
|
|
|
238
432
|
) -> Result<()> {
|
|
239
433
|
let file_hash = self.calculate_file_hash(file_path)?;
|
|
240
434
|
|
|
435
|
+
// Check if file is already indexed with same hash (incremental indexing)
|
|
241
436
|
if !self.force && self.is_file_indexed(file_path, &file_hash)? {
|
|
242
|
-
debug!("Skipping already indexed file: {}", file_path.display());
|
|
437
|
+
debug!("Skipping already indexed file (unchanged): {}", file_path.display());
|
|
243
438
|
return Ok(());
|
|
244
439
|
}
|
|
245
440
|
|
|
246
|
-
//
|
|
441
|
+
// Non-destructive update: Only delete entities for THIS specific file
|
|
442
|
+
// The delete_file_entities already scopes to project_root for multi-project safety
|
|
247
443
|
let file_path_str = file_path.to_string_lossy();
|
|
248
|
-
|
|
444
|
+
|
|
445
|
+
// Only clean up if the file was previously indexed (avoid unnecessary DB operations)
|
|
446
|
+
if self.is_file_in_index(file_path)? {
|
|
447
|
+
debug!("Updating existing file entries: {}", file_path.display());
|
|
448
|
+
self.store_v2.delete_file_entities(&file_path_str, &self.project_dir)?;
|
|
449
|
+
}
|
|
249
450
|
|
|
250
451
|
let content = fs::read_to_string(file_path)
|
|
251
452
|
.with_context(|| format!("Failed to read file: {}", file_path.display()))?;
|
|
@@ -276,7 +477,7 @@ impl IndexCommand {
|
|
|
276
477
|
s.embeddings_generated += embeddings.len();
|
|
277
478
|
}
|
|
278
479
|
|
|
279
|
-
self.mark_file_indexed(file_path, &file_hash)?;
|
|
480
|
+
self.mark_file_indexed(file_path, &file_hash, extraction_result.entities.len())?;
|
|
280
481
|
|
|
281
482
|
Ok(())
|
|
282
483
|
}
|
|
@@ -345,6 +546,7 @@ impl IndexCommand {
|
|
|
345
546
|
doc_comment: None,
|
|
346
547
|
attributes: None,
|
|
347
548
|
metadata: Some(serde_json::to_string(&entity.metadata)?),
|
|
549
|
+
project_root: project_root_str.to_string(),
|
|
348
550
|
created_at: chrono::Utc::now(),
|
|
349
551
|
updated_at: chrono::Utc::now(),
|
|
350
552
|
};
|
|
@@ -460,15 +662,33 @@ impl IndexCommand {
|
|
|
460
662
|
Ok(count > 0)
|
|
461
663
|
}
|
|
462
664
|
|
|
463
|
-
|
|
665
|
+
/// Check if file exists in the index (regardless of hash)
|
|
666
|
+
fn is_file_in_index(&self, file_path: &Path) -> Result<bool> {
|
|
667
|
+
let query = "SELECT COUNT(*) FROM file_hashes WHERE file_path = ?";
|
|
668
|
+
let mut stmt = self.store_v2.conn.prepare(query)?;
|
|
669
|
+
let count: i64 = stmt.query_row(
|
|
670
|
+
params![file_path.to_string_lossy()],
|
|
671
|
+
|row| row.get(0)
|
|
672
|
+
)?;
|
|
673
|
+
Ok(count > 0)
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
fn mark_file_indexed(&self, file_path: &Path, file_hash: &str, patterns_count: usize) -> Result<()> {
|
|
677
|
+
let timestamp = chrono::Utc::now().timestamp();
|
|
678
|
+
let file_path_str = file_path.to_string_lossy().to_string();
|
|
679
|
+
|
|
680
|
+
// Update file_hashes table (for incremental indexing)
|
|
464
681
|
self.store_v2.conn.execute(
|
|
465
682
|
"INSERT OR REPLACE INTO file_hashes (file_path, file_hash, indexed_at) VALUES (?1, ?2, ?3)",
|
|
466
|
-
params![
|
|
467
|
-
file_path.to_string_lossy(),
|
|
468
|
-
file_hash,
|
|
469
|
-
chrono::Utc::now().timestamp()
|
|
470
|
-
]
|
|
683
|
+
params![&file_path_str, file_hash, timestamp]
|
|
471
684
|
)?;
|
|
685
|
+
|
|
686
|
+
// Also update the files table (for legacy compatibility and stats)
|
|
687
|
+
self.store_v2.conn.execute(
|
|
688
|
+
"INSERT OR REPLACE INTO files (path, hash, last_indexed, patterns_count) VALUES (?1, ?2, ?3, ?4)",
|
|
689
|
+
params![&file_path_str, file_hash, timestamp, patterns_count as i64]
|
|
690
|
+
)?;
|
|
691
|
+
|
|
472
692
|
Ok(())
|
|
473
693
|
}
|
|
474
694
|
|
|
@@ -299,6 +299,7 @@ impl AstIndexCommand {
|
|
|
299
299
|
let mut entity_map = HashMap::new();
|
|
300
300
|
let mut type_usages = Vec::new();
|
|
301
301
|
|
|
302
|
+
let project_root_str = self.project_dir.to_string_lossy().to_string();
|
|
302
303
|
for (idx, entity) in extraction_result.entities.iter().enumerate() {
|
|
303
304
|
let store_entity = StoreEntity {
|
|
304
305
|
id: 0,
|
|
@@ -313,6 +314,7 @@ impl AstIndexCommand {
|
|
|
313
314
|
doc_comment: None, // TODO: Extract doc comments
|
|
314
315
|
attributes: None, // TODO: Extract attributes
|
|
315
316
|
metadata: Some(serde_json::to_string(&entity.metadata)?),
|
|
317
|
+
project_root: project_root_str.clone(),
|
|
316
318
|
created_at: chrono::Utc::now(),
|
|
317
319
|
updated_at: chrono::Utc::now(),
|
|
318
320
|
};
|
|
@@ -613,6 +615,7 @@ impl AstIndexCommand {
|
|
|
613
615
|
Ok(entity.id)
|
|
614
616
|
} else {
|
|
615
617
|
// Create a placeholder entity for unknown references
|
|
618
|
+
let project_root_str = self.project_dir.to_string_lossy().to_string();
|
|
616
619
|
let placeholder = StoreEntity {
|
|
617
620
|
id: 0,
|
|
618
621
|
kind: EntityKind::Function,
|
|
@@ -626,11 +629,11 @@ impl AstIndexCommand {
|
|
|
626
629
|
doc_comment: None,
|
|
627
630
|
attributes: None,
|
|
628
631
|
metadata: None,
|
|
632
|
+
project_root: project_root_str.clone(),
|
|
629
633
|
created_at: chrono::Utc::now(),
|
|
630
634
|
updated_at: chrono::Utc::now(),
|
|
631
635
|
};
|
|
632
636
|
|
|
633
|
-
let project_root_str = self.project_dir.to_string_lossy();
|
|
634
637
|
Ok(self.store_v2.insert_entity(&placeholder, &project_root_str)?)
|
|
635
638
|
}
|
|
636
639
|
}
|
|
@@ -4,8 +4,8 @@ use std::collections::HashMap;
|
|
|
4
4
|
use tracing::info;
|
|
5
5
|
use serde::{Serialize, Deserialize};
|
|
6
6
|
|
|
7
|
-
use crate::
|
|
8
|
-
use crate::
|
|
7
|
+
use crate::store_v2::StoreV2;
|
|
8
|
+
use crate::paths::get_database_path;
|
|
9
9
|
|
|
10
10
|
#[derive(Debug, Clone)]
|
|
11
11
|
pub enum OutputFormat {
|
|
@@ -42,26 +42,26 @@ impl StatsCommand {
|
|
|
42
42
|
pub fn execute(&self) -> Result<()> {
|
|
43
43
|
info!("Gathering statistics");
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
let
|
|
45
|
+
// Use centralized v2 database
|
|
46
|
+
let db_path = get_database_path()?;
|
|
47
|
+
let store = StoreV2::new(&db_path)
|
|
48
|
+
.context("Failed to open centralized database")?;
|
|
47
49
|
|
|
48
|
-
//
|
|
49
|
-
let mut engine = search_engine;
|
|
50
|
-
engine.load_or_create()?;
|
|
51
|
-
|
|
52
|
-
// Get stats from search engine
|
|
53
|
-
let index_stats = engine.get_stats();
|
|
54
|
-
|
|
55
|
-
// Get stats from database
|
|
50
|
+
// Get stats from v2 database
|
|
56
51
|
let db_stats = store.get_stats()?;
|
|
57
52
|
|
|
58
|
-
//
|
|
53
|
+
// Get database file size
|
|
54
|
+
let database_size_bytes = std::fs::metadata(&db_path)
|
|
55
|
+
.map(|m| m.len())
|
|
56
|
+
.unwrap_or(0);
|
|
57
|
+
|
|
58
|
+
// Create report using v2 stats
|
|
59
59
|
let report = StatsReport {
|
|
60
|
-
total_files: db_stats.
|
|
61
|
-
total_embeddings: db_stats.
|
|
62
|
-
total_patterns:
|
|
63
|
-
index_size_bytes:
|
|
64
|
-
database_size_bytes
|
|
60
|
+
total_files: db_stats.files_count,
|
|
61
|
+
total_embeddings: db_stats.embeddings_count,
|
|
62
|
+
total_patterns: db_stats.entities_count, // entities are our "patterns" in v2
|
|
63
|
+
index_size_bytes: 0, // v2 doesn't have separate index file
|
|
64
|
+
database_size_bytes,
|
|
65
65
|
file_types: HashMap::new(), // TODO: Calculate actual file types
|
|
66
66
|
};
|
|
67
67
|
|
|
@@ -28,7 +28,7 @@ pub fn create_text_fallback_extractor() -> Result<text_fallback::TextFallbackExt
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
/// Common entity kinds across languages
|
|
31
|
-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
31
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
32
32
|
pub enum EntityKind {
|
|
33
33
|
// Functions
|
|
34
34
|
Function,
|
|
@@ -359,21 +359,21 @@ mod tests {
|
|
|
359
359
|
|
|
360
360
|
SchemaV2::initialize(&conn)?;
|
|
361
361
|
|
|
362
|
-
// Insert test entities with
|
|
362
|
+
// Insert test entities with project_root set (schema now requires it)
|
|
363
363
|
conn.execute(
|
|
364
|
-
"INSERT INTO entities (kind, name, file_path, line_number) VALUES (?, ?, ?, ?)",
|
|
365
|
-
params!["struct", "Test1", "/home/user/project/src/main.rs", 10]
|
|
364
|
+
"INSERT INTO entities (kind, name, file_path, line_number, project_root) VALUES (?, ?, ?, ?, ?)",
|
|
365
|
+
params!["struct", "Test1", "/home/user/project/src/main.rs", 10, "/home/user/project"]
|
|
366
366
|
)?;
|
|
367
367
|
|
|
368
368
|
conn.execute(
|
|
369
|
-
"INSERT INTO entities (kind, name, file_path, line_number) VALUES (?, ?, ?, ?)",
|
|
370
|
-
params!["function", "Test2", "/var/app/lib/utils.rs", 20]
|
|
369
|
+
"INSERT INTO entities (kind, name, file_path, line_number, project_root) VALUES (?, ?, ?, ?, ?)",
|
|
370
|
+
params!["function", "Test2", "/var/app/lib/utils.rs", 20, "/var/app"]
|
|
371
371
|
)?;
|
|
372
372
|
|
|
373
|
-
// Run migration
|
|
373
|
+
// Run migration (will be skipped since schema already has project_root)
|
|
374
374
|
MigrationV2::run_v2_migration(&mut conn)?;
|
|
375
375
|
|
|
376
|
-
// Verify project_root
|
|
376
|
+
// Verify project_root values
|
|
377
377
|
let project1: String = conn.query_row(
|
|
378
378
|
"SELECT project_root FROM entities WHERE name = ?",
|
|
379
379
|
params!["Test1"],
|
|
@@ -310,14 +310,15 @@ impl QueryApi {
|
|
|
310
310
|
row.get::<_, Option<String>>(9)?, // doc_comment
|
|
311
311
|
row.get::<_, Option<String>>(10)?, // attributes
|
|
312
312
|
row.get::<_, Option<String>>(11)?, // metadata
|
|
313
|
-
row.get::<_,
|
|
314
|
-
row.get::<_, i64>(13)?, //
|
|
313
|
+
row.get::<_, String>(12)?, // project_root
|
|
314
|
+
row.get::<_, i64>(13)?, // created_at
|
|
315
|
+
row.get::<_, i64>(14)?, // updated_at
|
|
315
316
|
))
|
|
316
317
|
})?;
|
|
317
318
|
|
|
318
319
|
for row in rows {
|
|
319
320
|
let row = row?;
|
|
320
|
-
let (id, kind_str, name, signature, visibility_str, parent_id, file_path, line_number, column_number, doc_comment, attributes, metadata, created_at, updated_at) = row;
|
|
321
|
+
let (id, kind_str, name, signature, visibility_str, parent_id, file_path, line_number, column_number, doc_comment, attributes, metadata, project_root, created_at, updated_at) = row;
|
|
321
322
|
// For now, just create a simple entity - the full parsing can be done later
|
|
322
323
|
// This is just to get the IDs for reference finding
|
|
323
324
|
matching_entities.push(crate::store_v2::Entity {
|
|
@@ -333,6 +334,7 @@ impl QueryApi {
|
|
|
333
334
|
doc_comment,
|
|
334
335
|
attributes,
|
|
335
336
|
metadata,
|
|
337
|
+
project_root,
|
|
336
338
|
created_at: chrono::DateTime::from_timestamp(created_at, 0).unwrap_or_default(),
|
|
337
339
|
updated_at: chrono::DateTime::from_timestamp(updated_at, 0).unwrap_or_default(),
|
|
338
340
|
});
|
|
@@ -366,6 +368,7 @@ impl QueryApi {
|
|
|
366
368
|
doc_comment: None,
|
|
367
369
|
attributes: None,
|
|
368
370
|
metadata: None,
|
|
371
|
+
project_root: "".to_string(),
|
|
369
372
|
created_at: chrono::DateTime::from_timestamp(0, 0).unwrap_or_default(),
|
|
370
373
|
updated_at: chrono::DateTime::from_timestamp(0, 0).unwrap_or_default(),
|
|
371
374
|
};
|
|
@@ -226,9 +226,10 @@ impl SchemaV2 {
|
|
|
226
226
|
doc_comment TEXT,
|
|
227
227
|
attributes TEXT,
|
|
228
228
|
metadata TEXT,
|
|
229
|
+
project_root TEXT NOT NULL DEFAULT '',
|
|
229
230
|
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
|
|
230
231
|
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
|
|
231
|
-
|
|
232
|
+
|
|
232
233
|
FOREIGN KEY (parent_id) REFERENCES entities(id) ON DELETE RESTRICT
|
|
233
234
|
);
|
|
234
235
|
|
|
@@ -282,6 +283,14 @@ impl SchemaV2 {
|
|
|
282
283
|
|
|
283
284
|
FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE RESTRICT
|
|
284
285
|
);
|
|
286
|
+
|
|
287
|
+
-- Create files table for tracking indexed files (stats and legacy compatibility)
|
|
288
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
289
|
+
path TEXT PRIMARY KEY,
|
|
290
|
+
hash TEXT NOT NULL,
|
|
291
|
+
last_indexed INTEGER NOT NULL,
|
|
292
|
+
patterns_count INTEGER NOT NULL DEFAULT 0
|
|
293
|
+
);
|
|
285
294
|
"#
|
|
286
295
|
)?;
|
|
287
296
|
|
|
@@ -311,6 +320,8 @@ impl SchemaV2 {
|
|
|
311
320
|
CREATE INDEX IF NOT EXISTS idx_entities_kind_name ON entities(kind, name);
|
|
312
321
|
CREATE INDEX IF NOT EXISTS idx_entities_file_kind ON entities(file_path, kind);
|
|
313
322
|
CREATE INDEX IF NOT EXISTS idx_entities_parent_kind ON entities(parent_id, kind);
|
|
323
|
+
CREATE INDEX IF NOT EXISTS idx_entities_project_root ON entities(project_root);
|
|
324
|
+
CREATE INDEX IF NOT EXISTS idx_entities_project_file ON entities(project_root, file_path);
|
|
314
325
|
|
|
315
326
|
-- Reference indexes
|
|
316
327
|
CREATE INDEX IF NOT EXISTS idx_refs_source ON refs(source_entity_id);
|
|
@@ -338,6 +349,10 @@ impl SchemaV2 {
|
|
|
338
349
|
|
|
339
350
|
-- Entity-module relationship index (via file path)
|
|
340
351
|
CREATE INDEX IF NOT EXISTS idx_entities_module_lookup ON entities(file_path);
|
|
352
|
+
|
|
353
|
+
-- Files table indexes
|
|
354
|
+
CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash);
|
|
355
|
+
CREATE INDEX IF NOT EXISTS idx_files_last_indexed ON files(last_indexed);
|
|
341
356
|
"#
|
|
342
357
|
)?;
|
|
343
358
|
|