claude-flow-novice 2.18.15 → 2.18.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/.claude/agents/cfn-dev-team/analysts/analyst.md +9 -0
  2. package/.claude/agents/cfn-dev-team/analysts/root-cause-analyst.md +9 -0
  3. package/.claude/agents/cfn-dev-team/architecture/api-designer-persona.md +10 -1
  4. package/.claude/agents/cfn-dev-team/architecture/base-template-generator.md +9 -0
  5. package/.claude/agents/cfn-dev-team/architecture/goal-planner.md +10 -1
  6. package/.claude/agents/cfn-dev-team/architecture/planner.md +9 -0
  7. package/.claude/agents/cfn-dev-team/architecture/system-architect.md +9 -0
  8. package/.claude/agents/cfn-dev-team/coordinators/cfn-frontend-coordinator.md +9 -0
  9. package/.claude/agents/cfn-dev-team/coordinators/consensus-builder.md +10 -1
  10. package/.claude/agents/cfn-dev-team/coordinators/handoff-coordinator.md +9 -0
  11. package/.claude/agents/cfn-dev-team/coordinators/multi-sprint-coordinator.md +9 -0
  12. package/.claude/agents/cfn-dev-team/dev-ops/devops-engineer.md +9 -0
  13. package/.claude/agents/cfn-dev-team/dev-ops/docker-specialist.md +9 -0
  14. package/.claude/agents/cfn-dev-team/dev-ops/github-commit-agent.md +9 -0
  15. package/.claude/agents/cfn-dev-team/dev-ops/kubernetes-specialist.md +9 -0
  16. package/.claude/agents/cfn-dev-team/developers/api-gateway-specialist.md +9 -0
  17. package/.claude/agents/cfn-dev-team/developers/data/data-engineer.md +9 -0
  18. package/.claude/agents/cfn-dev-team/developers/database/database-architect.md +9 -0
  19. package/.claude/agents/cfn-dev-team/developers/database/supabase-specialist.md +9 -0
  20. package/.claude/agents/cfn-dev-team/developers/frontend/mobile-dev.md +10 -1
  21. package/.claude/agents/cfn-dev-team/developers/frontend/typescript-specialist.md +10 -1
  22. package/.claude/agents/cfn-dev-team/developers/frontend/ui-designer.md +10 -1
  23. package/.claude/agents/cfn-dev-team/developers/graphql-specialist.md +9 -0
  24. package/.claude/agents/cfn-dev-team/developers/rust-developer.md +10 -1
  25. package/.claude/agents/cfn-dev-team/documentation/agent-type-guidelines.md +9 -0
  26. package/.claude/agents/cfn-dev-team/documentation/api-documentation.md +10 -1
  27. package/.claude/agents/cfn-dev-team/documentation/pseudocode.md +10 -1
  28. package/.claude/agents/cfn-dev-team/documentation/specification-agent.md +10 -1
  29. package/.claude/agents/cfn-dev-team/product-owners/accessibility-advocate-persona.md +10 -1
  30. package/.claude/agents/cfn-dev-team/product-owners/cto-agent.md +9 -0
  31. package/.claude/agents/cfn-dev-team/product-owners/power-user-persona.md +10 -1
  32. package/.claude/agents/cfn-dev-team/product-owners/product-owner.md +9 -0
  33. package/.claude/agents/cfn-dev-team/reviewers/quality/code-quality-validator.md +9 -0
  34. package/.claude/agents/cfn-dev-team/reviewers/quality/cyclomatic-complexity-reducer.md +9 -0
  35. package/.claude/agents/cfn-dev-team/reviewers/quality/perf-analyzer.md +9 -0
  36. package/.claude/agents/cfn-dev-team/reviewers/quality/performance-benchmarker.md +10 -1
  37. package/.claude/agents/cfn-dev-team/reviewers/quality/quality-metrics.md +9 -0
  38. package/.claude/agents/cfn-dev-team/testers/api-testing-specialist.md +9 -0
  39. package/.claude/agents/cfn-dev-team/testers/chaos-engineering-specialist.md +9 -0
  40. package/.claude/agents/cfn-dev-team/testers/contract-tester.md +9 -0
  41. package/.claude/agents/cfn-dev-team/testers/e2e/playwright-tester.md +9 -0
  42. package/.claude/agents/cfn-dev-team/testers/integration-tester.md +9 -0
  43. package/.claude/agents/cfn-dev-team/testers/interaction-tester.md +9 -0
  44. package/.claude/agents/cfn-dev-team/testers/load-testing-specialist.md +9 -0
  45. package/.claude/agents/cfn-dev-team/testers/mutation-testing-specialist.md +9 -0
  46. package/.claude/agents/cfn-dev-team/testers/playwright-tester.md +10 -1
  47. package/.claude/agents/cfn-dev-team/testers/unit/tdd-london-unit-swarm.md +10 -1
  48. package/.claude/agents/cfn-dev-team/testers/validation/validation-production-validator.md +10 -1
  49. package/.claude/agents/cfn-dev-team/testing/test-validation-agent.md +9 -0
  50. package/.claude/agents/cfn-dev-team/utility/agent-builder.md +10 -1
  51. package/.claude/agents/cfn-dev-team/utility/context-curator.md +9 -0
  52. package/.claude/agents/cfn-dev-team/utility/memory-leak-specialist.md +9 -0
  53. package/.claude/agents/cfn-dev-team/utility/researcher.md +9 -0
  54. package/.claude/agents/cfn-dev-team/utility/z-ai-specialist.md +9 -0
  55. package/.claude/hooks/SessionStart-cfn-build-ruvector.sh +12 -0
  56. package/.claude/hooks/SessionStart:cfn-build-ruvector.sh +28 -0
  57. package/.claude/skills/bulk-add-ruvector-instructions.sh +34 -46
  58. package/.claude/skills/cfn-local-ruvector-accelerator/.claude/hooks/SessionStart-cfn-build-ruvector.sh +12 -0
  59. package/.claude/skills/cfn-local-ruvector-accelerator/SKILL.md +89 -6
  60. package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/index.rs +250 -30
  61. package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/index_ast.rs +4 -1
  62. package/.claude/skills/cfn-local-ruvector-accelerator/src/cli/stats.rs +18 -18
  63. package/.claude/skills/cfn-local-ruvector-accelerator/src/extractors/mod.rs +1 -1
  64. package/.claude/skills/cfn-local-ruvector-accelerator/src/migration_v2.rs +7 -7
  65. package/.claude/skills/cfn-local-ruvector-accelerator/src/query_api.rs +6 -3
  66. package/.claude/skills/cfn-local-ruvector-accelerator/src/schema_v2.rs +16 -1
  67. package/.claude/skills/cfn-local-ruvector-accelerator/src/store_v2.rs +7 -3
  68. package/.claude/skills/cfn-local-ruvector-accelerator/src/store_v2_tx.rs +5 -3
  69. package/.claude/skills/cfn-local-ruvector-accelerator/src/transaction_tests.rs +8 -2
  70. package/CLAUDE.md +2 -1
  71. package/dist/trigger.config.d.ts +15 -1
  72. package/dist/trigger.config.js +11 -1
  73. package/package.json +1 -1
@@ -1,8 +1,91 @@
1
- # 1. Initialize local RuVector
2
- ./target/release/local-ruvector init
1
+ # RuVector Local Semantic Code Search
3
2
 
4
- # 2. Index your codebase
5
- ./target/release/local-ruvector index --path /path/to/project --types rs
3
+ ## WHEN TO USE THIS SKILL
6
4
 
7
- # 3. Query patterns instantly
8
- ./target/release/local-ruvector query --pattern "authentication rust" --limit 5
5
+ **USE RuVector V2 SQL for ALL indexed projects (400x FASTER than grep):**
6
+ ```bash
7
+ # Exact name lookup - 0.002s vs grep's 0.8s
8
+ sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT file_path, line_number FROM entities WHERE name = 'MyFunction';"
9
+
10
+ # Fuzzy search - 0.004s
11
+ sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT file_path, line_number FROM entities WHERE name LIKE '%Store%' LIMIT 10;"
12
+ ```
13
+
14
+ **USE grep/rg ONLY when:**
15
+ - Project is NOT indexed yet
16
+ - Searching for strings that aren't code entities (error messages, comments, config values)
17
+ - Quick one-off search in small directory
18
+
19
+ **USE RuVector semantic search when:**
20
+ - "Where is authentication implemented?" (conceptual search)
21
+ - Finding similar patterns you can't name exactly
22
+ - Discovering how a feature is built
23
+
24
+ ## Quick Commands
25
+
26
+ ### Semantic Search (V1 - Embeddings)
27
+ ```bash
28
+ # Natural language search
29
+ /codebase-search "authentication middleware pattern"
30
+ /cfn-ruvector-search "error handling in API routes"
31
+
32
+ # CLI direct
33
+ ./.claude/skills/cfn-local-ruvector-accelerator/target/release/local-ruvector query --pattern "user login flow"
34
+ ```
35
+
36
+ ### Structural Search (V2 - SQL on AST)
37
+ ```bash
38
+ # Find all callers of a function
39
+ sqlite3 ~/.local/share/ruvector/index_v2.db \
40
+ "SELECT * FROM refs WHERE target_name = 'MyFunction';"
41
+
42
+ # Find all functions in a file
43
+ sqlite3 ~/.local/share/ruvector/index_v2.db \
44
+ "SELECT name, line_number FROM entities WHERE file_path LIKE '%myfile.rs' AND kind = 'function';"
45
+
46
+ # Find entities by project (multi-project isolation)
47
+ sqlite3 ~/.local/share/ruvector/index_v2.db \
48
+ "SELECT COUNT(*) FROM entities WHERE project_root = '/path/to/project';"
49
+ ```
50
+
51
+ ## Index Management
52
+
53
+ ```bash
54
+ # Index a project (first time or full rebuild)
55
+ ./target/release/local-ruvector index --path /path/to/project --types rs,ts,py
56
+
57
+ # Incremental update (after code changes)
58
+ /codebase-reindex
59
+
60
+ # Check index stats
61
+ sqlite3 ~/.local/share/ruvector/index_v2.db "SELECT project_root, COUNT(*) FROM entities GROUP BY project_root;"
62
+ ```
63
+
64
+ ## Key Features
65
+
66
+ - **Multi-project isolation**: Index multiple projects in single database without data collision
67
+ - **Non-destructive**: Indexing one project never deletes data from other projects
68
+ - **Centralized storage**: `~/.local/share/ruvector/index_v2.db`
69
+ - **Dual search**: V1 semantic (embeddings) + V2 structural (SQL on AST)
70
+ - **Fast**: Rust binary with SQLite backend
71
+
72
+ ## Database Location
73
+ ```
74
+ ~/.local/share/ruvector/index_v2.db
75
+ ```
76
+
77
+ ## For Agents
78
+
79
+ Before implementing changes, ALWAYS query RuVector first:
80
+ ```bash
81
+ # Find similar patterns
82
+ /codebase-search "relevant search terms" --top 5
83
+
84
+ # Query past errors
85
+ ./.claude/skills/cfn-ruvector-codebase-index/query-error-patterns.sh --task-description "description"
86
+
87
+ # Query learnings
88
+ ./.claude/skills/cfn-ruvector-codebase-index/query-learnings.sh --task-description "description" --category PATTERN
89
+ ```
90
+
91
+ This prevents duplicated work and leverages existing solutions.
@@ -1,3 +1,47 @@
1
+ //! # RuVector Index Command
2
+ //!
3
+ //! ## IMPORTANT: Run from PROJECT ROOT
4
+ //!
5
+ //! This indexer MUST be run from the project root directory to index all files correctly.
6
+ //! Running from a subdirectory will only index that subdirectory.
7
+ //!
8
+ //! ## Recommended Usage:
9
+ //! ```bash
10
+ //! cd /path/to/project-root
11
+ //! local-ruvector index --path . --types rs,ts,js,json,md,sh --force
12
+ //! ```
13
+ //!
14
+ //! ## Supported File Types (default):
15
+ //! - rs, ts, js, json, md, sh, yaml, yml, txt, config
16
+ //! - Use --types to specify custom extensions
17
+ //!
18
+ //! ## Excluded Directories (see EXCLUDED_DIRS constant - 52 patterns):
19
+ //! - Dependencies: node_modules, vendor, .pnpm, .yarn
20
+ //! - Build artifacts: target, dist, build, out, .next, .nuxt, .output, .turbo, .parcel-cache
21
+ //! - VCS: .git, .svn, .hg
22
+ //! - IDE: .idea, .vscode, .vs
23
+ //! - Cache: .cache, __pycache__, .pytest_cache, .mypy_cache, .ruff_cache, coverage, .nyc_output
24
+ //! - Virtual envs: .venv, venv, env
25
+ //! - IaC: .terraform, .serverless, .aws-sam
26
+ //! - Project-specific: .artifacts, .ruvector, .archive, archive
27
+ //! - Backups/temp: backups, .backups, backup, tmp, .tmp, temp, logs
28
+ //! - Test artifacts: __snapshots__, __mocks__, playwright-report, test-results
29
+ //! - Doc builds: _site, .docusaurus, site
30
+ //! - NOTE: .claude directory IS included (contains important config)
31
+ //!
32
+ //! ## Excluded Files (see EXCLUDED_FILES constant - 41 patterns):
33
+ //! - Secrets: .env*, credentials.json, secrets.json, .npmrc, .pypirc, .netrc, id_rsa, *.pem, *.key
34
+ //! - Lock files: package-lock.json, yarn.lock, pnpm-lock.yaml, Cargo.lock, go.sum, etc.
35
+ //! - Backups: *.bak, *.backup, *.orig, *.swp, *~
36
+ //! - Minified/generated: *.min.js, *.min.css, *.bundle.js, *.chunk.js, *.js.map, *.d.ts
37
+ //! - Binary/data: *.wasm, *.db, *.sqlite
38
+ //! - Build info: *.snap, *.eslintcache, *.tsbuildinfo
39
+ //!
40
+ //! ## Multi-Project Isolation:
41
+ //! - Each project root is isolated via project_root column in v2 schema
42
+ //! - Centralized database at ~/.local/share/ruvector/index_v2.db
43
+ //! - Queries are scoped to the project root passed during indexing
44
+
1
45
  use anyhow::{Result, Context, anyhow};
2
46
  use std::fs;
3
47
  use std::path::{Path, PathBuf};
@@ -21,6 +65,152 @@ use crate::schema_v2::{EntityKind, RefKind, Visibility};
21
65
  use crate::path_validator;
22
66
  use local_ruvector::paths::{get_ruvector_dir, get_database_path, get_v1_index_dir};
23
67
 
68
+ /// Directories to exclude from indexing.
69
+ /// These are typically build artifacts, dependencies, VCS, or sensitive directories.
70
+ const EXCLUDED_DIRS: &[&str] = &[
71
+ // Package managers & dependencies
72
+ "node_modules", // npm/yarn/pnpm dependencies
73
+ "vendor", // Go/PHP vendor dependencies
74
+ ".pnpm", // pnpm store
75
+ ".yarn", // Yarn 2+ PnP cache
76
+
77
+ // Build artifacts
78
+ "target", // Rust/Maven build artifacts
79
+ "dist", // JS/TS build output
80
+ "build", // Generic build output
81
+ "out", // Common output directory
82
+ ".next", // Next.js build
83
+ ".nuxt", // Nuxt.js build
84
+ ".output", // Nitro/Nuxt output
85
+ ".turbo", // Turborepo cache
86
+ ".parcel-cache", // Parcel bundler cache
87
+ ".webpack", // Webpack cache
88
+
89
+ // Version control
90
+ ".git", // Git repository data
91
+ ".svn", // Subversion
92
+ ".hg", // Mercurial
93
+
94
+ // IDE & editor
95
+ ".idea", // JetBrains IDEs
96
+ ".vscode", // VS Code (may contain sensitive settings)
97
+ ".vs", // Visual Studio
98
+
99
+ // Cache & temp
100
+ ".cache", // Generic cache directories
101
+ "__pycache__", // Python bytecode cache
102
+ ".pytest_cache", // Pytest cache
103
+ ".mypy_cache", // Mypy cache
104
+ ".ruff_cache", // Ruff linter cache
105
+ "coverage", // Test coverage reports
106
+ ".nyc_output", // NYC coverage output
107
+ ".eslintcache", // ESLint cache (dir form)
108
+
109
+ // Virtual environments
110
+ ".venv", // Python virtual environments
111
+ "venv", // Python venv (alternate)
112
+ ".env", // dotenv directories (not files)
113
+ "env", // Generic env directory
114
+
115
+ // Infrastructure as Code
116
+ ".terraform", // Terraform state/cache
117
+ ".serverless", // Serverless framework
118
+ ".aws-sam", // AWS SAM
119
+
120
+ // Project-specific
121
+ ".artifacts", // CFN Loop artifacts
122
+ ".ruvector", // RuVector local index (avoid self-indexing)
123
+ ".archive", // Archived/deprecated code
124
+ "archive", // Archive directories
125
+
126
+ // Backups & generated
127
+ "backups", // Backup directories
128
+ ".backups", // Hidden backup directories
129
+ "backup", // Singular backup directory
130
+ ".backup", // Hidden singular backup
131
+ "tmp", // Temporary files
132
+ ".tmp", // Hidden temp files
133
+ "temp", // Temp directory
134
+ "logs", // Log directories
135
+ ".logs", // Hidden logs
136
+
137
+ // Test artifacts (not source code)
138
+ "__snapshots__", // Jest snapshots
139
+ "__mocks__", // Jest mocks (usually generated)
140
+ ".storybook", // Storybook config (not source)
141
+ "storybook-static", // Storybook build output
142
+ "playwright-report", // Playwright test reports
143
+ "test-results", // Generic test results
144
+
145
+ // Documentation builds
146
+ "_site", // Jekyll output
147
+ ".docusaurus", // Docusaurus cache
148
+ "site", // MkDocs output
149
+ ];
150
+
151
+ /// File patterns to exclude from indexing.
152
+ /// These are sensitive files or files that shouldn't be semantically indexed.
153
+ const EXCLUDED_FILES: &[&str] = &[
154
+ // Sensitive/secrets
155
+ ".env", // Environment variables (secrets!)
156
+ ".env.local", // Local env overrides
157
+ ".env.development", // Dev env
158
+ ".env.production", // Prod env
159
+ ".env.test", // Test env
160
+ ".env.example", // Example env (may contain structure hints)
161
+ "credentials.json", // GCP/generic credentials
162
+ "secrets.json", // Generic secrets
163
+ "secrets.yaml", // Kubernetes secrets
164
+ "service-account.json", // GCP service account
165
+ ".npmrc", // npm auth tokens
166
+ ".pypirc", // PyPI auth
167
+ ".netrc", // Network credentials
168
+ "id_rsa", // SSH private key
169
+ "id_ed25519", // SSH private key
170
+ ".pem", // Certificate/key files
171
+ ".key", // Key files
172
+
173
+ // Lock files (large, not useful for semantic search)
174
+ "package-lock.json", // npm lock
175
+ "yarn.lock", // Yarn lock
176
+ "pnpm-lock.yaml", // pnpm lock
177
+ "Cargo.lock", // Rust lock
178
+ "poetry.lock", // Python poetry lock
179
+ "Gemfile.lock", // Ruby bundler lock
180
+ "composer.lock", // PHP composer lock
181
+ "go.sum", // Go module checksums
182
+ "flake.lock", // Nix flake lock
183
+
184
+ // Backups
185
+ ".bak", // Generic backup extension
186
+ ".backup", // Backup files
187
+ ".orig", // Original files (merge conflicts)
188
+ ".swp", // Vim swap files
189
+ ".swo", // Vim swap files
190
+ "~", // Emacs backup files
191
+
192
+ // Generated/minified (not useful for semantic search)
193
+ ".min.js", // Minified JS
194
+ ".min.css", // Minified CSS
195
+ ".bundle.js", // Bundled JS
196
+ ".chunk.js", // Webpack chunks
197
+ ".js.map", // JavaScript source maps
198
+ ".css.map", // CSS source maps
199
+ ".d.ts", // TypeScript declarations (generated, verbose)
200
+ ".d.ts.map", // TypeScript declaration maps
201
+
202
+ // Binary/data files (can't extract meaningful entities)
203
+ ".wasm", // WebAssembly binary
204
+ ".db", // SQLite/database files
205
+ ".sqlite", // SQLite files
206
+ ".sqlite3", // SQLite3 files
207
+
208
+ // Large generated files
209
+ ".snap", // Jest snapshots
210
+ ".eslintcache", // ESLint cache file
211
+ ".tsbuildinfo", // TypeScript incremental build info
212
+ ];
213
+
24
214
  #[derive(Debug)]
25
215
  pub struct IndexStats {
26
216
  pub files_processed: usize,
@@ -148,21 +338,19 @@ impl IndexCommand {
148
338
 
149
339
  fn collect_files(&self) -> Result<Vec<PathBuf>> {
150
340
  info!("Collecting files to index from: {}", self.source_path.display());
341
+ info!("Excluded directories: {} patterns", EXCLUDED_DIRS.len());
342
+ info!("Excluded files: {} patterns", EXCLUDED_FILES.len());
151
343
 
152
344
  let mut files = Vec::new();
153
345
 
154
346
  let walker = WalkDir::new(&self.source_path)
155
347
  .into_iter()
156
348
  .filter_entry(|e| {
157
- let path = e.path();
158
349
  let name = e.file_name().to_string_lossy();
159
350
 
160
- // Exclude build artifacts, dependencies, and temporary files
161
- // Allow .claude and other important hidden folders
162
- match name.as_ref() {
163
- "node_modules" | "target" | "dist" | "build" | ".git" | ".artifacts" => false,
164
- _ => true
165
- }
351
+ // Exclude build artifacts, dependencies, and sensitive directories
352
+ // Allow .claude and other important folders (not in EXCLUDED_DIRS)
353
+ !EXCLUDED_DIRS.contains(&name.as_ref())
166
354
  })
167
355
  .filter_map(|e| e.ok())
168
356
  .filter(|e| {
@@ -174,8 +362,25 @@ impl IndexCommand {
174
362
  return false;
175
363
  }
176
364
 
177
- // Index ALL files regardless of extension
178
- // File type metadata is captured during processing
365
+ let file_name = e.file_name().to_string_lossy();
366
+
367
+ // Exclude sensitive files by exact name match
368
+ if EXCLUDED_FILES.contains(&file_name.as_ref()) {
369
+ return false;
370
+ }
371
+
372
+ // Exclude files by suffix pattern (e.g., ".min.js", ".bak")
373
+ for pattern in EXCLUDED_FILES {
374
+ if pattern.starts_with('.') && file_name.ends_with(pattern) {
375
+ return false;
376
+ }
377
+ }
378
+
379
+ // Exclude emacs backup files ending with ~
380
+ if file_name.ends_with('~') {
381
+ return false;
382
+ }
383
+
179
384
  true
180
385
  });
181
386
 
@@ -187,17 +392,6 @@ impl IndexCommand {
187
392
  Ok(files)
188
393
  }
189
394
 
190
- fn is_hidden(entry: &DirEntry) -> bool {
191
- entry.file_name()
192
- .to_str()
193
- .map(|s| {
194
- if s == ".claude" {
195
- return false;
196
- }
197
- s.starts_with('.')
198
- })
199
- .unwrap_or(false)
200
- }
201
395
 
202
396
  fn process_files(&mut self, files: Vec<PathBuf>) -> Result<IndexStats> {
203
397
  let stats = Arc::new(RwLock::new(IndexStats::default()));
@@ -238,14 +432,21 @@ impl IndexCommand {
238
432
  ) -> Result<()> {
239
433
  let file_hash = self.calculate_file_hash(file_path)?;
240
434
 
435
+ // Check if file is already indexed with same hash (incremental indexing)
241
436
  if !self.force && self.is_file_indexed(file_path, &file_hash)? {
242
- debug!("Skipping already indexed file: {}", file_path.display());
437
+ debug!("Skipping already indexed file (unchanged): {}", file_path.display());
243
438
  return Ok(());
244
439
  }
245
440
 
246
- // Clean up old entries before reindexing to prevent duplicate entities
441
+ // Non-destructive update: Only delete entities for THIS specific file
442
+ // The delete_file_entities already scopes to project_root for multi-project safety
247
443
  let file_path_str = file_path.to_string_lossy();
248
- self.store_v2.delete_file_entities(&file_path_str, &self.project_dir)?;
444
+
445
+ // Only clean up if the file was previously indexed (avoid unnecessary DB operations)
446
+ if self.is_file_in_index(file_path)? {
447
+ debug!("Updating existing file entries: {}", file_path.display());
448
+ self.store_v2.delete_file_entities(&file_path_str, &self.project_dir)?;
449
+ }
249
450
 
250
451
  let content = fs::read_to_string(file_path)
251
452
  .with_context(|| format!("Failed to read file: {}", file_path.display()))?;
@@ -276,7 +477,7 @@ impl IndexCommand {
276
477
  s.embeddings_generated += embeddings.len();
277
478
  }
278
479
 
279
- self.mark_file_indexed(file_path, &file_hash)?;
480
+ self.mark_file_indexed(file_path, &file_hash, extraction_result.entities.len())?;
280
481
 
281
482
  Ok(())
282
483
  }
@@ -345,6 +546,7 @@ impl IndexCommand {
345
546
  doc_comment: None,
346
547
  attributes: None,
347
548
  metadata: Some(serde_json::to_string(&entity.metadata)?),
549
+ project_root: project_root_str.to_string(),
348
550
  created_at: chrono::Utc::now(),
349
551
  updated_at: chrono::Utc::now(),
350
552
  };
@@ -460,15 +662,33 @@ impl IndexCommand {
460
662
  Ok(count > 0)
461
663
  }
462
664
 
463
- fn mark_file_indexed(&self, file_path: &Path, file_hash: &str) -> Result<()> {
665
+ /// Check if file exists in the index (regardless of hash)
666
+ fn is_file_in_index(&self, file_path: &Path) -> Result<bool> {
667
+ let query = "SELECT COUNT(*) FROM file_hashes WHERE file_path = ?";
668
+ let mut stmt = self.store_v2.conn.prepare(query)?;
669
+ let count: i64 = stmt.query_row(
670
+ params![file_path.to_string_lossy()],
671
+ |row| row.get(0)
672
+ )?;
673
+ Ok(count > 0)
674
+ }
675
+
676
+ fn mark_file_indexed(&self, file_path: &Path, file_hash: &str, patterns_count: usize) -> Result<()> {
677
+ let timestamp = chrono::Utc::now().timestamp();
678
+ let file_path_str = file_path.to_string_lossy().to_string();
679
+
680
+ // Update file_hashes table (for incremental indexing)
464
681
  self.store_v2.conn.execute(
465
682
  "INSERT OR REPLACE INTO file_hashes (file_path, file_hash, indexed_at) VALUES (?1, ?2, ?3)",
466
- params![
467
- file_path.to_string_lossy(),
468
- file_hash,
469
- chrono::Utc::now().timestamp()
470
- ]
683
+ params![&file_path_str, file_hash, timestamp]
471
684
  )?;
685
+
686
+ // Also update the files table (for legacy compatibility and stats)
687
+ self.store_v2.conn.execute(
688
+ "INSERT OR REPLACE INTO files (path, hash, last_indexed, patterns_count) VALUES (?1, ?2, ?3, ?4)",
689
+ params![&file_path_str, file_hash, timestamp, patterns_count as i64]
690
+ )?;
691
+
472
692
  Ok(())
473
693
  }
474
694
 
@@ -299,6 +299,7 @@ impl AstIndexCommand {
299
299
  let mut entity_map = HashMap::new();
300
300
  let mut type_usages = Vec::new();
301
301
 
302
+ let project_root_str = self.project_dir.to_string_lossy().to_string();
302
303
  for (idx, entity) in extraction_result.entities.iter().enumerate() {
303
304
  let store_entity = StoreEntity {
304
305
  id: 0,
@@ -313,6 +314,7 @@ impl AstIndexCommand {
313
314
  doc_comment: None, // TODO: Extract doc comments
314
315
  attributes: None, // TODO: Extract attributes
315
316
  metadata: Some(serde_json::to_string(&entity.metadata)?),
317
+ project_root: project_root_str.clone(),
316
318
  created_at: chrono::Utc::now(),
317
319
  updated_at: chrono::Utc::now(),
318
320
  };
@@ -613,6 +615,7 @@ impl AstIndexCommand {
613
615
  Ok(entity.id)
614
616
  } else {
615
617
  // Create a placeholder entity for unknown references
618
+ let project_root_str = self.project_dir.to_string_lossy().to_string();
616
619
  let placeholder = StoreEntity {
617
620
  id: 0,
618
621
  kind: EntityKind::Function,
@@ -626,11 +629,11 @@ impl AstIndexCommand {
626
629
  doc_comment: None,
627
630
  attributes: None,
628
631
  metadata: None,
632
+ project_root: project_root_str.clone(),
629
633
  created_at: chrono::Utc::now(),
630
634
  updated_at: chrono::Utc::now(),
631
635
  };
632
636
 
633
- let project_root_str = self.project_dir.to_string_lossy();
634
637
  Ok(self.store_v2.insert_entity(&placeholder, &project_root_str)?)
635
638
  }
636
639
  }
@@ -4,8 +4,8 @@ use std::collections::HashMap;
4
4
  use tracing::info;
5
5
  use serde::{Serialize, Deserialize};
6
6
 
7
- use crate::search_engine::SearchEngine;
8
- use crate::sqlite_store::SqliteStore;
7
+ use crate::store_v2::StoreV2;
8
+ use crate::paths::get_database_path;
9
9
 
10
10
  #[derive(Debug, Clone)]
11
11
  pub enum OutputFormat {
@@ -42,26 +42,26 @@ impl StatsCommand {
42
42
  pub fn execute(&self) -> Result<()> {
43
43
  info!("Gathering statistics");
44
44
 
45
- let search_engine = SearchEngine::new(Path::new(&self.project_dir))?;
46
- let store = SqliteStore::new(&Path::new(&self.project_dir).join(".ruvector/index.db"))?;
45
+ // Use centralized v2 database
46
+ let db_path = get_database_path()?;
47
+ let store = StoreV2::new(&db_path)
48
+ .context("Failed to open centralized database")?;
47
49
 
48
- // Load search engine
49
- let mut engine = search_engine;
50
- engine.load_or_create()?;
51
-
52
- // Get stats from search engine
53
- let index_stats = engine.get_stats();
54
-
55
- // Get stats from database
50
+ // Get stats from v2 database
56
51
  let db_stats = store.get_stats()?;
57
52
 
58
- // Create report
53
+ // Get database file size
54
+ let database_size_bytes = std::fs::metadata(&db_path)
55
+ .map(|m| m.len())
56
+ .unwrap_or(0);
57
+
58
+ // Create report using v2 stats
59
59
  let report = StatsReport {
60
- total_files: db_stats.num_files,
61
- total_embeddings: db_stats.num_embeddings,
62
- total_patterns: index_stats.metadata_count,
63
- index_size_bytes: index_stats.index_size_bytes,
64
- database_size_bytes: db_stats.database_size_bytes,
60
+ total_files: db_stats.files_count,
61
+ total_embeddings: db_stats.embeddings_count,
62
+ total_patterns: db_stats.entities_count, // entities are our "patterns" in v2
63
+ index_size_bytes: 0, // v2 doesn't have separate index file
64
+ database_size_bytes,
65
65
  file_types: HashMap::new(), // TODO: Calculate actual file types
66
66
  };
67
67
 
@@ -28,7 +28,7 @@ pub fn create_text_fallback_extractor() -> Result<text_fallback::TextFallbackExt
28
28
  }
29
29
 
30
30
  /// Common entity kinds across languages
31
- #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
31
+ #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
32
32
  pub enum EntityKind {
33
33
  // Functions
34
34
  Function,
@@ -359,21 +359,21 @@ mod tests {
359
359
 
360
360
  SchemaV2::initialize(&conn)?;
361
361
 
362
- // Insert test entities with different path patterns
362
+ // Insert test entities with project_root set (schema now requires it)
363
363
  conn.execute(
364
- "INSERT INTO entities (kind, name, file_path, line_number) VALUES (?, ?, ?, ?)",
365
- params!["struct", "Test1", "/home/user/project/src/main.rs", 10]
364
+ "INSERT INTO entities (kind, name, file_path, line_number, project_root) VALUES (?, ?, ?, ?, ?)",
365
+ params!["struct", "Test1", "/home/user/project/src/main.rs", 10, "/home/user/project"]
366
366
  )?;
367
367
 
368
368
  conn.execute(
369
- "INSERT INTO entities (kind, name, file_path, line_number) VALUES (?, ?, ?, ?)",
370
- params!["function", "Test2", "/var/app/lib/utils.rs", 20]
369
+ "INSERT INTO entities (kind, name, file_path, line_number, project_root) VALUES (?, ?, ?, ?, ?)",
370
+ params!["function", "Test2", "/var/app/lib/utils.rs", 20, "/var/app"]
371
371
  )?;
372
372
 
373
- // Run migration
373
+ // Run migration (will be skipped since schema already has project_root)
374
374
  MigrationV2::run_v2_migration(&mut conn)?;
375
375
 
376
- // Verify project_root extraction
376
+ // Verify project_root values
377
377
  let project1: String = conn.query_row(
378
378
  "SELECT project_root FROM entities WHERE name = ?",
379
379
  params!["Test1"],
@@ -310,14 +310,15 @@ impl QueryApi {
310
310
  row.get::<_, Option<String>>(9)?, // doc_comment
311
311
  row.get::<_, Option<String>>(10)?, // attributes
312
312
  row.get::<_, Option<String>>(11)?, // metadata
313
- row.get::<_, i64>(12)?, // created_at
314
- row.get::<_, i64>(13)?, // updated_at
313
+ row.get::<_, String>(12)?, // project_root
314
+ row.get::<_, i64>(13)?, // created_at
315
+ row.get::<_, i64>(14)?, // updated_at
315
316
  ))
316
317
  })?;
317
318
 
318
319
  for row in rows {
319
320
  let row = row?;
320
- let (id, kind_str, name, signature, visibility_str, parent_id, file_path, line_number, column_number, doc_comment, attributes, metadata, created_at, updated_at) = row;
321
+ let (id, kind_str, name, signature, visibility_str, parent_id, file_path, line_number, column_number, doc_comment, attributes, metadata, project_root, created_at, updated_at) = row;
321
322
  // For now, just create a simple entity - the full parsing can be done later
322
323
  // This is just to get the IDs for reference finding
323
324
  matching_entities.push(crate::store_v2::Entity {
@@ -333,6 +334,7 @@ impl QueryApi {
333
334
  doc_comment,
334
335
  attributes,
335
336
  metadata,
337
+ project_root,
336
338
  created_at: chrono::DateTime::from_timestamp(created_at, 0).unwrap_or_default(),
337
339
  updated_at: chrono::DateTime::from_timestamp(updated_at, 0).unwrap_or_default(),
338
340
  });
@@ -366,6 +368,7 @@ impl QueryApi {
366
368
  doc_comment: None,
367
369
  attributes: None,
368
370
  metadata: None,
371
+ project_root: "".to_string(),
369
372
  created_at: chrono::DateTime::from_timestamp(0, 0).unwrap_or_default(),
370
373
  updated_at: chrono::DateTime::from_timestamp(0, 0).unwrap_or_default(),
371
374
  };
@@ -226,9 +226,10 @@ impl SchemaV2 {
226
226
  doc_comment TEXT,
227
227
  attributes TEXT,
228
228
  metadata TEXT,
229
+ project_root TEXT NOT NULL DEFAULT '',
229
230
  created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
230
231
  updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
231
-
232
+
232
233
  FOREIGN KEY (parent_id) REFERENCES entities(id) ON DELETE RESTRICT
233
234
  );
234
235
 
@@ -282,6 +283,14 @@ impl SchemaV2 {
282
283
 
283
284
  FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE RESTRICT
284
285
  );
286
+
287
+ -- Create files table for tracking indexed files (stats and legacy compatibility)
288
+ CREATE TABLE IF NOT EXISTS files (
289
+ path TEXT PRIMARY KEY,
290
+ hash TEXT NOT NULL,
291
+ last_indexed INTEGER NOT NULL,
292
+ patterns_count INTEGER NOT NULL DEFAULT 0
293
+ );
285
294
  "#
286
295
  )?;
287
296
 
@@ -311,6 +320,8 @@ impl SchemaV2 {
311
320
  CREATE INDEX IF NOT EXISTS idx_entities_kind_name ON entities(kind, name);
312
321
  CREATE INDEX IF NOT EXISTS idx_entities_file_kind ON entities(file_path, kind);
313
322
  CREATE INDEX IF NOT EXISTS idx_entities_parent_kind ON entities(parent_id, kind);
323
+ CREATE INDEX IF NOT EXISTS idx_entities_project_root ON entities(project_root);
324
+ CREATE INDEX IF NOT EXISTS idx_entities_project_file ON entities(project_root, file_path);
314
325
 
315
326
  -- Reference indexes
316
327
  CREATE INDEX IF NOT EXISTS idx_refs_source ON refs(source_entity_id);
@@ -338,6 +349,10 @@ impl SchemaV2 {
338
349
 
339
350
  -- Entity-module relationship index (via file path)
340
351
  CREATE INDEX IF NOT EXISTS idx_entities_module_lookup ON entities(file_path);
352
+
353
+ -- Files table indexes
354
+ CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash);
355
+ CREATE INDEX IF NOT EXISTS idx_files_last_indexed ON files(last_indexed);
341
356
  "#
342
357
  )?;
343
358