codevira 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. codevira-1.6.0.dist-info/LICENSE +21 -0
  2. codevira-1.6.0.dist-info/METADATA +477 -0
  3. codevira-1.6.0.dist-info/RECORD +58 -0
  4. codevira-1.6.0.dist-info/WHEEL +5 -0
  5. codevira-1.6.0.dist-info/entry_points.txt +2 -0
  6. codevira-1.6.0.dist-info/top_level.txt +2 -0
  7. indexer/__init__.py +1 -0
  8. indexer/chunker.py +428 -0
  9. indexer/global_db.py +197 -0
  10. indexer/graph_generator.py +380 -0
  11. indexer/index_codebase.py +588 -0
  12. indexer/outcome_tracker.py +172 -0
  13. indexer/rule_learner.py +186 -0
  14. indexer/sqlite_graph.py +640 -0
  15. indexer/treesitter_parser.py +423 -0
  16. mcp_server/__init__.py +1 -0
  17. mcp_server/__main__.py +20 -0
  18. mcp_server/auto_init.py +257 -0
  19. mcp_server/cli.py +622 -0
  20. mcp_server/crash_logger.py +236 -0
  21. mcp_server/data/__init__.py +1 -0
  22. mcp_server/data/agents/builder.md +84 -0
  23. mcp_server/data/agents/developer.md +111 -0
  24. mcp_server/data/agents/documenter.md +138 -0
  25. mcp_server/data/agents/orchestrator.md +96 -0
  26. mcp_server/data/agents/planner.md +106 -0
  27. mcp_server/data/agents/reviewer.md +82 -0
  28. mcp_server/data/agents/tester.md +83 -0
  29. mcp_server/data/config.example.yaml +33 -0
  30. mcp_server/data/rules/coding-standards.md +48 -0
  31. mcp_server/data/rules/engineering-excellence.md +28 -0
  32. mcp_server/data/rules/git-cicd-governance.md +32 -0
  33. mcp_server/data/rules/git_commits.md +130 -0
  34. mcp_server/data/rules/incremental-updates.md +5 -0
  35. mcp_server/data/rules/master_rule.md +187 -0
  36. mcp_server/data/rules/multi-language.md +19 -0
  37. mcp_server/data/rules/persistence.md +21 -0
  38. mcp_server/data/rules/resilience-observability.md +17 -0
  39. mcp_server/data/rules/smoke-testing.md +48 -0
  40. mcp_server/data/rules/testing-standards.md +23 -0
  41. mcp_server/detect.py +284 -0
  42. mcp_server/gitignore.py +284 -0
  43. mcp_server/global_sync.py +187 -0
  44. mcp_server/http_server.py +341 -0
  45. mcp_server/ide_inject.py +444 -0
  46. mcp_server/launchd.py +156 -0
  47. mcp_server/migrate.py +215 -0
  48. mcp_server/paths.py +256 -0
  49. mcp_server/prompts.py +136 -0
  50. mcp_server/server.py +1049 -0
  51. mcp_server/tools/__init__.py +0 -0
  52. mcp_server/tools/changesets.py +223 -0
  53. mcp_server/tools/code_reader.py +335 -0
  54. mcp_server/tools/graph.py +637 -0
  55. mcp_server/tools/learning.py +238 -0
  56. mcp_server/tools/playbook.py +89 -0
  57. mcp_server/tools/roadmap.py +599 -0
  58. mcp_server/tools/search.py +145 -0
@@ -0,0 +1,187 @@
1
+ ---
2
+ trigger: always_on
3
+ ---
4
+
5
+ # MASTER RULESET FOR AI CODE AGENTS (AUTHORITATIVE, NON-DILUTING)
6
+
7
+ This document defines HOW the AI code agent must behave.
8
+ It does NOT replace or override any existing rules.
9
+ It unifies them into a single, enforceable operating contract.
10
+
11
+ ────────────────────────────────────────
12
+ 0. CORE MENTAL MODEL (NON-NEGOTIABLE)
13
+ ────────────────────────────────────────
14
+
15
+ INTENT and REALITY are different.
16
+
17
+ - INTENT is decided consciously and preserved.
18
+ - REALITY is executed factually and recorded.
19
+ - CODE is only allowed to freeze when both are aligned.
20
+
21
+ LLM reasons
22
+ → System decides
23
+ → Code enforces
24
+ → Graph / Roadmap preserves intent
25
+ → Session Logs preserve truth
26
+ → FAQ preserves explanation
27
+ → Git freezes verified state
28
+
29
+ ────────────────────────────────────────
30
+ 1. AUTOMATED MEMORY & KNOWLEDGE SYSTEM (CODEVIRA)
31
+ ────────────────────────────────────────
32
+
33
+ ### A. ARCHITECTURAL MEMORY (INTENT)
34
+ Tools: `update_node()`, `add_phase()`, `complete_phase()`, `update_next_action()`
35
+ Storage: `.codevira/graph/` and `.codevira/roadmap.yaml`
36
+
37
+ Purpose:
38
+ - Preserve architectural intent and direction via Graph Rules.
39
+ - Capture final decisions as `key_decisions` in Roadmap phases.
40
+ - Record "never do this again" invariants as `rules` on specific file nodes.
41
+
42
+ Rules:
43
+ - Graph Rules and the Roadmap represent the "Project Law."
44
+ - Any architectural change:
45
+ - MUST be explicitly approved by the user.
46
+ - MUST be persisted via `update_node` (new rules) or `complete_phase` (decisions).
47
+ - MUST be logically linked in the session log.
48
+
49
+ ---
50
+
51
+ ### B. EXECUTION MEMORY (TRUTH)
52
+ Tool: `write_session_log()`
53
+ Storage: `.codevira/logs/` (YAML)
54
+
55
+ Purpose:
56
+ - Preserve factual reality of what happened in each session.
57
+ - Prevent "token re-discovery" in future sessions.
58
+
59
+ Must record:
60
+ - The Evolution: What was suggested vs. what was actually built.
61
+ - The "Wrong" Paths: Rejected ideas and failed attempts.
62
+ - The "Why": Underlying logic and trade-offs.
63
+ - The "What": Precise technical changes.
64
+
65
+ Rules:
66
+ - Every session MUST end with a `write_session_log` call.
67
+ - If it is not logged in the codevira history → it does not exist.
68
+
69
+ ---
70
+
71
+ ### C. FAQ / USER KNOWLEDGE
72
+ File: `docs/FAQ.md`
73
+
74
+ Purpose:
75
+ - Human-readable explanation of complex behaviors and non-obvious trade-offs.
76
+
77
+ Update FAQ whenever:
78
+ - A behavior or workflow is finalized.
79
+ - A technical limitation is accepted.
80
+ - A previous decision is reversed.
81
+
82
+ ENFORCEMENT:
83
+ - If behavior changed but FAQ is not updated → WORK IS INCOMPLETE.
84
+ - Agent MUST STOP.
85
+ - No further coding, execution, or commits allowed.
86
+
87
+ ────────────────────────────────────────
88
+ 2. AGENT BEHAVIOR RULES
89
+ ────────────────────────────────────────
90
+
91
+ 1. READ BEFORE ACT
92
+ Always review project memory (roadmap, graph nodes) before doing anything.
93
+ If missing, outdated, or unclear → STOP and ask.
94
+
95
+ 2. INTENT ≠ REALITY
96
+ - Intent → memory/roadmap
97
+ - Reality → session logs
98
+ Never mix them.
99
+
100
+ 3. NO DRIFT
101
+ Do NOT introduce new tools, APIs, architectures, or patterns
102
+ unless explicitly requested.
103
+
104
+ 4. NO GUESSING
105
+ If ambiguous, unsafe, or conflicting → FAIL FAST and explain risk.
106
+
107
+ 5. SMALL & REVERSIBLE
108
+ - Minimal diffs only
109
+ - Wrap, don't rewrite
110
+ - Refactors only for correctness
111
+ - Schema, memory, persistence, telemetry changes are NEVER assumed reversible
112
+
113
+ 6. EXECUTION SAFETY
114
+ - Unsafe or unvalidated output must NEVER reach execution layers
115
+ - Compiler / validator rules override LLM output
116
+
117
+ 7. NO SILENT FALLBACKS
118
+ Any fallback, heuristic, bypass, or degraded mode MUST be:
119
+ - Explicit
120
+ - Logged
121
+ - Visible to the user
122
+
123
+ ────────────────────────────────────────
124
+ 3. CONFLICT RULE (ABSOLUTE)
125
+ ────────────────────────────────────────
126
+
127
+ If a request conflicts with documented architectural decisions:
128
+ - STOP immediately
129
+ - Explain the conflict
130
+ - Cite the violated rule
131
+ - DO NOT implement
132
+
133
+ ────────────────────────────────────────
134
+ 4. CHANGE MANAGEMENT RULE
135
+ ────────────────────────────────────────
136
+
137
+ Whenever behavior, logic, or architecture changes:
138
+
139
+ 1. Log the change in session log
140
+ 2. If intent changed → update memory ONLY after user approval
141
+ 3. Update FAQ explaining:
142
+ - What changed
143
+ - Why it changed
144
+ - Impact and trade-offs
145
+
146
+ Skipping any step = incomplete work.
147
+
148
+ ────────────────────────────────────────
149
+ 5. GIT COMMIT RULES (GATEKEEPER ONLY)
150
+ ────────────────────────────────────────
151
+
152
+ Commits DO NOT decide intent.
153
+ Commits ONLY freeze verified state.
154
+
155
+ ### A. COMMIT AUTHORITY
156
+ - NEVER commit unless the user explicitly commands it.
157
+
158
+ ### B. PRE-COMMIT CHECKS (ALL REQUIRED)
159
+ Before committing, verify:
160
+
161
+ - Session log entries exist for all work
162
+ - Memory changes (if any) were explicitly approved
163
+ - FAQ updated if decision/behavior changed
164
+ - Docs updated if applicable
165
+ - No partial or inconsistent state remains
166
+
167
+ If any check fails → DO NOT COMMIT.
168
+
169
+ ### C. COMMIT MESSAGE REQUIREMENTS
170
+ - One-line commits are NOT allowed
171
+ - Commit message MUST explain:
172
+ - Context
173
+ - What changed
174
+ - Why it changed
175
+ - Decisions reinforced or changed
176
+ - Docs updated
177
+
178
+ Commits are permanent explanation, not just history.
179
+
180
+ ────────────────────────────────────────
181
+ FINAL RULE (NEVER VIOLATE)
182
+ ────────────────────────────────────────
183
+
184
+ If future you cannot answer:
185
+ "WHAT did we do, WHY did we do it, and WHAT changed?"
186
+
187
+ Then the agent has failed — even if the code works.
@@ -0,0 +1,19 @@
1
+ # Multi-Language Project Rules
2
+
3
+ ## TypeScript / TSX
4
+ - When adding a new module, re-export it from the barrel `index.ts`.
5
+ - Prefer `export function` over `export default` for named symbols.
6
+ - Use JSDoc `/** */` comments for public APIs — they are extracted by the indexer.
7
+ - When adding React components, co-locate styles and tests in the same directory.
8
+
9
+ ## Go
10
+ - Exported symbols start with an uppercase letter. Name unexported helpers with lowercase.
11
+ - When adding a new HTTP handler, register it in the router (e.g., `routes.go`).
12
+ - Follow the `func (s *Server) handleX(w http.ResponseWriter, r *http.Request)` pattern.
13
+ - Add a `// Package <name>` doc comment at the top of each package's primary file.
14
+
15
+ ## Rust
16
+ - When adding a new module, declare it in `mod.rs` or `lib.rs` with `pub mod <name>;`.
17
+ - Use `///` doc comments for public APIs — they are extracted by the indexer.
18
+ - Prefer `pub fn` for the public API surface; keep internals private by default.
19
+ - When adding a trait, provide a default implementation where sensible.
@@ -0,0 +1,21 @@
1
+ ---
2
+ trigger: always_on
3
+ ---
4
+
5
+ # Artifact and State Persistence Rule
6
+
7
+ ## Objective
8
+ To ensure all project intelligence, planning, and task data are stored locally within the project workspace for version control and persistence, rather than the IDE's temporary internal environment.
9
+
10
+ ## Mandatory Procedures
11
+ 1. **Automated State Tracking**: Every time a decision is finalized or a phase is completed, use Codevira's roadmap tools (`complete_phase()`, `add_phase()`) to persist the data locally.
12
+ 2. **Local Repository Truth**: Use `.codevira/` instead of any IDE temporary internal environment.
13
+ 3. **Session Handover**: At the start of every session, you must call `get_roadmap()` and `get_full_roadmap()` to synchronize your internal state with the files stored on disk.
14
+ 4. **No Temporary-Only Storage**: Do not finalize a task until the corresponding documentation (FAQ, Roadmap, Logs) has been committed to the local project file system.
15
+
16
+ ## Directory Structure
17
+ Ensure the following structure is maintained:
18
+ - `[project-root]/rules/` (Architectural Rules)
19
+ - `[project-root]/.codevira/graph/` (File context and rules)
20
+ - `[project-root]/.codevira/logs/` (Session truth history)
21
+ - `[project-root]/.codevira/roadmap.yaml` (Project planning and status)
@@ -0,0 +1,17 @@
1
+ # Rule 009: Resilience and Observability
2
+
3
+ ## 1. Fault Tolerance
4
+
5
+ - **Retries**: Use exponential backoff for transient failures (git subprocess calls, file I/O).
6
+ - **Timeouts**: Every subprocess call MUST have a defined timeout (e.g., `timeout=3` for git commands).
7
+ - **Graceful Degradation**: When optional dependencies are missing (chromadb, tree-sitter), tools MUST continue with reduced functionality rather than crashing.
8
+
9
+ ## 2. Observability
10
+
11
+ - **Structured Logging**: Use Python `logging` module with context metadata rather than raw print statements.
12
+ - **Crash Logging**: All unhandled exceptions are captured to `~/.codevira/logs/crashes.log` with automatic sanitization of sensitive data (connection strings, passwords, private IPs).
13
+
14
+ ## 3. Security
15
+
16
+ - **Crash logs MUST NOT contain PII**: The crash logger sanitizes connection strings, key=value secrets, and home directory paths before writing to disk.
17
+ - **MCP tool responses MUST NOT expose raw secrets**: Connection strings, API keys, and tokens MUST be masked before returning to the AI agent.
@@ -0,0 +1,48 @@
1
+ # Rule 013: Smoke Testing & Edge Case Hardening
2
+
3
+ ## Objective
4
+ To ensure the system remains robust under extreme conditions, invalid inputs, and infrastructure failures. All major features MUST be accompanied by a comprehensive "Smoke Test" suite that covers positive, negative, and boundary scenarios.
5
+
6
+ ## 1. The Smoke Test Manifesto
7
+ Smoke tests are NOT unit tests. They verify end-to-end system health and resilience.
8
+ - **Fail Fast**: If a smoke test fails, the build/deployment MUST abort.
9
+ - **Infrastructure Aware**: Tests must gracefully handle simulation vs. production providers.
10
+ - **State Neutral**: Tests must clean up after themselves or use isolated namespaces.
11
+
12
+ ## 2. Mandatory Edge Case Coverage
13
+ Every Smoke Test suite MUST include the following scenarios:
14
+
15
+ ### A. Input Extremes
16
+ - **Empty/Whitespace**: "", " ", "\n"
17
+ - **Massive Input**: 100KB+ queries or payloads.
18
+ - **Unicode/Multilingual**: Emoji, non-Latin scripts (CJK, Cyrillic, Hindi).
19
+ - **Special Characters**: SQL injection characters (', ", ;, --), control characters.
20
+
21
+ ### B. Parameter Boundaries
22
+ - **Limits**: limit=0, limit=-1, limit=999999999
23
+ - **Thresholds**: Below 0.0, above 1.0, exactly 0 or 1.
24
+ - **Timeframes**: Distant past (1970), far future (2099), current second.
25
+
26
+ ### C. Infrastructure Resilience (Chaos)
27
+ - **Closed Circuit**: Normal operation.
28
+ - **Open Circuit**: System must return InfrastructureError or graceful fallback, NOT hang.
29
+ - **Retry Success**: System must succeed on 2nd or 3rd attempt if initial call fails.
30
+ - **Latency**: System must handle slow response times (timeouts).
31
+
32
+ ### D. Idempotency & Concurrency
33
+ - **Double-Run**: Executing the same command twice must produce identical results/state.
34
+ - **Race conditions**: Concurrent writes to the same entity/pattern must be protected by locks.
35
+
36
+ ## 3. Implementation Standard
37
+ 1. **Verification Scripts**: Use scripts/verify_<feature>.py.
38
+ 2. **Rich Feedback**: Use core.logging and rich for clear PASS/FAIL reporting.
39
+ 3. **Exit Codes**: Return non-zero exit code on any failure.
40
+
41
+ ## 5. Verification Etiquette & Safety
42
+
43
+ - **Non-Destructive by Default**: Smoke tests MUST NOT perform destructive actions (like `shutdown`) as part of a shared suite (`verify_all.py`) unless explicitly marked as a "Final/Clean-up" step.
44
+ - **Reentrant-Proof**: If a test creates data, it should use a unique `id` or cleanup after itself to avoid failing on a second run.
45
+ - **Authentication Resilience**:
46
+ - Health checks should remain public.
47
+ - Data APIs should require auth but the test suite MUST handle missing secrets gracefully (warn/skip rather than fail) to allow for basic health verification.
48
+ - **Connectivity Check**: Before running complex API tests, the suite MUST verify that the target service is actually online.
@@ -0,0 +1,23 @@
1
+ # Rule 006: Testing Standards
2
+
3
+ ## 1. Test Categories
4
+
5
+ | Category | Location | Purpose | I/O |
6
+ |----------|----------|---------|-----|
7
+ | Unit | `tests/` | Isolated functions, 1:1 per source file | None |
8
+ | Integration | `tests/integration/` | Cross-module flows | Real |
9
+ | E2E | `tests/e2e/` | Full system verify | Real |
10
+
11
+ ## 2. Test Coverage Requirements
12
+ - Every new MCP tool MUST have corresponding unit tests.
13
+ - Every new module MUST have a matching `tests/test_<module>.py` file.
14
+
15
+ ## 3. Test Naming & Structure
16
+
17
+ - **Naming Pattern**: `test_<action>_<condition>_<expected_result>`.
18
+ - **Example**: `test_search_with_empty_query_returns_empty_list()`.
19
+
20
+ ## 4. Verification Policy
21
+ - "If it'\''s not tested, it doesn'\''t exist."
22
+ - All new features MUST include at least unit tests.
23
+ - Infrastructure changes MUST include contract tests.
mcp_server/detect.py ADDED
@@ -0,0 +1,284 @@
1
+ """
2
+ detect.py — Zero-config project auto-detection.
3
+
4
+ Detects language, source directories, file extensions, and project name
5
+ from project markers (package.json, go.mod, Cargo.toml, etc.) with zero
6
+ interactive prompts. Supports 15+ languages.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Language marker map — checked in order, first match wins
17
+ # ---------------------------------------------------------------------------
18
+
19
+ LANGUAGE_MARKERS: list[tuple[str, str]] = [
20
+ ("Cargo.toml", "rust"),
21
+ ("go.mod", "go"),
22
+ ("tsconfig.json", "typescript"),
23
+ ("pyproject.toml", "python"),
24
+ ("setup.py", "python"),
25
+ ("setup.cfg", "python"),
26
+ ("requirements.txt", "python"),
27
+ ("pom.xml", "java"),
28
+ ("build.gradle.kts", "kotlin"),
29
+ ("build.gradle", "java"),
30
+ ("Gemfile", "ruby"),
31
+ ("Package.swift", "swift"),
32
+ ("composer.json", "php"),
33
+ ("CMakeLists.txt", "cpp"),
34
+ # package.json last — needs disambiguation between TS and JS
35
+ ("package.json", "_js_or_ts"),
36
+ ]
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Per-language conventions
40
+ # ---------------------------------------------------------------------------
41
+
42
+ LANGUAGE_DIRS: dict[str, list[str]] = {
43
+ "python": ["src", "lib", "app"],
44
+ "typescript": ["src", "lib", "app", "pages", "components"],
45
+ "javascript": ["src", "lib", "app", "pages", "components"],
46
+ "go": ["cmd", "pkg", "internal"],
47
+ "rust": ["src"],
48
+ "java": ["src/main/java", "src"],
49
+ "kotlin": ["src/main/kotlin", "src"],
50
+ "ruby": ["lib", "app"],
51
+ "csharp": ["src"],
52
+ "cpp": ["src", "include", "lib"],
53
+ "c": ["src", "include", "lib"],
54
+ "swift": ["Sources", "src"],
55
+ "php": ["src", "app", "lib"],
56
+ }
57
+
58
+ LANGUAGE_EXTENSIONS: dict[str, list[str]] = {
59
+ "python": [".py"],
60
+ "typescript": [".ts", ".tsx"],
61
+ "javascript": [".js", ".jsx"],
62
+ "go": [".go"],
63
+ "rust": [".rs"],
64
+ "java": [".java"],
65
+ "kotlin": [".kt", ".kts"],
66
+ "ruby": [".rb"],
67
+ "csharp": [".cs"],
68
+ "cpp": [".cpp", ".cc", ".cxx", ".h", ".hpp"],
69
+ "c": [".c", ".h"],
70
+ "swift": [".swift"],
71
+ "php": [".php"],
72
+ "solidity": [".sol"],
73
+ "vue": [".vue"],
74
+ }
75
+
76
+ # Reverse map: extension → language (for fallback scanning)
77
+ _EXT_TO_LANG: dict[str, str] = {}
78
+ for _lang, _exts in LANGUAGE_EXTENSIONS.items():
79
+ for _ext in _exts:
80
+ if _ext not in _EXT_TO_LANG: # first language wins
81
+ _EXT_TO_LANG[_ext] = _lang
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Detection functions
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def detect_language(root: Path) -> str:
89
+ """Detect primary language from project markers. Falls back to file extension scan."""
90
+ root = root.resolve()
91
+
92
+ for marker, lang in LANGUAGE_MARKERS:
93
+ if (root / marker).exists():
94
+ if lang == "_js_or_ts":
95
+ return _disambiguate_js_ts(root)
96
+ return lang
97
+
98
+ # Fallback: scan files 2 levels deep, count extensions
99
+ return _scan_dominant_language(root)
100
+
101
+
102
+ def _disambiguate_js_ts(root: Path) -> str:
103
+ """Determine if a package.json project is TypeScript or JavaScript."""
104
+ if (root / "tsconfig.json").exists():
105
+ return "typescript"
106
+
107
+ # Check for any .ts/.tsx files in first 2 levels
108
+ for depth_glob in ["*.ts", "*.tsx", "*/*.ts", "*/*.tsx", "*/*/*.ts"]:
109
+ if list(root.glob(depth_glob)):
110
+ return "typescript"
111
+
112
+ return "javascript"
113
+
114
+
115
+ def _scan_dominant_language(root: Path, max_depth: int = 2) -> str:
116
+ """Scan file extensions to find the dominant language.
117
+
118
+ Uses gitignore-aware discovery when pathspec is available, falls back
119
+ to a simple depth-limited walk otherwise.
120
+ """
121
+ try:
122
+ from mcp_server.gitignore import discover_source_files, infer_language_from_files
123
+ files = discover_source_files(root)
124
+ lang = infer_language_from_files(files)
125
+ if lang != "unknown":
126
+ return lang
127
+ except Exception:
128
+ pass
129
+
130
+ # Legacy fallback: depth-limited walk
131
+ from collections import Counter
132
+
133
+ counts: Counter[str] = Counter()
134
+ skip_dirs = {".git", ".codevira", "node_modules", "__pycache__", ".venv",
135
+ "venv", ".tox", "dist", "build", "target", ".next", ".nuxt"}
136
+
137
+ for path in root.rglob("*"):
138
+ try:
139
+ rel = path.relative_to(root)
140
+ except ValueError:
141
+ continue
142
+ if len(rel.parts) > max_depth + 1:
143
+ continue
144
+ if any(part in skip_dirs for part in rel.parts):
145
+ continue
146
+ if path.is_file() and path.suffix in _EXT_TO_LANG:
147
+ counts[_EXT_TO_LANG[path.suffix]] += 1
148
+
149
+ if counts:
150
+ return counts.most_common(1)[0][0]
151
+
152
+ return "python" # ultimate fallback
153
+
154
+
155
+
156
+ # Top-level directories that never contain user code
157
+ _SKIP_DIRS: set[str] = {
158
+ "node_modules", ".git", ".codevira", "__pycache__", ".venv", "venv",
159
+ "env", ".env", ".tox", "dist", "build", "target", ".next", ".nuxt",
160
+ ".turbo", ".cache", "coverage", ".nyc_output", "htmlcov", ".pytest_cache",
161
+ ".mypy_cache", ".ruff_cache", ".eggs", "*.egg-info", "vendor",
162
+ ".idea", ".vscode", "__snapshots__", ".storybook", "storybook-static",
163
+ "public", "static", "assets", "migrations", "fixtures",
164
+ }
165
+
166
+
167
+ def detect_watched_dirs(root: Path, language: str) -> list[str]:
168
+ """
169
+ Detect source directories by scanning the actual project.
170
+
171
+ Strategy:
172
+ 1. Use gitignore-aware discovery to find all source files.
173
+ 2. Extract unique top-level directories that contain source files.
174
+ 3. Fall back to convention list if nothing found.
175
+ 4. Ultimate fallback: ["."]
176
+ """
177
+ # Try gitignore-aware discovery first
178
+ try:
179
+ from mcp_server.gitignore import discover_source_files
180
+ extensions = set(LANGUAGE_EXTENSIONS.get(language, [".py"]))
181
+ files = discover_source_files(root)
182
+ # Filter to language-appropriate files for better dir detection
183
+ lang_files = [f for f in files if f.suffix.lower() in extensions]
184
+ if not lang_files:
185
+ lang_files = files # fall through to all files if none match
186
+
187
+ # Extract unique top-level dirs relative to project root
188
+ top_dirs: set[str] = set()
189
+ for f in lang_files:
190
+ try:
191
+ rel = f.relative_to(root)
192
+ if len(rel.parts) > 1:
193
+ top_dirs.add(rel.parts[0])
194
+ except ValueError:
195
+ pass
196
+
197
+ # Filter out noise dirs
198
+ found = sorted(
199
+ d for d in top_dirs
200
+ if not d.startswith(".") and d not in _SKIP_DIRS and not d.endswith("-info")
201
+ )
202
+ if found:
203
+ return found
204
+ except Exception:
205
+ pass
206
+
207
+ # Legacy fallback: scan top-level dirs manually
208
+ extensions = set(LANGUAGE_EXTENSIONS.get(language, [".py"]))
209
+ found_legacy: list[str] = []
210
+
211
+ try:
212
+ for entry in sorted(root.iterdir()):
213
+ if not entry.is_dir():
214
+ continue
215
+ name = entry.name
216
+ if name.startswith(".") or name in _SKIP_DIRS or name.endswith("-info"):
217
+ continue
218
+ if _dir_has_sources(entry, extensions, max_depth=6):
219
+ found_legacy.append(name)
220
+ except PermissionError:
221
+ pass
222
+
223
+ if found_legacy:
224
+ return found_legacy
225
+
226
+ # Convention fallback
227
+ candidates = LANGUAGE_DIRS.get(language, [])
228
+ convention = [d for d in candidates if (root / d).is_dir()]
229
+ if convention:
230
+ return convention
231
+
232
+ return ["."]
233
+
234
+
235
+ def _dir_has_sources(path: Path, extensions: set[str], max_depth: int) -> bool:
236
+ """Return True if path contains at least one file with a matching extension."""
237
+ if max_depth == 0:
238
+ return False
239
+ try:
240
+ for entry in path.iterdir():
241
+ if entry.is_file() and entry.suffix in extensions:
242
+ return True
243
+ if entry.is_dir() and not entry.name.startswith(".") and entry.name not in _SKIP_DIRS:
244
+ if _dir_has_sources(entry, extensions, max_depth - 1):
245
+ return True
246
+ except PermissionError:
247
+ pass
248
+ return False
249
+
250
+
251
+ def language_extensions(language: str) -> list[str]:
252
+ """Get file extensions for a language."""
253
+ return LANGUAGE_EXTENSIONS.get(language, [".py"])
254
+
255
+
256
+ def auto_detect_project(root: Path) -> dict:
257
+ """
258
+ Auto-detect everything needed for codevira init — zero prompts.
259
+
260
+ Returns:
261
+ {
262
+ "name": str,
263
+ "language": str,
264
+ "watched_dirs": list[str],
265
+ "file_extensions": list[str],
266
+ "collection_name": str,
267
+ }
268
+ """
269
+ root = root.resolve()
270
+ name = root.name
271
+ language = detect_language(root)
272
+ watched_dirs = detect_watched_dirs(root, language)
273
+ extensions = language_extensions(language)
274
+ collection_name = name.lower().replace("-", "_").replace(" ", "_").replace(".", "_")
275
+
276
+ logger.info("Auto-detected: language=%s, dirs=%s, exts=%s", language, watched_dirs, extensions)
277
+
278
+ return {
279
+ "name": name,
280
+ "language": language,
281
+ "watched_dirs": watched_dirs,
282
+ "file_extensions": extensions,
283
+ "collection_name": collection_name,
284
+ }