codevira 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codevira-1.6.0.dist-info/LICENSE +21 -0
- codevira-1.6.0.dist-info/METADATA +477 -0
- codevira-1.6.0.dist-info/RECORD +58 -0
- codevira-1.6.0.dist-info/WHEEL +5 -0
- codevira-1.6.0.dist-info/entry_points.txt +2 -0
- codevira-1.6.0.dist-info/top_level.txt +2 -0
- indexer/__init__.py +1 -0
- indexer/chunker.py +428 -0
- indexer/global_db.py +197 -0
- indexer/graph_generator.py +380 -0
- indexer/index_codebase.py +588 -0
- indexer/outcome_tracker.py +172 -0
- indexer/rule_learner.py +186 -0
- indexer/sqlite_graph.py +640 -0
- indexer/treesitter_parser.py +423 -0
- mcp_server/__init__.py +1 -0
- mcp_server/__main__.py +20 -0
- mcp_server/auto_init.py +257 -0
- mcp_server/cli.py +622 -0
- mcp_server/crash_logger.py +236 -0
- mcp_server/data/__init__.py +1 -0
- mcp_server/data/agents/builder.md +84 -0
- mcp_server/data/agents/developer.md +111 -0
- mcp_server/data/agents/documenter.md +138 -0
- mcp_server/data/agents/orchestrator.md +96 -0
- mcp_server/data/agents/planner.md +106 -0
- mcp_server/data/agents/reviewer.md +82 -0
- mcp_server/data/agents/tester.md +83 -0
- mcp_server/data/config.example.yaml +33 -0
- mcp_server/data/rules/coding-standards.md +48 -0
- mcp_server/data/rules/engineering-excellence.md +28 -0
- mcp_server/data/rules/git-cicd-governance.md +32 -0
- mcp_server/data/rules/git_commits.md +130 -0
- mcp_server/data/rules/incremental-updates.md +5 -0
- mcp_server/data/rules/master_rule.md +187 -0
- mcp_server/data/rules/multi-language.md +19 -0
- mcp_server/data/rules/persistence.md +21 -0
- mcp_server/data/rules/resilience-observability.md +17 -0
- mcp_server/data/rules/smoke-testing.md +48 -0
- mcp_server/data/rules/testing-standards.md +23 -0
- mcp_server/detect.py +284 -0
- mcp_server/gitignore.py +284 -0
- mcp_server/global_sync.py +187 -0
- mcp_server/http_server.py +341 -0
- mcp_server/ide_inject.py +444 -0
- mcp_server/launchd.py +156 -0
- mcp_server/migrate.py +215 -0
- mcp_server/paths.py +256 -0
- mcp_server/prompts.py +136 -0
- mcp_server/server.py +1049 -0
- mcp_server/tools/__init__.py +0 -0
- mcp_server/tools/changesets.py +223 -0
- mcp_server/tools/code_reader.py +335 -0
- mcp_server/tools/graph.py +637 -0
- mcp_server/tools/learning.py +238 -0
- mcp_server/tools/playbook.py +89 -0
- mcp_server/tools/roadmap.py +599 -0
- mcp_server/tools/search.py +145 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
---
|
|
2
|
+
trigger: always_on
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# MASTER RULESET FOR AI CODE AGENTS (AUTHORITATIVE, NON-DILUTING)
|
|
6
|
+
|
|
7
|
+
This document defines HOW the AI code agent must behave.
|
|
8
|
+
It does NOT replace or override any existing rules.
|
|
9
|
+
It unifies them into a single, enforceable operating contract.
|
|
10
|
+
|
|
11
|
+
────────────────────────────────────────
|
|
12
|
+
0. CORE MENTAL MODEL (NON-NEGOTIABLE)
|
|
13
|
+
────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
INTENT and REALITY are different.
|
|
16
|
+
|
|
17
|
+
- INTENT is decided consciously and preserved.
|
|
18
|
+
- REALITY is executed factually and recorded.
|
|
19
|
+
- CODE is only allowed to freeze when both are aligned.
|
|
20
|
+
|
|
21
|
+
LLM reasons
|
|
22
|
+
→ System decides
|
|
23
|
+
→ Code enforces
|
|
24
|
+
→ Graph / Roadmap preserves intent
|
|
25
|
+
→ Session Logs preserve truth
|
|
26
|
+
→ FAQ preserves explanation
|
|
27
|
+
→ Git freezes verified state
|
|
28
|
+
|
|
29
|
+
────────────────────────────────────────
|
|
30
|
+
1. AUTOMATED MEMORY & KNOWLEDGE SYSTEM (CODEVIRA)
|
|
31
|
+
────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
### A. ARCHITECTURAL MEMORY (INTENT)
|
|
34
|
+
Tools: `update_node()`, `add_phase()`, `complete_phase()`, `update_next_action()`
|
|
35
|
+
Storage: `.codevira/graph/` and `.codevira/roadmap.yaml`
|
|
36
|
+
|
|
37
|
+
Purpose:
|
|
38
|
+
- Preserve architectural intent and direction via Graph Rules.
|
|
39
|
+
- Capture final decisions as `key_decisions` in Roadmap phases.
|
|
40
|
+
- Record "never do this again" invariants as `rules` on specific file nodes.
|
|
41
|
+
|
|
42
|
+
Rules:
|
|
43
|
+
- Graph Rules and the Roadmap represent the "Project Law."
|
|
44
|
+
- Any architectural change:
|
|
45
|
+
- MUST be explicitly approved by the user.
|
|
46
|
+
- MUST be persisted via `update_node` (new rules) or `complete_phase` (decisions).
|
|
47
|
+
- MUST be logically linked in the session log.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### B. EXECUTION MEMORY (TRUTH)
|
|
52
|
+
Tool: `write_session_log()`
|
|
53
|
+
Storage: `.codevira/logs/` (YAML)
|
|
54
|
+
|
|
55
|
+
Purpose:
|
|
56
|
+
- Preserve factual reality of what happened in each session.
|
|
57
|
+
- Prevent "token re-discovery" in future sessions.
|
|
58
|
+
|
|
59
|
+
Must record:
|
|
60
|
+
- The Evolution: What was suggested vs. what was actually built.
|
|
61
|
+
- The "Wrong" Paths: Rejected ideas and failed attempts.
|
|
62
|
+
- The "Why": Underlying logic and trade-offs.
|
|
63
|
+
- The "What": Precise technical changes.
|
|
64
|
+
|
|
65
|
+
Rules:
|
|
66
|
+
- Every session MUST end with a `write_session_log` call.
|
|
67
|
+
- If it is not logged in the codevira history → it does not exist.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
### C. FAQ / USER KNOWLEDGE
|
|
72
|
+
File: `docs/FAQ.md`
|
|
73
|
+
|
|
74
|
+
Purpose:
|
|
75
|
+
- Human-readable explanation of complex behaviors and non-obvious trade-offs.
|
|
76
|
+
|
|
77
|
+
Update FAQ whenever:
|
|
78
|
+
- A behavior or workflow is finalized.
|
|
79
|
+
- A technical limitation is accepted.
|
|
80
|
+
- A previous decision is reversed.
|
|
81
|
+
|
|
82
|
+
ENFORCEMENT:
|
|
83
|
+
- If behavior changed but FAQ is not updated → WORK IS INCOMPLETE.
|
|
84
|
+
- Agent MUST STOP.
|
|
85
|
+
- No further coding, execution, or commits allowed.
|
|
86
|
+
|
|
87
|
+
────────────────────────────────────────
|
|
88
|
+
2. AGENT BEHAVIOR RULES
|
|
89
|
+
────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
1. READ BEFORE ACT
|
|
92
|
+
Always review project memory (roadmap, graph nodes) before doing anything.
|
|
93
|
+
If missing, outdated, or unclear → STOP and ask.
|
|
94
|
+
|
|
95
|
+
2. INTENT ≠ REALITY
|
|
96
|
+
- Intent → memory/roadmap
|
|
97
|
+
- Reality → session logs
|
|
98
|
+
Never mix them.
|
|
99
|
+
|
|
100
|
+
3. NO DRIFT
|
|
101
|
+
Do NOT introduce new tools, APIs, architectures, or patterns
|
|
102
|
+
unless explicitly requested.
|
|
103
|
+
|
|
104
|
+
4. NO GUESSING
|
|
105
|
+
If ambiguous, unsafe, or conflicting → FAIL FAST and explain risk.
|
|
106
|
+
|
|
107
|
+
5. SMALL & REVERSIBLE
|
|
108
|
+
- Minimal diffs only
|
|
109
|
+
- Wrap, don't rewrite
|
|
110
|
+
- Refactors only for correctness
|
|
111
|
+
- Schema, memory, persistence, telemetry changes are NEVER assumed reversible
|
|
112
|
+
|
|
113
|
+
6. EXECUTION SAFETY
|
|
114
|
+
- Unsafe or unvalidated output must NEVER reach execution layers
|
|
115
|
+
- Compiler / validator rules override LLM output
|
|
116
|
+
|
|
117
|
+
7. NO SILENT FALLBACKS
|
|
118
|
+
Any fallback, heuristic, bypass, or degraded mode MUST be:
|
|
119
|
+
- Explicit
|
|
120
|
+
- Logged
|
|
121
|
+
- Visible to the user
|
|
122
|
+
|
|
123
|
+
────────────────────────────────────────
|
|
124
|
+
3. CONFLICT RULE (ABSOLUTE)
|
|
125
|
+
────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
If a request conflicts with documented architectural decisions:
|
|
128
|
+
- STOP immediately
|
|
129
|
+
- Explain the conflict
|
|
130
|
+
- Cite the violated rule
|
|
131
|
+
- DO NOT implement
|
|
132
|
+
|
|
133
|
+
────────────────────────────────────────
|
|
134
|
+
4. CHANGE MANAGEMENT RULE
|
|
135
|
+
────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
Whenever behavior, logic, or architecture changes:
|
|
138
|
+
|
|
139
|
+
1. Log the change in session log
|
|
140
|
+
2. If intent changed → update memory ONLY after user approval
|
|
141
|
+
3. Update FAQ explaining:
|
|
142
|
+
- What changed
|
|
143
|
+
- Why it changed
|
|
144
|
+
- Impact and trade-offs
|
|
145
|
+
|
|
146
|
+
Skipping any step = incomplete work.
|
|
147
|
+
|
|
148
|
+
────────────────────────────────────────
|
|
149
|
+
5. GIT COMMIT RULES (GATEKEEPER ONLY)
|
|
150
|
+
────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
Commits DO NOT decide intent.
|
|
153
|
+
Commits ONLY freeze verified state.
|
|
154
|
+
|
|
155
|
+
### A. COMMIT AUTHORITY
|
|
156
|
+
- NEVER commit unless the user explicitly commands it.
|
|
157
|
+
|
|
158
|
+
### B. PRE-COMMIT CHECKS (ALL REQUIRED)
|
|
159
|
+
Before committing, verify:
|
|
160
|
+
|
|
161
|
+
- Session log entries exist for all work
|
|
162
|
+
- Memory changes (if any) were explicitly approved
|
|
163
|
+
- FAQ updated if decision/behavior changed
|
|
164
|
+
- Docs updated if applicable
|
|
165
|
+
- No partial or inconsistent state remains
|
|
166
|
+
|
|
167
|
+
If any check fails → DO NOT COMMIT.
|
|
168
|
+
|
|
169
|
+
### C. COMMIT MESSAGE REQUIREMENTS
|
|
170
|
+
- One-line commits are NOT allowed
|
|
171
|
+
- Commit message MUST explain:
|
|
172
|
+
- Context
|
|
173
|
+
- What changed
|
|
174
|
+
- Why it changed
|
|
175
|
+
- Decisions reinforced or changed
|
|
176
|
+
- Docs updated
|
|
177
|
+
|
|
178
|
+
Commits are permanent explanation, not just history.
|
|
179
|
+
|
|
180
|
+
────────────────────────────────────────
|
|
181
|
+
FINAL RULE (NEVER VIOLATE)
|
|
182
|
+
────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
If future you cannot answer:
|
|
185
|
+
"WHAT did we do, WHY did we do it, and WHAT changed?"
|
|
186
|
+
|
|
187
|
+
Then the agent has failed — even if the code works.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Multi-Language Project Rules
|
|
2
|
+
|
|
3
|
+
## TypeScript / TSX
|
|
4
|
+
- When adding a new module, re-export it from the barrel `index.ts`.
|
|
5
|
+
- Prefer `export function` over `export default` for named symbols.
|
|
6
|
+
- Use JSDoc `/** */` comments for public APIs — they are extracted by the indexer.
|
|
7
|
+
- When adding React components, co-locate styles and tests in the same directory.
|
|
8
|
+
|
|
9
|
+
## Go
|
|
10
|
+
- Exported symbols start with an uppercase letter. Name unexported helpers with lowercase.
|
|
11
|
+
- When adding a new HTTP handler, register it in the router (e.g., `routes.go`).
|
|
12
|
+
- Follow the `func (s *Server) handleX(w http.ResponseWriter, r *http.Request)` pattern.
|
|
13
|
+
- Add a `// Package <name>` doc comment at the top of each package's primary file.
|
|
14
|
+
|
|
15
|
+
## Rust
|
|
16
|
+
- When adding a new module, declare it in `mod.rs` or `lib.rs` with `pub mod <name>;`.
|
|
17
|
+
- Use `///` doc comments for public APIs — they are extracted by the indexer.
|
|
18
|
+
- Prefer `pub fn` for the public API surface; keep internals private by default.
|
|
19
|
+
- When adding a trait, provide a default implementation where sensible.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
trigger: always_on
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Artifact and State Persistence Rule
|
|
6
|
+
|
|
7
|
+
## Objective
|
|
8
|
+
To ensure all project intelligence, planning, and task data are stored locally within the project workspace for version control and persistence, rather than the IDE's temporary internal environment.
|
|
9
|
+
|
|
10
|
+
## Mandatory Procedures
|
|
11
|
+
1. **Automated State Tracking**: Every time a decision is finalized or a phase is completed, use Codevira's roadmap tools (`complete_phase()`, `add_phase()`) to persist the data locally.
|
|
12
|
+
2. **Local Repository Truth**: Use `.codevira/` instead of any IDE temporary internal environment.
|
|
13
|
+
3. **Session Handover**: At the start of every session, you must call `get_roadmap()` and `get_full_roadmap()` to synchronize your internal state with the files stored on disk.
|
|
14
|
+
4. **No Temporary-Only Storage**: Do not finalize a task until the corresponding documentation (FAQ, Roadmap, Logs) has been committed to the local project file system.
|
|
15
|
+
|
|
16
|
+
## Directory Structure
|
|
17
|
+
Ensure the following structure is maintained:
|
|
18
|
+
- `[project-root]/rules/` (Architectural Rules)
|
|
19
|
+
- `[project-root]/.codevira/graph/` (File context and rules)
|
|
20
|
+
- `[project-root]/.codevira/logs/` (Session truth history)
|
|
21
|
+
- `[project-root]/.codevira/roadmap.yaml` (Project planning and status)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Rule 009: Resilience and Observability
|
|
2
|
+
|
|
3
|
+
## 1. Fault Tolerance
|
|
4
|
+
|
|
5
|
+
- **Retries**: Use exponential backoff for transient failures (git subprocess calls, file I/O).
|
|
6
|
+
- **Timeouts**: Every subprocess call MUST have a defined timeout (e.g., `timeout=3` for git commands).
|
|
7
|
+
- **Graceful Degradation**: When optional dependencies are missing (chromadb, tree-sitter), tools MUST continue with reduced functionality rather than crashing.
|
|
8
|
+
|
|
9
|
+
## 2. Observability
|
|
10
|
+
|
|
11
|
+
- **Structured Logging**: Use Python `logging` module with context metadata rather than raw print statements.
|
|
12
|
+
- **Crash Logging**: All unhandled exceptions are captured to `~/.codevira/logs/crashes.log` with automatic sanitization of sensitive data (connection strings, passwords, private IPs).
|
|
13
|
+
|
|
14
|
+
## 3. Security
|
|
15
|
+
|
|
16
|
+
- **Crash logs MUST NOT contain PII**: The crash logger sanitizes connection strings, key=value secrets, and home directory paths before writing to disk.
|
|
17
|
+
- **MCP tool responses MUST NOT expose raw secrets**: Connection strings, API keys, and tokens MUST be masked before returning to the AI agent.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Rule 013: Smoke Testing & Edge Case Hardening
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
To ensure the system remains robust under extreme conditions, invalid inputs, and infrastructure failures. All major features MUST be accompanied by a comprehensive "Smoke Test" suite that covers positive, negative, and boundary scenarios.
|
|
5
|
+
|
|
6
|
+
## 1. The Smoke Test Manifesto
|
|
7
|
+
Smoke tests are NOT unit tests. They verify end-to-end system health and resilience.
|
|
8
|
+
- **Fail Fast**: If a smoke test fails, the build/deployment MUST abort.
|
|
9
|
+
- **Infrastructure Aware**: Tests must gracefully handle simulation vs. production providers.
|
|
10
|
+
- **State Neutral**: Tests must clean up after themselves or use isolated namespaces.
|
|
11
|
+
|
|
12
|
+
## 2. Mandatory Edge Case Coverage
|
|
13
|
+
Every Smoke Test suite MUST include the following scenarios:
|
|
14
|
+
|
|
15
|
+
### A. Input Extremes
|
|
16
|
+
- **Empty/Whitespace**: "", " ", "\n"
|
|
17
|
+
- **Massive Input**: 100KB+ queries or payloads.
|
|
18
|
+
- **Unicode/Multilingual**: Emoji, non-Latin scripts (CJK, Cyrillic, Hindi).
|
|
19
|
+
- **Special Characters**: SQL injection characters (', ", ;, --), control characters.
|
|
20
|
+
|
|
21
|
+
### B. Parameter Boundaries
|
|
22
|
+
- **Limits**: limit=0, limit=-1, limit=999999999
|
|
23
|
+
- **Thresholds**: Below 0.0, above 1.0, exactly 0 or 1.
|
|
24
|
+
- **Timeframes**: Distant past (1970), far future (2099), current second.
|
|
25
|
+
|
|
26
|
+
### C. Infrastructure Resilience (Chaos)
|
|
27
|
+
- **Closed Circuit**: Normal operation.
|
|
28
|
+
- **Open Circuit**: System must return InfrastructureError or graceful fallback, NOT hang.
|
|
29
|
+
- **Retry Success**: System must succeed on 2nd or 3rd attempt if initial call fails.
|
|
30
|
+
- **Latency**: System must handle slow response times (timeouts).
|
|
31
|
+
|
|
32
|
+
### D. Idempotency & Concurrency
|
|
33
|
+
- **Double-Run**: Executing the same command twice must produce identical results/state.
|
|
34
|
+
- **Race conditions**: Concurrent writes to the same entity/pattern must be protected by locks.
|
|
35
|
+
|
|
36
|
+
## 3. Implementation Standard
|
|
37
|
+
1. **Verification Scripts**: Use scripts/verify_<feature>.py.
|
|
38
|
+
2. **Rich Feedback**: Use core.logging and rich for clear PASS/FAIL reporting.
|
|
39
|
+
3. **Exit Codes**: Return non-zero exit code on any failure.
|
|
40
|
+
|
|
41
|
+
## 5. Verification Etiquette & Safety
|
|
42
|
+
|
|
43
|
+
- **Non-Destructive by Default**: Smoke tests MUST NOT perform destructive actions (like `shutdown`) as part of a shared suite (`verify_all.py`) unless explicitly marked as a "Final/Clean-up" step.
|
|
44
|
+
- **Reentrant-Proof**: If a test creates data, it should use a unique `id` or cleanup after itself to avoid failing on a second run.
|
|
45
|
+
- **Authentication Resilience**:
|
|
46
|
+
- Health checks should remain public.
|
|
47
|
+
- Data APIs should require auth but the test suite MUST handle missing secrets gracefully (warn/skip rather than fail) to allow for basic health verification.
|
|
48
|
+
- **Connectivity Check**: Before running complex API tests, the suite MUST verify that the target service is actually online.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Rule 006: Testing Standards
|
|
2
|
+
|
|
3
|
+
## 1. Test Categories
|
|
4
|
+
|
|
5
|
+
| Category | Location | Purpose | I/O |
|
|
6
|
+
|----------|----------|---------|-----|
|
|
7
|
+
| Unit | `tests/` | Isolated functions, 1:1 per source file | None |
|
|
8
|
+
| Integration | `tests/integration/` | Cross-module flows | Real |
|
|
9
|
+
| E2E | `tests/e2e/` | Full system verify | Real |
|
|
10
|
+
|
|
11
|
+
## 2. Test Coverage Requirements
|
|
12
|
+
- Every new MCP tool MUST have corresponding unit tests.
|
|
13
|
+
- Every new module MUST have a matching `tests/test_<module>.py` file.
|
|
14
|
+
|
|
15
|
+
## 3. Test Naming & Structure
|
|
16
|
+
|
|
17
|
+
- **Naming Pattern**: `test_<action>_<condition>_<expected_result>`.
|
|
18
|
+
- **Example**: `test_search_with_empty_query_returns_empty_list()`.
|
|
19
|
+
|
|
20
|
+
## 4. Verification Policy
|
|
21
|
+
- "If it'\''s not tested, it doesn'\''t exist."
|
|
22
|
+
- All new features MUST include at least unit tests.
|
|
23
|
+
- Infrastructure changes MUST include contract tests.
|
mcp_server/detect.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
detect.py — Zero-config project auto-detection.
|
|
3
|
+
|
|
4
|
+
Detects language, source directories, file extensions, and project name
|
|
5
|
+
from project markers (package.json, go.mod, Cargo.toml, etc.) with zero
|
|
6
|
+
interactive prompts. Supports 15+ languages.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Language marker map — checked in order, first match wins
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
LANGUAGE_MARKERS: list[tuple[str, str]] = [
|
|
20
|
+
("Cargo.toml", "rust"),
|
|
21
|
+
("go.mod", "go"),
|
|
22
|
+
("tsconfig.json", "typescript"),
|
|
23
|
+
("pyproject.toml", "python"),
|
|
24
|
+
("setup.py", "python"),
|
|
25
|
+
("setup.cfg", "python"),
|
|
26
|
+
("requirements.txt", "python"),
|
|
27
|
+
("pom.xml", "java"),
|
|
28
|
+
("build.gradle.kts", "kotlin"),
|
|
29
|
+
("build.gradle", "java"),
|
|
30
|
+
("Gemfile", "ruby"),
|
|
31
|
+
("Package.swift", "swift"),
|
|
32
|
+
("composer.json", "php"),
|
|
33
|
+
("CMakeLists.txt", "cpp"),
|
|
34
|
+
# package.json last — needs disambiguation between TS and JS
|
|
35
|
+
("package.json", "_js_or_ts"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Per-language conventions
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
LANGUAGE_DIRS: dict[str, list[str]] = {
|
|
43
|
+
"python": ["src", "lib", "app"],
|
|
44
|
+
"typescript": ["src", "lib", "app", "pages", "components"],
|
|
45
|
+
"javascript": ["src", "lib", "app", "pages", "components"],
|
|
46
|
+
"go": ["cmd", "pkg", "internal"],
|
|
47
|
+
"rust": ["src"],
|
|
48
|
+
"java": ["src/main/java", "src"],
|
|
49
|
+
"kotlin": ["src/main/kotlin", "src"],
|
|
50
|
+
"ruby": ["lib", "app"],
|
|
51
|
+
"csharp": ["src"],
|
|
52
|
+
"cpp": ["src", "include", "lib"],
|
|
53
|
+
"c": ["src", "include", "lib"],
|
|
54
|
+
"swift": ["Sources", "src"],
|
|
55
|
+
"php": ["src", "app", "lib"],
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
LANGUAGE_EXTENSIONS: dict[str, list[str]] = {
|
|
59
|
+
"python": [".py"],
|
|
60
|
+
"typescript": [".ts", ".tsx"],
|
|
61
|
+
"javascript": [".js", ".jsx"],
|
|
62
|
+
"go": [".go"],
|
|
63
|
+
"rust": [".rs"],
|
|
64
|
+
"java": [".java"],
|
|
65
|
+
"kotlin": [".kt", ".kts"],
|
|
66
|
+
"ruby": [".rb"],
|
|
67
|
+
"csharp": [".cs"],
|
|
68
|
+
"cpp": [".cpp", ".cc", ".cxx", ".h", ".hpp"],
|
|
69
|
+
"c": [".c", ".h"],
|
|
70
|
+
"swift": [".swift"],
|
|
71
|
+
"php": [".php"],
|
|
72
|
+
"solidity": [".sol"],
|
|
73
|
+
"vue": [".vue"],
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Reverse map: extension → language (for fallback scanning)
|
|
77
|
+
_EXT_TO_LANG: dict[str, str] = {}
|
|
78
|
+
for _lang, _exts in LANGUAGE_EXTENSIONS.items():
|
|
79
|
+
for _ext in _exts:
|
|
80
|
+
if _ext not in _EXT_TO_LANG: # first language wins
|
|
81
|
+
_EXT_TO_LANG[_ext] = _lang
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Detection functions
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def detect_language(root: Path) -> str:
|
|
89
|
+
"""Detect primary language from project markers. Falls back to file extension scan."""
|
|
90
|
+
root = root.resolve()
|
|
91
|
+
|
|
92
|
+
for marker, lang in LANGUAGE_MARKERS:
|
|
93
|
+
if (root / marker).exists():
|
|
94
|
+
if lang == "_js_or_ts":
|
|
95
|
+
return _disambiguate_js_ts(root)
|
|
96
|
+
return lang
|
|
97
|
+
|
|
98
|
+
# Fallback: scan files 2 levels deep, count extensions
|
|
99
|
+
return _scan_dominant_language(root)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _disambiguate_js_ts(root: Path) -> str:
|
|
103
|
+
"""Determine if a package.json project is TypeScript or JavaScript."""
|
|
104
|
+
if (root / "tsconfig.json").exists():
|
|
105
|
+
return "typescript"
|
|
106
|
+
|
|
107
|
+
# Check for any .ts/.tsx files in first 2 levels
|
|
108
|
+
for depth_glob in ["*.ts", "*.tsx", "*/*.ts", "*/*.tsx", "*/*/*.ts"]:
|
|
109
|
+
if list(root.glob(depth_glob)):
|
|
110
|
+
return "typescript"
|
|
111
|
+
|
|
112
|
+
return "javascript"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _scan_dominant_language(root: Path, max_depth: int = 2) -> str:
|
|
116
|
+
"""Scan file extensions to find the dominant language.
|
|
117
|
+
|
|
118
|
+
Uses gitignore-aware discovery when pathspec is available, falls back
|
|
119
|
+
to a simple depth-limited walk otherwise.
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
from mcp_server.gitignore import discover_source_files, infer_language_from_files
|
|
123
|
+
files = discover_source_files(root)
|
|
124
|
+
lang = infer_language_from_files(files)
|
|
125
|
+
if lang != "unknown":
|
|
126
|
+
return lang
|
|
127
|
+
except Exception:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
# Legacy fallback: depth-limited walk
|
|
131
|
+
from collections import Counter
|
|
132
|
+
|
|
133
|
+
counts: Counter[str] = Counter()
|
|
134
|
+
skip_dirs = {".git", ".codevira", "node_modules", "__pycache__", ".venv",
|
|
135
|
+
"venv", ".tox", "dist", "build", "target", ".next", ".nuxt"}
|
|
136
|
+
|
|
137
|
+
for path in root.rglob("*"):
|
|
138
|
+
try:
|
|
139
|
+
rel = path.relative_to(root)
|
|
140
|
+
except ValueError:
|
|
141
|
+
continue
|
|
142
|
+
if len(rel.parts) > max_depth + 1:
|
|
143
|
+
continue
|
|
144
|
+
if any(part in skip_dirs for part in rel.parts):
|
|
145
|
+
continue
|
|
146
|
+
if path.is_file() and path.suffix in _EXT_TO_LANG:
|
|
147
|
+
counts[_EXT_TO_LANG[path.suffix]] += 1
|
|
148
|
+
|
|
149
|
+
if counts:
|
|
150
|
+
return counts.most_common(1)[0][0]
|
|
151
|
+
|
|
152
|
+
return "python" # ultimate fallback
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# Top-level directories that never contain user code
|
|
157
|
+
_SKIP_DIRS: set[str] = {
|
|
158
|
+
"node_modules", ".git", ".codevira", "__pycache__", ".venv", "venv",
|
|
159
|
+
"env", ".env", ".tox", "dist", "build", "target", ".next", ".nuxt",
|
|
160
|
+
".turbo", ".cache", "coverage", ".nyc_output", "htmlcov", ".pytest_cache",
|
|
161
|
+
".mypy_cache", ".ruff_cache", ".eggs", "*.egg-info", "vendor",
|
|
162
|
+
".idea", ".vscode", "__snapshots__", ".storybook", "storybook-static",
|
|
163
|
+
"public", "static", "assets", "migrations", "fixtures",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def detect_watched_dirs(root: Path, language: str) -> list[str]:
|
|
168
|
+
"""
|
|
169
|
+
Detect source directories by scanning the actual project.
|
|
170
|
+
|
|
171
|
+
Strategy:
|
|
172
|
+
1. Use gitignore-aware discovery to find all source files.
|
|
173
|
+
2. Extract unique top-level directories that contain source files.
|
|
174
|
+
3. Fall back to convention list if nothing found.
|
|
175
|
+
4. Ultimate fallback: ["."]
|
|
176
|
+
"""
|
|
177
|
+
# Try gitignore-aware discovery first
|
|
178
|
+
try:
|
|
179
|
+
from mcp_server.gitignore import discover_source_files
|
|
180
|
+
extensions = set(LANGUAGE_EXTENSIONS.get(language, [".py"]))
|
|
181
|
+
files = discover_source_files(root)
|
|
182
|
+
# Filter to language-appropriate files for better dir detection
|
|
183
|
+
lang_files = [f for f in files if f.suffix.lower() in extensions]
|
|
184
|
+
if not lang_files:
|
|
185
|
+
lang_files = files # fall through to all files if none match
|
|
186
|
+
|
|
187
|
+
# Extract unique top-level dirs relative to project root
|
|
188
|
+
top_dirs: set[str] = set()
|
|
189
|
+
for f in lang_files:
|
|
190
|
+
try:
|
|
191
|
+
rel = f.relative_to(root)
|
|
192
|
+
if len(rel.parts) > 1:
|
|
193
|
+
top_dirs.add(rel.parts[0])
|
|
194
|
+
except ValueError:
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
# Filter out noise dirs
|
|
198
|
+
found = sorted(
|
|
199
|
+
d for d in top_dirs
|
|
200
|
+
if not d.startswith(".") and d not in _SKIP_DIRS and not d.endswith("-info")
|
|
201
|
+
)
|
|
202
|
+
if found:
|
|
203
|
+
return found
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Legacy fallback: scan top-level dirs manually
|
|
208
|
+
extensions = set(LANGUAGE_EXTENSIONS.get(language, [".py"]))
|
|
209
|
+
found_legacy: list[str] = []
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
for entry in sorted(root.iterdir()):
|
|
213
|
+
if not entry.is_dir():
|
|
214
|
+
continue
|
|
215
|
+
name = entry.name
|
|
216
|
+
if name.startswith(".") or name in _SKIP_DIRS or name.endswith("-info"):
|
|
217
|
+
continue
|
|
218
|
+
if _dir_has_sources(entry, extensions, max_depth=6):
|
|
219
|
+
found_legacy.append(name)
|
|
220
|
+
except PermissionError:
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
if found_legacy:
|
|
224
|
+
return found_legacy
|
|
225
|
+
|
|
226
|
+
# Convention fallback
|
|
227
|
+
candidates = LANGUAGE_DIRS.get(language, [])
|
|
228
|
+
convention = [d for d in candidates if (root / d).is_dir()]
|
|
229
|
+
if convention:
|
|
230
|
+
return convention
|
|
231
|
+
|
|
232
|
+
return ["."]
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _dir_has_sources(path: Path, extensions: set[str], max_depth: int) -> bool:
|
|
236
|
+
"""Return True if path contains at least one file with a matching extension."""
|
|
237
|
+
if max_depth == 0:
|
|
238
|
+
return False
|
|
239
|
+
try:
|
|
240
|
+
for entry in path.iterdir():
|
|
241
|
+
if entry.is_file() and entry.suffix in extensions:
|
|
242
|
+
return True
|
|
243
|
+
if entry.is_dir() and not entry.name.startswith(".") and entry.name not in _SKIP_DIRS:
|
|
244
|
+
if _dir_has_sources(entry, extensions, max_depth - 1):
|
|
245
|
+
return True
|
|
246
|
+
except PermissionError:
|
|
247
|
+
pass
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def language_extensions(language: str) -> list[str]:
|
|
252
|
+
"""Get file extensions for a language."""
|
|
253
|
+
return LANGUAGE_EXTENSIONS.get(language, [".py"])
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def auto_detect_project(root: Path) -> dict:
|
|
257
|
+
"""
|
|
258
|
+
Auto-detect everything needed for codevira init — zero prompts.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
{
|
|
262
|
+
"name": str,
|
|
263
|
+
"language": str,
|
|
264
|
+
"watched_dirs": list[str],
|
|
265
|
+
"file_extensions": list[str],
|
|
266
|
+
"collection_name": str,
|
|
267
|
+
}
|
|
268
|
+
"""
|
|
269
|
+
root = root.resolve()
|
|
270
|
+
name = root.name
|
|
271
|
+
language = detect_language(root)
|
|
272
|
+
watched_dirs = detect_watched_dirs(root, language)
|
|
273
|
+
extensions = language_extensions(language)
|
|
274
|
+
collection_name = name.lower().replace("-", "_").replace(" ", "_").replace(".", "_")
|
|
275
|
+
|
|
276
|
+
logger.info("Auto-detected: language=%s, dirs=%s, exts=%s", language, watched_dirs, extensions)
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
"name": name,
|
|
280
|
+
"language": language,
|
|
281
|
+
"watched_dirs": watched_dirs,
|
|
282
|
+
"file_extensions": extensions,
|
|
283
|
+
"collection_name": collection_name,
|
|
284
|
+
}
|