PyPI - opencode-bridge - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

opencode-bridge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

opencode_bridge/server.py CHANGED Viewed

@@ -16,6 +16,7 @@ Configuration:
 """
 import os
+import re
 import json
 import asyncio
 import shutil
@@ -35,6 +36,13 @@ SMALL_FILE = 500        # lines
 MEDIUM_FILE = 1500      # lines
 LARGE_FILE = 5000       # lines
+# Chunked processing thresholds
+CHUNK_THRESHOLD = 2000   # lines — files above this get chunked
+CHUNK_SIZE = 800         # lines per chunk
+CHUNK_OVERLAP = 20       # overlap between adjacent chunks
+MAX_PARALLEL_CHUNKS = 6  # concurrency limit
+MAX_TOTAL_CHUNKS = 20    # safety cap
 # Language detection by extension
 LANG_MAP = {
     ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript/React",
@@ -214,546 +222,30 @@ def build_message_prompt(message: str, file_paths: list[str]) -> str:
 # ---------------------------------------------------------------------------
-# Domain Detection & Companion System
+# Companion System — Auto-Framing
 # ---------------------------------------------------------------------------
-@dataclass
-class DomainProfile:
-    """Defines a domain of expertise with persona, frameworks, and approach."""
-    id: str
-    name: str
-    keywords: list[str]
-    phrases: list[str]
-    file_indicators: list[str]  # file extensions or name patterns
-    expert_persona: str
-    thinking_frameworks: list[str]
-    key_questions: list[str]
-    structured_approach: list[str]
-    agent_hint: str  # suggested opencode agent
-DOMAIN_REGISTRY: dict[str, DomainProfile] = {}
-def _register(*profiles: DomainProfile):
-    for p in profiles:
-        DOMAIN_REGISTRY[p.id] = p
-_register(
-    DomainProfile(
-        id="architecture",
-        name="Architecture & System Design",
-        keywords=["architecture", "microservice", "monolith", "scalab", "distributed",
-                  "component", "module", "layer", "decouple", "coupling", "cohesion",
-                  "event", "queue", "broker", "gateway", "proxy", "load balancer"],
-        phrases=["system design", "event driven", "event sourcing", "service mesh",
-                 "domain driven", "hexagonal architecture", "clean architecture",
-                 "micro frontend", "message bus", "data pipeline", "cqrs"],
-        file_indicators=[".proto", ".yaml", ".yml", ".tf", ".hcl"],
-        expert_persona=(
-            "a senior distributed systems architect who has designed systems serving "
-            "millions of users. You think in terms of components, boundaries, data flow, "
-            "and failure modes. You've seen both over-engineered and under-engineered "
-            "systems and know when each approach is appropriate."
-        ),
-        thinking_frameworks=["C4 model (context, containers, components, code)",
-                             "CAP theorem", "DDD (bounded contexts, aggregates)",
-                             "CQRS/Event Sourcing trade-offs",
-                             "Twelve-Factor App principles"],
-        key_questions=["What are the key quality attributes (latency, throughput, availability)?",
-                       "Where are the domain boundaries?",
-                       "What data consistency model fits here?",
-                       "What happens when a component fails?",
-                       "How will this evolve in 6-12 months?"],
-        structured_approach=["Clarify requirements and constraints",
-                             "Identify components and their responsibilities",
-                             "Define interfaces and data flow",
-                             "Analyze trade-offs and failure modes",
-                             "Recommend with rationale"],
-        agent_hint="plan",
-    ),
-    DomainProfile(
-        id="debugging",
-        name="Debugging & Troubleshooting",
-        keywords=["bug", "error", "crash", "fail", "exception", "traceback",
-                  "stacktrace", "debug", "breakpoint", "segfault", "panic",
-                  "hang", "freeze", "corrupt", "unexpected", "wrong"],
-        phrases=["root cause", "stack trace", "doesn't work", "stopped working",
-                 "race condition", "deadlock", "memory leak", "null pointer",
-                 "off by one", "regression", "flaky test", "intermittent failure"],
-        file_indicators=[".log", ".dump", ".core"],
-        expert_persona=(
-            "a seasoned debugger who has tracked down the most elusive bugs — race "
-            "conditions, heisenbugs, memory corruption, off-by-one errors hidden for "
-            "years. You are methodical, hypothesis-driven, and never jump to conclusions."
-        ),
-        thinking_frameworks=["Five Whys (root cause analysis)",
-                             "Scientific method (hypothesize, test, refine)",
-                             "Binary search / bisection for isolating changes",
-                             "Rubber duck debugging"],
-        key_questions=["When did it start happening? What changed?",
-                       "Is it reproducible? Under what conditions?",
-                       "What are the exact symptoms vs. expected behavior?",
-                       "Have we ruled out environment differences?",
-                       "What is the minimal reproduction case?"],
-        structured_approach=["Reproduce and isolate the issue",
-                             "Form hypotheses ranked by likelihood",
-                             "Gather evidence: logs, traces, state inspection",
-                             "Narrow down via elimination",
-                             "Fix, verify, and prevent regression"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="performance",
-        name="Performance & Optimization",
-        keywords=["performance", "optimize", "bottleneck", "latency", "throughput",
-                  "cache", "profil", "benchmark", "slow", "fast", "speed",
-                  "memory", "cpu", "io", "bandwidth", "concurren"],
-        phrases=["cache miss", "hot path", "time complexity", "space complexity",
-                 "p99 latency", "tail latency", "garbage collection", "connection pool",
-                 "query plan", "flame graph", "load test"],
-        file_indicators=[".perf", ".prof", ".bench"],
-        expert_persona=(
-            "a performance engineer who obsesses over microseconds and memory allocations. "
-            "You profile before optimizing, know that premature optimization is the root of "
-            "all evil, and always ask 'what does the data say?' before recommending changes."
-        ),
-        thinking_frameworks=["Amdahl's Law", "Little's Law",
-                             "USE method (Utilization, Saturation, Errors)",
-                             "Roofline model", "Big-O analysis with practical constants"],
-        key_questions=["What is the actual bottleneck (CPU, memory, I/O, network)?",
-                       "Do we have profiling data or benchmarks?",
-                       "What's the target performance? Current baseline?",
-                       "What are the hot paths?",
-                       "What trade-offs are acceptable (memory vs speed, complexity vs perf)?"],
-        structured_approach=["Measure current performance with profiling/benchmarks",
-                             "Identify the bottleneck — do not guess",
-                             "Propose targeted optimizations",
-                             "Estimate impact and trade-offs",
-                             "Measure again after changes"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="security",
-        name="Security & Threat Modeling",
-        keywords=["security", "vulnerab", "auth", "token", "encrypt", "hash",
-                  "ssl", "tls", "cors", "csrf", "xss", "injection", "sanitiz",
-                  "permission", "privilege", "secret", "credential"],
-        phrases=["sql injection", "cross site", "threat model", "attack surface",
-                 "zero trust", "defense in depth", "least privilege",
-                 "owasp top 10", "security audit", "penetration test",
-                 "access control", "input validation"],
-        file_indicators=[".pem", ".key", ".cert", ".env"],
-        expert_persona=(
-            "a senior application security engineer who thinks like an attacker but "
-            "builds like a defender. You know the OWASP Top 10 by heart, understand "
-            "cryptographic primitives, and always consider the full threat model."
-        ),
-        thinking_frameworks=["STRIDE threat modeling",
-                             "OWASP Top 10",
-                             "Defense in depth",
-                             "Zero trust architecture",
-                             "Principle of least privilege"],
-        key_questions=["What is the threat model? Who are the adversaries?",
-                       "What data is sensitive and how is it protected?",
-                       "Where are the trust boundaries?",
-                       "What authentication and authorization model is in use?",
-                       "Are there known CVEs in dependencies?"],
-        structured_approach=["Identify assets and threat actors",
-                             "Map the attack surface",
-                             "Enumerate threats (STRIDE)",
-                             "Assess risk (likelihood x impact)",
-                             "Recommend mitigations prioritized by risk"],
-        agent_hint="plan",
-    ),
-    DomainProfile(
-        id="testing",
-        name="Testing & Quality Assurance",
-        keywords=["test", "assert", "mock", "stub", "fixture", "coverage",
-                  "spec", "suite", "expect", "verify", "tdd", "bdd"],
-        phrases=["unit test", "integration test", "end to end", "test coverage",
-                 "test driven", "edge case", "boundary condition", "test pyramid",
-                 "property based", "mutation testing", "snapshot test",
-                 "regression test"],
-        file_indicators=["_test.py", "_test.go", ".test.js", ".test.ts", ".spec.js",
-                         ".spec.ts", "_spec.rb"],
-        expert_persona=(
-            "a testing specialist who believes tests are living documentation. You "
-            "understand the test pyramid, know when to mock and when not to, and "
-            "write tests that catch real bugs without being brittle."
-        ),
-        thinking_frameworks=["Test pyramid (unit → integration → e2e)",
-                             "FIRST principles (Fast, Independent, Repeatable, Self-validating, Timely)",
-                             "Arrange-Act-Assert pattern",
-                             "Equivalence partitioning & boundary value analysis"],
-        key_questions=["What behavior are we verifying?",
-                       "What are the edge cases and boundary conditions?",
-                       "Is this a unit, integration, or e2e concern?",
-                       "What should we mock vs. use real implementations?",
-                       "How will we know if this test is catching real bugs?"],
-        structured_approach=["Identify what behavior to test",
-                             "Determine test level (unit/integration/e2e)",
-                             "Design test cases covering happy path and edge cases",
-                             "Write clear, maintainable assertions",
-                             "Review for brittleness and false confidence"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="devops",
-        name="DevOps & Infrastructure",
-        keywords=["deploy", "pipeline", "container", "docker", "kubernetes", "k8s",
-                  "terraform", "ansible", "helm", "ci", "cd", "infra", "cloud",
-                  "aws", "gcp", "azure", "monitoring", "alert", "observ"],
-        phrases=["ci/cd pipeline", "infrastructure as code", "blue green deployment",
-                 "canary release", "rolling update", "auto scaling",
-                 "service discovery", "container orchestration",
-                 "gitops", "platform engineering"],
-        file_indicators=[".tf", ".hcl", "Dockerfile", ".yml", ".yaml",
-                         "Jenkinsfile", ".github"],
-        expert_persona=(
-            "a senior DevOps/platform engineer who has managed production infrastructure "
-            "at scale. You think in terms of reliability, repeatability, and observability. "
-            "You know that every manual step is a future incident."
-        ),
-        thinking_frameworks=["DORA metrics (deployment frequency, lead time, MTTR, change failure rate)",
-                             "Infrastructure as Code principles",
-                             "SRE golden signals (latency, traffic, errors, saturation)",
-                             "GitOps workflow"],
-        key_questions=["What is the deployment target (cloud, on-prem, hybrid)?",
-                       "What are the reliability requirements (SLOs)?",
-                       "How do we roll back if something goes wrong?",
-                       "What observability do we have?",
-                       "What is the blast radius of a bad deploy?"],
-        structured_approach=["Assess current infrastructure and deployment process",
-                             "Identify gaps in reliability and automation",
-                             "Design pipeline and infrastructure changes",
-                             "Plan rollout with rollback strategy",
-                             "Define success metrics and alerts"],
-        agent_hint="plan",
-    ),
-    DomainProfile(
-        id="database",
-        name="Database & Data Modeling",
-        keywords=["database", "schema", "table", "column", "index", "query",
-                  "sql", "nosql", "migration", "join", "foreign key", "primary key",
-                  "transaction", "acid", "normali", "partition", "shard", "replica"],
-        phrases=["query optimization", "execution plan", "database migration",
-                 "data model", "schema design", "query plan", "n+1 query",
-                 "connection pool", "read replica", "write ahead log",
-                 "eventual consistency"],
-        file_indicators=[".sql", ".prisma", ".migration"],
-        expert_persona=(
-            "a database architect with deep expertise in both relational and NoSQL systems. "
-            "You think about data access patterns first, schema second. You've tuned queries "
-            "from minutes to milliseconds and know when denormalization is the right call."
-        ),
-        thinking_frameworks=["Normal forms (1NF through BCNF) and when to denormalize",
-                             "ACID vs BASE trade-offs",
-                             "Index design (B-tree, hash, composite, covering)",
-                             "CAP theorem applied to data stores"],
-        key_questions=["What are the primary access patterns (reads vs writes)?",
-                       "What consistency guarantees are needed?",
-                       "How much data and what growth rate?",
-                       "What are the query performance requirements?",
-                       "How will the schema evolve?"],
-        structured_approach=["Understand access patterns and data relationships",
-                             "Design schema to match access patterns",
-                             "Plan indexing strategy",
-                             "Consider partitioning/sharding needs",
-                             "Design migration path from current state"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="api_design",
-        name="API Design",
-        keywords=["api", "endpoint", "rest", "graphql", "grpc", "webhook",
-                  "pagination", "versioning", "rate limit", "openapi", "swagger",
-                  "request", "response", "payload", "header", "status code"],
-        phrases=["rest api", "api design", "api versioning", "breaking change",
-                 "backward compatible", "content negotiation", "hateoas",
-                 "api gateway", "graphql schema", "api contract"],
-        file_indicators=[".openapi", ".swagger", ".graphql", ".gql", ".proto"],
-        expert_persona=(
-            "a senior API designer who has built APIs used by thousands of developers. "
-            "You think about developer experience, consistency, evolvability, and "
-            "backward compatibility. You know REST deeply but aren't dogmatic about it."
-        ),
-        thinking_frameworks=["REST maturity model (Richardson)",
-                             "API-first design",
-                             "Consumer-driven contracts",
-                             "Robustness principle (be liberal in what you accept)"],
-        key_questions=["Who are the API consumers (internal, external, both)?",
-                       "What operations does the API need to support?",
-                       "How will we handle versioning and breaking changes?",
-                       "What authentication and rate limiting model?",
-                       "What error format and status code conventions?"],
-        structured_approach=["Identify resources and operations",
-                             "Design URL structure and HTTP methods",
-                             "Define request/response schemas",
-                             "Plan versioning and error handling",
-                             "Document with examples"],
-        agent_hint="plan",
-    ),
-    DomainProfile(
-        id="frontend",
-        name="Frontend & UI",
-        keywords=["react", "vue", "svelte", "angular", "component", "render",
-                  "state", "hook", "prop", "css", "style", "dom", "browser",
-                  "responsive", "animation", "accessibility", "a11y", "ssr"],
-        phrases=["server side rendering", "client side rendering", "state management",
-                 "component library", "design system", "web vitals",
-                 "progressive enhancement", "single page app", "hydration",
-                 "code splitting", "lazy loading"],
-        file_indicators=[".tsx", ".jsx", ".vue", ".svelte", ".css", ".scss", ".less"],
-        expert_persona=(
-            "a senior frontend architect who cares deeply about user experience, "
-            "accessibility, and performance. You've built design systems and know "
-            "that the best code is the code that makes users productive and happy."
-        ),
-        thinking_frameworks=["Component composition patterns",
-                             "Unidirectional data flow",
-                             "Web Core Vitals (LCP, FID, CLS)",
-                             "Progressive enhancement",
-                             "WCAG accessibility guidelines"],
-        key_questions=["What is the target user experience?",
-                       "What rendering strategy fits (SSR, CSR, ISR, SSG)?",
-                       "How will we manage state (local, global, server)?",
-                       "What are the accessibility requirements?",
-                       "What are the performance budgets?"],
-        structured_approach=["Clarify UX requirements and constraints",
-                             "Choose rendering and state management strategy",
-                             "Design component hierarchy",
-                             "Plan for accessibility and performance",
-                             "Define testing approach (visual, interaction, a11y)"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="algorithms",
-        name="Algorithms & Data Structures",
-        keywords=["algorithm", "complexity", "sort", "search", "graph", "tree",
-                  "heap", "hash", "array", "linked list", "stack", "queue",
-                  "recursive", "dynamic", "greedy", "backtrack"],
-        phrases=["time complexity", "space complexity", "dynamic programming",
-                 "divide and conquer", "binary search", "breadth first",
-                 "depth first", "shortest path", "minimum spanning",
-                 "sliding window", "two pointer"],
-        file_indicators=[],
-        expert_persona=(
-            "a computer scientist who loves elegant solutions and rigorous analysis. "
-            "You think in terms of invariants, complexity classes, and correctness proofs. "
-            "You know that the right data structure often matters more than the algorithm."
-        ),
-        thinking_frameworks=["Big-O analysis (time and space)",
-                             "Problem reduction (what known problem does this map to?)",
-                             "Invariant-based reasoning",
-                             "Amortized analysis"],
-        key_questions=["What are the input constraints (size, range, distribution)?",
-                       "What are the performance requirements?",
-                       "Is there a known algorithm or pattern that applies?",
-                       "Can we trade space for time (or vice versa)?",
-                       "What edge cases must we handle?"],
-        structured_approach=["Understand the problem and constraints",
-                             "Identify applicable patterns or known algorithms",
-                             "Design solution with correctness argument",
-                             "Analyze time and space complexity",
-                             "Consider optimizations and edge cases"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="code_quality",
-        name="Code Quality & Refactoring",
-        keywords=["refactor", "clean", "readab", "maintainab", "solid", "dry",
-                  "smell", "debt", "pattern", "antipattern", "principle",
-                  "naming", "abstraction", "duplication"],
-        phrases=["code smell", "technical debt", "design pattern", "code review",
-                 "clean code", "single responsibility", "dependency injection",
-                 "separation of concerns", "boy scout rule",
-                 "strangler fig", "legacy code"],
-        file_indicators=[],
-        expert_persona=(
-            "a pragmatic software craftsperson who values readability over cleverness. "
-            "You refactor with purpose, not for its own sake. You know that good code "
-            "is code your teammates can understand and modify with confidence."
-        ),
-        thinking_frameworks=["SOLID principles (applied pragmatically)",
-                             "Refactoring patterns (Fowler)",
-                             "Code smells catalog",
-                             "Connascence (coupling analysis)"],
-        key_questions=["What problem is the current design causing?",
-                       "Is this refactoring worth the risk and effort?",
-                       "What's the minimal change that improves the situation?",
-                       "How do we refactor safely (tests as safety net)?",
-                       "Will this be clearer to the next person reading it?"],
-        structured_approach=["Identify the pain point or code smell",
-                             "Ensure adequate test coverage before refactoring",
-                             "Apply incremental, safe transformations",
-                             "Verify behavior preservation after each step",
-                             "Review for clarity and simplicity"],
-        agent_hint="build",
-    ),
-    DomainProfile(
-        id="planning",
-        name="Project Planning & Product",
-        keywords=["plan", "roadmap", "milestone", "sprint", "epic", "story",
-                  "requirement", "scope", "prioriti", "estimate", "mvp",
-                  "feature", "deadline", "backlog", "stakeholder"],
-        phrases=["user story", "acceptance criteria", "definition of done",
-                 "minimum viable", "project plan", "technical spec",
-                 "request for comments", "design doc", "product requirement",
-                 "scope creep"],
-        file_indicators=[],
-        expert_persona=(
-            "a seasoned tech lead who bridges engineering and product. You break down "
-            "ambiguous problems into concrete, shippable increments. You know that the "
-            "best plan is one the team actually follows."
-        ),
-        thinking_frameworks=["User story mapping",
-                             "RICE prioritization (Reach, Impact, Confidence, Effort)",
-                             "MoSCoW prioritization",
-                             "Incremental delivery (thin vertical slices)"],
-        key_questions=["What is the user problem we're solving?",
-                       "What is the smallest thing we can ship to learn?",
-                       "What are the dependencies and risks?",
-                       "How will we know this succeeded?",
-                       "What can we defer without losing value?"],
-        structured_approach=["Define the problem and success criteria",
-                             "Break down into shippable increments",
-                             "Identify dependencies, risks, and unknowns",
-                             "Prioritize by value and effort",
-                             "Define first concrete next steps"],
-        agent_hint="plan",
-    ),
-    DomainProfile(
-        id="general",
-        name="General Discussion",
-        keywords=[],
-        phrases=[],
-        file_indicators=[],
-        expert_persona=(
-            "a knowledgeable senior engineer with broad experience across the stack. "
-            "You think clearly, communicate precisely, and always consider the broader "
-            "context before diving into details."
-        ),
-        thinking_frameworks=["First principles thinking",
-                             "Trade-off analysis",
-                             "Systems thinking"],
-        key_questions=["What are we trying to achieve?",
-                       "What are the constraints?",
-                       "What are the trade-offs?"],
-        structured_approach=["Understand the question and context",
-                             "Consider multiple perspectives",
-                             "Analyze trade-offs",
-                             "Provide a clear recommendation"],
-        agent_hint="plan",
-    ),
-)
-@dataclass
-class DomainDetection:
-    """Result of domain detection."""
-    primary: DomainProfile
-    confidence: int  # 0-100
-    secondary: Optional[DomainProfile] = None
-    secondary_confidence: int = 0
-def detect_domain(
-    message: str,
-    file_paths: Optional[list[str]] = None,
-) -> DomainDetection:
-    """Score message against all domains and return best match.
-    Scoring rules:
-    - keyword match: +1 per keyword found
-    - phrase match: +2 per phrase found  (phrases are more specific)
-    - file indicator: +1.5 per matching file extension/pattern
-    """
-    text = message.lower()
-    scores: dict[str, float] = {}
-    for domain_id, profile in DOMAIN_REGISTRY.items():
-        if domain_id == "general":
-            continue  # general is the fallback
-        score = 0.0
-        for kw in profile.keywords:
-            if kw in text:
-                score += 1
-        for phrase in profile.phrases:
-            if phrase in text:
-                score += 2
-        if file_paths:
-            for fp in file_paths:
-                fp_lower = fp.lower()
-                name_lower = Path(fp).name.lower()
-                for indicator in profile.file_indicators:
-                    ind = indicator.lower()
-                    if fp_lower.endswith(ind) or ind == name_lower or ind in fp_lower:
-                        score += 1.5
-        if score > 0:
-            scores[domain_id] = score
-    if not scores:
-        return DomainDetection(
-            primary=DOMAIN_REGISTRY["general"],
-            confidence=50,
-        )
-    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
-    best_id, best_score = ranked[0]
-    # Confidence: scale relative to number of matches.
-    # A score of 5+ is very confident; 1 is low.
-    confidence = min(99, int(40 + best_score * 12))
-    result = DomainDetection(
-        primary=DOMAIN_REGISTRY[best_id],
-        confidence=confidence,
-    )
-    # Cross-domain detection: secondary if >60% of primary
-    if len(ranked) > 1:
-        second_id, second_score = ranked[1]
-        if second_score >= best_score * 0.6:
-            result.secondary = DOMAIN_REGISTRY[second_id]
-            result.secondary_confidence = min(99, int(40 + second_score * 12))
-    return result
 def build_companion_prompt(
     message: str,
     files: Optional[list[str]] = None,
     domain_override: Optional[str] = None,
     is_followup: bool = False,
-) -> tuple[str, DomainDetection]:
-    """Assemble a domain-aware companion prompt.
+) -> str:
+    """Assemble a companion prompt that auto-detects the domain.
-    Returns (prompt_text, domain_detection).
+    The LLM identifies the domain and adopts an appropriate expert persona.
+    An optional *domain_override* hint biases the framing toward a specific field.
     """
-    # Detect or override domain
-    if domain_override and domain_override in DOMAIN_REGISTRY:
-        profile = DOMAIN_REGISTRY[domain_override]
-        detection = DomainDetection(primary=profile, confidence=99)
-    else:
-        detection = detect_domain(message, files)
-        profile = detection.primary
     # Follow-up: lightweight prompt
     if is_followup:
-        parts = [
+        return "\n".join([
             "## Continuing Our Discussion",
             "",
             message,
             "",
             "Remember: challenge assumptions, consider alternatives, be explicit about trade-offs.",
-        ]
-        return "\n".join(parts), detection
+        ])
     # --- Full initial prompt ---
     parts = []
@@ -767,59 +259,225 @@ def build_companion_prompt(
             parts.append(file_context)
             parts.append("")
-    # Cross-domain note
-    cross = ""
-    if detection.secondary:
-        cross = f" This also touches on **{detection.secondary.name}**, so weave in that perspective where relevant."
+    # Domain hint
+    domain_hint = ""
+    if domain_override:
+        domain_hint = (
+            f"\n\nNote: the user has indicated this is about **{domain_override}** — "
+            "frame your expertise accordingly."
+        )
-    # Discussion setup
     parts.append("## Discussion Setup")
     parts.append(
-        f"You are {profile.expert_persona}{cross}\n"
-        f"I'm bringing you a question about **{profile.name}**, "
-        "and I want us to think through it together as peers."
+        "Determine the **specific domain of expertise** this question belongs to "
+        "(e.g., distributed systems, metagenomics, compiler design, quantitative finance, "
+        "DevOps, security, database design, or any other field).\n"
+        "\n"
+        "Then adopt the persona of a **senior practitioner with deep, hands-on "
+        "experience** in that domain. You have:\n"
+        "- Years of practical experience solving real problems in this field\n"
+        "- Deep knowledge of the key frameworks, methods, and trade-offs\n"
+        "- Strong opinions loosely held — you recommend but explain why\n"
+        "\n"
+        "Briefly state what domain you identified and what expert lens you're "
+        f"applying (one line at the top is enough).{domain_hint}"
     )
     parts.append("")
-    # Frameworks
-    parts.append(f"### Analytical Toolkit")
-    for fw in profile.thinking_frameworks:
-        parts.append(f"- {fw}")
-    parts.append("")
-    # Key questions
-    parts.append("### Key Questions to Consider")
-    for q in profile.key_questions:
-        parts.append(f"- {q}")
-    parts.append("")
-    # Collaborative ground rules
     parts.append("## Collaborative Ground Rules")
-    parts.append("- Think out loud, share your reasoning")
+    parts.append("- Think out loud, share your reasoning step by step")
     parts.append("- Challenge questionable assumptions — including mine")
     parts.append("- Lay out trade-offs explicitly: what we gain, what we lose")
+    parts.append("- Name the key analytical frameworks or methods relevant to this domain")
     parts.append("- Propose at least one alternative I haven't considered")
     parts.append("")
-    # Structured approach
-    parts.append(f"## Approach")
-    for i, step in enumerate(profile.structured_approach, 1):
-        parts.append(f"{i}. {step}")
+    parts.append("## Your Approach")
+    parts.append("1. Identify the domain and the core question")
+    parts.append("2. Apply domain-specific frameworks and best practices")
+    parts.append("3. Analyze trade-offs with concrete reasoning")
+    parts.append("4. Provide a clear recommendation")
     parts.append("")
-    # The question
     parts.append("## The Question")
     parts.append(message)
     parts.append("")
-    # Synthesize
     parts.append("## Synthesize")
     parts.append("1. Your recommendation with rationale")
     parts.append("2. Key trade-offs")
     parts.append("3. Risks or blind spots")
     parts.append("4. Open questions worth exploring")
-    return "\n".join(parts), detection
+    return "\n".join(parts)
+# ---------------------------------------------------------------------------
+# Chunked Processing — map-reduce for large files
+# ---------------------------------------------------------------------------
+# Regex for natural code boundaries (language-agnostic)
+_BOUNDARY_RE = re.compile(
+    r"^(?:\s*$"                     # blank line
+    r"|(?:def |class |function |func |fn |pub fn |impl |module |package )"  # definitions
+    r"|(?:})\s*$"                   # closing brace on its own line
+    r"|(?://|#|/\*|\*/).{0,80}$"   # comment lines
+    r")",
+    re.MULTILINE,
+)
+def chunk_file(
+    filepath: str,
+    chunk_size: int = CHUNK_SIZE,
+    overlap: int = CHUNK_OVERLAP,
+) -> list[dict]:
+    """Split a file into overlapping chunks with boundary snapping.
+    Returns a list of dicts with keys:
+        chunk_index, total_chunks, start_line, end_line, content, filepath
+    """
+    p = Path(filepath)
+    try:
+        lines = p.read_text(errors="replace").splitlines(keepends=True)
+    except Exception:
+        return []
+    total = len(lines)
+    if total == 0:
+        return []
+    if total <= chunk_size:
+        return [{
+            "chunk_index": 0,
+            "total_chunks": 1,
+            "start_line": 1,
+            "end_line": total,
+            "content": "".join(lines),
+            "filepath": str(p),
+        }]
+    chunks: list[dict] = []
+    pos = 0
+    while pos < total:
+        end = min(pos + chunk_size, total)
+        # Snap to a natural boundary within ±50 lines of the cut point
+        if end < total:
+            best = end
+            scan_start = max(end - 50, pos + chunk_size // 2)
+            scan_end = min(end + 50, total)
+            for i in range(scan_start, scan_end):
+                if _BOUNDARY_RE.match(lines[i]):
+                    best = i + 1  # include the boundary line in this chunk
+                    break
+            end = best
+        chunk_content = "".join(lines[pos:end])
+        chunks.append({
+            "chunk_index": len(chunks),
+            "total_chunks": -1,  # filled in below
+            "start_line": pos + 1,  # 1-indexed
+            "end_line": end,
+            "content": chunk_content,
+            "filepath": str(p),
+        })
+        # Advance: overlap with previous chunk, but stop if we've reached the end
+        if end >= total:
+            break
+        pos = max(end - overlap, pos + 1)
+    # Fill in total_chunks
+    for c in chunks:
+        c["total_chunks"] = len(chunks)
+    return chunks
+def build_chunk_prompt(
+    user_prompt: str,
+    chunk_info: dict,
+    file_info: dict,
+    mode: str = "discuss",
+) -> str:
+    """Build a focused prompt for analyzing a single file chunk."""
+    name = file_info.get("name", Path(chunk_info["filepath"]).name)
+    language = file_info.get("language", "Unknown")
+    total_lines = file_info.get("lines", "?")
+    idx = chunk_info["chunk_index"] + 1
+    total = chunk_info["total_chunks"]
+    start = chunk_info["start_line"]
+    end = chunk_info["end_line"]
+    parts = [
+        f"You are analyzing **chunk {idx} of {total}** from `{name}` "
+        f"({language}, {total_lines} total lines).",
+        f"This chunk covers **lines {start}–{end}**.",
+        "",
+        "## Task",
+        user_prompt,
+        "",
+        "## Instructions",
+        "- Focus ONLY on the code in this chunk",
+        "- Note any references to code that might exist outside this chunk",
+        "- Be concise — your output will be combined with analyses of other chunks",
+        "- Include line numbers for any issues found",
+    ]
+    if mode == "review":
+        parts.append("- Categorize findings as: bug, security, design, performance, or style")
+    return "\n".join(parts)
+def build_synthesis_prompt(
+    user_prompt: str,
+    chunk_results: list[dict],
+    file_infos: list[dict],
+    mode: str = "discuss",
+) -> str:
+    """Build a prompt that merges chunk analyses into one coherent response."""
+    file_desc = ", ".join(
+        f"`{i.get('name', '?')}` ({i.get('lines', '?')} lines)"
+        for i in file_infos
+    )
+    n = len(chunk_results)
+    parts = [
+        f"You analyzed a large file in **{n} chunks**. "
+        "Synthesize the chunk analyses below into one coherent response.",
+        "",
+        "## Original Request",
+        user_prompt,
+        "",
+        f"## Files Analyzed",
+        file_desc,
+        "",
+        "## Chunk Analyses",
+    ]
+    for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
+        idx = cr.get("chunk_index", 0) + 1
+        fp = Path(cr.get("file", "")).name
+        response = cr.get("response", "[analysis failed]")
+        if cr.get("error"):
+            response = f"[analysis failed: {cr['error']}]"
+        parts.append(f"\n### Chunk {idx} — `{fp}`")
+        parts.append(response)
+    parts.extend([
+        "",
+        "## Instructions",
+        "- Combine findings and remove duplicates (chunks overlap slightly)",
+        "- Organize by importance, not by chunk order",
+        "- Preserve line number references from the original analyses",
+        "- Provide an overall assessment at the top",
+    ])
+    if mode == "review":
+        parts.append("- Group findings by category: bugs, security, design, performance, style")
+    return "\n".join(parts)
 # Default configuration
@@ -988,6 +646,182 @@ class OpenCodeBridge:
         except Exception as e:
             return f"Error: {e}", 1
+    @staticmethod
+    def _parse_opencode_response(output: str) -> tuple[str, Optional[str]]:
+        """Parse JSON-lines output from opencode CLI.
+        Returns (reply_text, session_id).
+        """
+        reply_parts: list[str] = []
+        session_id: Optional[str] = None
+        for line in output.split("\n"):
+            if not line:
+                continue
+            try:
+                event = json.loads(line)
+                if not session_id and "sessionID" in event:
+                    session_id = event["sessionID"]
+                if event.get("type") == "text":
+                    text = event.get("part", {}).get("text", "")
+                    if text:
+                        reply_parts.append(text)
+            except json.JSONDecodeError:
+                continue
+        return "".join(reply_parts), session_id
+    async def _run_chunk(
+        self,
+        chunk_info: dict,
+        file_info: dict,
+        user_prompt: str,
+        session: "Session",
+        mode: str = "discuss",
+    ) -> dict:
+        """Process a single file chunk through OpenCode (stateless)."""
+        result = {
+            "chunk_index": chunk_info["chunk_index"],
+            "file": chunk_info["filepath"],
+            "response": "",
+            "error": None,
+        }
+        # Write chunk to a temp file preserving the original extension
+        ext = Path(chunk_info["filepath"]).suffix or ".txt"
+        tmp = None
+        try:
+            tmp = tempfile.NamedTemporaryFile(
+                mode="w", suffix=ext, delete=False, prefix="opencode_chunk_"
+            )
+            tmp.write(chunk_info["content"])
+            tmp.close()
+            prompt = build_chunk_prompt(user_prompt, chunk_info, file_info, mode)
+            args = [
+                "run", prompt,
+                "--model", session.model,
+                "--agent", session.agent,
+                "--file", tmp.name,
+                "--format", "json",
+            ]
+            if session.variant:
+                args.extend(["--variant", session.variant])
+            output, code = await self._run_opencode(*args, timeout=300)
+            if code != 0:
+                result["error"] = output[:500]
+                return result
+            reply, _ = self._parse_opencode_response(output)
+            result["response"] = reply or "[no response]"
+        except Exception as e:
+            result["error"] = str(e)
+        finally:
+            if tmp:
+                try:
+                    os.unlink(tmp.name)
+                except OSError:
+                    pass
+        return result
+    async def _run_chunked(
+        self,
+        user_prompt: str,
+        files: list[str],
+        session: "Session",
+        mode: str = "discuss",
+    ) -> str:
+        """Map-reduce orchestrator: chunk large files, process in parallel, synthesize."""
+        small_files: list[str] = []
+        all_chunks: list[tuple[dict, dict]] = []  # (chunk_info, file_info)
+        for f in files:
+            info = get_file_info(f)
+            line_count = info.get("lines", 0)
+            if line_count > CHUNK_THRESHOLD:
+                chunks = chunk_file(f, CHUNK_SIZE, CHUNK_OVERLAP)
+                for c in chunks:
+                    all_chunks.append((c, info))
+            else:
+                small_files.append(f)
+        # Safety: if too many chunks, increase chunk size and re-chunk
+        if len(all_chunks) > MAX_TOTAL_CHUNKS:
+            all_chunks = []
+            bigger = CHUNK_SIZE * 2
+            for f in files:
+                info = get_file_info(f)
+                if info.get("lines", 0) > CHUNK_THRESHOLD:
+                    chunks = chunk_file(f, bigger, CHUNK_OVERLAP)
+                    for c in chunks:
+                        all_chunks.append((c, info))
+                # small_files already collected above
+        if not all_chunks:
+            return "No chunks to process."
+        # --- Map phase: run chunks in parallel ---
+        sem = asyncio.Semaphore(MAX_PARALLEL_CHUNKS)
+        async def _limited(chunk_info: dict, file_info: dict) -> dict:
+            async with sem:
+                return await self._run_chunk(chunk_info, file_info, user_prompt, session, mode)
+        tasks = [_limited(ci, fi) for ci, fi in all_chunks]
+        chunk_results: list[dict] = await asyncio.gather(*tasks)
+        # Check failure rate
+        failed = sum(1 for cr in chunk_results if cr.get("error"))
+        if failed > len(chunk_results) / 2:
+            return (
+                f"Chunked analysis failed: {failed}/{len(chunk_results)} chunks errored. "
+                "Try with a smaller file or increase the chunk size."
+            )
+        # --- Reduce phase: synthesize ---
+        file_infos = []
+        seen_paths: set[str] = set()
+        for _, fi in all_chunks:
+            fp = fi.get("path", "")
+            if fp not in seen_paths:
+                seen_paths.add(fp)
+                file_infos.append(fi)
+        synthesis_prompt = build_synthesis_prompt(user_prompt, chunk_results, file_infos, mode)
+        # Attach small files for reference context (not the large ones)
+        args = [
+            "run", synthesis_prompt,
+            "--model", session.model,
+            "--agent", session.agent,
+            "--format", "json",
+        ]
+        if session.variant:
+            args.extend(["--variant", session.variant])
+        for sf in small_files:
+            args.extend(["--file", sf])
+        # Longer timeout for synthesis
+        output, code = await self._run_opencode(*args, timeout=600)
+        if code != 0:
+            # Fallback: concatenate raw chunk results
+            parts = [f"*Synthesis failed — showing raw chunk analyses:*\n"]
+            for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
+                idx = cr.get("chunk_index", 0) + 1
+                fp = Path(cr.get("file", "")).name
+                parts.append(f"\n### Chunk {idx} — `{fp}`")
+                if cr.get("error"):
+                    parts.append(f"[error: {cr['error']}]")
+                else:
+                    parts.append(cr.get("response", "[no response]"))
+            return "\n".join(parts)
+        reply, _ = self._parse_opencode_response(output)
+        return reply or "No response from synthesis."
     async def list_models(self, provider: Optional[str] = None) -> str:
         """List available models from OpenCode."""
         args = ["models"]
@@ -1117,19 +951,36 @@ Set via:
         temp_file.close()
         files = (files or []) + [temp_file.name]
+        # --- Chunking gate: large user files get map-reduce processing ---
+        user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
+        needs_chunking = any(
+            get_file_info(f).get("lines", 0) > CHUNK_THRESHOLD
+            for f in user_files
+        )
+        if needs_chunking:
+            reply = await self._run_chunked(message, user_files, session, mode="discuss")
+            # Cleanup temp file
+            try:
+                os.unlink(temp_file.name)
+            except OSError:
+                pass
+            if reply:
+                session.add_message("assistant", reply)
+                session.save(self.sessions_dir / f"{sid}.json")
+            return reply or "No response received"
+        # --- Normal (non-chunked) path ---
         # Build prompt: companion system unless _raw is set
-        domain_info = ""
         if _raw:
             run_prompt = build_message_prompt(message, files)
         else:
             is_followup = len(session.messages) > 1
-            run_prompt, detection = build_companion_prompt(
+            run_prompt = build_companion_prompt(
                 message, files, domain_override=domain_override,
                 is_followup=is_followup,
             )
-            domain_info = f"[Domain: {detection.primary.name}] [Confidence: {detection.confidence}%]"
-            if detection.secondary:
-                domain_info += f" [Also: {detection.secondary.name} ({detection.secondary_confidence}%)]"
         args = ["run", run_prompt]
@@ -1153,7 +1004,6 @@ Set via:
         args.extend(["--format", "json"])
         # Scale timeout based on attached file size
-        user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
         total_lines = sum(get_file_info(f).get("lines", 0) for f in user_files)
         # Base 300s, +60s per 1000 lines above threshold, capped at 900s
         timeout = min(900, 300 + max(0, (total_lines - MEDIUM_FILE) * 60 // 1000))
@@ -1171,22 +1021,10 @@ Set via:
             return f"Error: {output}"
         # Parse JSON events for session ID and text
-        reply_parts = []
-        for line in output.split("\n"):
-            if not line:
-                continue
-            try:
-                event = json.loads(line)
-                if not session.opencode_session_id and "sessionID" in event:
-                    session.opencode_session_id = event["sessionID"]
-                if event.get("type") == "text":
-                    text = event.get("part", {}).get("text", "")
-                    if text:
-                        reply_parts.append(text)
-            except json.JSONDecodeError:
-                continue
+        reply, new_session_id = self._parse_opencode_response(output)
+        if new_session_id and not session.opencode_session_id:
+            session.opencode_session_id = new_session_id
-        reply = "".join(reply_parts)
         if reply:
             session.add_message("assistant", reply)
@@ -1194,10 +1032,7 @@ Set via:
         if reply or session.opencode_session_id:
             session.save(self.sessions_dir / f"{sid}.json")
-        response = reply or "No response received"
-        if domain_info:
-            response = f"{domain_info}\n\n{response}"
-        return response
+        return reply or "No response received"
     async def plan(
         self,
@@ -1262,10 +1097,16 @@ Set via:
             files = file_paths
             file_infos = [get_file_info(f) for f in file_paths]
             file_infos = [i for i in file_infos if i]
+            total_lines = sum(i.get("lines", 0) for i in file_infos)
+            # Chunking gate for large reviews
+            if total_lines > CHUNK_THRESHOLD:
+                prompt = build_review_prompt(file_infos, focus)
+                return await self._run_chunked(prompt, file_paths, self.sessions[sid], mode="review")
             prompt = build_review_prompt(file_infos, focus)
             # Increase timeout for large files
-            total_lines = sum(i.get("lines", 0) for i in file_infos)
             if total_lines > LARGE_FILE:
                 # Use variant=high for large reviews if not already high+
                 session = self.sessions[sid]
@@ -1492,11 +1333,7 @@ async def list_tools():
                     },
                     "domain": {
                         "type": "string",
-                        "description": "Override auto-detected domain",
-                        "enum": ["architecture", "debugging", "performance", "security",
-                                 "testing", "devops", "database", "api_design",
-                                 "frontend", "algorithms", "code_quality", "planning",
-                                 "general"]
+                        "description": "Hint the domain of expertise (e.g., 'security', 'metagenomics', 'quantitative finance')"
                     }
                 },
                 "required": ["message"]

{opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opencode-bridge
-Version: 0.2.0
+Version: 0.4.0
 Summary: MCP server for continuous OpenCode discussion sessions
 Project-URL: Repository, https://github.com/genomewalker/opencode-bridge
 Author: Antonio Fernandez-Guerra

opencode_bridge-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
+opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
+opencode_bridge/server.py,sha256=Nqx8s-Bz68HObVF7SwPWQCHjoRVH0TRtvpZyMP5zNFA,57249
+opencode_bridge-0.4.0.dist-info/METADATA,sha256=jveFIMox8TFoiiiFCWw6RgHDsSiXd5ortnNX1yKLois,3924
+opencode_bridge-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+opencode_bridge-0.4.0.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
+opencode_bridge-0.4.0.dist-info/RECORD,,

opencode_bridge-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
-opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
-opencode_bridge/server.py,sha256=vNTN7IFIQi6IaEQ1VtBgoum7s6uPKyKDrC3_y1mbTPw,72490
-opencode_bridge-0.2.0.dist-info/METADATA,sha256=s810ba1WjpaJ_Bt5Ag-hiLa1PY2msk5N_crtRdxffZg,3924
-opencode_bridge-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-opencode_bridge-0.2.0.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
-opencode_bridge-0.2.0.dist-info/RECORD,,

{opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

opencode-bridge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

opencode-bridge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl