opencode-bridge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencode_bridge/server.py +420 -583
- {opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/METADATA +1 -1
- opencode_bridge-0.4.0.dist-info/RECORD +7 -0
- opencode_bridge-0.2.0.dist-info/RECORD +0 -7
- {opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/WHEEL +0 -0
- {opencode_bridge-0.2.0.dist-info → opencode_bridge-0.4.0.dist-info}/entry_points.txt +0 -0
opencode_bridge/server.py
CHANGED
|
@@ -16,6 +16,7 @@ Configuration:
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
+
import re
|
|
19
20
|
import json
|
|
20
21
|
import asyncio
|
|
21
22
|
import shutil
|
|
@@ -35,6 +36,13 @@ SMALL_FILE = 500 # lines
|
|
|
35
36
|
MEDIUM_FILE = 1500 # lines
|
|
36
37
|
LARGE_FILE = 5000 # lines
|
|
37
38
|
|
|
39
|
+
# Chunked processing thresholds
|
|
40
|
+
CHUNK_THRESHOLD = 2000 # lines — files above this get chunked
|
|
41
|
+
CHUNK_SIZE = 800 # lines per chunk
|
|
42
|
+
CHUNK_OVERLAP = 20 # overlap between adjacent chunks
|
|
43
|
+
MAX_PARALLEL_CHUNKS = 6 # concurrency limit
|
|
44
|
+
MAX_TOTAL_CHUNKS = 20 # safety cap
|
|
45
|
+
|
|
38
46
|
# Language detection by extension
|
|
39
47
|
LANG_MAP = {
|
|
40
48
|
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript/React",
|
|
@@ -214,546 +222,30 @@ def build_message_prompt(message: str, file_paths: list[str]) -> str:
|
|
|
214
222
|
|
|
215
223
|
|
|
216
224
|
# ---------------------------------------------------------------------------
|
|
217
|
-
#
|
|
225
|
+
# Companion System — Auto-Framing
|
|
218
226
|
# ---------------------------------------------------------------------------
|
|
219
227
|
|
|
220
|
-
@dataclass
|
|
221
|
-
class DomainProfile:
|
|
222
|
-
"""Defines a domain of expertise with persona, frameworks, and approach."""
|
|
223
|
-
id: str
|
|
224
|
-
name: str
|
|
225
|
-
keywords: list[str]
|
|
226
|
-
phrases: list[str]
|
|
227
|
-
file_indicators: list[str] # file extensions or name patterns
|
|
228
|
-
expert_persona: str
|
|
229
|
-
thinking_frameworks: list[str]
|
|
230
|
-
key_questions: list[str]
|
|
231
|
-
structured_approach: list[str]
|
|
232
|
-
agent_hint: str # suggested opencode agent
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
DOMAIN_REGISTRY: dict[str, DomainProfile] = {}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def _register(*profiles: DomainProfile):
|
|
239
|
-
for p in profiles:
|
|
240
|
-
DOMAIN_REGISTRY[p.id] = p
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
_register(
|
|
244
|
-
DomainProfile(
|
|
245
|
-
id="architecture",
|
|
246
|
-
name="Architecture & System Design",
|
|
247
|
-
keywords=["architecture", "microservice", "monolith", "scalab", "distributed",
|
|
248
|
-
"component", "module", "layer", "decouple", "coupling", "cohesion",
|
|
249
|
-
"event", "queue", "broker", "gateway", "proxy", "load balancer"],
|
|
250
|
-
phrases=["system design", "event driven", "event sourcing", "service mesh",
|
|
251
|
-
"domain driven", "hexagonal architecture", "clean architecture",
|
|
252
|
-
"micro frontend", "message bus", "data pipeline", "cqrs"],
|
|
253
|
-
file_indicators=[".proto", ".yaml", ".yml", ".tf", ".hcl"],
|
|
254
|
-
expert_persona=(
|
|
255
|
-
"a senior distributed systems architect who has designed systems serving "
|
|
256
|
-
"millions of users. You think in terms of components, boundaries, data flow, "
|
|
257
|
-
"and failure modes. You've seen both over-engineered and under-engineered "
|
|
258
|
-
"systems and know when each approach is appropriate."
|
|
259
|
-
),
|
|
260
|
-
thinking_frameworks=["C4 model (context, containers, components, code)",
|
|
261
|
-
"CAP theorem", "DDD (bounded contexts, aggregates)",
|
|
262
|
-
"CQRS/Event Sourcing trade-offs",
|
|
263
|
-
"Twelve-Factor App principles"],
|
|
264
|
-
key_questions=["What are the key quality attributes (latency, throughput, availability)?",
|
|
265
|
-
"Where are the domain boundaries?",
|
|
266
|
-
"What data consistency model fits here?",
|
|
267
|
-
"What happens when a component fails?",
|
|
268
|
-
"How will this evolve in 6-12 months?"],
|
|
269
|
-
structured_approach=["Clarify requirements and constraints",
|
|
270
|
-
"Identify components and their responsibilities",
|
|
271
|
-
"Define interfaces and data flow",
|
|
272
|
-
"Analyze trade-offs and failure modes",
|
|
273
|
-
"Recommend with rationale"],
|
|
274
|
-
agent_hint="plan",
|
|
275
|
-
),
|
|
276
|
-
DomainProfile(
|
|
277
|
-
id="debugging",
|
|
278
|
-
name="Debugging & Troubleshooting",
|
|
279
|
-
keywords=["bug", "error", "crash", "fail", "exception", "traceback",
|
|
280
|
-
"stacktrace", "debug", "breakpoint", "segfault", "panic",
|
|
281
|
-
"hang", "freeze", "corrupt", "unexpected", "wrong"],
|
|
282
|
-
phrases=["root cause", "stack trace", "doesn't work", "stopped working",
|
|
283
|
-
"race condition", "deadlock", "memory leak", "null pointer",
|
|
284
|
-
"off by one", "regression", "flaky test", "intermittent failure"],
|
|
285
|
-
file_indicators=[".log", ".dump", ".core"],
|
|
286
|
-
expert_persona=(
|
|
287
|
-
"a seasoned debugger who has tracked down the most elusive bugs — race "
|
|
288
|
-
"conditions, heisenbugs, memory corruption, off-by-one errors hidden for "
|
|
289
|
-
"years. You are methodical, hypothesis-driven, and never jump to conclusions."
|
|
290
|
-
),
|
|
291
|
-
thinking_frameworks=["Five Whys (root cause analysis)",
|
|
292
|
-
"Scientific method (hypothesize, test, refine)",
|
|
293
|
-
"Binary search / bisection for isolating changes",
|
|
294
|
-
"Rubber duck debugging"],
|
|
295
|
-
key_questions=["When did it start happening? What changed?",
|
|
296
|
-
"Is it reproducible? Under what conditions?",
|
|
297
|
-
"What are the exact symptoms vs. expected behavior?",
|
|
298
|
-
"Have we ruled out environment differences?",
|
|
299
|
-
"What is the minimal reproduction case?"],
|
|
300
|
-
structured_approach=["Reproduce and isolate the issue",
|
|
301
|
-
"Form hypotheses ranked by likelihood",
|
|
302
|
-
"Gather evidence: logs, traces, state inspection",
|
|
303
|
-
"Narrow down via elimination",
|
|
304
|
-
"Fix, verify, and prevent regression"],
|
|
305
|
-
agent_hint="build",
|
|
306
|
-
),
|
|
307
|
-
DomainProfile(
|
|
308
|
-
id="performance",
|
|
309
|
-
name="Performance & Optimization",
|
|
310
|
-
keywords=["performance", "optimize", "bottleneck", "latency", "throughput",
|
|
311
|
-
"cache", "profil", "benchmark", "slow", "fast", "speed",
|
|
312
|
-
"memory", "cpu", "io", "bandwidth", "concurren"],
|
|
313
|
-
phrases=["cache miss", "hot path", "time complexity", "space complexity",
|
|
314
|
-
"p99 latency", "tail latency", "garbage collection", "connection pool",
|
|
315
|
-
"query plan", "flame graph", "load test"],
|
|
316
|
-
file_indicators=[".perf", ".prof", ".bench"],
|
|
317
|
-
expert_persona=(
|
|
318
|
-
"a performance engineer who obsesses over microseconds and memory allocations. "
|
|
319
|
-
"You profile before optimizing, know that premature optimization is the root of "
|
|
320
|
-
"all evil, and always ask 'what does the data say?' before recommending changes."
|
|
321
|
-
),
|
|
322
|
-
thinking_frameworks=["Amdahl's Law", "Little's Law",
|
|
323
|
-
"USE method (Utilization, Saturation, Errors)",
|
|
324
|
-
"Roofline model", "Big-O analysis with practical constants"],
|
|
325
|
-
key_questions=["What is the actual bottleneck (CPU, memory, I/O, network)?",
|
|
326
|
-
"Do we have profiling data or benchmarks?",
|
|
327
|
-
"What's the target performance? Current baseline?",
|
|
328
|
-
"What are the hot paths?",
|
|
329
|
-
"What trade-offs are acceptable (memory vs speed, complexity vs perf)?"],
|
|
330
|
-
structured_approach=["Measure current performance with profiling/benchmarks",
|
|
331
|
-
"Identify the bottleneck — do not guess",
|
|
332
|
-
"Propose targeted optimizations",
|
|
333
|
-
"Estimate impact and trade-offs",
|
|
334
|
-
"Measure again after changes"],
|
|
335
|
-
agent_hint="build",
|
|
336
|
-
),
|
|
337
|
-
DomainProfile(
|
|
338
|
-
id="security",
|
|
339
|
-
name="Security & Threat Modeling",
|
|
340
|
-
keywords=["security", "vulnerab", "auth", "token", "encrypt", "hash",
|
|
341
|
-
"ssl", "tls", "cors", "csrf", "xss", "injection", "sanitiz",
|
|
342
|
-
"permission", "privilege", "secret", "credential"],
|
|
343
|
-
phrases=["sql injection", "cross site", "threat model", "attack surface",
|
|
344
|
-
"zero trust", "defense in depth", "least privilege",
|
|
345
|
-
"owasp top 10", "security audit", "penetration test",
|
|
346
|
-
"access control", "input validation"],
|
|
347
|
-
file_indicators=[".pem", ".key", ".cert", ".env"],
|
|
348
|
-
expert_persona=(
|
|
349
|
-
"a senior application security engineer who thinks like an attacker but "
|
|
350
|
-
"builds like a defender. You know the OWASP Top 10 by heart, understand "
|
|
351
|
-
"cryptographic primitives, and always consider the full threat model."
|
|
352
|
-
),
|
|
353
|
-
thinking_frameworks=["STRIDE threat modeling",
|
|
354
|
-
"OWASP Top 10",
|
|
355
|
-
"Defense in depth",
|
|
356
|
-
"Zero trust architecture",
|
|
357
|
-
"Principle of least privilege"],
|
|
358
|
-
key_questions=["What is the threat model? Who are the adversaries?",
|
|
359
|
-
"What data is sensitive and how is it protected?",
|
|
360
|
-
"Where are the trust boundaries?",
|
|
361
|
-
"What authentication and authorization model is in use?",
|
|
362
|
-
"Are there known CVEs in dependencies?"],
|
|
363
|
-
structured_approach=["Identify assets and threat actors",
|
|
364
|
-
"Map the attack surface",
|
|
365
|
-
"Enumerate threats (STRIDE)",
|
|
366
|
-
"Assess risk (likelihood x impact)",
|
|
367
|
-
"Recommend mitigations prioritized by risk"],
|
|
368
|
-
agent_hint="plan",
|
|
369
|
-
),
|
|
370
|
-
DomainProfile(
|
|
371
|
-
id="testing",
|
|
372
|
-
name="Testing & Quality Assurance",
|
|
373
|
-
keywords=["test", "assert", "mock", "stub", "fixture", "coverage",
|
|
374
|
-
"spec", "suite", "expect", "verify", "tdd", "bdd"],
|
|
375
|
-
phrases=["unit test", "integration test", "end to end", "test coverage",
|
|
376
|
-
"test driven", "edge case", "boundary condition", "test pyramid",
|
|
377
|
-
"property based", "mutation testing", "snapshot test",
|
|
378
|
-
"regression test"],
|
|
379
|
-
file_indicators=["_test.py", "_test.go", ".test.js", ".test.ts", ".spec.js",
|
|
380
|
-
".spec.ts", "_spec.rb"],
|
|
381
|
-
expert_persona=(
|
|
382
|
-
"a testing specialist who believes tests are living documentation. You "
|
|
383
|
-
"understand the test pyramid, know when to mock and when not to, and "
|
|
384
|
-
"write tests that catch real bugs without being brittle."
|
|
385
|
-
),
|
|
386
|
-
thinking_frameworks=["Test pyramid (unit → integration → e2e)",
|
|
387
|
-
"FIRST principles (Fast, Independent, Repeatable, Self-validating, Timely)",
|
|
388
|
-
"Arrange-Act-Assert pattern",
|
|
389
|
-
"Equivalence partitioning & boundary value analysis"],
|
|
390
|
-
key_questions=["What behavior are we verifying?",
|
|
391
|
-
"What are the edge cases and boundary conditions?",
|
|
392
|
-
"Is this a unit, integration, or e2e concern?",
|
|
393
|
-
"What should we mock vs. use real implementations?",
|
|
394
|
-
"How will we know if this test is catching real bugs?"],
|
|
395
|
-
structured_approach=["Identify what behavior to test",
|
|
396
|
-
"Determine test level (unit/integration/e2e)",
|
|
397
|
-
"Design test cases covering happy path and edge cases",
|
|
398
|
-
"Write clear, maintainable assertions",
|
|
399
|
-
"Review for brittleness and false confidence"],
|
|
400
|
-
agent_hint="build",
|
|
401
|
-
),
|
|
402
|
-
DomainProfile(
|
|
403
|
-
id="devops",
|
|
404
|
-
name="DevOps & Infrastructure",
|
|
405
|
-
keywords=["deploy", "pipeline", "container", "docker", "kubernetes", "k8s",
|
|
406
|
-
"terraform", "ansible", "helm", "ci", "cd", "infra", "cloud",
|
|
407
|
-
"aws", "gcp", "azure", "monitoring", "alert", "observ"],
|
|
408
|
-
phrases=["ci/cd pipeline", "infrastructure as code", "blue green deployment",
|
|
409
|
-
"canary release", "rolling update", "auto scaling",
|
|
410
|
-
"service discovery", "container orchestration",
|
|
411
|
-
"gitops", "platform engineering"],
|
|
412
|
-
file_indicators=[".tf", ".hcl", "Dockerfile", ".yml", ".yaml",
|
|
413
|
-
"Jenkinsfile", ".github"],
|
|
414
|
-
expert_persona=(
|
|
415
|
-
"a senior DevOps/platform engineer who has managed production infrastructure "
|
|
416
|
-
"at scale. You think in terms of reliability, repeatability, and observability. "
|
|
417
|
-
"You know that every manual step is a future incident."
|
|
418
|
-
),
|
|
419
|
-
thinking_frameworks=["DORA metrics (deployment frequency, lead time, MTTR, change failure rate)",
|
|
420
|
-
"Infrastructure as Code principles",
|
|
421
|
-
"SRE golden signals (latency, traffic, errors, saturation)",
|
|
422
|
-
"GitOps workflow"],
|
|
423
|
-
key_questions=["What is the deployment target (cloud, on-prem, hybrid)?",
|
|
424
|
-
"What are the reliability requirements (SLOs)?",
|
|
425
|
-
"How do we roll back if something goes wrong?",
|
|
426
|
-
"What observability do we have?",
|
|
427
|
-
"What is the blast radius of a bad deploy?"],
|
|
428
|
-
structured_approach=["Assess current infrastructure and deployment process",
|
|
429
|
-
"Identify gaps in reliability and automation",
|
|
430
|
-
"Design pipeline and infrastructure changes",
|
|
431
|
-
"Plan rollout with rollback strategy",
|
|
432
|
-
"Define success metrics and alerts"],
|
|
433
|
-
agent_hint="plan",
|
|
434
|
-
),
|
|
435
|
-
DomainProfile(
|
|
436
|
-
id="database",
|
|
437
|
-
name="Database & Data Modeling",
|
|
438
|
-
keywords=["database", "schema", "table", "column", "index", "query",
|
|
439
|
-
"sql", "nosql", "migration", "join", "foreign key", "primary key",
|
|
440
|
-
"transaction", "acid", "normali", "partition", "shard", "replica"],
|
|
441
|
-
phrases=["query optimization", "execution plan", "database migration",
|
|
442
|
-
"data model", "schema design", "query plan", "n+1 query",
|
|
443
|
-
"connection pool", "read replica", "write ahead log",
|
|
444
|
-
"eventual consistency"],
|
|
445
|
-
file_indicators=[".sql", ".prisma", ".migration"],
|
|
446
|
-
expert_persona=(
|
|
447
|
-
"a database architect with deep expertise in both relational and NoSQL systems. "
|
|
448
|
-
"You think about data access patterns first, schema second. You've tuned queries "
|
|
449
|
-
"from minutes to milliseconds and know when denormalization is the right call."
|
|
450
|
-
),
|
|
451
|
-
thinking_frameworks=["Normal forms (1NF through BCNF) and when to denormalize",
|
|
452
|
-
"ACID vs BASE trade-offs",
|
|
453
|
-
"Index design (B-tree, hash, composite, covering)",
|
|
454
|
-
"CAP theorem applied to data stores"],
|
|
455
|
-
key_questions=["What are the primary access patterns (reads vs writes)?",
|
|
456
|
-
"What consistency guarantees are needed?",
|
|
457
|
-
"How much data and what growth rate?",
|
|
458
|
-
"What are the query performance requirements?",
|
|
459
|
-
"How will the schema evolve?"],
|
|
460
|
-
structured_approach=["Understand access patterns and data relationships",
|
|
461
|
-
"Design schema to match access patterns",
|
|
462
|
-
"Plan indexing strategy",
|
|
463
|
-
"Consider partitioning/sharding needs",
|
|
464
|
-
"Design migration path from current state"],
|
|
465
|
-
agent_hint="build",
|
|
466
|
-
),
|
|
467
|
-
DomainProfile(
|
|
468
|
-
id="api_design",
|
|
469
|
-
name="API Design",
|
|
470
|
-
keywords=["api", "endpoint", "rest", "graphql", "grpc", "webhook",
|
|
471
|
-
"pagination", "versioning", "rate limit", "openapi", "swagger",
|
|
472
|
-
"request", "response", "payload", "header", "status code"],
|
|
473
|
-
phrases=["rest api", "api design", "api versioning", "breaking change",
|
|
474
|
-
"backward compatible", "content negotiation", "hateoas",
|
|
475
|
-
"api gateway", "graphql schema", "api contract"],
|
|
476
|
-
file_indicators=[".openapi", ".swagger", ".graphql", ".gql", ".proto"],
|
|
477
|
-
expert_persona=(
|
|
478
|
-
"a senior API designer who has built APIs used by thousands of developers. "
|
|
479
|
-
"You think about developer experience, consistency, evolvability, and "
|
|
480
|
-
"backward compatibility. You know REST deeply but aren't dogmatic about it."
|
|
481
|
-
),
|
|
482
|
-
thinking_frameworks=["REST maturity model (Richardson)",
|
|
483
|
-
"API-first design",
|
|
484
|
-
"Consumer-driven contracts",
|
|
485
|
-
"Robustness principle (be liberal in what you accept)"],
|
|
486
|
-
key_questions=["Who are the API consumers (internal, external, both)?",
|
|
487
|
-
"What operations does the API need to support?",
|
|
488
|
-
"How will we handle versioning and breaking changes?",
|
|
489
|
-
"What authentication and rate limiting model?",
|
|
490
|
-
"What error format and status code conventions?"],
|
|
491
|
-
structured_approach=["Identify resources and operations",
|
|
492
|
-
"Design URL structure and HTTP methods",
|
|
493
|
-
"Define request/response schemas",
|
|
494
|
-
"Plan versioning and error handling",
|
|
495
|
-
"Document with examples"],
|
|
496
|
-
agent_hint="plan",
|
|
497
|
-
),
|
|
498
|
-
DomainProfile(
|
|
499
|
-
id="frontend",
|
|
500
|
-
name="Frontend & UI",
|
|
501
|
-
keywords=["react", "vue", "svelte", "angular", "component", "render",
|
|
502
|
-
"state", "hook", "prop", "css", "style", "dom", "browser",
|
|
503
|
-
"responsive", "animation", "accessibility", "a11y", "ssr"],
|
|
504
|
-
phrases=["server side rendering", "client side rendering", "state management",
|
|
505
|
-
"component library", "design system", "web vitals",
|
|
506
|
-
"progressive enhancement", "single page app", "hydration",
|
|
507
|
-
"code splitting", "lazy loading"],
|
|
508
|
-
file_indicators=[".tsx", ".jsx", ".vue", ".svelte", ".css", ".scss", ".less"],
|
|
509
|
-
expert_persona=(
|
|
510
|
-
"a senior frontend architect who cares deeply about user experience, "
|
|
511
|
-
"accessibility, and performance. You've built design systems and know "
|
|
512
|
-
"that the best code is the code that makes users productive and happy."
|
|
513
|
-
),
|
|
514
|
-
thinking_frameworks=["Component composition patterns",
|
|
515
|
-
"Unidirectional data flow",
|
|
516
|
-
"Web Core Vitals (LCP, FID, CLS)",
|
|
517
|
-
"Progressive enhancement",
|
|
518
|
-
"WCAG accessibility guidelines"],
|
|
519
|
-
key_questions=["What is the target user experience?",
|
|
520
|
-
"What rendering strategy fits (SSR, CSR, ISR, SSG)?",
|
|
521
|
-
"How will we manage state (local, global, server)?",
|
|
522
|
-
"What are the accessibility requirements?",
|
|
523
|
-
"What are the performance budgets?"],
|
|
524
|
-
structured_approach=["Clarify UX requirements and constraints",
|
|
525
|
-
"Choose rendering and state management strategy",
|
|
526
|
-
"Design component hierarchy",
|
|
527
|
-
"Plan for accessibility and performance",
|
|
528
|
-
"Define testing approach (visual, interaction, a11y)"],
|
|
529
|
-
agent_hint="build",
|
|
530
|
-
),
|
|
531
|
-
DomainProfile(
|
|
532
|
-
id="algorithms",
|
|
533
|
-
name="Algorithms & Data Structures",
|
|
534
|
-
keywords=["algorithm", "complexity", "sort", "search", "graph", "tree",
|
|
535
|
-
"heap", "hash", "array", "linked list", "stack", "queue",
|
|
536
|
-
"recursive", "dynamic", "greedy", "backtrack"],
|
|
537
|
-
phrases=["time complexity", "space complexity", "dynamic programming",
|
|
538
|
-
"divide and conquer", "binary search", "breadth first",
|
|
539
|
-
"depth first", "shortest path", "minimum spanning",
|
|
540
|
-
"sliding window", "two pointer"],
|
|
541
|
-
file_indicators=[],
|
|
542
|
-
expert_persona=(
|
|
543
|
-
"a computer scientist who loves elegant solutions and rigorous analysis. "
|
|
544
|
-
"You think in terms of invariants, complexity classes, and correctness proofs. "
|
|
545
|
-
"You know that the right data structure often matters more than the algorithm."
|
|
546
|
-
),
|
|
547
|
-
thinking_frameworks=["Big-O analysis (time and space)",
|
|
548
|
-
"Problem reduction (what known problem does this map to?)",
|
|
549
|
-
"Invariant-based reasoning",
|
|
550
|
-
"Amortized analysis"],
|
|
551
|
-
key_questions=["What are the input constraints (size, range, distribution)?",
|
|
552
|
-
"What are the performance requirements?",
|
|
553
|
-
"Is there a known algorithm or pattern that applies?",
|
|
554
|
-
"Can we trade space for time (or vice versa)?",
|
|
555
|
-
"What edge cases must we handle?"],
|
|
556
|
-
structured_approach=["Understand the problem and constraints",
|
|
557
|
-
"Identify applicable patterns or known algorithms",
|
|
558
|
-
"Design solution with correctness argument",
|
|
559
|
-
"Analyze time and space complexity",
|
|
560
|
-
"Consider optimizations and edge cases"],
|
|
561
|
-
agent_hint="build",
|
|
562
|
-
),
|
|
563
|
-
DomainProfile(
|
|
564
|
-
id="code_quality",
|
|
565
|
-
name="Code Quality & Refactoring",
|
|
566
|
-
keywords=["refactor", "clean", "readab", "maintainab", "solid", "dry",
|
|
567
|
-
"smell", "debt", "pattern", "antipattern", "principle",
|
|
568
|
-
"naming", "abstraction", "duplication"],
|
|
569
|
-
phrases=["code smell", "technical debt", "design pattern", "code review",
|
|
570
|
-
"clean code", "single responsibility", "dependency injection",
|
|
571
|
-
"separation of concerns", "boy scout rule",
|
|
572
|
-
"strangler fig", "legacy code"],
|
|
573
|
-
file_indicators=[],
|
|
574
|
-
expert_persona=(
|
|
575
|
-
"a pragmatic software craftsperson who values readability over cleverness. "
|
|
576
|
-
"You refactor with purpose, not for its own sake. You know that good code "
|
|
577
|
-
"is code your teammates can understand and modify with confidence."
|
|
578
|
-
),
|
|
579
|
-
thinking_frameworks=["SOLID principles (applied pragmatically)",
|
|
580
|
-
"Refactoring patterns (Fowler)",
|
|
581
|
-
"Code smells catalog",
|
|
582
|
-
"Connascence (coupling analysis)"],
|
|
583
|
-
key_questions=["What problem is the current design causing?",
|
|
584
|
-
"Is this refactoring worth the risk and effort?",
|
|
585
|
-
"What's the minimal change that improves the situation?",
|
|
586
|
-
"How do we refactor safely (tests as safety net)?",
|
|
587
|
-
"Will this be clearer to the next person reading it?"],
|
|
588
|
-
structured_approach=["Identify the pain point or code smell",
|
|
589
|
-
"Ensure adequate test coverage before refactoring",
|
|
590
|
-
"Apply incremental, safe transformations",
|
|
591
|
-
"Verify behavior preservation after each step",
|
|
592
|
-
"Review for clarity and simplicity"],
|
|
593
|
-
agent_hint="build",
|
|
594
|
-
),
|
|
595
|
-
DomainProfile(
|
|
596
|
-
id="planning",
|
|
597
|
-
name="Project Planning & Product",
|
|
598
|
-
keywords=["plan", "roadmap", "milestone", "sprint", "epic", "story",
|
|
599
|
-
"requirement", "scope", "prioriti", "estimate", "mvp",
|
|
600
|
-
"feature", "deadline", "backlog", "stakeholder"],
|
|
601
|
-
phrases=["user story", "acceptance criteria", "definition of done",
|
|
602
|
-
"minimum viable", "project plan", "technical spec",
|
|
603
|
-
"request for comments", "design doc", "product requirement",
|
|
604
|
-
"scope creep"],
|
|
605
|
-
file_indicators=[],
|
|
606
|
-
expert_persona=(
|
|
607
|
-
"a seasoned tech lead who bridges engineering and product. You break down "
|
|
608
|
-
"ambiguous problems into concrete, shippable increments. You know that the "
|
|
609
|
-
"best plan is one the team actually follows."
|
|
610
|
-
),
|
|
611
|
-
thinking_frameworks=["User story mapping",
|
|
612
|
-
"RICE prioritization (Reach, Impact, Confidence, Effort)",
|
|
613
|
-
"MoSCoW prioritization",
|
|
614
|
-
"Incremental delivery (thin vertical slices)"],
|
|
615
|
-
key_questions=["What is the user problem we're solving?",
|
|
616
|
-
"What is the smallest thing we can ship to learn?",
|
|
617
|
-
"What are the dependencies and risks?",
|
|
618
|
-
"How will we know this succeeded?",
|
|
619
|
-
"What can we defer without losing value?"],
|
|
620
|
-
structured_approach=["Define the problem and success criteria",
|
|
621
|
-
"Break down into shippable increments",
|
|
622
|
-
"Identify dependencies, risks, and unknowns",
|
|
623
|
-
"Prioritize by value and effort",
|
|
624
|
-
"Define first concrete next steps"],
|
|
625
|
-
agent_hint="plan",
|
|
626
|
-
),
|
|
627
|
-
DomainProfile(
|
|
628
|
-
id="general",
|
|
629
|
-
name="General Discussion",
|
|
630
|
-
keywords=[],
|
|
631
|
-
phrases=[],
|
|
632
|
-
file_indicators=[],
|
|
633
|
-
expert_persona=(
|
|
634
|
-
"a knowledgeable senior engineer with broad experience across the stack. "
|
|
635
|
-
"You think clearly, communicate precisely, and always consider the broader "
|
|
636
|
-
"context before diving into details."
|
|
637
|
-
),
|
|
638
|
-
thinking_frameworks=["First principles thinking",
|
|
639
|
-
"Trade-off analysis",
|
|
640
|
-
"Systems thinking"],
|
|
641
|
-
key_questions=["What are we trying to achieve?",
|
|
642
|
-
"What are the constraints?",
|
|
643
|
-
"What are the trade-offs?"],
|
|
644
|
-
structured_approach=["Understand the question and context",
|
|
645
|
-
"Consider multiple perspectives",
|
|
646
|
-
"Analyze trade-offs",
|
|
647
|
-
"Provide a clear recommendation"],
|
|
648
|
-
agent_hint="plan",
|
|
649
|
-
),
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
@dataclass
|
|
654
|
-
class DomainDetection:
|
|
655
|
-
"""Result of domain detection."""
|
|
656
|
-
primary: DomainProfile
|
|
657
|
-
confidence: int # 0-100
|
|
658
|
-
secondary: Optional[DomainProfile] = None
|
|
659
|
-
secondary_confidence: int = 0
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
def detect_domain(
|
|
663
|
-
message: str,
|
|
664
|
-
file_paths: Optional[list[str]] = None,
|
|
665
|
-
) -> DomainDetection:
|
|
666
|
-
"""Score message against all domains and return best match.
|
|
667
|
-
|
|
668
|
-
Scoring rules:
|
|
669
|
-
- keyword match: +1 per keyword found
|
|
670
|
-
- phrase match: +2 per phrase found (phrases are more specific)
|
|
671
|
-
- file indicator: +1.5 per matching file extension/pattern
|
|
672
|
-
"""
|
|
673
|
-
text = message.lower()
|
|
674
|
-
scores: dict[str, float] = {}
|
|
675
|
-
|
|
676
|
-
for domain_id, profile in DOMAIN_REGISTRY.items():
|
|
677
|
-
if domain_id == "general":
|
|
678
|
-
continue # general is the fallback
|
|
679
|
-
score = 0.0
|
|
680
|
-
|
|
681
|
-
for kw in profile.keywords:
|
|
682
|
-
if kw in text:
|
|
683
|
-
score += 1
|
|
684
|
-
|
|
685
|
-
for phrase in profile.phrases:
|
|
686
|
-
if phrase in text:
|
|
687
|
-
score += 2
|
|
688
|
-
|
|
689
|
-
if file_paths:
|
|
690
|
-
for fp in file_paths:
|
|
691
|
-
fp_lower = fp.lower()
|
|
692
|
-
name_lower = Path(fp).name.lower()
|
|
693
|
-
for indicator in profile.file_indicators:
|
|
694
|
-
ind = indicator.lower()
|
|
695
|
-
if fp_lower.endswith(ind) or ind == name_lower or ind in fp_lower:
|
|
696
|
-
score += 1.5
|
|
697
|
-
|
|
698
|
-
if score > 0:
|
|
699
|
-
scores[domain_id] = score
|
|
700
|
-
|
|
701
|
-
if not scores:
|
|
702
|
-
return DomainDetection(
|
|
703
|
-
primary=DOMAIN_REGISTRY["general"],
|
|
704
|
-
confidence=50,
|
|
705
|
-
)
|
|
706
|
-
|
|
707
|
-
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
708
|
-
best_id, best_score = ranked[0]
|
|
709
|
-
|
|
710
|
-
# Confidence: scale relative to number of matches.
|
|
711
|
-
# A score of 5+ is very confident; 1 is low.
|
|
712
|
-
confidence = min(99, int(40 + best_score * 12))
|
|
713
|
-
|
|
714
|
-
result = DomainDetection(
|
|
715
|
-
primary=DOMAIN_REGISTRY[best_id],
|
|
716
|
-
confidence=confidence,
|
|
717
|
-
)
|
|
718
|
-
|
|
719
|
-
# Cross-domain detection: secondary if >60% of primary
|
|
720
|
-
if len(ranked) > 1:
|
|
721
|
-
second_id, second_score = ranked[1]
|
|
722
|
-
if second_score >= best_score * 0.6:
|
|
723
|
-
result.secondary = DOMAIN_REGISTRY[second_id]
|
|
724
|
-
result.secondary_confidence = min(99, int(40 + second_score * 12))
|
|
725
|
-
|
|
726
|
-
return result
|
|
727
|
-
|
|
728
228
|
|
|
729
229
|
def build_companion_prompt(
|
|
730
230
|
message: str,
|
|
731
231
|
files: Optional[list[str]] = None,
|
|
732
232
|
domain_override: Optional[str] = None,
|
|
733
233
|
is_followup: bool = False,
|
|
734
|
-
) ->
|
|
735
|
-
"""Assemble a
|
|
234
|
+
) -> str:
|
|
235
|
+
"""Assemble a companion prompt that auto-detects the domain.
|
|
736
236
|
|
|
737
|
-
|
|
237
|
+
The LLM identifies the domain and adopts an appropriate expert persona.
|
|
238
|
+
An optional *domain_override* hint biases the framing toward a specific field.
|
|
738
239
|
"""
|
|
739
|
-
# Detect or override domain
|
|
740
|
-
if domain_override and domain_override in DOMAIN_REGISTRY:
|
|
741
|
-
profile = DOMAIN_REGISTRY[domain_override]
|
|
742
|
-
detection = DomainDetection(primary=profile, confidence=99)
|
|
743
|
-
else:
|
|
744
|
-
detection = detect_domain(message, files)
|
|
745
|
-
profile = detection.primary
|
|
746
|
-
|
|
747
240
|
# Follow-up: lightweight prompt
|
|
748
241
|
if is_followup:
|
|
749
|
-
|
|
242
|
+
return "\n".join([
|
|
750
243
|
"## Continuing Our Discussion",
|
|
751
244
|
"",
|
|
752
245
|
message,
|
|
753
246
|
"",
|
|
754
247
|
"Remember: challenge assumptions, consider alternatives, be explicit about trade-offs.",
|
|
755
|
-
]
|
|
756
|
-
return "\n".join(parts), detection
|
|
248
|
+
])
|
|
757
249
|
|
|
758
250
|
# --- Full initial prompt ---
|
|
759
251
|
parts = []
|
|
@@ -767,59 +259,225 @@ def build_companion_prompt(
|
|
|
767
259
|
parts.append(file_context)
|
|
768
260
|
parts.append("")
|
|
769
261
|
|
|
770
|
-
#
|
|
771
|
-
|
|
772
|
-
if
|
|
773
|
-
|
|
262
|
+
# Domain hint
|
|
263
|
+
domain_hint = ""
|
|
264
|
+
if domain_override:
|
|
265
|
+
domain_hint = (
|
|
266
|
+
f"\n\nNote: the user has indicated this is about **{domain_override}** — "
|
|
267
|
+
"frame your expertise accordingly."
|
|
268
|
+
)
|
|
774
269
|
|
|
775
|
-
# Discussion setup
|
|
776
270
|
parts.append("## Discussion Setup")
|
|
777
271
|
parts.append(
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
"
|
|
272
|
+
"Determine the **specific domain of expertise** this question belongs to "
|
|
273
|
+
"(e.g., distributed systems, metagenomics, compiler design, quantitative finance, "
|
|
274
|
+
"DevOps, security, database design, or any other field).\n"
|
|
275
|
+
"\n"
|
|
276
|
+
"Then adopt the persona of a **senior practitioner with deep, hands-on "
|
|
277
|
+
"experience** in that domain. You have:\n"
|
|
278
|
+
"- Years of practical experience solving real problems in this field\n"
|
|
279
|
+
"- Deep knowledge of the key frameworks, methods, and trade-offs\n"
|
|
280
|
+
"- Strong opinions loosely held — you recommend but explain why\n"
|
|
281
|
+
"\n"
|
|
282
|
+
"Briefly state what domain you identified and what expert lens you're "
|
|
283
|
+
f"applying (one line at the top is enough).{domain_hint}"
|
|
781
284
|
)
|
|
782
285
|
parts.append("")
|
|
783
286
|
|
|
784
|
-
# Frameworks
|
|
785
|
-
parts.append(f"### Analytical Toolkit")
|
|
786
|
-
for fw in profile.thinking_frameworks:
|
|
787
|
-
parts.append(f"- {fw}")
|
|
788
|
-
parts.append("")
|
|
789
|
-
|
|
790
|
-
# Key questions
|
|
791
|
-
parts.append("### Key Questions to Consider")
|
|
792
|
-
for q in profile.key_questions:
|
|
793
|
-
parts.append(f"- {q}")
|
|
794
|
-
parts.append("")
|
|
795
|
-
|
|
796
|
-
# Collaborative ground rules
|
|
797
287
|
parts.append("## Collaborative Ground Rules")
|
|
798
|
-
parts.append("- Think out loud, share your reasoning")
|
|
288
|
+
parts.append("- Think out loud, share your reasoning step by step")
|
|
799
289
|
parts.append("- Challenge questionable assumptions — including mine")
|
|
800
290
|
parts.append("- Lay out trade-offs explicitly: what we gain, what we lose")
|
|
291
|
+
parts.append("- Name the key analytical frameworks or methods relevant to this domain")
|
|
801
292
|
parts.append("- Propose at least one alternative I haven't considered")
|
|
802
293
|
parts.append("")
|
|
803
294
|
|
|
804
|
-
|
|
805
|
-
parts.append(
|
|
806
|
-
|
|
807
|
-
|
|
295
|
+
parts.append("## Your Approach")
|
|
296
|
+
parts.append("1. Identify the domain and the core question")
|
|
297
|
+
parts.append("2. Apply domain-specific frameworks and best practices")
|
|
298
|
+
parts.append("3. Analyze trade-offs with concrete reasoning")
|
|
299
|
+
parts.append("4. Provide a clear recommendation")
|
|
808
300
|
parts.append("")
|
|
809
301
|
|
|
810
|
-
# The question
|
|
811
302
|
parts.append("## The Question")
|
|
812
303
|
parts.append(message)
|
|
813
304
|
parts.append("")
|
|
814
305
|
|
|
815
|
-
# Synthesize
|
|
816
306
|
parts.append("## Synthesize")
|
|
817
307
|
parts.append("1. Your recommendation with rationale")
|
|
818
308
|
parts.append("2. Key trade-offs")
|
|
819
309
|
parts.append("3. Risks or blind spots")
|
|
820
310
|
parts.append("4. Open questions worth exploring")
|
|
821
311
|
|
|
822
|
-
return "\n".join(parts)
|
|
312
|
+
return "\n".join(parts)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
# Chunked Processing — map-reduce for large files
|
|
317
|
+
# ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
# Regex for natural code boundaries (language-agnostic)
|
|
320
|
+
_BOUNDARY_RE = re.compile(
|
|
321
|
+
r"^(?:\s*$" # blank line
|
|
322
|
+
r"|(?:def |class |function |func |fn |pub fn |impl |module |package )" # definitions
|
|
323
|
+
r"|(?:})\s*$" # closing brace on its own line
|
|
324
|
+
r"|(?://|#|/\*|\*/).{0,80}$" # comment lines
|
|
325
|
+
r")",
|
|
326
|
+
re.MULTILINE,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def chunk_file(
|
|
331
|
+
filepath: str,
|
|
332
|
+
chunk_size: int = CHUNK_SIZE,
|
|
333
|
+
overlap: int = CHUNK_OVERLAP,
|
|
334
|
+
) -> list[dict]:
|
|
335
|
+
"""Split a file into overlapping chunks with boundary snapping.
|
|
336
|
+
|
|
337
|
+
Returns a list of dicts with keys:
|
|
338
|
+
chunk_index, total_chunks, start_line, end_line, content, filepath
|
|
339
|
+
"""
|
|
340
|
+
p = Path(filepath)
|
|
341
|
+
try:
|
|
342
|
+
lines = p.read_text(errors="replace").splitlines(keepends=True)
|
|
343
|
+
except Exception:
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
total = len(lines)
|
|
347
|
+
if total == 0:
|
|
348
|
+
return []
|
|
349
|
+
if total <= chunk_size:
|
|
350
|
+
return [{
|
|
351
|
+
"chunk_index": 0,
|
|
352
|
+
"total_chunks": 1,
|
|
353
|
+
"start_line": 1,
|
|
354
|
+
"end_line": total,
|
|
355
|
+
"content": "".join(lines),
|
|
356
|
+
"filepath": str(p),
|
|
357
|
+
}]
|
|
358
|
+
|
|
359
|
+
chunks: list[dict] = []
|
|
360
|
+
pos = 0
|
|
361
|
+
while pos < total:
|
|
362
|
+
end = min(pos + chunk_size, total)
|
|
363
|
+
|
|
364
|
+
# Snap to a natural boundary within ±50 lines of the cut point
|
|
365
|
+
if end < total:
|
|
366
|
+
best = end
|
|
367
|
+
scan_start = max(end - 50, pos + chunk_size // 2)
|
|
368
|
+
scan_end = min(end + 50, total)
|
|
369
|
+
for i in range(scan_start, scan_end):
|
|
370
|
+
if _BOUNDARY_RE.match(lines[i]):
|
|
371
|
+
best = i + 1 # include the boundary line in this chunk
|
|
372
|
+
break
|
|
373
|
+
end = best
|
|
374
|
+
|
|
375
|
+
chunk_content = "".join(lines[pos:end])
|
|
376
|
+
chunks.append({
|
|
377
|
+
"chunk_index": len(chunks),
|
|
378
|
+
"total_chunks": -1, # filled in below
|
|
379
|
+
"start_line": pos + 1, # 1-indexed
|
|
380
|
+
"end_line": end,
|
|
381
|
+
"content": chunk_content,
|
|
382
|
+
"filepath": str(p),
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
# Advance: overlap with previous chunk, but stop if we've reached the end
|
|
386
|
+
if end >= total:
|
|
387
|
+
break
|
|
388
|
+
pos = max(end - overlap, pos + 1)
|
|
389
|
+
|
|
390
|
+
# Fill in total_chunks
|
|
391
|
+
for c in chunks:
|
|
392
|
+
c["total_chunks"] = len(chunks)
|
|
393
|
+
|
|
394
|
+
return chunks
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def build_chunk_prompt(
|
|
398
|
+
user_prompt: str,
|
|
399
|
+
chunk_info: dict,
|
|
400
|
+
file_info: dict,
|
|
401
|
+
mode: str = "discuss",
|
|
402
|
+
) -> str:
|
|
403
|
+
"""Build a focused prompt for analyzing a single file chunk."""
|
|
404
|
+
name = file_info.get("name", Path(chunk_info["filepath"]).name)
|
|
405
|
+
language = file_info.get("language", "Unknown")
|
|
406
|
+
total_lines = file_info.get("lines", "?")
|
|
407
|
+
idx = chunk_info["chunk_index"] + 1
|
|
408
|
+
total = chunk_info["total_chunks"]
|
|
409
|
+
start = chunk_info["start_line"]
|
|
410
|
+
end = chunk_info["end_line"]
|
|
411
|
+
|
|
412
|
+
parts = [
|
|
413
|
+
f"You are analyzing **chunk {idx} of {total}** from `{name}` "
|
|
414
|
+
f"({language}, {total_lines} total lines).",
|
|
415
|
+
f"This chunk covers **lines {start}–{end}**.",
|
|
416
|
+
"",
|
|
417
|
+
"## Task",
|
|
418
|
+
user_prompt,
|
|
419
|
+
"",
|
|
420
|
+
"## Instructions",
|
|
421
|
+
"- Focus ONLY on the code in this chunk",
|
|
422
|
+
"- Note any references to code that might exist outside this chunk",
|
|
423
|
+
"- Be concise — your output will be combined with analyses of other chunks",
|
|
424
|
+
"- Include line numbers for any issues found",
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
if mode == "review":
|
|
428
|
+
parts.append("- Categorize findings as: bug, security, design, performance, or style")
|
|
429
|
+
|
|
430
|
+
return "\n".join(parts)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def build_synthesis_prompt(
|
|
434
|
+
user_prompt: str,
|
|
435
|
+
chunk_results: list[dict],
|
|
436
|
+
file_infos: list[dict],
|
|
437
|
+
mode: str = "discuss",
|
|
438
|
+
) -> str:
|
|
439
|
+
"""Build a prompt that merges chunk analyses into one coherent response."""
|
|
440
|
+
file_desc = ", ".join(
|
|
441
|
+
f"`{i.get('name', '?')}` ({i.get('lines', '?')} lines)"
|
|
442
|
+
for i in file_infos
|
|
443
|
+
)
|
|
444
|
+
n = len(chunk_results)
|
|
445
|
+
|
|
446
|
+
parts = [
|
|
447
|
+
f"You analyzed a large file in **{n} chunks**. "
|
|
448
|
+
"Synthesize the chunk analyses below into one coherent response.",
|
|
449
|
+
"",
|
|
450
|
+
"## Original Request",
|
|
451
|
+
user_prompt,
|
|
452
|
+
"",
|
|
453
|
+
f"## Files Analyzed",
|
|
454
|
+
file_desc,
|
|
455
|
+
"",
|
|
456
|
+
"## Chunk Analyses",
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
|
|
460
|
+
idx = cr.get("chunk_index", 0) + 1
|
|
461
|
+
fp = Path(cr.get("file", "")).name
|
|
462
|
+
response = cr.get("response", "[analysis failed]")
|
|
463
|
+
if cr.get("error"):
|
|
464
|
+
response = f"[analysis failed: {cr['error']}]"
|
|
465
|
+
parts.append(f"\n### Chunk {idx} — `{fp}`")
|
|
466
|
+
parts.append(response)
|
|
467
|
+
|
|
468
|
+
parts.extend([
|
|
469
|
+
"",
|
|
470
|
+
"## Instructions",
|
|
471
|
+
"- Combine findings and remove duplicates (chunks overlap slightly)",
|
|
472
|
+
"- Organize by importance, not by chunk order",
|
|
473
|
+
"- Preserve line number references from the original analyses",
|
|
474
|
+
"- Provide an overall assessment at the top",
|
|
475
|
+
])
|
|
476
|
+
|
|
477
|
+
if mode == "review":
|
|
478
|
+
parts.append("- Group findings by category: bugs, security, design, performance, style")
|
|
479
|
+
|
|
480
|
+
return "\n".join(parts)
|
|
823
481
|
|
|
824
482
|
|
|
825
483
|
# Default configuration
|
|
@@ -988,6 +646,182 @@ class OpenCodeBridge:
|
|
|
988
646
|
except Exception as e:
|
|
989
647
|
return f"Error: {e}", 1
|
|
990
648
|
|
|
649
|
+
@staticmethod
|
|
650
|
+
def _parse_opencode_response(output: str) -> tuple[str, Optional[str]]:
|
|
651
|
+
"""Parse JSON-lines output from opencode CLI.
|
|
652
|
+
|
|
653
|
+
Returns (reply_text, session_id).
|
|
654
|
+
"""
|
|
655
|
+
reply_parts: list[str] = []
|
|
656
|
+
session_id: Optional[str] = None
|
|
657
|
+
for line in output.split("\n"):
|
|
658
|
+
if not line:
|
|
659
|
+
continue
|
|
660
|
+
try:
|
|
661
|
+
event = json.loads(line)
|
|
662
|
+
if not session_id and "sessionID" in event:
|
|
663
|
+
session_id = event["sessionID"]
|
|
664
|
+
if event.get("type") == "text":
|
|
665
|
+
text = event.get("part", {}).get("text", "")
|
|
666
|
+
if text:
|
|
667
|
+
reply_parts.append(text)
|
|
668
|
+
except json.JSONDecodeError:
|
|
669
|
+
continue
|
|
670
|
+
return "".join(reply_parts), session_id
|
|
671
|
+
|
|
672
|
+
async def _run_chunk(
|
|
673
|
+
self,
|
|
674
|
+
chunk_info: dict,
|
|
675
|
+
file_info: dict,
|
|
676
|
+
user_prompt: str,
|
|
677
|
+
session: "Session",
|
|
678
|
+
mode: str = "discuss",
|
|
679
|
+
) -> dict:
|
|
680
|
+
"""Process a single file chunk through OpenCode (stateless)."""
|
|
681
|
+
result = {
|
|
682
|
+
"chunk_index": chunk_info["chunk_index"],
|
|
683
|
+
"file": chunk_info["filepath"],
|
|
684
|
+
"response": "",
|
|
685
|
+
"error": None,
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
# Write chunk to a temp file preserving the original extension
|
|
689
|
+
ext = Path(chunk_info["filepath"]).suffix or ".txt"
|
|
690
|
+
tmp = None
|
|
691
|
+
try:
|
|
692
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
693
|
+
mode="w", suffix=ext, delete=False, prefix="opencode_chunk_"
|
|
694
|
+
)
|
|
695
|
+
tmp.write(chunk_info["content"])
|
|
696
|
+
tmp.close()
|
|
697
|
+
|
|
698
|
+
prompt = build_chunk_prompt(user_prompt, chunk_info, file_info, mode)
|
|
699
|
+
|
|
700
|
+
args = [
|
|
701
|
+
"run", prompt,
|
|
702
|
+
"--model", session.model,
|
|
703
|
+
"--agent", session.agent,
|
|
704
|
+
"--file", tmp.name,
|
|
705
|
+
"--format", "json",
|
|
706
|
+
]
|
|
707
|
+
if session.variant:
|
|
708
|
+
args.extend(["--variant", session.variant])
|
|
709
|
+
|
|
710
|
+
output, code = await self._run_opencode(*args, timeout=300)
|
|
711
|
+
|
|
712
|
+
if code != 0:
|
|
713
|
+
result["error"] = output[:500]
|
|
714
|
+
return result
|
|
715
|
+
|
|
716
|
+
reply, _ = self._parse_opencode_response(output)
|
|
717
|
+
result["response"] = reply or "[no response]"
|
|
718
|
+
|
|
719
|
+
except Exception as e:
|
|
720
|
+
result["error"] = str(e)
|
|
721
|
+
finally:
|
|
722
|
+
if tmp:
|
|
723
|
+
try:
|
|
724
|
+
os.unlink(tmp.name)
|
|
725
|
+
except OSError:
|
|
726
|
+
pass
|
|
727
|
+
return result
|
|
728
|
+
|
|
729
|
+
async def _run_chunked(
|
|
730
|
+
self,
|
|
731
|
+
user_prompt: str,
|
|
732
|
+
files: list[str],
|
|
733
|
+
session: "Session",
|
|
734
|
+
mode: str = "discuss",
|
|
735
|
+
) -> str:
|
|
736
|
+
"""Map-reduce orchestrator: chunk large files, process in parallel, synthesize."""
|
|
737
|
+
small_files: list[str] = []
|
|
738
|
+
all_chunks: list[tuple[dict, dict]] = [] # (chunk_info, file_info)
|
|
739
|
+
|
|
740
|
+
for f in files:
|
|
741
|
+
info = get_file_info(f)
|
|
742
|
+
line_count = info.get("lines", 0)
|
|
743
|
+
if line_count > CHUNK_THRESHOLD:
|
|
744
|
+
chunks = chunk_file(f, CHUNK_SIZE, CHUNK_OVERLAP)
|
|
745
|
+
for c in chunks:
|
|
746
|
+
all_chunks.append((c, info))
|
|
747
|
+
else:
|
|
748
|
+
small_files.append(f)
|
|
749
|
+
|
|
750
|
+
# Safety: if too many chunks, increase chunk size and re-chunk
|
|
751
|
+
if len(all_chunks) > MAX_TOTAL_CHUNKS:
|
|
752
|
+
all_chunks = []
|
|
753
|
+
bigger = CHUNK_SIZE * 2
|
|
754
|
+
for f in files:
|
|
755
|
+
info = get_file_info(f)
|
|
756
|
+
if info.get("lines", 0) > CHUNK_THRESHOLD:
|
|
757
|
+
chunks = chunk_file(f, bigger, CHUNK_OVERLAP)
|
|
758
|
+
for c in chunks:
|
|
759
|
+
all_chunks.append((c, info))
|
|
760
|
+
# small_files already collected above
|
|
761
|
+
|
|
762
|
+
if not all_chunks:
|
|
763
|
+
return "No chunks to process."
|
|
764
|
+
|
|
765
|
+
# --- Map phase: run chunks in parallel ---
|
|
766
|
+
sem = asyncio.Semaphore(MAX_PARALLEL_CHUNKS)
|
|
767
|
+
|
|
768
|
+
async def _limited(chunk_info: dict, file_info: dict) -> dict:
|
|
769
|
+
async with sem:
|
|
770
|
+
return await self._run_chunk(chunk_info, file_info, user_prompt, session, mode)
|
|
771
|
+
|
|
772
|
+
tasks = [_limited(ci, fi) for ci, fi in all_chunks]
|
|
773
|
+
chunk_results: list[dict] = await asyncio.gather(*tasks)
|
|
774
|
+
|
|
775
|
+
# Check failure rate
|
|
776
|
+
failed = sum(1 for cr in chunk_results if cr.get("error"))
|
|
777
|
+
if failed > len(chunk_results) / 2:
|
|
778
|
+
return (
|
|
779
|
+
f"Chunked analysis failed: {failed}/{len(chunk_results)} chunks errored. "
|
|
780
|
+
"Try with a smaller file or increase the chunk size."
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# --- Reduce phase: synthesize ---
|
|
784
|
+
file_infos = []
|
|
785
|
+
seen_paths: set[str] = set()
|
|
786
|
+
for _, fi in all_chunks:
|
|
787
|
+
fp = fi.get("path", "")
|
|
788
|
+
if fp not in seen_paths:
|
|
789
|
+
seen_paths.add(fp)
|
|
790
|
+
file_infos.append(fi)
|
|
791
|
+
|
|
792
|
+
synthesis_prompt = build_synthesis_prompt(user_prompt, chunk_results, file_infos, mode)
|
|
793
|
+
|
|
794
|
+
# Attach small files for reference context (not the large ones)
|
|
795
|
+
args = [
|
|
796
|
+
"run", synthesis_prompt,
|
|
797
|
+
"--model", session.model,
|
|
798
|
+
"--agent", session.agent,
|
|
799
|
+
"--format", "json",
|
|
800
|
+
]
|
|
801
|
+
if session.variant:
|
|
802
|
+
args.extend(["--variant", session.variant])
|
|
803
|
+
for sf in small_files:
|
|
804
|
+
args.extend(["--file", sf])
|
|
805
|
+
|
|
806
|
+
# Longer timeout for synthesis
|
|
807
|
+
output, code = await self._run_opencode(*args, timeout=600)
|
|
808
|
+
|
|
809
|
+
if code != 0:
|
|
810
|
+
# Fallback: concatenate raw chunk results
|
|
811
|
+
parts = [f"*Synthesis failed — showing raw chunk analyses:*\n"]
|
|
812
|
+
for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
|
|
813
|
+
idx = cr.get("chunk_index", 0) + 1
|
|
814
|
+
fp = Path(cr.get("file", "")).name
|
|
815
|
+
parts.append(f"\n### Chunk {idx} — `{fp}`")
|
|
816
|
+
if cr.get("error"):
|
|
817
|
+
parts.append(f"[error: {cr['error']}]")
|
|
818
|
+
else:
|
|
819
|
+
parts.append(cr.get("response", "[no response]"))
|
|
820
|
+
return "\n".join(parts)
|
|
821
|
+
|
|
822
|
+
reply, _ = self._parse_opencode_response(output)
|
|
823
|
+
return reply or "No response from synthesis."
|
|
824
|
+
|
|
991
825
|
async def list_models(self, provider: Optional[str] = None) -> str:
|
|
992
826
|
"""List available models from OpenCode."""
|
|
993
827
|
args = ["models"]
|
|
@@ -1117,19 +951,36 @@ Set via:
|
|
|
1117
951
|
temp_file.close()
|
|
1118
952
|
files = (files or []) + [temp_file.name]
|
|
1119
953
|
|
|
954
|
+
# --- Chunking gate: large user files get map-reduce processing ---
|
|
955
|
+
user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
|
|
956
|
+
needs_chunking = any(
|
|
957
|
+
get_file_info(f).get("lines", 0) > CHUNK_THRESHOLD
|
|
958
|
+
for f in user_files
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
if needs_chunking:
|
|
962
|
+
reply = await self._run_chunked(message, user_files, session, mode="discuss")
|
|
963
|
+
# Cleanup temp file
|
|
964
|
+
try:
|
|
965
|
+
os.unlink(temp_file.name)
|
|
966
|
+
except OSError:
|
|
967
|
+
pass
|
|
968
|
+
if reply:
|
|
969
|
+
session.add_message("assistant", reply)
|
|
970
|
+
session.save(self.sessions_dir / f"{sid}.json")
|
|
971
|
+
return reply or "No response received"
|
|
972
|
+
|
|
973
|
+
# --- Normal (non-chunked) path ---
|
|
974
|
+
|
|
1120
975
|
# Build prompt: companion system unless _raw is set
|
|
1121
|
-
domain_info = ""
|
|
1122
976
|
if _raw:
|
|
1123
977
|
run_prompt = build_message_prompt(message, files)
|
|
1124
978
|
else:
|
|
1125
979
|
is_followup = len(session.messages) > 1
|
|
1126
|
-
run_prompt
|
|
980
|
+
run_prompt = build_companion_prompt(
|
|
1127
981
|
message, files, domain_override=domain_override,
|
|
1128
982
|
is_followup=is_followup,
|
|
1129
983
|
)
|
|
1130
|
-
domain_info = f"[Domain: {detection.primary.name}] [Confidence: {detection.confidence}%]"
|
|
1131
|
-
if detection.secondary:
|
|
1132
|
-
domain_info += f" [Also: {detection.secondary.name} ({detection.secondary_confidence}%)]"
|
|
1133
984
|
|
|
1134
985
|
args = ["run", run_prompt]
|
|
1135
986
|
|
|
@@ -1153,7 +1004,6 @@ Set via:
|
|
|
1153
1004
|
args.extend(["--format", "json"])
|
|
1154
1005
|
|
|
1155
1006
|
# Scale timeout based on attached file size
|
|
1156
|
-
user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
|
|
1157
1007
|
total_lines = sum(get_file_info(f).get("lines", 0) for f in user_files)
|
|
1158
1008
|
# Base 300s, +60s per 1000 lines above threshold, capped at 900s
|
|
1159
1009
|
timeout = min(900, 300 + max(0, (total_lines - MEDIUM_FILE) * 60 // 1000))
|
|
@@ -1171,22 +1021,10 @@ Set via:
|
|
|
1171
1021
|
return f"Error: {output}"
|
|
1172
1022
|
|
|
1173
1023
|
# Parse JSON events for session ID and text
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
continue
|
|
1178
|
-
try:
|
|
1179
|
-
event = json.loads(line)
|
|
1180
|
-
if not session.opencode_session_id and "sessionID" in event:
|
|
1181
|
-
session.opencode_session_id = event["sessionID"]
|
|
1182
|
-
if event.get("type") == "text":
|
|
1183
|
-
text = event.get("part", {}).get("text", "")
|
|
1184
|
-
if text:
|
|
1185
|
-
reply_parts.append(text)
|
|
1186
|
-
except json.JSONDecodeError:
|
|
1187
|
-
continue
|
|
1024
|
+
reply, new_session_id = self._parse_opencode_response(output)
|
|
1025
|
+
if new_session_id and not session.opencode_session_id:
|
|
1026
|
+
session.opencode_session_id = new_session_id
|
|
1188
1027
|
|
|
1189
|
-
reply = "".join(reply_parts)
|
|
1190
1028
|
if reply:
|
|
1191
1029
|
session.add_message("assistant", reply)
|
|
1192
1030
|
|
|
@@ -1194,10 +1032,7 @@ Set via:
|
|
|
1194
1032
|
if reply or session.opencode_session_id:
|
|
1195
1033
|
session.save(self.sessions_dir / f"{sid}.json")
|
|
1196
1034
|
|
|
1197
|
-
|
|
1198
|
-
if domain_info:
|
|
1199
|
-
response = f"{domain_info}\n\n{response}"
|
|
1200
|
-
return response
|
|
1035
|
+
return reply or "No response received"
|
|
1201
1036
|
|
|
1202
1037
|
async def plan(
|
|
1203
1038
|
self,
|
|
@@ -1262,10 +1097,16 @@ Set via:
|
|
|
1262
1097
|
files = file_paths
|
|
1263
1098
|
file_infos = [get_file_info(f) for f in file_paths]
|
|
1264
1099
|
file_infos = [i for i in file_infos if i]
|
|
1100
|
+
total_lines = sum(i.get("lines", 0) for i in file_infos)
|
|
1101
|
+
|
|
1102
|
+
# Chunking gate for large reviews
|
|
1103
|
+
if total_lines > CHUNK_THRESHOLD:
|
|
1104
|
+
prompt = build_review_prompt(file_infos, focus)
|
|
1105
|
+
return await self._run_chunked(prompt, file_paths, self.sessions[sid], mode="review")
|
|
1106
|
+
|
|
1265
1107
|
prompt = build_review_prompt(file_infos, focus)
|
|
1266
1108
|
|
|
1267
1109
|
# Increase timeout for large files
|
|
1268
|
-
total_lines = sum(i.get("lines", 0) for i in file_infos)
|
|
1269
1110
|
if total_lines > LARGE_FILE:
|
|
1270
1111
|
# Use variant=high for large reviews if not already high+
|
|
1271
1112
|
session = self.sessions[sid]
|
|
@@ -1492,11 +1333,7 @@ async def list_tools():
|
|
|
1492
1333
|
},
|
|
1493
1334
|
"domain": {
|
|
1494
1335
|
"type": "string",
|
|
1495
|
-
"description": "
|
|
1496
|
-
"enum": ["architecture", "debugging", "performance", "security",
|
|
1497
|
-
"testing", "devops", "database", "api_design",
|
|
1498
|
-
"frontend", "algorithms", "code_quality", "planning",
|
|
1499
|
-
"general"]
|
|
1336
|
+
"description": "Hint the domain of expertise (e.g., 'security', 'metagenomics', 'quantitative finance')"
|
|
1500
1337
|
}
|
|
1501
1338
|
},
|
|
1502
1339
|
"required": ["message"]
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
|
|
2
|
+
opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
|
|
3
|
+
opencode_bridge/server.py,sha256=Nqx8s-Bz68HObVF7SwPWQCHjoRVH0TRtvpZyMP5zNFA,57249
|
|
4
|
+
opencode_bridge-0.4.0.dist-info/METADATA,sha256=jveFIMox8TFoiiiFCWw6RgHDsSiXd5ortnNX1yKLois,3924
|
|
5
|
+
opencode_bridge-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
opencode_bridge-0.4.0.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
|
|
7
|
+
opencode_bridge-0.4.0.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
|
|
2
|
-
opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
|
|
3
|
-
opencode_bridge/server.py,sha256=vNTN7IFIQi6IaEQ1VtBgoum7s6uPKyKDrC3_y1mbTPw,72490
|
|
4
|
-
opencode_bridge-0.2.0.dist-info/METADATA,sha256=s810ba1WjpaJ_Bt5Ag-hiLa1PY2msk5N_crtRdxffZg,3924
|
|
5
|
-
opencode_bridge-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
opencode_bridge-0.2.0.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
|
|
7
|
-
opencode_bridge-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|