xtrm-tools 2.4.0 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +23 -9
  2. package/cli/dist/index.cjs +774 -240
  3. package/cli/dist/index.cjs.map +1 -1
  4. package/cli/package.json +1 -1
  5. package/config/hooks.json +10 -0
  6. package/config/pi/extensions/core/adapter.ts +2 -14
  7. package/config/pi/extensions/core/guard-rules.ts +70 -0
  8. package/config/pi/extensions/core/session-state.ts +59 -0
  9. package/config/pi/extensions/main-guard.ts +10 -14
  10. package/config/pi/extensions/plan-mode/README.md +65 -0
  11. package/config/pi/extensions/plan-mode/index.ts +340 -0
  12. package/config/pi/extensions/plan-mode/utils.ts +168 -0
  13. package/config/pi/extensions/service-skills.ts +51 -7
  14. package/config/pi/extensions/session-flow.ts +117 -0
  15. package/hooks/beads-claim-sync.mjs +123 -2
  16. package/hooks/beads-compact-restore.mjs +41 -9
  17. package/hooks/beads-compact-save.mjs +36 -5
  18. package/hooks/beads-gate-messages.mjs +27 -1
  19. package/hooks/beads-stop-gate.mjs +58 -8
  20. package/hooks/guard-rules.mjs +86 -0
  21. package/hooks/hooks.json +28 -18
  22. package/hooks/main-guard.mjs +3 -21
  23. package/hooks/quality-check.cjs +1286 -0
  24. package/hooks/quality-check.py +345 -0
  25. package/hooks/session-state.mjs +138 -0
  26. package/package.json +2 -1
  27. package/project-skills/quality-gates/.claude/settings.json +1 -24
  28. package/skills/creating-service-skills/SKILL.md +433 -0
  29. package/skills/creating-service-skills/references/script_quality_standards.md +425 -0
  30. package/skills/creating-service-skills/references/service_skill_system_guide.md +278 -0
  31. package/skills/creating-service-skills/scripts/bootstrap.py +326 -0
  32. package/skills/creating-service-skills/scripts/deep_dive.py +304 -0
  33. package/skills/creating-service-skills/scripts/scaffolder.py +482 -0
  34. package/skills/scoping-service-skills/SKILL.md +231 -0
  35. package/skills/scoping-service-skills/scripts/scope.py +74 -0
  36. package/skills/sync-docs/SKILL.md +235 -0
  37. package/skills/sync-docs/evals/evals.json +89 -0
  38. package/skills/sync-docs/references/doc-structure.md +104 -0
  39. package/skills/sync-docs/references/schema.md +103 -0
  40. package/skills/sync-docs/scripts/context_gatherer.py +246 -0
  41. package/skills/sync-docs/scripts/doc_structure_analyzer.py +495 -0
  42. package/skills/sync-docs/scripts/validate_doc.py +365 -0
  43. package/skills/sync-docs-workspace/iteration-1/benchmark.json +293 -0
  44. package/skills/sync-docs-workspace/iteration-1/benchmark.md +13 -0
  45. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
  46. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
  47. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
  48. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  49. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
  50. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
  51. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  52. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
  53. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
  54. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
  55. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
  56. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  57. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
  58. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
  59. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
  60. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
  61. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
  62. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
  63. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  64. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
  65. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
  66. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
  67. package/skills/sync-docs-workspace/iteration-2/benchmark.json +297 -0
  68. package/skills/sync-docs-workspace/iteration-2/benchmark.md +13 -0
  69. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
  70. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
  71. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
  72. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  73. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
  74. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
  75. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
  76. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
  77. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
  78. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
  79. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  80. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
  81. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
  82. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  83. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
  84. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
  85. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  86. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  87. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
  88. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
  89. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  90. package/skills/sync-docs-workspace/iteration-3/benchmark.json +298 -0
  91. package/skills/sync-docs-workspace/iteration-3/benchmark.md +13 -0
  92. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
  93. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
  94. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
  95. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
  96. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
  97. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
  98. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  99. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
  100. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
  101. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
  102. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
  103. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
  104. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
  105. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  106. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
  107. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
  108. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
  109. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
  110. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
  111. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
  112. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
  113. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  114. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
  115. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
  116. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
  117. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  118. package/skills/updating-service-skills/SKILL.md +136 -0
  119. package/skills/updating-service-skills/scripts/drift_detector.py +222 -0
  120. package/skills/using-quality-gates/SKILL.md +254 -0
  121. package/skills/using-service-skills/SKILL.md +108 -0
  122. package/skills/using-service-skills/scripts/cataloger.py +74 -0
  123. package/skills/using-service-skills/scripts/skill_activator.py +152 -0
  124. package/skills/using-service-skills/scripts/test_skill_activator.py +58 -0
  125. package/skills/using-xtrm/SKILL.md +34 -38
@@ -0,0 +1,326 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Bootstrap module for Service Skill Trinity.
4
+
5
+ Provides root-discovery and registry CRUD operations shared across all
6
+ service-skill workflow scripts. All scripts in the trinity import from here.
7
+
8
+ Registry location: service-registry.json (preferred) or .claude/skills/service-registry.json (legacy)
9
+ Skills location: .claude/skills/<service-id>/
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import subprocess # nosec B404
15
+ import sys
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+
21
+ class BootstrapError(Exception):
22
+ """Base exception for bootstrap operations."""
23
+
24
+ pass
25
+
26
+
27
+ class RootResolutionError(BootstrapError):
28
+ """Raised when project root cannot be determined."""
29
+
30
+ pass
31
+
32
+
33
+ class RegistryError(BootstrapError):
34
+ """Raised when registry operations fail."""
35
+
36
+ pass
37
+
38
+
39
+ def get_project_root() -> str:
40
+ """
41
+ Resolve project root via git.
42
+
43
+ Returns:
44
+ Absolute path to project root
45
+
46
+ Raises:
47
+ RootResolutionError: If git command fails or returns invalid path
48
+ """
49
+ try:
50
+ result = subprocess.run( # nosec B603 B607
51
+ ["git", "rev-parse", "--show-toplevel"],
52
+ capture_output=True,
53
+ text=True,
54
+ check=True,
55
+ timeout=5,
56
+ )
57
+ root = result.stdout.strip()
58
+
59
+ if not root:
60
+ raise RootResolutionError("Git returned empty path")
61
+
62
+ if not os.path.isdir(root):
63
+ raise RootResolutionError(f"Resolved path is not a directory: {root}")
64
+
65
+ return root
66
+
67
+ except subprocess.CalledProcessError as e:
68
+ raise RootResolutionError(
69
+ f"Git root resolution failed: {e.stderr.strip() if e.stderr else str(e)}"
70
+ ) from e
71
+ except subprocess.TimeoutExpired as e:
72
+ raise RootResolutionError("Git command timed out") from e
73
+ except FileNotFoundError as e:
74
+ raise RootResolutionError("Git not found in PATH") from e
75
+
76
+
77
+ def get_skills_root(project_root: str | None = None) -> Path:
78
+ """
79
+ Get the .claude/skills/ directory path.
80
+
81
+ Args:
82
+ project_root: Optional project root (uses get_project_root() if not provided)
83
+
84
+ Returns:
85
+ Path to .claude/skills/ directory
86
+ """
87
+ if project_root is None:
88
+ project_root = get_project_root()
89
+ return Path(project_root) / ".claude" / "skills"
90
+
91
+
92
+ def get_registry_path(project_root: str | None = None) -> Path:
93
+ """
94
+ Get the service-registry.json path.
95
+
96
+ Resolution order:
97
+ 1. SERVICE_REGISTRY_PATH env var (explicit override)
98
+ 2. <project-root>/service-registry.json (preferred)
99
+ 3. <project-root>/.claude/skills/service-registry.json (legacy fallback)
100
+
101
+ Args:
102
+ project_root: Optional project root
103
+
104
+ Returns:
105
+ Path to service registry
106
+ """
107
+ env_registry = os.environ.get("SERVICE_REGISTRY_PATH")
108
+ if env_registry:
109
+ return Path(env_registry)
110
+
111
+ if project_root is None:
112
+ env_root = os.environ.get("CLAUDE_PROJECT_DIR")
113
+ project_root = env_root or get_project_root()
114
+
115
+ root = Path(project_root)
116
+ preferred = root / "service-registry.json"
117
+ legacy = root / ".claude" / "skills" / "service-registry.json"
118
+
119
+ if preferred.exists() or not legacy.exists():
120
+ return preferred
121
+
122
+ return legacy
123
+
124
+
125
+ def load_registry(project_root: str | None = None) -> dict[str, Any]:
126
+ """
127
+ Load the service registry.
128
+
129
+ Args:
130
+ project_root: Optional project root
131
+
132
+ Returns:
133
+ Registry contents as dict
134
+
135
+ Raises:
136
+ RegistryError: If registry cannot be loaded
137
+ """
138
+ registry_path = get_registry_path(project_root)
139
+
140
+ if not registry_path.exists():
141
+ return {"version": "1.0", "services": {}}
142
+
143
+ try:
144
+ with open(registry_path, encoding="utf-8") as f:
145
+ return json.load(f)
146
+ except json.JSONDecodeError as e:
147
+ raise RegistryError(f"Invalid JSON in registry: {e}") from e
148
+ except OSError as e:
149
+ raise RegistryError(f"Cannot read registry: {e}") from e
150
+
151
+
152
+ def save_registry(data: dict[str, Any], project_root: str | None = None) -> None:
153
+ """
154
+ Save the service registry.
155
+
156
+ Args:
157
+ data: Registry contents
158
+ project_root: Optional project root
159
+
160
+ Raises:
161
+ RegistryError: If registry cannot be saved
162
+ """
163
+ registry_path = get_registry_path(project_root)
164
+ registry_path.parent.mkdir(parents=True, exist_ok=True)
165
+
166
+ try:
167
+ with open(registry_path, "w", encoding="utf-8") as f:
168
+ json.dump(data, f, indent=2)
169
+ except OSError as e:
170
+ raise RegistryError(f"Cannot write registry: {e}") from e
171
+
172
+
173
+ def register_service(
174
+ service_id: str,
175
+ name: str,
176
+ territory: list[str],
177
+ skill_path: str,
178
+ description: str = "",
179
+ project_root: str | None = None,
180
+ ) -> None:
181
+ """
182
+ Register a new service in the registry.
183
+
184
+ Args:
185
+ service_id: Unique identifier (e.g., "db-expert")
186
+ name: Display name (e.g., "Database Expert")
187
+ territory: List of glob patterns for files this service owns
188
+ skill_path: Path to SKILL.md relative to project root
189
+ description: Optional description
190
+ project_root: Optional project root
191
+
192
+ Raises:
193
+ RegistryError: If registration fails
194
+ """
195
+ registry = load_registry(project_root)
196
+
197
+ if "services" not in registry:
198
+ registry["services"] = {}
199
+
200
+ registry["services"][service_id] = {
201
+ "name": name,
202
+ "territory": territory,
203
+ "skill_path": skill_path,
204
+ "description": description,
205
+ "last_sync": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
206
+ }
207
+
208
+ save_registry(registry, project_root)
209
+
210
+
211
+ def unregister_service(service_id: str, project_root: str | None = None) -> bool:
212
+ """
213
+ Remove a service from the registry.
214
+
215
+ Args:
216
+ service_id: Service identifier to remove
217
+ project_root: Optional project root
218
+
219
+ Returns:
220
+ True if service was removed, False if it didn't exist
221
+ """
222
+ registry = load_registry(project_root)
223
+
224
+ if "services" not in registry or service_id not in registry["services"]:
225
+ return False
226
+
227
+ del registry["services"][service_id]
228
+ save_registry(registry, project_root)
229
+ return True
230
+
231
+
232
+ def get_service(service_id: str, project_root: str | None = None) -> dict[str, Any] | None:
233
+ """
234
+ Get a service by ID.
235
+
236
+ Args:
237
+ service_id: Service identifier
238
+ project_root: Optional project root
239
+
240
+ Returns:
241
+ Service dict or None if not found
242
+ """
243
+ registry = load_registry(project_root)
244
+ return registry.get("services", {}).get(service_id)
245
+
246
+
247
+ def list_services(project_root: str | None = None) -> dict[str, dict[str, Any]]:
248
+ """
249
+ List all registered services.
250
+
251
+ Args:
252
+ project_root: Optional project root
253
+
254
+ Returns:
255
+ Dict of service_id -> service_data
256
+ """
257
+ registry = load_registry(project_root)
258
+ return registry.get("services", {})
259
+
260
+
261
+ def find_service_for_path(file_path: str, project_root: str | None = None) -> str | None:
262
+ """
263
+ Find which service (if any) owns a given file path.
264
+
265
+ Uses glob matching against territory patterns.
266
+
267
+ Args:
268
+ file_path: Relative path to check
269
+ project_root: Optional project root
270
+
271
+ Returns:
272
+ Service ID or None if no match
273
+ """
274
+ registry = load_registry(project_root)
275
+
276
+ if project_root is None:
277
+ try:
278
+ project_root = get_project_root()
279
+ except RootResolutionError:
280
+ return None
281
+
282
+ project_root = Path(project_root)
283
+ file_path_obj = Path(file_path)
284
+
285
+ if not file_path_obj.is_absolute():
286
+ test_path = project_root / file_path_obj
287
+ else:
288
+ test_path = file_path_obj
289
+
290
+ for service_id, service_data in registry.get("services", {}).items():
291
+ territory = service_data.get("territory", [])
292
+ for pattern in territory:
293
+ # Direct glob match
294
+ for glob_match in Path(project_root).glob(pattern):
295
+ if glob_match == test_path:
296
+ return service_id
297
+ # Prefix match for directory patterns
298
+ base = pattern.replace("/**/*", "").replace("/**", "").rstrip("/")
299
+ if str(file_path).startswith(base + "/") or str(file_path) == base:
300
+ return service_id
301
+
302
+ return None
303
+
304
+
305
+ if __name__ == "__main__":
306
+ if len(sys.argv) < 2:
307
+ print("Usage: python bootstrap.py <command> [args...]")
308
+ print("Commands: root, registry, list, find <path>")
309
+ sys.exit(1)
310
+
311
+ command = sys.argv[1]
312
+
313
+ if command == "root":
314
+ print(get_project_root())
315
+ elif command == "registry":
316
+ print(json.dumps(load_registry(), indent=2))
317
+ elif command == "list":
318
+ services = list_services()
319
+ for sid, data in services.items():
320
+ print(f"- {sid}: {data.get('name', 'N/A')} ({data.get('description', 'N/A')})")
321
+ elif command == "find" and len(sys.argv) > 2:
322
+ result = find_service_for_path(sys.argv[2])
323
+ print(result if result else "No service found")
324
+ else:
325
+ print(f"Unknown command: {command}")
326
+ sys.exit(1)
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Phase 2 deep dive analyzer for creating-service-skills.
4
+
5
+ Generates structured research questions and classifies service types.
6
+ The agent answers every question using Serena LSP tools against the real source.
7
+
8
+ Usage:
9
+ python3 deep_dive.py classify <territory-path>
10
+ python3 deep_dive.py questions <service-type> <territory-path>
11
+ python3 deep_dive.py template
12
+ """
13
+
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ script_dir = Path(__file__).parent
18
+ sys.path.insert(0, str(script_dir))
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Service type classification
23
+ # ---------------------------------------------------------------------------
24
+ SERVICE_TYPES: dict[str, dict] = {
25
+ "continuous_db_writer": {
26
+ "patterns": ["insert", "update", "upsert", "execute", "copy_records"],
27
+ "indicators": ["timescaledb", "postgres", "asyncpg", "psycopg", "sqlalchemy"],
28
+ "script": "data_explorer.py",
29
+ "health": "table_freshness + row_count",
30
+ },
31
+ "http_api_server": {
32
+ "patterns": ["route", "endpoint", "handler", "router", "@app.get", "@app.post"],
33
+ "indicators": ["fastapi", "flask", "express", "aiohttp", "uvicorn"],
34
+ "script": "endpoint_tester.py",
35
+ "health": "http_probe (real routes, not just /health)",
36
+ },
37
+ "one_shot_migration": {
38
+ "patterns": ["migrate", "alembic", "upgrade", "seed", "backfill", "--init"],
39
+ "indicators": ["alembic", "prisma migrate", "flyway"],
40
+ "script": "coverage_checker.py",
41
+ "health": "exit_code + expected schema presence",
42
+ },
43
+ "file_watcher": {
44
+ "patterns": ["inotify", "watchdog", "watch", "chokidar", "fsevents"],
45
+ "indicators": ["inotify", "watchdog", "notify"],
46
+ "script": "state_inspector.py",
47
+ "health": "mount_path_accessible + state_file_present + db_recency",
48
+ },
49
+ "scheduled_poller": {
50
+ "patterns": ["schedule", "interval", "cron", "sleep", "asyncio.sleep"],
51
+ "indicators": ["apscheduler", "celery", "rq", "dramatiq"],
52
+ "script": "service_specific.py",
53
+ "health": "token_presence + last_run_recency",
54
+ },
55
+ }
56
+
57
+
58
+ def classify_service(directory: Path) -> dict:
59
+ """Classify a service by scanning source files for type indicators."""
60
+ if not directory.exists():
61
+ return {"error": f"Directory not found: {directory}"}
62
+
63
+ source_files = (
64
+ list(directory.rglob("*.py"))
65
+ + list(directory.rglob("*.ts"))
66
+ + list(directory.rglob("*.rs"))
67
+ + list(directory.rglob("*.go"))
68
+ )
69
+
70
+ if not source_files:
71
+ return {"error": "No source files found in territory"}
72
+
73
+ scores: dict[str, int] = {}
74
+
75
+ for file_path in source_files[:30]:
76
+ try:
77
+ content = file_path.read_text(encoding="utf-8").lower()
78
+ for stype, cfg in SERVICE_TYPES.items():
79
+ for p in cfg["patterns"]:
80
+ if p in content:
81
+ scores[stype] = scores.get(stype, 0) + 2
82
+ for ind in cfg["indicators"]:
83
+ if ind in content:
84
+ scores[stype] = scores.get(stype, 0) + 1
85
+ except (OSError, UnicodeDecodeError):
86
+ continue
87
+
88
+ if not scores:
89
+ return {"type": "unknown", "confidence": "low", "scores": {}}
90
+
91
+ primary = max(scores, key=lambda k: scores[k])
92
+ score = scores[primary]
93
+ confidence = "high" if score >= 6 else "medium" if score >= 3 else "low"
94
+ cfg = SERVICE_TYPES[primary]
95
+
96
+ return {
97
+ "type": primary,
98
+ "confidence": confidence,
99
+ "score": score,
100
+ "all_scores": scores,
101
+ "recommended_script": cfg["script"],
102
+ "health_strategy": cfg["health"],
103
+ }
104
+
105
+
106
+ def print_deep_dive_questions(service_type: str) -> None:
107
+ """
108
+ Print the full Phase 2 research agenda for the given service type.
109
+
110
+ The agent answers every question using Serena LSP tools — NOT raw file reads.
111
+ """
112
+ print(
113
+ f"""
114
+ === Phase 2 Deep Dive: {service_type} ===
115
+
116
+ IMPORTANT — Use Serena LSP tools for all code exploration:
117
+
118
+ | Task | Tool |
119
+ |----------------------------|----------------------------------------------------|
120
+ | Map module structure | get_symbols_overview(depth=1) |
121
+ | Read a specific function | find_symbol(name_path, include_body=True) |
122
+ | Find log/error patterns | search_for_pattern("logger.error|raise|except") |
123
+ | Find SQL queries | search_for_pattern("SELECT|INSERT|UPDATE|COPY") |
124
+ | Trace data flow | find_referencing_symbols(name_path, relative_path) |
125
+ | Find env var usage | search_for_pattern("os.getenv|os.environ") |
126
+
127
+ Do NOT read entire files. Map first, then read only what you need.
128
+
129
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
130
+ SECTION 1: Container & Runtime
131
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
132
+ 1. What is the exact entry point?
133
+ → Read docker-compose.yml `command:` field and Dockerfile CMD.
134
+ → Verify with: search_for_pattern("if __name__|def main|async def main")
135
+
136
+ 2. Which env vars will crash the service if missing?
137
+ → search_for_pattern("os.getenv|os.environ") and check which ones have no default.
138
+
139
+ 3. What volumes does it mount (read/write)?
140
+ → Check docker-compose.yml `volumes:` section.
141
+
142
+ 4. Is this a daemon, one-shot job, or cron?
143
+ → Check `restart:` policy in docker-compose.yml.
144
+ → Look for `while True`, `asyncio.sleep`, `--bootstrap` flags.
145
+
146
+ 5. Does it depend on another service being healthy first?
147
+ → Check `depends_on:` with `condition: service_healthy`.
148
+
149
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
150
+ SECTION 2: Data Layer
151
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
152
+ 6. Which tables does it WRITE? Which does it only READ?
153
+ → search_for_pattern("INSERT INTO|COPY.*FROM|UPDATE.*SET|execute.*INSERT")
154
+ → Cross-check with search_for_pattern("SELECT.*FROM") for read-only tables.
155
+
156
+ 7. What is the timestamp column for each output table?
157
+ → search_for_pattern("created_at|snapshot_ts|asof_ts|received_at|timestamp")
158
+
159
+ 8. What is a realistic stale threshold per output table?
160
+ → How often does the service write? (Check sleep intervals, cron schedule.)
161
+ → Stale threshold = 3x the write interval minimum.
162
+
163
+ 9. Does it use Redis, S3, files, or other external state?
164
+ → search_for_pattern("redis|s3|boto|aiofiles|open(")
165
+
166
+ 10. Are all SQL queries parameterized?
167
+ → search_for_pattern("f\\".*SELECT|f'.*INSERT|%s|\\$1|bindparams")
168
+ → Flag any f-string SQL as a security issue.
169
+ """
170
+ )
171
+
172
+ if service_type == "continuous_db_writer":
173
+ print(
174
+ """━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
175
+ SECTION 3 (continuous_db_writer): Write Patterns
176
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
177
+ 11. Is it bulk INSERT or row-by-row?
178
+ → search_for_pattern("executemany|copy_records_to_table|insert_many")
179
+ 12. How does it handle duplicate keys?
180
+ → search_for_pattern("ON CONFLICT|upsert|INSERT OR REPLACE")
181
+ 13. Expected row growth rate (rows/hour)?
182
+ → Estimate from sleep intervals × data volume.
183
+ """
184
+ )
185
+
186
+ elif service_type == "http_api_server":
187
+ print(
188
+ """━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
189
+ SECTION 3 (http_api_server): API Endpoints
190
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
191
+ 11. List ALL real routes (not just /health).
192
+ → get_symbols_overview on router files, then find_symbol for each route handler.
193
+ 12. Which routes require authentication?
194
+ → search_for_pattern("Depends|require_auth|Authorization|Bearer")
195
+ 13. Expected response times per endpoint?
196
+ → Check for timeouts, DB queries, external calls in each handler.
197
+ """
198
+ )
199
+
200
+ elif service_type == "file_watcher":
201
+ print(
202
+ """━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
203
+ SECTION 3 (file_watcher): File Monitoring
204
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
205
+ 11. What mount paths does it monitor?
206
+ → Check docker-compose.yml `volumes:` and search_for_pattern("WATCH_PATH|MOUNT_DIR")
207
+ 12. What is the state file format and location?
208
+ → search_for_pattern("state_file|checkpoint|last_processed|cursor")
209
+ 13. What happens when the mount becomes unavailable?
210
+ → search_for_pattern("except.*OSError|mount.*error|inotify.*limit")
211
+ """
212
+ )
213
+
214
+ print(
215
+ """━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
216
+ SECTION 4: Failure Modes (required ≥5 rows)
217
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
218
+ For each failure mode, find the exact fix command.
219
+
220
+ → search_for_pattern("except|raise|logger.error|logger.critical|panic!")
221
+ → Read each exception handler with find_symbol(include_body=True)
222
+ → Build the Troubleshooting table in SKILL.md from these real cases.
223
+
224
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
225
+ SECTION 5: Log Patterns (for log_hunter.py)
226
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
227
+ Find real patterns — do NOT invent generic ones like "ERROR" or "ConnectionError".
228
+
229
+ → search_for_pattern("logger.info|logging.info") → info patterns
230
+ → search_for_pattern("logger.warning|logger.error") → error/warning patterns
231
+ → search_for_pattern("logger.critical|raise.*Error|panic!") → critical patterns
232
+ → For Rust: search_for_pattern("thread '.*' panicked")
233
+
234
+ Copy the actual error message strings verbatim into log_hunter.py PATTERNS list.
235
+
236
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
237
+ SECTION 6: Write Phase 2 Scripts
238
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
239
+ Now replace ALL [PENDING RESEARCH] stubs in scripts/:
240
+
241
+ scripts/health_probe.py
242
+ - Replace STALE_CHECKS with actual table names + timestamp cols + thresholds
243
+ - Use external mapped port (e.g. 5433), NOT container-internal port (5432)
244
+ - Print exact docker/SQL fix command on failure
245
+
246
+ scripts/log_hunter.py
247
+ - Replace PATTERNS with patterns found in Section 5 above
248
+ - Use severity bucketing: critical → error → warning → info
249
+
250
+ scripts/data_explorer.py
251
+ - Replace TABLE, DB_PORT, DB_NAME with real values
252
+ - All queries must use parameterized %s — no f-strings in SQL
253
+
254
+ scripts/<specialist>.py (based on service type: {service_type})
255
+ - See references/script_quality_standards.md for the template
256
+ """
257
+ )
258
+
259
+
260
+ def generate_protected_regions() -> str:
261
+ """Template for protected regions that preserve manual refinements during auto-updates."""
262
+ return """
263
+ ## Protected Regions
264
+
265
+ <!-- SEMANTIC_START -->
266
+ ## Semantic Deep Dive (Human/Agent Refined)
267
+
268
+ Add deep operational knowledge here after Phase 2.
269
+ This section is preserved during auto-updates.
270
+
271
+ <!-- SEMANTIC_END -->
272
+ """
273
+
274
+
275
+ def main() -> None:
276
+ if len(sys.argv) < 2:
277
+ print("Usage: python deep_dive.py <command> [args...]")
278
+ print(" classify <path> — Classify service type from source")
279
+ print(" questions <type> — Print Phase 2 research agenda")
280
+ print(" template — Print protected regions template")
281
+ sys.exit(1)
282
+
283
+ cmd = sys.argv[1]
284
+
285
+ if cmd == "classify" and len(sys.argv) > 2:
286
+ result = classify_service(Path(sys.argv[2]))
287
+ print("Service Classification:")
288
+ for k, v in result.items():
289
+ print(f" {k}: {v}")
290
+
291
+ elif cmd == "questions":
292
+ stype = sys.argv[2] if len(sys.argv) > 2 else "continuous_db_writer"
293
+ print_deep_dive_questions(stype)
294
+
295
+ elif cmd == "template":
296
+ print(generate_protected_regions())
297
+
298
+ else:
299
+ print(f"Unknown command: {cmd}")
300
+ sys.exit(1)
301
+
302
+
303
+ if __name__ == "__main__":
304
+ main()