@groupby/ai-dev 0.5.7 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/package.json +1 -1
  2. package/teams/agentic-checkout/prompts/AGENTS.md +103 -0
  3. package/teams/agentic-checkout/prompts/create-plan.md +103 -0
  4. package/teams/agentic-checkout/prompts/create-pull-request.md +157 -0
  5. package/teams/agentic-checkout/prompts/fix-pr-comments.md +170 -0
  6. package/teams/agentic-checkout/prompts/fix-review-findings.md +1 -12
  7. package/teams/agentic-checkout/prompts/implement-task.md +62 -0
  8. package/teams/agentic-checkout/prompts/new-workspace.md +12 -0
  9. package/teams/agentic-checkout/prompts/orchestrate-component-change.md +25 -0
  10. package/teams/agentic-checkout/prompts/review-change.md +8 -2
  11. package/teams/agentic-checkout/scripts/check-secrets +51 -0
  12. package/teams/agentic-checkout/scripts/install-git-hooks +15 -0
  13. package/teams/agentic-checkout/scripts/local-fast-report +5 -0
  14. package/teams/agentic-checkout/scripts/local-report +205 -0
  15. package/teams/agentic-checkout/scripts/local-summarize +47 -0
  16. package/teams/agentic-checkout/scripts/logs-deps +9 -0
  17. package/teams/agentic-checkout/scripts/setup-local-fast-model +20 -0
  18. package/teams/agentic-checkout/scripts/start-deps +15 -0
  19. package/teams/agentic-checkout/scripts/status-deps +9 -0
  20. package/teams/agentic-checkout/scripts/stop-deps +9 -0
  21. package/teams/agentic-checkout/scripts/sync-components +110 -0
  22. package/teams/agentic-checkout/skills/approval-gated-task-execution/SKILL.md +57 -0
  23. package/teams/agentic-checkout/skills/component-verification/SKILL.md +34 -0
  24. package/teams/agentic-checkout/skills/grill-me/SKILL.md +23 -0
  25. package/teams/agentic-checkout/skills/karpathy-guidelines/SKILL.md +67 -0
  26. package/teams/agentic-checkout/skills/secret-safety/SKILL.md +41 -0
  27. package/teams/agentic-checkout/skills/sync-components/SKILL.md +23 -60
  28. package/teams/agentic-checkout/skills/tdd/SKILL.md +48 -0
  29. package/teams/fhr-ai-team/github/PULL_REQUEST_TEMPLATE/full.md +31 -0
  30. package/teams/fhr-ai-team/github/PULL_REQUEST_TEMPLATE/light.md +7 -0
  31. package/teams/fhr-ai-team/github/copilot-instructions.md +24 -0
  32. package/teams/fhr-ai-team/github/instructions/python.instructions.md +23 -0
  33. package/teams/fhr-ai-team/github/pull_request_template.md +21 -0
  34. package/teams/fhr-ai-team/prompts/brainstorm.md +7 -0
  35. package/teams/fhr-ai-team/prompts/plan-algo-tests.md +7 -0
  36. package/teams/fhr-ai-team/prompts/plan.md +7 -0
  37. package/teams/fhr-ai-team/prompts/pr-description.md +7 -0
  38. package/teams/fhr-ai-team/prompts/test.md +7 -0
  39. package/teams/fhr-ai-team/resources/AGENTS.md +55 -0
  40. package/teams/fhr-ai-team/resources/CLAUDE.md +52 -0
  41. package/teams/fhr-ai-team/resources/README.md +51 -0
  42. package/teams/fhr-ai-team/resources/claude-code-setup.md +60 -0
  43. package/teams/fhr-ai-team/resources/copilot-setup.md +64 -0
  44. package/teams/fhr-ai-team/resources/onboarding.md +179 -0
  45. package/teams/fhr-ai-team/resources/opencode-install.md +29 -0
  46. package/teams/fhr-ai-team/resources/opencode-setup.md +43 -0
  47. package/teams/fhr-ai-team/skills/algo-test-planning/SKILL.md +192 -0
  48. package/teams/fhr-ai-team/skills/algo-test-planning/references/pipeline-registry.md +280 -0
  49. package/teams/fhr-ai-team/skills/brainstorming/SKILL.md +111 -0
  50. package/teams/fhr-ai-team/skills/e2e-testing/SKILL.md +163 -0
  51. package/teams/fhr-ai-team/skills/grill-me/SKILL.md +10 -0
  52. package/teams/fhr-ai-team/skills/ml-tooling-dev/SKILL.md +313 -0
  53. package/teams/fhr-ai-team/skills/ml-tooling-dev/references/kubectl-debug.md +165 -0
  54. package/teams/fhr-ai-team/skills/ml-tooling-dev/references/mongodb-config.md +218 -0
  55. package/teams/fhr-ai-team/skills/ml-tooling-dev/references/pipeline-configs.md +190 -0
  56. package/teams/fhr-ai-team/skills/ml-tooling-dev/references/pipeline-steps.md +182 -0
  57. package/teams/fhr-ai-team/skills/ml-tooling-dev/scripts/kf_logs.py +203 -0
  58. package/teams/fhr-ai-team/skills/ml-tooling-dev/scripts/kf_query.py +233 -0
  59. package/teams/fhr-ai-team/skills/ml-tooling-dev/scripts/kf_wait.py +195 -0
  60. package/teams/fhr-ai-team/skills/ml-tooling-dev/scripts/mlflow_query.py +252 -0
  61. package/teams/fhr-ai-team/skills/ml-tooling-dev/scripts/mongo_predictor.py +352 -0
  62. package/teams/fhr-ai-team/skills/naming-conventions-reviewer/SKILL.md +230 -0
  63. package/teams/fhr-ai-team/skills/naming-conventions-reviewer/references/dataset-naming.md +190 -0
  64. package/teams/fhr-ai-team/skills/naming-conventions-reviewer/references/domain-vocabulary.md +447 -0
  65. package/teams/fhr-ai-team/skills/naming-conventions-reviewer/references/repo-dependency-graph.md +264 -0
  66. package/teams/fhr-ai-team/skills/planning/SKILL.md +138 -0
  67. package/teams/fhr-ai-team/skills/pr-description/SKILL.md +94 -0
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Query the dev MLflow tracking server for runs, metrics, and registered models.
4
+
5
+ Usage:
6
+ python3 mlflow_query.py experiments # List all experiments
7
+ python3 mlflow_query.py runs <experiment_name> # List runs in experiment
8
+ python3 mlflow_query.py run <run_id> # Show run details + metrics
9
+ python3 mlflow_query.py models # List registered models
10
+ python3 mlflow_query.py model <model_name> # Show model versions + aliases
11
+ python3 mlflow_query.py model-for-predictor <predictor_id> # Find model by predictor_id
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import re
17
+ import sys
18
+ from datetime import datetime
19
+ from urllib.request import urlopen, Request
20
+ from urllib.parse import urlencode, quote
21
+ from urllib.error import URLError
22
+
23
+ MLFLOW_HOST = "http://10.11.96.16:5000"
24
+ API_BASE = f"{MLFLOW_HOST}/api/2.0/mlflow"
25
+
26
+ # Only allow alphanumeric, hyphens, underscores, dots, and spaces in filter values
27
+ _SAFE_FILTER_VALUE = re.compile(r"^[\w.\- ]+$")
28
+
29
+
30
+ def get(path: str, params: dict = None) -> dict:
31
+ url = f"{API_BASE}{path}"
32
+ if params:
33
+ url = f"{url}?{urlencode(params)}"
34
+ try:
35
+ with urlopen(url, timeout=15) as r:
36
+ return json.loads(r.read())
37
+ except URLError as e:
38
+ print(f"[ERROR] Cannot reach MLflow at {url}: {e}", file=sys.stderr)
39
+ sys.exit(1)
40
+
41
+
42
+ def get_with_name(path: str, name: str, extra_params: dict = None) -> dict:
43
+ """GET with model name properly URL-encoded (avoids double-encoding from urlencode)."""
44
+ encoded_name = quote(name, safe="")
45
+ url = f"{API_BASE}{path}?name={encoded_name}"
46
+ if extra_params:
47
+ url += "&" + urlencode(extra_params)
48
+ try:
49
+ with urlopen(url, timeout=15) as r:
50
+ return json.loads(r.read())
51
+ except URLError as e:
52
+ print(f"[ERROR] Cannot reach MLflow at {url}: {e}", file=sys.stderr)
53
+ sys.exit(1)
54
+
55
+
56
+ def post(path: str, body: dict) -> dict:
57
+ url = f"{API_BASE}{path}"
58
+ data = json.dumps(body).encode()
59
+ req = Request(url, data=data, headers={"Content-Type": "application/json"})
60
+ try:
61
+ with urlopen(req, timeout=15) as r:
62
+ return json.loads(r.read())
63
+ except URLError as e:
64
+ print(f"[ERROR] Cannot reach MLflow at {url}: {e}", file=sys.stderr)
65
+ sys.exit(1)
66
+
67
+
68
+ def list_experiments() -> None:
69
+ data = get("/experiments/search", {"max_results": 100, "order_by": "last_update_time DESC"})
70
+ experiments = data.get("experiments", [])
71
+ print(f"{'Experiment ID':<15} {'Name'}")
72
+ print("-" * 80)
73
+ for exp in experiments:
74
+ print(f"{exp['experiment_id']:<15} {exp['name']}")
75
+
76
+
77
+ def list_runs(experiment_name: str) -> None:
78
+ data = get("/experiments/search", {"max_results": 200})
79
+ exp_id = None
80
+ for exp in data.get("experiments", []):
81
+ if experiment_name.lower() in exp["name"].lower():
82
+ exp_id = exp["experiment_id"]
83
+ print(f"Experiment: {exp['name']} (ID: {exp_id})")
84
+ break
85
+ if not exp_id:
86
+ print(f"[ERROR] No experiment found matching '{experiment_name}'")
87
+ list_experiments()
88
+ sys.exit(1)
89
+
90
+ runs_data = post("/runs/search", {
91
+ "experiment_ids": [exp_id],
92
+ "max_results": 30,
93
+ "order_by": ["start_time DESC"]
94
+ })
95
+ runs = runs_data.get("runs", [])
96
+
97
+ print(f"\n{'Run ID':<36} {'Status':<12} {'Start':<22} Name")
98
+ print("-" * 100)
99
+ for run in runs:
100
+ info = run.get("info", {})
101
+ run_id = info.get("run_id", "")
102
+ status = info.get("status", "")
103
+ start = info.get("start_time", 0)
104
+ run_name = info.get("run_name", "")
105
+ start_str = datetime.fromtimestamp(start / 1000).strftime("%Y-%m-%d %H:%M") if start else ""
106
+ print(f"{run_id:<36} {status:<12} {start_str:<22} {run_name}")
107
+
108
+
109
+ def show_run(run_id: str) -> None:
110
+ data = get("/runs/get", {"run_id": run_id})
111
+ run = data.get("run", {})
112
+ info = run.get("info", {})
113
+ params = run.get("data", {}).get("params", [])
114
+ metrics = run.get("data", {}).get("metrics", [])
115
+ tags = run.get("data", {}).get("tags", [])
116
+
117
+
118
+ start = info.get("start_time", 0)
119
+ end = info.get("end_time", 0)
120
+ start_str = datetime.fromtimestamp(start / 1000).strftime("%Y-%m-%d %H:%M:%S") if start else "N/A"
121
+ end_str = datetime.fromtimestamp(end / 1000).strftime("%Y-%m-%d %H:%M:%S") if end else "N/A"
122
+
123
+ print(f"Run ID: {info.get('run_id', '')}")
124
+ print(f"Name: {info.get('run_name', '')}")
125
+ print(f"Status: {info.get('status', '')}")
126
+ print(f"Started: {start_str}")
127
+ print(f"Finished: {end_str}")
128
+ print(f"Artifact: {info.get('artifact_uri', '')}")
129
+ print()
130
+
131
+ if params:
132
+ print("PARAMS:")
133
+ for p in sorted(params, key=lambda x: x["key"]):
134
+ print(f" {p['key']}: {p['value']}")
135
+ print()
136
+
137
+ if metrics:
138
+ print("METRICS:")
139
+ for m in sorted(metrics, key=lambda x: x["key"]):
140
+ val = m['value']
141
+ print(f" {m['key']}: {val:.6f}" if isinstance(val, float) else f" {m['key']}: {val}")
142
+ print()
143
+
144
+ relevant_tags = {t["key"]: t["value"] for t in tags if not t["key"].startswith("mlflow.log")}
145
+ if relevant_tags:
146
+ print("TAGS:")
147
+ for k, v in sorted(relevant_tags.items()):
148
+ print(f" {k}: {v}")
149
+
150
+
151
+ def list_models() -> None:
152
+ data = get("/registered-models/search", {"max_results": 100})
153
+ models = data.get("registered_models", [])
154
+ print(f"{'Model Name':<60} {'Latest Version':<16} Latest Run ID")
155
+ print("-" * 110)
156
+ for m in models:
157
+ name = m.get("name", "")
158
+ latest = m.get("latest_versions", [])
159
+ version = latest[0].get("version", "") if latest else ""
160
+ run_id = latest[0].get("run_id", "") if latest else ""
161
+ print(f"{name:<60} {version:<16} {run_id}")
162
+
163
+
164
+ def show_model(model_name: str) -> None:
165
+ data = get_with_name("/registered-models/get", model_name)
166
+ model = data.get("registered_model", {})
167
+ print(f"Model: {model.get('name', '')}")
168
+
169
+ # Get all versions via search
170
+ sanitized_name = model_name.replace("'", "''")
171
+ versions_data = get("/model-versions/search", {
172
+ "filter": f"name='{sanitized_name}'",
173
+ "max_results": 20,
174
+ "order_by": "version_number DESC"
175
+ })
176
+ versions = versions_data.get("model_versions", [])
177
+
178
+ # Aliases are in the registered_model response
179
+ aliases = model.get("aliases", [])
180
+ alias_by_version = {a.get("version"): a.get("alias") for a in aliases}
181
+
182
+
183
+ print(f"\n{'Version':<10} {'Run ID':<36} {'Stage':<12} {'Updated':<22} Alias")
184
+ print("-" * 100)
185
+ for v in versions:
186
+ version = v.get("version", "")
187
+ run_id = v.get("run_id", "")
188
+ stage = v.get("current_stage", "None")
189
+ ts = v.get("last_updated_timestamp", 0)
190
+ updated = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%d %H:%M") if ts else ""
191
+ alias = alias_by_version.get(version, "")
192
+ print(f"{version:<10} {run_id:<36} {stage:<12} {updated:<22} {alias}")
193
+
194
+
195
+ def find_model_for_predictor(predictor_id: str) -> None:
196
+ if not _SAFE_FILTER_VALUE.match(predictor_id):
197
+ print(f"[ERROR] Invalid predictor_id: {predictor_id!r}", file=sys.stderr)
198
+ sys.exit(1)
199
+ data = get("/registered-models/search", {
200
+ "filter": f"name LIKE '%{predictor_id}%'",
201
+ "max_results": 20
202
+ })
203
+ models = data.get("registered_models", [])
204
+ if not models:
205
+ print(f"No registered models found containing: {predictor_id}")
206
+ print(f"Browse all models: {MLFLOW_HOST}/#/models")
207
+ return
208
+ for m in models:
209
+ print(f"\n{'='*60}")
210
+ show_model(m["name"])
211
+
212
+
213
+ def main():
214
+ parser = argparse.ArgumentParser(description=f"Query dev MLflow ({MLFLOW_HOST})")
215
+ sub = parser.add_subparsers(dest="cmd")
216
+
217
+ sub.add_parser("experiments", help="List all experiments")
218
+
219
+ p_runs = sub.add_parser("runs", help="List runs in experiment")
220
+ p_runs.add_argument("experiment_name", help="Experiment name (partial match)")
221
+
222
+ p_run = sub.add_parser("run", help="Show run details + metrics")
223
+ p_run.add_argument("run_id", help="MLflow run ID")
224
+
225
+ sub.add_parser("models", help="List registered models")
226
+
227
+ p_model = sub.add_parser("model", help="Show model versions and aliases")
228
+ p_model.add_argument("model_name", help="Registered model name")
229
+
230
+ p_pred = sub.add_parser("model-for-predictor", help="Find model by predictor_id")
231
+ p_pred.add_argument("predictor_id", help="Predictor ID to search for")
232
+
233
+ args = parser.parse_args()
234
+
235
+ if args.cmd == "experiments":
236
+ list_experiments()
237
+ elif args.cmd == "runs":
238
+ list_runs(args.experiment_name)
239
+ elif args.cmd == "run":
240
+ show_run(args.run_id)
241
+ elif args.cmd == "models":
242
+ list_models()
243
+ elif args.cmd == "model":
244
+ show_model(args.model_name)
245
+ elif args.cmd == "model-for-predictor":
246
+ find_model_for_predictor(args.predictor_id)
247
+ else:
248
+ parser.print_help()
249
+
250
+
251
+ if __name__ == "__main__":
252
+ main()
@@ -0,0 +1,352 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Read, update, apply, or diff predictor training config in dev MongoDB.
4
+
5
+ All operations target: mongodb://10.11.96.21:27017/earlybirds (collection: predictors)
6
+
7
+ Usage:
8
+ mongo_predictor.py read <predictor_id> [--strategy <name>]
9
+ mongo_predictor.py update <predictor_id> [--strategy <name>] --set k=v [--set k=v ...]
10
+ mongo_predictor.py apply <predictor_id> [--strategy <name>] --file <config.json>
11
+ mongo_predictor.py diff <predictor_id> [--strategy <name>] --file <config.json>
12
+
13
+ Strategy defaults to "semantic-search-learning" (the documented use case).
14
+ The strategy config lives at: config.batch.<strategy>
15
+
16
+ Examples:
17
+ # Read current config
18
+ mongo_predictor.py read 64f0a12b5856b11b7aa4e71e
19
+
20
+ # Patch a few fields (dot-notation $set under config.batch.<strategy>)
21
+ mongo_predictor.py update 64f0a12b5856b11b7aa4e71e \\
22
+ --set pipelineConfig.maxSequenceLength=64 \\
23
+ --set learningConfig.trainingArguments.perDeviceTrainBatchSize=128 \\
24
+ --set learningConfig.trainingArguments.gradientCheckpointing=true
25
+
26
+ # Replace the whole strategy config from a file (uses NumberInt for ints)
27
+ mongo_predictor.py apply 64f0a12b5856b11b7aa4e71e --file experiment-A.json
28
+
29
+ # Show what apply would change, before doing it
30
+ mongo_predictor.py diff 64f0a12b5856b11b7aa4e71e --file experiment-A.json
31
+ """
32
+
33
+ import argparse
34
+ import json
35
+ import os
36
+ import re
37
+ import shutil
38
+ import subprocess
39
+ import sys
40
+
41
+ MONGO_URI = "mongodb://10.11.96.21:27017/earlybirds"
42
+ COLLECTION = "predictors"
43
+ DEFAULT_STRATEGY = "semantic-search-learning"
44
+ MONGOSH_TIMEOUT_SECONDS = 30
45
+
46
+ OBJECTID_RE = re.compile(r"^[0-9a-fA-F]{24}$")
47
+
48
+
49
+ def require_mongosh() -> None:
50
+ if shutil.which("mongosh") is None:
51
+ print("[ERROR] mongosh not found. Install with: brew install mongosh", file=sys.stderr)
52
+ sys.exit(1)
53
+
54
+
55
+ def validate_object_id(oid: str) -> None:
56
+ if not OBJECTID_RE.match(oid):
57
+ print(
58
+ f"[ERROR] Invalid predictor_id: {oid!r} (expected 24-char hex ObjectId)",
59
+ file=sys.stderr,
60
+ )
61
+ sys.exit(1)
62
+
63
+
64
+ def run_mongosh(js: str, payload: dict) -> dict:
65
+ """Run a mongosh --eval template. Payload is bound via env var as JSON.
66
+
67
+ The js template MUST read its inputs from process.env.PAYLOAD and emit a
68
+ single JSON object via print(JSON.stringify(...)). User-controlled values
69
+ never appear in the js text itself, only in the env-bound JSON payload.
70
+ """
71
+ require_mongosh()
72
+ env = os.environ.copy()
73
+ env["PAYLOAD"] = json.dumps(payload)
74
+ cmd = ["mongosh", MONGO_URI, "--quiet", "--eval", js]
75
+ try:
76
+ result = subprocess.run(
77
+ cmd, env=env, capture_output=True, text=True, timeout=MONGOSH_TIMEOUT_SECONDS
78
+ )
79
+ except subprocess.TimeoutExpired:
80
+ print(f"[ERROR] mongosh timed out after {MONGOSH_TIMEOUT_SECONDS}s", file=sys.stderr)
81
+ sys.exit(1)
82
+ if result.returncode != 0:
83
+ print(f"[ERROR] mongosh failed (exit {result.returncode}):", file=sys.stderr)
84
+ if result.stderr.strip():
85
+ print(result.stderr.strip(), file=sys.stderr)
86
+ if result.stdout.strip():
87
+ print(result.stdout.strip(), file=sys.stderr)
88
+ sys.exit(1)
89
+ out = result.stdout.strip()
90
+ if not out:
91
+ return {}
92
+ try:
93
+ return json.loads(out)
94
+ except json.JSONDecodeError as e:
95
+ print(f"[ERROR] Could not parse mongosh output as JSON: {e}", file=sys.stderr)
96
+ print(out, file=sys.stderr)
97
+ sys.exit(1)
98
+
99
+
100
+ def coerce_value(raw: str):
101
+ """Coerce a CLI string value to a typed Python value for $set."""
102
+ if raw == "null":
103
+ return None
104
+ if raw == "true":
105
+ return True
106
+ if raw == "false":
107
+ return False
108
+ # JSON literal (object, array, quoted string)
109
+ if raw and raw[0] in '{["':
110
+ try:
111
+ return json.loads(raw)
112
+ except json.JSONDecodeError:
113
+ pass
114
+ # Number
115
+ try:
116
+ if "." in raw or "e" in raw.lower():
117
+ return float(raw)
118
+ return int(raw)
119
+ except ValueError:
120
+ pass
121
+ return raw
122
+
123
+
124
+ def to_extended_json_ints(obj):
125
+ """Recursively rewrite Python ints as MongoDB extended-JSON NumberInt form.
126
+
127
+ Used by `apply` so a full-strategy $set stores integer fields as Int32
128
+ instead of Double (see mongodb-config.md gotchas).
129
+ """
130
+ if isinstance(obj, bool):
131
+ return obj # bool is a subclass of int; keep as bool
132
+ if isinstance(obj, int):
133
+ return {"$numberInt": str(obj)}
134
+ if isinstance(obj, dict):
135
+ return {k: to_extended_json_ints(v) for k, v in obj.items()}
136
+ if isinstance(obj, list):
137
+ return [to_extended_json_ints(v) for v in obj]
138
+ return obj
139
+
140
+
141
+ def fetch_strategy_config(predictor_id: str, strategy: str) -> dict:
142
+ """Return the current config.batch.<strategy> for a predictor, or {} if absent."""
143
+ payload = {"id": predictor_id, "strategy": strategy}
144
+ js = """
145
+ const p = JSON.parse(process.env.PAYLOAD);
146
+ const doc = db.getCollection("predictors").findOne({_id: ObjectId(p.id)});
147
+ if (!doc) { print(JSON.stringify({found: false})); quit(0); }
148
+ const cfg = (doc.config && doc.config.batch && doc.config.batch[p.strategy]) || null;
149
+ print(JSON.stringify({found: true, config: cfg}));
150
+ """
151
+ result = run_mongosh(js, payload)
152
+ if not result.get("found"):
153
+ print(f"[ERROR] No predictor matched _id={predictor_id}", file=sys.stderr)
154
+ sys.exit(2)
155
+ return result.get("config") or {}
156
+
157
+
158
+ def cmd_read(predictor_id: str, strategy: str) -> None:
159
+ cfg = fetch_strategy_config(predictor_id, strategy)
160
+ if not cfg:
161
+ print(
162
+ f"[WARN] Predictor {predictor_id} has no config.batch.{strategy}",
163
+ file=sys.stderr,
164
+ )
165
+ print("{}")
166
+ return
167
+ print(json.dumps(cfg, indent=2, sort_keys=True))
168
+
169
+
170
+ def cmd_update(predictor_id: str, strategy: str, sets: list) -> None:
171
+ if not sets:
172
+ print("[ERROR] update requires at least one --set k=v", file=sys.stderr)
173
+ sys.exit(1)
174
+
175
+ set_doc = {}
176
+ for kv in sets:
177
+ if "=" not in kv:
178
+ print(f"[ERROR] --set value must be k=v, got: {kv!r}", file=sys.stderr)
179
+ sys.exit(1)
180
+ key, _, raw = kv.partition("=")
181
+ key = key.strip()
182
+ if not key:
183
+ print(f"[ERROR] empty key in --set: {kv!r}", file=sys.stderr)
184
+ sys.exit(1)
185
+ full_key = f"config.batch.{strategy}.{key}"
186
+ set_doc[full_key] = coerce_value(raw)
187
+
188
+ payload = {"id": predictor_id, "set": set_doc}
189
+ js = """
190
+ const p = JSON.parse(process.env.PAYLOAD);
191
+ const r = db.getCollection("predictors").updateOne(
192
+ {_id: ObjectId(p.id)},
193
+ {$set: p.set}
194
+ );
195
+ print(JSON.stringify({matched: r.matchedCount, modified: r.modifiedCount}));
196
+ """
197
+ result = run_mongosh(js, payload)
198
+ matched = result.get("matched", 0)
199
+ modified = result.get("modified", 0)
200
+ print(f"matched={matched} modified={modified}")
201
+ if matched == 0:
202
+ print(f"[ERROR] No predictor matched _id={predictor_id}", file=sys.stderr)
203
+ sys.exit(2)
204
+ print("Set fields:")
205
+ for k, v in set_doc.items():
206
+ print(f" {k} = {json.dumps(v)}")
207
+
208
+
209
+ def cmd_apply(predictor_id: str, strategy: str, file_path: str) -> None:
210
+ new_cfg = load_json_file(file_path)
211
+ if not isinstance(new_cfg, dict):
212
+ print(f"[ERROR] {file_path} must contain a JSON object at the top level", file=sys.stderr)
213
+ sys.exit(1)
214
+
215
+ full_key = f"config.batch.{strategy}"
216
+ set_doc = {full_key: to_extended_json_ints(new_cfg)}
217
+ payload = {"id": predictor_id, "set": set_doc}
218
+
219
+ # EJSON.parse honors {"$numberInt": "N"} and stores as Int32. Bare floats
220
+ # remain Double, which is what we want.
221
+ js = """
222
+ const p = EJSON.parse(process.env.PAYLOAD);
223
+ const r = db.getCollection("predictors").updateOne(
224
+ {_id: ObjectId(p.id)},
225
+ {$set: p.set}
226
+ );
227
+ print(JSON.stringify({matched: r.matchedCount, modified: r.modifiedCount}));
228
+ """
229
+ result = run_mongosh(js, payload)
230
+ matched = result.get("matched", 0)
231
+ modified = result.get("modified", 0)
232
+ print(f"matched={matched} modified={modified}")
233
+ if matched == 0:
234
+ print(f"[ERROR] No predictor matched _id={predictor_id}", file=sys.stderr)
235
+ sys.exit(2)
236
+ print(f"Applied config.batch.{strategy} from {file_path}")
237
+
238
+
239
+ def cmd_diff(predictor_id: str, strategy: str, file_path: str) -> None:
240
+ new_cfg = load_json_file(file_path)
241
+ if not isinstance(new_cfg, dict):
242
+ print(f"[ERROR] {file_path} must contain a JSON object at the top level", file=sys.stderr)
243
+ sys.exit(1)
244
+
245
+ current = fetch_strategy_config(predictor_id, strategy)
246
+ diffs = compute_diff(current, new_cfg, prefix="")
247
+ if not diffs:
248
+ print(f"No differences. Current config.batch.{strategy} matches {file_path}.")
249
+ return
250
+ print(f"Diff (current -> {file_path}):")
251
+ for op, path, before, after in diffs:
252
+ if op == "+":
253
+ print(f" + {path} = {json.dumps(after)}")
254
+ elif op == "-":
255
+ print(f" - {path} (was {json.dumps(before)})")
256
+ else:
257
+ print(f" ~ {path}: {json.dumps(before)} -> {json.dumps(after)}")
258
+
259
+
260
+ def load_json_file(file_path: str):
261
+ try:
262
+ with open(file_path) as f:
263
+ return json.load(f)
264
+ except FileNotFoundError:
265
+ print(f"[ERROR] File not found: {file_path}", file=sys.stderr)
266
+ sys.exit(1)
267
+ except json.JSONDecodeError as e:
268
+ print(f"[ERROR] Invalid JSON in {file_path}: {e}", file=sys.stderr)
269
+ sys.exit(1)
270
+ except OSError as e:
271
+ print(f"[ERROR] Could not read {file_path}: {e}", file=sys.stderr)
272
+ sys.exit(1)
273
+
274
+
275
+ def compute_diff(a, b, prefix: str) -> list:
276
+ """Return [(op, path, before, after)] where op is '+', '-', or '~'."""
277
+ diffs = []
278
+ if isinstance(a, dict) and isinstance(b, dict):
279
+ for k in sorted(set(a) | set(b)):
280
+ sub = f"{prefix}.{k}" if prefix else k
281
+ if k not in a:
282
+ diffs.append(("+", sub, None, b[k]))
283
+ elif k not in b:
284
+ diffs.append(("-", sub, a[k], None))
285
+ else:
286
+ diffs.extend(compute_diff(a[k], b[k], sub))
287
+ elif a != b:
288
+ diffs.append(("~", prefix, a, b))
289
+ return diffs
290
+
291
+
292
+ def main():
293
+ parser = argparse.ArgumentParser(
294
+ description=f"Manage predictor training config in dev MongoDB ({MONGO_URI})",
295
+ formatter_class=argparse.RawDescriptionHelpFormatter,
296
+ epilog="See references/mongodb-config.md for the full strategy config schema.",
297
+ )
298
+ sub = parser.add_subparsers(dest="cmd", required=True)
299
+
300
+ common = argparse.ArgumentParser(add_help=False)
301
+ common.add_argument("predictor_id", help="MongoDB ObjectId (24 hex chars)")
302
+ common.add_argument(
303
+ "--strategy",
304
+ default=DEFAULT_STRATEGY,
305
+ help=f"Strategy ID under config.batch (default: {DEFAULT_STRATEGY})",
306
+ )
307
+
308
+ sub.add_parser("read", parents=[common], help="Print current config.batch.<strategy>")
309
+
310
+ p_update = sub.add_parser(
311
+ "update",
312
+ parents=[common],
313
+ help="Patch fields under config.batch.<strategy> via dot-notation $set",
314
+ )
315
+ p_update.add_argument(
316
+ "--set",
317
+ action="append",
318
+ dest="sets",
319
+ default=[],
320
+ metavar="K=V",
321
+ help="Field path under strategy + value. Repeat for multiple fields.",
322
+ )
323
+
324
+ p_apply = sub.add_parser(
325
+ "apply",
326
+ parents=[common],
327
+ help="Replace entire config.batch.<strategy> from a JSON file",
328
+ )
329
+ p_apply.add_argument("--file", required=True, help="Path to JSON file with new strategy config")
330
+
331
+ p_diff = sub.add_parser(
332
+ "diff",
333
+ parents=[common],
334
+ help="Show diff between current config.batch.<strategy> and a JSON file",
335
+ )
336
+ p_diff.add_argument("--file", required=True, help="Path to JSON file to compare against")
337
+
338
+ args = parser.parse_args()
339
+ validate_object_id(args.predictor_id)
340
+
341
+ if args.cmd == "read":
342
+ cmd_read(args.predictor_id, args.strategy)
343
+ elif args.cmd == "update":
344
+ cmd_update(args.predictor_id, args.strategy, args.sets)
345
+ elif args.cmd == "apply":
346
+ cmd_apply(args.predictor_id, args.strategy, args.file)
347
+ elif args.cmd == "diff":
348
+ cmd_diff(args.predictor_id, args.strategy, args.file)
349
+
350
+
351
+ if __name__ == "__main__":
352
+ main()