jfl 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/context-hub.d.ts +1 -0
- package/dist/commands/context-hub.d.ts.map +1 -1
- package/dist/commands/context-hub.js +246 -2
- package/dist/commands/context-hub.js.map +1 -1
- package/dist/commands/peter.d.ts +2 -0
- package/dist/commands/peter.d.ts.map +1 -1
- package/dist/commands/peter.js +242 -52
- package/dist/commands/peter.js.map +1 -1
- package/dist/commands/setup.d.ts +12 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +322 -0
- package/dist/commands/setup.js.map +1 -0
- package/dist/commands/train.d.ts +33 -0
- package/dist/commands/train.d.ts.map +1 -0
- package/dist/commands/train.js +510 -0
- package/dist/commands/train.js.map +1 -0
- package/dist/commands/verify.d.ts +14 -0
- package/dist/commands/verify.d.ts.map +1 -0
- package/dist/commands/verify.js +276 -0
- package/dist/commands/verify.js.map +1 -0
- package/dist/dashboard-static/assets/index-CW9ZxqX8.css +1 -0
- package/dist/dashboard-static/assets/index-DNN__p4K.js +121 -0
- package/dist/dashboard-static/index.html +2 -2
- package/dist/index.js +99 -3
- package/dist/index.js.map +1 -1
- package/dist/lib/agent-session.d.ts.map +1 -1
- package/dist/lib/agent-session.js +12 -4
- package/dist/lib/agent-session.js.map +1 -1
- package/dist/lib/eval-snapshot.js +1 -1
- package/dist/lib/eval-snapshot.js.map +1 -1
- package/dist/lib/pi-sky/bridge.d.ts +55 -0
- package/dist/lib/pi-sky/bridge.d.ts.map +1 -0
- package/dist/lib/pi-sky/bridge.js +264 -0
- package/dist/lib/pi-sky/bridge.js.map +1 -0
- package/dist/lib/pi-sky/cost-monitor.d.ts +21 -0
- package/dist/lib/pi-sky/cost-monitor.d.ts.map +1 -0
- package/dist/lib/pi-sky/cost-monitor.js +126 -0
- package/dist/lib/pi-sky/cost-monitor.js.map +1 -0
- package/dist/lib/pi-sky/eval-sweep.d.ts +27 -0
- package/dist/lib/pi-sky/eval-sweep.d.ts.map +1 -0
- package/dist/lib/pi-sky/eval-sweep.js +141 -0
- package/dist/lib/pi-sky/eval-sweep.js.map +1 -0
- package/dist/lib/pi-sky/event-router.d.ts +32 -0
- package/dist/lib/pi-sky/event-router.d.ts.map +1 -0
- package/dist/lib/pi-sky/event-router.js +176 -0
- package/dist/lib/pi-sky/event-router.js.map +1 -0
- package/dist/lib/pi-sky/experiment.d.ts +9 -0
- package/dist/lib/pi-sky/experiment.d.ts.map +1 -0
- package/dist/lib/pi-sky/experiment.js +83 -0
- package/dist/lib/pi-sky/experiment.js.map +1 -0
- package/dist/lib/pi-sky/index.d.ts +16 -0
- package/dist/lib/pi-sky/index.d.ts.map +1 -0
- package/dist/lib/pi-sky/index.js +16 -0
- package/dist/lib/pi-sky/index.js.map +1 -0
- package/dist/lib/pi-sky/stratus-gate.d.ts +28 -0
- package/dist/lib/pi-sky/stratus-gate.d.ts.map +1 -0
- package/dist/lib/pi-sky/stratus-gate.js +61 -0
- package/dist/lib/pi-sky/stratus-gate.js.map +1 -0
- package/dist/lib/pi-sky/swarm.d.ts +28 -0
- package/dist/lib/pi-sky/swarm.d.ts.map +1 -0
- package/dist/lib/pi-sky/swarm.js +208 -0
- package/dist/lib/pi-sky/swarm.js.map +1 -0
- package/dist/lib/pi-sky/types.d.ts +139 -0
- package/dist/lib/pi-sky/types.d.ts.map +1 -0
- package/dist/lib/pi-sky/types.js +2 -0
- package/dist/lib/pi-sky/types.js.map +1 -0
- package/dist/lib/pi-sky/voice-bridge.d.ts +20 -0
- package/dist/lib/pi-sky/voice-bridge.d.ts.map +1 -0
- package/dist/lib/pi-sky/voice-bridge.js +91 -0
- package/dist/lib/pi-sky/voice-bridge.js.map +1 -0
- package/dist/lib/policy-head.d.ts +16 -1
- package/dist/lib/policy-head.d.ts.map +1 -1
- package/dist/lib/policy-head.js +117 -19
- package/dist/lib/policy-head.js.map +1 -1
- package/dist/lib/predictor.d.ts +10 -0
- package/dist/lib/predictor.d.ts.map +1 -1
- package/dist/lib/predictor.js +46 -7
- package/dist/lib/predictor.js.map +1 -1
- package/dist/lib/setup/agent-generator.d.ts +18 -0
- package/dist/lib/setup/agent-generator.d.ts.map +1 -0
- package/dist/lib/setup/agent-generator.js +114 -0
- package/dist/lib/setup/agent-generator.js.map +1 -0
- package/dist/lib/setup/context-analyzer.d.ts +16 -0
- package/dist/lib/setup/context-analyzer.d.ts.map +1 -0
- package/dist/lib/setup/context-analyzer.js +112 -0
- package/dist/lib/setup/context-analyzer.js.map +1 -0
- package/dist/lib/setup/doc-auditor.d.ts +54 -0
- package/dist/lib/setup/doc-auditor.d.ts.map +1 -0
- package/dist/lib/setup/doc-auditor.js +629 -0
- package/dist/lib/setup/doc-auditor.js.map +1 -0
- package/dist/lib/setup/domain-generator.d.ts +7 -0
- package/dist/lib/setup/domain-generator.d.ts.map +1 -0
- package/dist/lib/setup/domain-generator.js +58 -0
- package/dist/lib/setup/domain-generator.js.map +1 -0
- package/dist/lib/setup/smart-eval-generator.d.ts +38 -0
- package/dist/lib/setup/smart-eval-generator.d.ts.map +1 -0
- package/dist/lib/setup/smart-eval-generator.js +378 -0
- package/dist/lib/setup/smart-eval-generator.js.map +1 -0
- package/dist/lib/setup/smart-recommender.d.ts +63 -0
- package/dist/lib/setup/smart-recommender.d.ts.map +1 -0
- package/dist/lib/setup/smart-recommender.js +329 -0
- package/dist/lib/setup/smart-recommender.js.map +1 -0
- package/dist/lib/setup/spec-generator.d.ts +63 -0
- package/dist/lib/setup/spec-generator.d.ts.map +1 -0
- package/dist/lib/setup/spec-generator.js +310 -0
- package/dist/lib/setup/spec-generator.js.map +1 -0
- package/dist/lib/setup/violation-agent-generator.d.ts +32 -0
- package/dist/lib/setup/violation-agent-generator.d.ts.map +1 -0
- package/dist/lib/setup/violation-agent-generator.js +255 -0
- package/dist/lib/setup/violation-agent-generator.js.map +1 -0
- package/package.json +1 -1
- package/packages/pi/extensions/context.ts +88 -55
- package/packages/pi/extensions/hub-resolver.ts +63 -0
- package/packages/pi/extensions/index.ts +16 -3
- package/packages/pi/extensions/memory-tool.ts +9 -4
- package/packages/pi/extensions/session.ts +68 -16
- package/packages/pi/extensions/tool-renderers.ts +23 -8
- package/scripts/train/requirements.txt +5 -0
- package/scripts/train/train-policy-head.py +477 -0
- package/scripts/train/v2/dataset.py +81 -0
- package/scripts/train/v2/domain.json +18 -0
- package/scripts/train/v2/eval.py +196 -0
- package/scripts/train/v2/generate_data.py +219 -0
- package/scripts/train/v2/infer.py +188 -0
- package/scripts/train/v2/model.py +112 -0
- package/scripts/train/v2/precompute.py +132 -0
- package/scripts/train/v2/train.py +302 -0
- package/scripts/train/v2/transform_buffer.py +227 -0
- package/scripts/train/v2/validate_data.py +115 -0
- package/template/.claude/settings.json +2 -15
- package/template/scripts/session/session-cleanup.sh +2 -11
- package/template/scripts/session/session-end-hub.sh +72 -0
- package/template/scripts/session/session-start-hub.sh +105 -0
- package/dist/dashboard-static/assets/index-B6b867Pv.js +0 -121
- package/dist/dashboard-static/assets/index-Y4BrqxV-.css +0 -1
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transform v1 training buffer (state, action, reward) to v2 format (current_state, goal, correct_tool).
|
|
3
|
+
|
|
4
|
+
Reads .jfl/training-buffer.jsonl and .jfl/agents/*.toml to produce v2 training data
|
|
5
|
+
with 70/15/15 train/val/test splits.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
import random
|
|
12
|
+
import argparse
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
ACTION_TYPE_MAP = {
|
|
16
|
+
"fix": "fix_bug",
|
|
17
|
+
"refactor": "refactor_code",
|
|
18
|
+
"feature": "add_feature",
|
|
19
|
+
"test": "add_tests",
|
|
20
|
+
"config": "update_config",
|
|
21
|
+
"experiment": "run_experiment",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
def load_domain(domain_path: str) -> dict:
|
|
25
|
+
with open(domain_path) as f:
|
|
26
|
+
return json.load(f)
|
|
27
|
+
|
|
28
|
+
def load_agent_goals(agents_dir: str) -> dict[str, str]:
|
|
29
|
+
goals = {}
|
|
30
|
+
agents_path = Path(agents_dir)
|
|
31
|
+
if not agents_path.exists():
|
|
32
|
+
return goals
|
|
33
|
+
|
|
34
|
+
for toml_file in agents_path.glob("*.toml"):
|
|
35
|
+
name = None
|
|
36
|
+
metric = None
|
|
37
|
+
direction = None
|
|
38
|
+
scope = None
|
|
39
|
+
|
|
40
|
+
for line in toml_file.read_text().splitlines():
|
|
41
|
+
line = line.strip()
|
|
42
|
+
if line.startswith("name"):
|
|
43
|
+
name = line.split("=", 1)[1].strip().strip('"')
|
|
44
|
+
elif line.startswith("metric"):
|
|
45
|
+
metric = line.split("=", 1)[1].strip().strip('"')
|
|
46
|
+
elif line.startswith("direction"):
|
|
47
|
+
direction = line.split("=", 1)[1].strip().strip('"')
|
|
48
|
+
elif line.startswith("scope") and "=" in line and not line.startswith("scope_files"):
|
|
49
|
+
scope = line.split("=", 1)[1].strip().strip('"')
|
|
50
|
+
|
|
51
|
+
if name and metric and direction:
|
|
52
|
+
verb = "Improve" if direction == "maximize" else "Reduce"
|
|
53
|
+
scope_str = f" for {scope}" if scope else ""
|
|
54
|
+
goals[name] = f"{verb} {metric.replace('_', ' ')}{scope_str}"
|
|
55
|
+
|
|
56
|
+
return goals
|
|
57
|
+
|
|
58
|
+
def format_state_text(state: dict) -> str:
|
|
59
|
+
dims = state.get("dimension_scores", {})
|
|
60
|
+
dims_str = ", ".join(f"{k}={v:.4f}" for k, v in dims.items()) if dims else "none"
|
|
61
|
+
deltas = state.get("recent_deltas", [])
|
|
62
|
+
deltas_str = ", ".join(f"{'+' if d >= 0 else ''}{d:.4f}" for d in deltas) if deltas else "none"
|
|
63
|
+
return "\n".join([
|
|
64
|
+
f"Agent: {state.get('agent', 'unknown')}",
|
|
65
|
+
f"Composite: {state.get('composite_score', 0):.4f}",
|
|
66
|
+
f"Tests: {state.get('tests_passing', 0)}/{state.get('tests_total', 0)}",
|
|
67
|
+
f"Trajectory: {state.get('trajectory_length', 0)}",
|
|
68
|
+
f"Dimensions: {dims_str}",
|
|
69
|
+
f"Recent deltas: {deltas_str}",
|
|
70
|
+
])
|
|
71
|
+
|
|
72
|
+
def map_action_type(action_type: str) -> str | None:
|
|
73
|
+
return ACTION_TYPE_MAP.get(action_type)
|
|
74
|
+
|
|
75
|
+
def transform_entry(entry: dict, agent_goals: dict, valid_tools: set) -> dict | None:
|
|
76
|
+
state = entry.get("state", {})
|
|
77
|
+
action = entry.get("action", {})
|
|
78
|
+
reward = entry.get("reward", {})
|
|
79
|
+
|
|
80
|
+
action_type = action.get("type", "")
|
|
81
|
+
tool_name = map_action_type(action_type)
|
|
82
|
+
if not tool_name or tool_name not in valid_tools:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
agent_name = entry.get("agent", state.get("agent", ""))
|
|
86
|
+
goal = agent_goals.get(agent_name)
|
|
87
|
+
if not goal:
|
|
88
|
+
metric_hint = action.get("description", "")[:80]
|
|
89
|
+
goal = f"Improve codebase quality: {metric_hint}" if metric_hint else "Improve overall codebase quality"
|
|
90
|
+
|
|
91
|
+
current_state = format_state_text(state)
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
"current_state": current_state,
|
|
95
|
+
"goal": goal,
|
|
96
|
+
"correct_tool": tool_name,
|
|
97
|
+
"source": "buffer",
|
|
98
|
+
"agent": agent_name,
|
|
99
|
+
"improved": reward.get("improved", False),
|
|
100
|
+
"reward_delta": reward.get("composite_delta", 0),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def transform_buffer(
|
|
104
|
+
buffer_path: str,
|
|
105
|
+
agents_dir: str,
|
|
106
|
+
domain_path: str,
|
|
107
|
+
output_dir: str,
|
|
108
|
+
improved_only: bool = True,
|
|
109
|
+
seed: int = 42,
|
|
110
|
+
) -> dict:
|
|
111
|
+
domain = load_domain(domain_path)
|
|
112
|
+
valid_tools = {t["name"] for t in domain["tools"]}
|
|
113
|
+
agent_goals = load_agent_goals(agents_dir)
|
|
114
|
+
|
|
115
|
+
entries = []
|
|
116
|
+
with open(buffer_path) as f:
|
|
117
|
+
for line in f:
|
|
118
|
+
line = line.strip()
|
|
119
|
+
if not line:
|
|
120
|
+
continue
|
|
121
|
+
try:
|
|
122
|
+
entries.append(json.loads(line))
|
|
123
|
+
except json.JSONDecodeError:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
print(f"Loaded {len(entries)} raw training buffer entries")
|
|
127
|
+
if agent_goals:
|
|
128
|
+
print(f"Agent goals: {json.dumps(agent_goals, indent=2)}")
|
|
129
|
+
|
|
130
|
+
examples = []
|
|
131
|
+
skipped_type = 0
|
|
132
|
+
skipped_improved = 0
|
|
133
|
+
|
|
134
|
+
for entry in entries:
|
|
135
|
+
transformed = transform_entry(entry, agent_goals, valid_tools)
|
|
136
|
+
if transformed is None:
|
|
137
|
+
skipped_type += 1
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
if improved_only and not transformed["improved"]:
|
|
141
|
+
skipped_improved += 1
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
examples.append({
|
|
145
|
+
"current_state": transformed["current_state"],
|
|
146
|
+
"goal": transformed["goal"],
|
|
147
|
+
"correct_tool": transformed["correct_tool"],
|
|
148
|
+
"source": transformed["source"],
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
print(f"Transformed: {len(examples)} examples")
|
|
152
|
+
print(f"Skipped: {skipped_type} (unmapped type), {skipped_improved} (not improved)")
|
|
153
|
+
|
|
154
|
+
if not examples:
|
|
155
|
+
print("No examples to write. Check action types and improved filter.")
|
|
156
|
+
return {"train": 0, "val": 0, "test": 0}
|
|
157
|
+
|
|
158
|
+
random.seed(seed)
|
|
159
|
+
random.shuffle(examples)
|
|
160
|
+
|
|
161
|
+
n = len(examples)
|
|
162
|
+
train_end = int(n * 0.7)
|
|
163
|
+
val_end = int(n * 0.85)
|
|
164
|
+
|
|
165
|
+
splits = {
|
|
166
|
+
"train": examples[:train_end],
|
|
167
|
+
"val": examples[train_end:val_end],
|
|
168
|
+
"test": examples[val_end:],
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
172
|
+
|
|
173
|
+
counts = {}
|
|
174
|
+
for split_name, split_data in splits.items():
|
|
175
|
+
path = os.path.join(output_dir, f"{split_name}.jsonl")
|
|
176
|
+
with open(path, "w") as f:
|
|
177
|
+
for ex in split_data:
|
|
178
|
+
f.write(json.dumps(ex) + "\n")
|
|
179
|
+
counts[split_name] = len(split_data)
|
|
180
|
+
print(f" {split_name}: {len(split_data)} examples -> {path}")
|
|
181
|
+
|
|
182
|
+
# Class balance report
|
|
183
|
+
from collections import Counter
|
|
184
|
+
tool_counts = Counter(ex["correct_tool"] for ex in examples)
|
|
185
|
+
print(f"\nClass distribution:")
|
|
186
|
+
for tool, count in tool_counts.most_common():
|
|
187
|
+
pct = count / len(examples) * 100
|
|
188
|
+
print(f" {tool:25s} {count:4d} ({pct:5.1f}%)")
|
|
189
|
+
|
|
190
|
+
return counts
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def main():
|
|
194
|
+
parser = argparse.ArgumentParser(description="Transform v1 training buffer to v2 format")
|
|
195
|
+
parser.add_argument("--buffer", default=".jfl/training-buffer.jsonl", help="Path to training buffer JSONL")
|
|
196
|
+
parser.add_argument("--agents", default=".jfl/agents", help="Path to agents directory")
|
|
197
|
+
parser.add_argument("--domain", default=None, help="Path to domain.json")
|
|
198
|
+
parser.add_argument("--output", default=".jfl/v2-data", help="Output directory for split data")
|
|
199
|
+
parser.add_argument("--all", action="store_true", help="Include entries where improved=false")
|
|
200
|
+
parser.add_argument("--seed", type=int, default=42, help="Random seed for split")
|
|
201
|
+
args = parser.parse_args()
|
|
202
|
+
|
|
203
|
+
# Default domain path: relative to this script
|
|
204
|
+
domain_path = args.domain
|
|
205
|
+
if domain_path is None:
|
|
206
|
+
domain_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "domain.json")
|
|
207
|
+
|
|
208
|
+
if not os.path.exists(args.buffer):
|
|
209
|
+
print(f"Training buffer not found: {args.buffer}")
|
|
210
|
+
sys.exit(1)
|
|
211
|
+
|
|
212
|
+
if not os.path.exists(domain_path):
|
|
213
|
+
print(f"Domain file not found: {domain_path}")
|
|
214
|
+
sys.exit(1)
|
|
215
|
+
|
|
216
|
+
transform_buffer(
|
|
217
|
+
buffer_path=args.buffer,
|
|
218
|
+
agents_dir=args.agents,
|
|
219
|
+
domain_path=domain_path,
|
|
220
|
+
output_dir=args.output,
|
|
221
|
+
improved_only=not args.all,
|
|
222
|
+
seed=args.seed,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
if __name__ == "__main__":
|
|
227
|
+
main()
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validate v2 training data quality before training.
|
|
3
|
+
Checks class balance, duplicates, empty fields, and text lengths.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import argparse
|
|
10
|
+
from collections import Counter
|
|
11
|
+
|
|
12
|
+
def validate_dataset(path: str) -> dict:
|
|
13
|
+
examples = []
|
|
14
|
+
with open(path) as f:
|
|
15
|
+
for line in f:
|
|
16
|
+
line = line.strip()
|
|
17
|
+
if not line:
|
|
18
|
+
continue
|
|
19
|
+
examples.append(json.loads(line))
|
|
20
|
+
|
|
21
|
+
results = {"total": len(examples), "issues": []}
|
|
22
|
+
|
|
23
|
+
print(f"Total examples: {len(examples)}")
|
|
24
|
+
|
|
25
|
+
tool_counts = Counter(ex["correct_tool"] for ex in examples)
|
|
26
|
+
print("\nClass distribution:")
|
|
27
|
+
for tool, count in tool_counts.most_common():
|
|
28
|
+
pct = count / len(examples) * 100
|
|
29
|
+
bar = "█" * int(pct)
|
|
30
|
+
print(f" {tool:30s} {count:4d} ({pct:5.1f}%) {bar}")
|
|
31
|
+
|
|
32
|
+
max_count = max(tool_counts.values()) if tool_counts else 0
|
|
33
|
+
min_count = min(tool_counts.values()) if tool_counts else 0
|
|
34
|
+
ratio = max_count / min_count if min_count > 0 else float("inf")
|
|
35
|
+
if ratio > 3.0:
|
|
36
|
+
msg = f"Class imbalance: {ratio:.1f}x ratio (max/min)"
|
|
37
|
+
print(f"\n ⚠️ {msg}")
|
|
38
|
+
results["issues"].append(msg)
|
|
39
|
+
else:
|
|
40
|
+
print(f"\n ✅ Class balance OK: {ratio:.1f}x ratio")
|
|
41
|
+
|
|
42
|
+
results["class_ratio"] = ratio
|
|
43
|
+
results["tool_counts"] = dict(tool_counts)
|
|
44
|
+
|
|
45
|
+
goal_set = set()
|
|
46
|
+
dupes = 0
|
|
47
|
+
for ex in examples:
|
|
48
|
+
key = (ex["goal"], ex["correct_tool"])
|
|
49
|
+
if key in goal_set:
|
|
50
|
+
dupes += 1
|
|
51
|
+
goal_set.add(key)
|
|
52
|
+
if dupes > 0:
|
|
53
|
+
msg = f"{dupes} duplicate (goal, tool) pairs"
|
|
54
|
+
print(f"\n ⚠️ {msg}")
|
|
55
|
+
results["issues"].append(msg)
|
|
56
|
+
else:
|
|
57
|
+
print("\n ✅ No duplicate (goal, tool) pairs")
|
|
58
|
+
results["duplicates"] = dupes
|
|
59
|
+
|
|
60
|
+
empty_states = sum(1 for ex in examples if not ex.get("current_state", "").strip())
|
|
61
|
+
empty_goals = sum(1 for ex in examples if not ex.get("goal", "").strip())
|
|
62
|
+
if empty_states or empty_goals:
|
|
63
|
+
msg = f"Empty fields: {empty_states} states, {empty_goals} goals"
|
|
64
|
+
print(f"\n ⚠️ {msg}")
|
|
65
|
+
results["issues"].append(msg)
|
|
66
|
+
else:
|
|
67
|
+
print(" ✅ No empty fields")
|
|
68
|
+
|
|
69
|
+
if examples:
|
|
70
|
+
avg_state_len = sum(len(ex.get("current_state", "")) for ex in examples) / len(examples)
|
|
71
|
+
avg_goal_len = sum(len(ex.get("goal", "")) for ex in examples) / len(examples)
|
|
72
|
+
print(f"\n Avg state length: {avg_state_len:.0f} chars")
|
|
73
|
+
print(f" Avg goal length: {avg_goal_len:.0f} chars")
|
|
74
|
+
results["avg_state_len"] = avg_state_len
|
|
75
|
+
results["avg_goal_len"] = avg_goal_len
|
|
76
|
+
|
|
77
|
+
return results
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def main():
|
|
81
|
+
parser = argparse.ArgumentParser(description="Validate v2 training data quality")
|
|
82
|
+
parser.add_argument("--data-dir", default=".jfl/v2-data", help="Directory with train/val/test JSONL files")
|
|
83
|
+
parser.add_argument("--json", action="store_true", help="Output results as JSON")
|
|
84
|
+
args = parser.parse_args()
|
|
85
|
+
|
|
86
|
+
all_results = {}
|
|
87
|
+
has_issues = False
|
|
88
|
+
|
|
89
|
+
for split in ["train", "val", "test"]:
|
|
90
|
+
path = os.path.join(args.data_dir, f"{split}.jsonl")
|
|
91
|
+
if not os.path.exists(path):
|
|
92
|
+
print(f"\nSkipping {split} (not found)")
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
print(f"\n{'=' * 60}")
|
|
96
|
+
print(f" {split.upper()} SPLIT")
|
|
97
|
+
print(f"{'=' * 60}")
|
|
98
|
+
|
|
99
|
+
results = validate_dataset(path)
|
|
100
|
+
all_results[split] = results
|
|
101
|
+
if results["issues"]:
|
|
102
|
+
has_issues = True
|
|
103
|
+
|
|
104
|
+
if args.json:
|
|
105
|
+
print(json.dumps(all_results, indent=2))
|
|
106
|
+
|
|
107
|
+
if has_issues:
|
|
108
|
+
print(f"\n⚠️ Issues found. Review above warnings.")
|
|
109
|
+
sys.exit(1)
|
|
110
|
+
else:
|
|
111
|
+
print(f"\n✅ All validation checks passed.")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
@@ -6,12 +6,7 @@
|
|
|
6
6
|
"hooks": [
|
|
7
7
|
{
|
|
8
8
|
"type": "command",
|
|
9
|
-
"command": "./scripts/session/session-init.sh"
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"type": "command",
|
|
13
|
-
"command": "jfl context-hub ensure >> .jfl/logs/context-hub.log 2>&1 &",
|
|
14
|
-
"async": true
|
|
9
|
+
"command": "./scripts/session/session-start-hub.sh || ./scripts/session/session-init.sh"
|
|
15
10
|
}
|
|
16
11
|
]
|
|
17
12
|
}
|
|
@@ -73,15 +68,7 @@
|
|
|
73
68
|
"hooks": [
|
|
74
69
|
{
|
|
75
70
|
"type": "command",
|
|
76
|
-
"command": "
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"type": "command",
|
|
80
|
-
"command": "jfl context-hub stop >> .jfl/logs/context-hub.log 2>&1; echo '✓ Context hub stopped'; exit 0"
|
|
81
|
-
},
|
|
82
|
-
{
|
|
83
|
-
"type": "command",
|
|
84
|
-
"command": "./scripts/session/session-cleanup.sh >> .jfl/logs/session-cleanup.log 2>&1 || echo 'Cleanup skipped'; exit 0"
|
|
71
|
+
"command": "./scripts/session/session-end-hub.sh || exit 0"
|
|
85
72
|
}
|
|
86
73
|
]
|
|
87
74
|
}
|
|
@@ -48,17 +48,8 @@ if [ -f ".auto-merge.pid" ]; then
|
|
|
48
48
|
rm -f ".auto-merge.pid"
|
|
49
49
|
fi
|
|
50
50
|
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
PID=$(cat ".jfl/context-hub.pid")
|
|
54
|
-
if kill -0 "$PID" 2>/dev/null; then
|
|
55
|
-
echo " Stopping context-hub (PID: $PID)..."
|
|
56
|
-
kill -TERM "$PID" 2>/dev/null || true
|
|
57
|
-
sleep 1
|
|
58
|
-
kill -0 "$PID" 2>/dev/null && kill -9 "$PID" 2>/dev/null || true
|
|
59
|
-
fi
|
|
60
|
-
rm -f ".jfl/context-hub.pid"
|
|
61
|
-
fi
|
|
51
|
+
# Context Hub is a persistent daemon — do NOT kill it on session end.
|
|
52
|
+
# It serves multiple sessions and runtimes (Claude Code, Pi, etc).
|
|
62
53
|
|
|
63
54
|
# Get current session info
|
|
64
55
|
BRANCH=$(git branch --show-current 2>/dev/null || echo "")
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# session-end-hub.sh — Session end via Context Hub API
|
|
4
|
+
#
|
|
5
|
+
# Calls POST /api/session/end for journal check + cleanup.
|
|
6
|
+
# Falls back to session-cleanup.sh if hub is unavailable.
|
|
7
|
+
#
|
|
8
|
+
# @purpose Hub-first session cleanup with script fallback
|
|
9
|
+
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
+
REPO_DIR="${JFL_REPO_DIR:-$(pwd)}"
|
|
12
|
+
RUNTIME="${1:-claude-code}"
|
|
13
|
+
|
|
14
|
+
cd "$REPO_DIR" || exit 0
|
|
15
|
+
|
|
16
|
+
# Read port and token
|
|
17
|
+
PORT=$(jq -r '.contextHub.port // empty' .jfl/config.json 2>/dev/null)
|
|
18
|
+
if [ -z "$PORT" ]; then
|
|
19
|
+
PORT=$(cat .jfl/context-hub.port 2>/dev/null)
|
|
20
|
+
fi
|
|
21
|
+
if [ -z "$PORT" ]; then
|
|
22
|
+
PORT="4242"
|
|
23
|
+
fi
|
|
24
|
+
|
|
25
|
+
TOKEN=$(cat .jfl/context-hub.token 2>/dev/null || echo "")
|
|
26
|
+
AUTH_HEADER=""
|
|
27
|
+
if [ -n "$TOKEN" ]; then
|
|
28
|
+
AUTH_HEADER="Authorization: Bearer $TOKEN"
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
HUB_URL="http://localhost:${PORT}"
|
|
32
|
+
RESULT=""
|
|
33
|
+
|
|
34
|
+
if curl -sf "$HUB_URL/health" >/dev/null 2>&1; then
|
|
35
|
+
RESULT=$(curl -sf -X POST "$HUB_URL/api/session/end" \
|
|
36
|
+
-H "Content-Type: application/json" \
|
|
37
|
+
${AUTH_HEADER:+-H "$AUTH_HEADER"} \
|
|
38
|
+
-d "{\"runtime\":\"$RUNTIME\"}" \
|
|
39
|
+
--max-time 90 2>/dev/null) || RESULT=""
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
if [ -n "$RESULT" ]; then
|
|
43
|
+
HAS_JOURNAL=$(echo "$RESULT" | jq -r '.hasJournal // false' 2>/dev/null)
|
|
44
|
+
CLEANUP=$(echo "$RESULT" | jq -r '.cleanupResult // "unknown"' 2>/dev/null)
|
|
45
|
+
|
|
46
|
+
if [ "$HAS_JOURNAL" = "true" ]; then
|
|
47
|
+
echo '✓ Journal exists'
|
|
48
|
+
else
|
|
49
|
+
echo '⚠️ No journal entry for session'
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
if [ "$CLEANUP" = "ok" ]; then
|
|
53
|
+
echo '✓ Session cleanup complete'
|
|
54
|
+
else
|
|
55
|
+
echo "Cleanup: $CLEANUP"
|
|
56
|
+
fi
|
|
57
|
+
else
|
|
58
|
+
# Fallback: run cleanup script directly
|
|
59
|
+
BRANCH=$(cat .jfl/current-session-branch.txt 2>/dev/null || git branch --show-current 2>/dev/null)
|
|
60
|
+
JOURNAL=".jfl/journal/${BRANCH}.jsonl"
|
|
61
|
+
if [ ! -s "$JOURNAL" ] 2>/dev/null; then
|
|
62
|
+
echo '⚠️ No journal entry for session'
|
|
63
|
+
else
|
|
64
|
+
echo '✓ Journal exists'
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
if [ -x "$SCRIPT_DIR/session-cleanup.sh" ]; then
|
|
68
|
+
"$SCRIPT_DIR/session-cleanup.sh" >> .jfl/logs/session-cleanup.log 2>&1 || echo 'Cleanup skipped'
|
|
69
|
+
fi
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
exit 0
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# session-start-hub.sh — Session init via Context Hub API
|
|
4
|
+
#
|
|
5
|
+
# Ensures hub is running, then calls POST /api/session/init.
|
|
6
|
+
# Falls back to session-init.sh if hub is unavailable.
|
|
7
|
+
#
|
|
8
|
+
# @purpose Hub-first session initialization with script fallback
|
|
9
|
+
|
|
10
|
+
set -e
|
|
11
|
+
|
|
12
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
|
+
REPO_DIR="${JFL_REPO_DIR:-$(pwd)}"
|
|
14
|
+
RUNTIME="${1:-claude-code}"
|
|
15
|
+
|
|
16
|
+
GREEN='\033[0;32m'
|
|
17
|
+
YELLOW='\033[1;33m'
|
|
18
|
+
NC='\033[0m'
|
|
19
|
+
|
|
20
|
+
cd "$REPO_DIR" || exit 1
|
|
21
|
+
|
|
22
|
+
# Ensure .jfl directories exist before any logging
|
|
23
|
+
mkdir -p .jfl/logs .jfl/journal
|
|
24
|
+
|
|
25
|
+
echo ""
|
|
26
|
+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
27
|
+
echo " JFL Session Init"
|
|
28
|
+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
29
|
+
|
|
30
|
+
# Step 1: Ensure Context Hub is running
|
|
31
|
+
jfl context-hub ensure >> .jfl/logs/context-hub.log 2>&1 || true
|
|
32
|
+
|
|
33
|
+
# Step 2: Read port and token
|
|
34
|
+
PORT=$(jq -r '.contextHub.port // empty' .jfl/config.json 2>/dev/null)
|
|
35
|
+
if [ -z "$PORT" ]; then
|
|
36
|
+
# Try runtime port file
|
|
37
|
+
PORT=$(cat .jfl/context-hub.port 2>/dev/null)
|
|
38
|
+
fi
|
|
39
|
+
if [ -z "$PORT" ]; then
|
|
40
|
+
PORT="4242"
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
TOKEN=$(cat .jfl/context-hub.token 2>/dev/null || echo "")
|
|
44
|
+
AUTH_HEADER=""
|
|
45
|
+
if [ -n "$TOKEN" ]; then
|
|
46
|
+
AUTH_HEADER="Authorization: Bearer $TOKEN"
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
# Step 3: Call Hub API
|
|
50
|
+
HUB_URL="http://localhost:${PORT}"
|
|
51
|
+
RESULT=""
|
|
52
|
+
|
|
53
|
+
if curl -sf "$HUB_URL/health" >/dev/null 2>&1; then
|
|
54
|
+
RESULT=$(curl -sf -X POST "$HUB_URL/api/session/init" \
|
|
55
|
+
-H "Content-Type: application/json" \
|
|
56
|
+
${AUTH_HEADER:+-H "$AUTH_HEADER"} \
|
|
57
|
+
-d "{\"runtime\":\"$RUNTIME\"}" \
|
|
58
|
+
--max-time 90 2>/dev/null) || RESULT=""
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
if [ -n "$RESULT" ]; then
|
|
62
|
+
# Parse Hub response
|
|
63
|
+
BRANCH=$(echo "$RESULT" | jq -r '.branch // "unknown"' 2>/dev/null)
|
|
64
|
+
SYNC_OK=$(echo "$RESULT" | jq -r '.syncOk // false' 2>/dev/null)
|
|
65
|
+
DOC_ERRORS=$(echo "$RESULT" | jq -r '.doctor.errors // 0' 2>/dev/null)
|
|
66
|
+
DOC_WARNINGS=$(echo "$RESULT" | jq -r '.doctor.warnings // 0' 2>/dev/null)
|
|
67
|
+
WARNINGS=$(echo "$RESULT" | jq -r '.warnings[]?' 2>/dev/null)
|
|
68
|
+
|
|
69
|
+
if [ "$SYNC_OK" = "true" ]; then
|
|
70
|
+
echo -e "${GREEN}✓${NC} Repos synced"
|
|
71
|
+
else
|
|
72
|
+
echo -e "${YELLOW}⚠${NC} Sync had warnings"
|
|
73
|
+
fi
|
|
74
|
+
|
|
75
|
+
if [ "$DOC_ERRORS" -gt 0 ] 2>/dev/null; then
|
|
76
|
+
echo -e "${YELLOW}⚠${NC} Doctor: $DOC_ERRORS errors, $DOC_WARNINGS warnings"
|
|
77
|
+
elif [ "$DOC_WARNINGS" -gt 0 ] 2>/dev/null; then
|
|
78
|
+
echo -e "${GREEN}✓${NC} Doctor: $DOC_WARNINGS warnings"
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
if [ -n "$WARNINGS" ]; then
|
|
82
|
+
echo "$WARNINGS" | while IFS= read -r w; do
|
|
83
|
+
echo -e "${YELLOW}⚠${NC} $w"
|
|
84
|
+
done
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
echo -e "${GREEN}✓${NC} Session branch: $BRANCH"
|
|
88
|
+
|
|
89
|
+
# Start auto-commit daemon (Hub doesn't do this — needs a local detached process)
|
|
90
|
+
if [ -x "$SCRIPT_DIR/auto-commit.sh" ]; then
|
|
91
|
+
"$SCRIPT_DIR/auto-commit.sh" start >> .jfl/logs/auto-commit.log 2>&1 &
|
|
92
|
+
echo -e "${GREEN}✓${NC} Auto-commit started"
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
echo -e "${GREEN}✓${NC} Session ready (via Hub)"
|
|
96
|
+
echo ""
|
|
97
|
+
else
|
|
98
|
+
# Fallback: run session-init.sh directly
|
|
99
|
+
echo -e "${YELLOW}→${NC} Hub unavailable, falling back to local init"
|
|
100
|
+
if [ -x "$SCRIPT_DIR/session-init.sh" ]; then
|
|
101
|
+
"$SCRIPT_DIR/session-init.sh"
|
|
102
|
+
else
|
|
103
|
+
echo -e "${YELLOW}⚠${NC} No session-init.sh found"
|
|
104
|
+
fi
|
|
105
|
+
fi
|