nightpay 0.3.11 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nightpay might be problematic. Click here for more details.

@@ -1,1324 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
-
4
- usage() {
5
- cat <<'EOF'
6
- Usage:
7
- bash scripts/load-sim.sh [options]
8
-
9
- Options:
10
- --activity-mode Sequential activity feed mode (leaderboard-style)
11
- --activity-agent-count <n> Logical agent pool size in activity mode (default: 100)
12
- --activity-target-tasks <n> Stop after this many tasks in activity mode (default: 15000000)
13
- --activity-interval-min-seconds <n> Min delay between tasks in activity mode (default: 3)
14
- --activity-interval-max-seconds <n> Max delay between tasks in activity mode (default: 5)
15
- --activity-report-every <n> Emit progress summary every N tasks in activity mode (default: 25)
16
- --activity-state-file <path> Persist counters to JSON for resume/observation (default: .tmp/activity-sim-state.json)
17
- --activity-visibility <mode> Job visibility in activity mode: public|private (default: public)
18
- --operator-secret <secret> Operator bearer for /complete_job (default: OPERATOR_SECRET_KEY env var)
19
- --skip-complete Activity mode only: leave jobs awaiting approval (no /complete_job)
20
-
21
- --base-url <url> MIP-003 base URL (default: http://127.0.0.1:8090)
22
- --jobs-per-round <n> Jobs created per round (default: 100)
23
- --rounds <n> Number of rounds (default: 1, ignored with --continuous)
24
- --continuous Run rounds forever until interrupted
25
- --sleep-seconds <n> Sleep between rounds (default: 2)
26
- --job-workers <n> Parallel workers processing jobs (default: 20)
27
- --task-agent-count <n> Logical task-generator agents (default: 100)
28
- --worker-agent-count <n> Logical worker agents (default: 300)
29
- --voter-agent-count <n> Logical voter agents (default: 300)
30
- --claim-attempts-per-job <n> Claim attempts per job (default: 12)
31
- --max-agents-per-job <n> Contest claim cap per job (default: 5)
32
- --min-votes-to-select <n> Minimum votes required to select winner (default: 3)
33
- --votes-per-submission <n> Votes cast per submission (default: 3)
34
- --amount-specks <n> Job amount for economics (default: 50000)
35
- --timeout-seconds <n> HTTP timeout per request (default: 10)
36
- --seed <n> Deterministic seed (default: random)
37
- -h, --help Show this help
38
-
39
- Examples:
40
- bash scripts/load-sim.sh
41
- bash scripts/load-sim.sh --continuous --sleep-seconds 1
42
- bash scripts/load-sim.sh --jobs-per-round 200 --job-workers 40 --max-agents-per-job 5
43
- bash scripts/load-sim.sh --activity-mode --continuous
44
- bash scripts/load-sim.sh --activity-mode --activity-target-tasks 15000000 --activity-agent-count 100
45
- EOF
46
- }
47
-
48
- ACTIVITY_MODE=0
49
- ACTIVITY_AGENT_COUNT=100
50
- ACTIVITY_TARGET_TASKS=15000000
51
- ACTIVITY_INTERVAL_MIN_SECONDS=3
52
- ACTIVITY_INTERVAL_MAX_SECONDS=5
53
- ACTIVITY_REPORT_EVERY=25
54
- ACTIVITY_STATE_FILE=".tmp/activity-sim-state.json"
55
- ACTIVITY_VISIBILITY="public"
56
- OPERATOR_SECRET="${OPERATOR_SECRET_KEY:-}"
57
- ACTIVITY_SKIP_COMPLETE=0
58
-
59
- BASE_URL="http://127.0.0.1:8090"
60
- JOBS_PER_ROUND=100
61
- ROUNDS=1
62
- CONTINUOUS=0
63
- SLEEP_SECONDS=2
64
- JOB_WORKERS=20
65
- TASK_AGENT_COUNT=100
66
- WORKER_AGENT_COUNT=300
67
- VOTER_AGENT_COUNT=300
68
- CLAIM_ATTEMPTS_PER_JOB=12
69
- MAX_AGENTS_PER_JOB=5
70
- MIN_VOTES_TO_SELECT=3
71
- VOTES_PER_SUBMISSION=3
72
- AMOUNT_SPECKS=50000
73
- TIMEOUT_SECONDS=10
74
- SEED=""
75
-
76
- while [[ $# -gt 0 ]]; do
77
- case "$1" in
78
- --activity-mode) ACTIVITY_MODE=1; shift 1 ;;
79
- --activity-agent-count) ACTIVITY_AGENT_COUNT="${2:-}"; shift 2 ;;
80
- --activity-target-tasks) ACTIVITY_TARGET_TASKS="${2:-}"; shift 2 ;;
81
- --activity-interval-min-seconds) ACTIVITY_INTERVAL_MIN_SECONDS="${2:-}"; shift 2 ;;
82
- --activity-interval-max-seconds) ACTIVITY_INTERVAL_MAX_SECONDS="${2:-}"; shift 2 ;;
83
- --activity-report-every) ACTIVITY_REPORT_EVERY="${2:-}"; shift 2 ;;
84
- --activity-state-file) ACTIVITY_STATE_FILE="${2:-}"; shift 2 ;;
85
- --activity-visibility) ACTIVITY_VISIBILITY="${2:-}"; shift 2 ;;
86
- --operator-secret) OPERATOR_SECRET="${2:-}"; shift 2 ;;
87
- --skip-complete) ACTIVITY_SKIP_COMPLETE=1; shift 1 ;;
88
-
89
- --base-url) BASE_URL="${2:-}"; shift 2 ;;
90
- --jobs-per-round) JOBS_PER_ROUND="${2:-}"; shift 2 ;;
91
- --rounds) ROUNDS="${2:-}"; shift 2 ;;
92
- --continuous) CONTINUOUS=1; shift 1 ;;
93
- --sleep-seconds) SLEEP_SECONDS="${2:-}"; shift 2 ;;
94
- --job-workers) JOB_WORKERS="${2:-}"; shift 2 ;;
95
- --task-agent-count) TASK_AGENT_COUNT="${2:-}"; shift 2 ;;
96
- --worker-agent-count) WORKER_AGENT_COUNT="${2:-}"; shift 2 ;;
97
- --voter-agent-count) VOTER_AGENT_COUNT="${2:-}"; shift 2 ;;
98
- --claim-attempts-per-job) CLAIM_ATTEMPTS_PER_JOB="${2:-}"; shift 2 ;;
99
- --max-agents-per-job) MAX_AGENTS_PER_JOB="${2:-}"; shift 2 ;;
100
- --min-votes-to-select) MIN_VOTES_TO_SELECT="${2:-}"; shift 2 ;;
101
- --votes-per-submission) VOTES_PER_SUBMISSION="${2:-}"; shift 2 ;;
102
- --amount-specks) AMOUNT_SPECKS="${2:-}"; shift 2 ;;
103
- --timeout-seconds) TIMEOUT_SECONDS="${2:-}"; shift 2 ;;
104
- --seed) SEED="${2:-}"; shift 2 ;;
105
- -h|--help)
106
- usage
107
- exit 0
108
- ;;
109
- *)
110
- echo "ERROR: unknown argument: $1" >&2
111
- usage
112
- exit 1
113
- ;;
114
- esac
115
- done
116
-
117
- require_positive_int() {
118
- local value="$1"
119
- local name="$2"
120
- [[ "$value" =~ ^[0-9]+$ ]] || { echo "ERROR: $name must be a non-negative integer" >&2; exit 1; }
121
- }
122
-
123
- require_positive_int "$JOBS_PER_ROUND" "jobs-per-round"
124
- require_positive_int "$ROUNDS" "rounds"
125
- require_positive_int "$SLEEP_SECONDS" "sleep-seconds"
126
- require_positive_int "$JOB_WORKERS" "job-workers"
127
- require_positive_int "$TASK_AGENT_COUNT" "task-agent-count"
128
- require_positive_int "$WORKER_AGENT_COUNT" "worker-agent-count"
129
- require_positive_int "$VOTER_AGENT_COUNT" "voter-agent-count"
130
- require_positive_int "$CLAIM_ATTEMPTS_PER_JOB" "claim-attempts-per-job"
131
- require_positive_int "$MAX_AGENTS_PER_JOB" "max-agents-per-job"
132
- require_positive_int "$MIN_VOTES_TO_SELECT" "min-votes-to-select"
133
- require_positive_int "$VOTES_PER_SUBMISSION" "votes-per-submission"
134
- require_positive_int "$AMOUNT_SPECKS" "amount-specks"
135
- require_positive_int "$TIMEOUT_SECONDS" "timeout-seconds"
136
- require_positive_int "$ACTIVITY_AGENT_COUNT" "activity-agent-count"
137
- require_positive_int "$ACTIVITY_TARGET_TASKS" "activity-target-tasks"
138
- require_positive_int "$ACTIVITY_INTERVAL_MIN_SECONDS" "activity-interval-min-seconds"
139
- require_positive_int "$ACTIVITY_INTERVAL_MAX_SECONDS" "activity-interval-max-seconds"
140
- require_positive_int "$ACTIVITY_REPORT_EVERY" "activity-report-every"
141
- if [[ -n "$SEED" ]]; then
142
- require_positive_int "$SEED" "seed"
143
- fi
144
-
145
- if (( MAX_AGENTS_PER_JOB < 1 )); then
146
- echo "ERROR: max-agents-per-job must be >= 1" >&2
147
- exit 1
148
- fi
149
- if (( MAX_AGENTS_PER_JOB > 20 )); then
150
- echo "ERROR: max-agents-per-job must be <= 20 (server-side contest cap)" >&2
151
- exit 1
152
- fi
153
- if (( MIN_VOTES_TO_SELECT < 1 )); then
154
- echo "ERROR: min-votes-to-select must be >= 1" >&2
155
- exit 1
156
- fi
157
- if (( TASK_AGENT_COUNT < 1 || WORKER_AGENT_COUNT < 1 || VOTER_AGENT_COUNT < 1 )); then
158
- echo "ERROR: task/worker/voter agent counts must be >= 1" >&2
159
- exit 1
160
- fi
161
- if (( JOB_WORKERS < 1 )); then
162
- echo "ERROR: job-workers must be >= 1" >&2
163
- exit 1
164
- fi
165
- if (( ACTIVITY_AGENT_COUNT < 1 )); then
166
- echo "ERROR: activity-agent-count must be >= 1" >&2
167
- exit 1
168
- fi
169
- if (( ACTIVITY_TARGET_TASKS < 1 )) && (( CONTINUOUS == 0 )) && (( ACTIVITY_MODE == 1 )); then
170
- echo "ERROR: activity-target-tasks must be >= 1 unless --continuous is used" >&2
171
- exit 1
172
- fi
173
- if (( ACTIVITY_INTERVAL_MAX_SECONDS < ACTIVITY_INTERVAL_MIN_SECONDS )); then
174
- echo "ERROR: activity-interval-max-seconds must be >= activity-interval-min-seconds" >&2
175
- exit 1
176
- fi
177
- if (( ACTIVITY_MODE == 1 )) && (( ACTIVITY_SKIP_COMPLETE == 0 )) && [[ -z "${OPERATOR_SECRET}" ]]; then
178
- echo "ERROR: activity mode requires --operator-secret (or OPERATOR_SECRET_KEY env var) unless --skip-complete is set" >&2
179
- exit 1
180
- fi
181
- if [[ "$ACTIVITY_VISIBILITY" != "public" && "$ACTIVITY_VISIBILITY" != "private" ]]; then
182
- echo "ERROR: activity-visibility must be one of: public, private" >&2
183
- exit 1
184
- fi
185
-
186
- PYTHON_BIN="$(command -v python3 2>/dev/null || true)"
187
- if [[ -z "$PYTHON_BIN" || "$PYTHON_BIN" == *"WindowsApps"* ]]; then
188
- PYTHON_BIN="$(command -v python 2>/dev/null || true)"
189
- fi
190
- if [[ -z "$PYTHON_BIN" ]]; then
191
- echo "ERROR: python3/python is required" >&2
192
- exit 1
193
- fi
194
-
195
- exec "$PYTHON_BIN" - \
196
- "$ACTIVITY_MODE" \
197
- "$ACTIVITY_AGENT_COUNT" \
198
- "$ACTIVITY_TARGET_TASKS" \
199
- "$ACTIVITY_INTERVAL_MIN_SECONDS" \
200
- "$ACTIVITY_INTERVAL_MAX_SECONDS" \
201
- "$ACTIVITY_REPORT_EVERY" \
202
- "$ACTIVITY_STATE_FILE" \
203
- "$ACTIVITY_VISIBILITY" \
204
- "$OPERATOR_SECRET" \
205
- "$ACTIVITY_SKIP_COMPLETE" \
206
- "$BASE_URL" \
207
- "$JOBS_PER_ROUND" \
208
- "$ROUNDS" \
209
- "$CONTINUOUS" \
210
- "$SLEEP_SECONDS" \
211
- "$JOB_WORKERS" \
212
- "$TASK_AGENT_COUNT" \
213
- "$WORKER_AGENT_COUNT" \
214
- "$VOTER_AGENT_COUNT" \
215
- "$CLAIM_ATTEMPTS_PER_JOB" \
216
- "$MAX_AGENTS_PER_JOB" \
217
- "$MIN_VOTES_TO_SELECT" \
218
- "$VOTES_PER_SUBMISSION" \
219
- "$AMOUNT_SPECKS" \
220
- "$TIMEOUT_SECONDS" \
221
- "$SEED" <<'PYCODE'
222
- import concurrent.futures
223
- import json
224
- import os
225
- import random
226
- import secrets
227
- import statistics
228
- import sys
229
- import time
230
- import urllib.error
231
- import urllib.request
232
- from datetime import datetime, timezone
233
-
234
- (
235
- activity_mode,
236
- activity_agent_count,
237
- activity_target_tasks,
238
- activity_interval_min_seconds,
239
- activity_interval_max_seconds,
240
- activity_report_every,
241
- activity_state_file,
242
- activity_visibility,
243
- operator_secret,
244
- activity_skip_complete,
245
- base_url,
246
- jobs_per_round,
247
- rounds,
248
- continuous,
249
- sleep_seconds,
250
- job_workers,
251
- task_agent_count,
252
- worker_agent_count,
253
- voter_agent_count,
254
- claim_attempts_per_job,
255
- max_agents_per_job,
256
- min_votes_to_select,
257
- votes_per_submission,
258
- amount_specks,
259
- timeout_seconds,
260
- seed_raw,
261
- ) = sys.argv[1:27]
262
-
263
- activity_mode = int(activity_mode)
264
- activity_agent_count = int(activity_agent_count)
265
- activity_target_tasks = int(activity_target_tasks)
266
- activity_interval_min_seconds = int(activity_interval_min_seconds)
267
- activity_interval_max_seconds = int(activity_interval_max_seconds)
268
- activity_report_every = int(activity_report_every)
269
- activity_skip_complete = int(activity_skip_complete)
270
- operator_secret = str(operator_secret or "")
271
- activity_state_file = str(activity_state_file or "").strip()
272
- activity_visibility = str(activity_visibility or "public").strip().lower()
273
-
274
- jobs_per_round = int(jobs_per_round)
275
- rounds = int(rounds)
276
- continuous = int(continuous)
277
- sleep_seconds = int(sleep_seconds)
278
- job_workers = int(job_workers)
279
- task_agent_count = int(task_agent_count)
280
- worker_agent_count = int(worker_agent_count)
281
- voter_agent_count = int(voter_agent_count)
282
- claim_attempts_per_job = int(claim_attempts_per_job)
283
- max_agents_per_job = int(max_agents_per_job)
284
- min_votes_to_select = int(min_votes_to_select)
285
- votes_per_submission = int(votes_per_submission)
286
- amount_specks = int(amount_specks)
287
- timeout_seconds = int(timeout_seconds)
288
-
289
- seed = int(seed_raw) if seed_raw else int(time.time_ns() % (2**31 - 1))
290
- rng = random.Random(seed)
291
-
292
- base_url = base_url.rstrip("/")
293
-
294
-
295
- def build_epic_agent_pool(count, local_rng):
296
- adjectives = [
297
- "amber",
298
- "brisk",
299
- "cinder",
300
- "daring",
301
- "ember",
302
- "frost",
303
- "gale",
304
- "helios",
305
- "ion",
306
- "jade",
307
- "kepler",
308
- "lunar",
309
- "magma",
310
- "nova",
311
- "onyx",
312
- "prism",
313
- "quantum",
314
- "rivet",
315
- "solstice",
316
- "turbo",
317
- "umbra",
318
- "vivid",
319
- "wild",
320
- "xeno",
321
- "young",
322
- "zen",
323
- ]
324
- nouns = [
325
- "falcon",
326
- "otter",
327
- "lynx",
328
- "orca",
329
- "tiger",
330
- "panther",
331
- "condor",
332
- "rocket",
333
- "pixel",
334
- "cipher",
335
- "orbit",
336
- "vortex",
337
- "matrix",
338
- "beacon",
339
- "ranger",
340
- "voyager",
341
- "striker",
342
- "pioneer",
343
- "nomad",
344
- "engine",
345
- "signal",
346
- "radar",
347
- "comet",
348
- "blaze",
349
- "atlas",
350
- "sentinel",
351
- ]
352
- names = []
353
- used = set()
354
- while len(names) < count:
355
- candidate = f"{local_rng.choice(adjectives)}-{local_rng.choice(nouns)}-{local_rng.randint(100, 999)}"
356
- if candidate in used:
357
- continue
358
- used.add(candidate)
359
- names.append(candidate)
360
- return names
361
-
362
-
363
- task_agents = [f"tasker-{i:04d}" for i in range(task_agent_count)]
364
- worker_agents = [f"worker-{i:04d}" for i in range(worker_agent_count)]
365
- voter_agents = [f"voter-{i:04d}" for i in range(voter_agent_count)]
366
- activity_agents = build_epic_agent_pool(activity_agent_count, random.Random(seed + 97))
367
-
368
-
369
- def pctl(values, percentile):
370
- if not values:
371
- return 0.0
372
- if len(values) == 1:
373
- return float(values[0])
374
- return float(statistics.quantiles(values, n=100, method="inclusive")[percentile - 1])
375
-
376
-
377
- def now_iso():
378
- return datetime.now(timezone.utc).isoformat()
379
-
380
-
381
- def post_json(path, payload, headers=None):
382
- hdrs = {"Content-Type": "application/json"}
383
- if headers:
384
- hdrs.update(headers)
385
- body = json.dumps(payload, separators=(",", ":")).encode("utf-8")
386
- req = urllib.request.Request(
387
- f"{base_url}{path}",
388
- data=body,
389
- headers=hdrs,
390
- method="POST",
391
- )
392
- t0 = time.perf_counter()
393
- try:
394
- with urllib.request.urlopen(req, timeout=timeout_seconds) as resp:
395
- raw = resp.read().decode("utf-8")
396
- dt_ms = (time.perf_counter() - t0) * 1000.0
397
- try:
398
- data = json.loads(raw) if raw else {}
399
- except Exception:
400
- data = {"_raw": raw}
401
- return resp.status, data, dt_ms
402
- except urllib.error.HTTPError as exc:
403
- raw = exc.read().decode("utf-8") if exc.fp else ""
404
- dt_ms = (time.perf_counter() - t0) * 1000.0
405
- try:
406
- data = json.loads(raw) if raw else {}
407
- except Exception:
408
- data = {"error": raw or str(exc)}
409
- return int(exc.code), data, dt_ms
410
- except Exception as exc:
411
- dt_ms = (time.perf_counter() - t0) * 1000.0
412
- return 0, {"error": str(exc)}, dt_ms
413
-
414
-
415
- def get_json(path, headers=None):
416
- hdrs = {}
417
- if headers:
418
- hdrs.update(headers)
419
- req = urllib.request.Request(f"{base_url}{path}", headers=hdrs, method="GET")
420
- t0 = time.perf_counter()
421
- try:
422
- with urllib.request.urlopen(req, timeout=timeout_seconds) as resp:
423
- raw = resp.read().decode("utf-8")
424
- dt_ms = (time.perf_counter() - t0) * 1000.0
425
- try:
426
- data = json.loads(raw) if raw else {}
427
- except Exception:
428
- data = {"_raw": raw}
429
- return resp.status, data, dt_ms
430
- except urllib.error.HTTPError as exc:
431
- raw = exc.read().decode("utf-8") if exc.fp else ""
432
- dt_ms = (time.perf_counter() - t0) * 1000.0
433
- try:
434
- data = json.loads(raw) if raw else {}
435
- except Exception:
436
- data = {"error": raw or str(exc)}
437
- return int(exc.code), data, dt_ms
438
- except Exception as exc:
439
- dt_ms = (time.perf_counter() - t0) * 1000.0
440
- return 0, {"error": str(exc)}, dt_ms
441
-
442
-
443
- def empty_metrics():
444
- return {
445
- "jobs_planned": 0,
446
- "jobs_started": 0,
447
- "jobs_start_failed": 0,
448
- "jobs_flow_completed": 0,
449
- "jobs_flow_failed": 0,
450
- "claim_attempts": 0,
451
- "claim_success": 0,
452
- "claim_cap_rejections": 0,
453
- "claim_other_failures": 0,
454
- "submission_attempts": 0,
455
- "submission_success": 0,
456
- "submission_failures": 0,
457
- "vote_attempts": 0,
458
- "vote_success": 0,
459
- "vote_failures": 0,
460
- "winner_select_success": 0,
461
- "winner_select_failures": 0,
462
- "claim_cap_violations": 0,
463
- "status_awaiting_approval": 0,
464
- "status_multisig_pending": 0,
465
- "status_other": 0,
466
- "economics_total_amount_specks": 0,
467
- "economics_total_fee_specks": 0,
468
- "economics_total_net_specks": 0,
469
- "endpoint_latencies_ms": {
470
- "start_job": [],
471
- "claim_job": [],
472
- "provide_result": [],
473
- "vote_submission": [],
474
- "select_winner": [],
475
- "status": [],
476
- "submissions": [],
477
- },
478
- "errors": [],
479
- }
480
-
481
-
482
- def merge_metrics(dst, src):
483
- for key, value in src.items():
484
- if key == "endpoint_latencies_ms":
485
- for endpoint, samples in value.items():
486
- dst["endpoint_latencies_ms"][endpoint].extend(samples)
487
- elif key == "errors":
488
- dst["errors"].extend(value)
489
- elif isinstance(value, int):
490
- dst[key] += value
491
-
492
-
493
- def compact_error(stage, code, payload):
494
- err = payload.get("error")
495
- if isinstance(err, dict):
496
- err = json.dumps(err, separators=(",", ":"))
497
- return {"stage": stage, "code": code, "error": str(err)[:200]}
498
-
499
-
500
- def vote_for_submission(job_id, submission_id, voter_id, vote_value, metrics):
501
- code, payload, dt = post_json(
502
- f"/vote_submission/{job_id}/{submission_id}",
503
- {"voter_id": voter_id, "vote": vote_value, "reason": "sim-load"},
504
- )
505
- metrics["vote_attempts"] += 1
506
- metrics["endpoint_latencies_ms"]["vote_submission"].append(dt)
507
- if code == 200:
508
- metrics["vote_success"] += 1
509
- return True
510
- metrics["vote_failures"] += 1
511
- metrics["errors"].append(compact_error("vote_submission", code, payload))
512
- return False
513
-
514
-
515
- def run_job(round_index, job_index):
516
- local = empty_metrics()
517
- local_rng = random.Random(seed + (round_index * 100000) + job_index)
518
- local["jobs_planned"] = 1
519
-
520
- task_agent = task_agents[(round_index + job_index) % len(task_agents)]
521
- commitment_hash = secrets.token_hex(32)
522
- payload = {
523
- "input_data": {
524
- "description": f"sim-{task_agent}-r{round_index}-j{job_index}-{commitment_hash[:12]}",
525
- "commitmentHash": commitment_hash,
526
- "network": "preprod",
527
- },
528
- "amount_specks": amount_specks,
529
- "contest": {
530
- "enabled": True,
531
- "min_agents": max_agents_per_job,
532
- "max_agents": max_agents_per_job,
533
- "min_votes_to_select": min_votes_to_select,
534
- },
535
- }
536
-
537
- code, start_body, dt = post_json("/start_job", payload)
538
- local["endpoint_latencies_ms"]["start_job"].append(dt)
539
- if code != 200:
540
- local["jobs_start_failed"] += 1
541
- local["jobs_flow_failed"] += 1
542
- local["errors"].append(compact_error("start_job", code, start_body))
543
- return local
544
-
545
- legacy = start_body.get("legacy") if isinstance(start_body, dict) else {}
546
- if not isinstance(legacy, dict):
547
- legacy = {}
548
- job_id = str(
549
- start_body.get("job_id")
550
- or start_body.get("id")
551
- or legacy.get("job_id")
552
- or ""
553
- ).strip()
554
- job_token = str(
555
- start_body.get("job_token")
556
- or start_body.get("jobToken")
557
- or legacy.get("job_token")
558
- or ""
559
- ).strip()
560
- if not job_id or not job_token:
561
- local["jobs_start_failed"] += 1
562
- local["jobs_flow_failed"] += 1
563
- local["errors"].append({"stage": "start_job", "code": code, "error": "missing job_id/job_token"})
564
- return local
565
-
566
- local["jobs_started"] += 1
567
- claimed = []
568
- attempted = set()
569
-
570
- max_claim_attempts = max(claim_attempts_per_job, max_agents_per_job)
571
- for _ in range(max_claim_attempts):
572
- if len(claimed) >= max_agents_per_job:
573
- break
574
- agent = local_rng.choice(worker_agents)
575
- if agent in attempted:
576
- continue
577
- attempted.add(agent)
578
- c_code, c_body, c_dt = post_json(f"/claim_job/{job_id}", {"agent_id": agent})
579
- local["claim_attempts"] += 1
580
- local["endpoint_latencies_ms"]["claim_job"].append(c_dt)
581
- if c_code == 200:
582
- local["claim_success"] += 1
583
- if agent not in claimed:
584
- claimed.append(agent)
585
- continue
586
- if c_code == 409 and "max_agents" in str(c_body.get("error", "")):
587
- local["claim_cap_rejections"] += 1
588
- continue
589
- local["claim_other_failures"] += 1
590
- local["errors"].append(compact_error("claim_job", c_code, c_body))
591
-
592
- # Force at least max_agents_per_job successful claims if possible.
593
- if len(claimed) < max_agents_per_job:
594
- for agent in worker_agents:
595
- if len(claimed) >= max_agents_per_job:
596
- break
597
- if agent in attempted:
598
- continue
599
- attempted.add(agent)
600
- c_code, c_body, c_dt = post_json(f"/claim_job/{job_id}", {"agent_id": agent})
601
- local["claim_attempts"] += 1
602
- local["endpoint_latencies_ms"]["claim_job"].append(c_dt)
603
- if c_code == 200:
604
- local["claim_success"] += 1
605
- claimed.append(agent)
606
- continue
607
- if c_code == 409 and "max_agents" in str(c_body.get("error", "")):
608
- local["claim_cap_rejections"] += 1
609
- break
610
- local["claim_other_failures"] += 1
611
- local["errors"].append(compact_error("claim_job", c_code, c_body))
612
-
613
- if len(claimed) < max_agents_per_job:
614
- local["jobs_flow_failed"] += 1
615
- local["errors"].append(
616
- {"stage": "claim_job", "code": 409, "error": f"only {len(claimed)} claims, expected {max_agents_per_job}"}
617
- )
618
- return local
619
-
620
- for agent in claimed:
621
- work_output = f"solution job={job_id} agent={agent} round={round_index} idx={job_index}"
622
- p_code, p_body, p_dt = post_json(
623
- f"/provide_result/{job_id}",
624
- {"agent_id": agent, "work_output": work_output, "artifact_file_paths": [f"/tmp/{job_id}/{agent}.json"]},
625
- )
626
- local["submission_attempts"] += 1
627
- local["endpoint_latencies_ms"]["provide_result"].append(p_dt)
628
- if p_code == 200:
629
- local["submission_success"] += 1
630
- else:
631
- local["submission_failures"] += 1
632
- local["errors"].append(compact_error("provide_result", p_code, p_body))
633
-
634
- s_code, subs_body, s_dt = get_json(
635
- f"/submissions/{job_id}",
636
- headers={"Authorization": f"Bearer {job_token}"},
637
- )
638
- local["endpoint_latencies_ms"]["submissions"].append(s_dt)
639
- if s_code != 200:
640
- local["jobs_flow_failed"] += 1
641
- local["errors"].append(compact_error("submissions", s_code, subs_body))
642
- return local
643
-
644
- submissions = subs_body.get("submissions") if isinstance(subs_body, dict) else []
645
- if not isinstance(submissions, list) or not submissions:
646
- local["jobs_flow_failed"] += 1
647
- local["errors"].append({"stage": "submissions", "code": s_code, "error": "no submissions found"})
648
- return local
649
-
650
- # Vote per submission; bias first submission positive so winner selection passes.
651
- # Voter snapshot is claim-based when agent_voting_only=true, so prefer claimed agents here.
652
- used_voters = set()
653
- eligible_voters = [agent for agent in claimed if agent]
654
- if not eligible_voters:
655
- eligible_voters = [agent for agent in worker_agents if agent]
656
-
657
- def pick_voter(exclude_agent):
658
- pool = [v for v in eligible_voters if v != exclude_agent and v not in used_voters]
659
- if not pool:
660
- pool = [v for v in eligible_voters if v != exclude_agent]
661
- if not pool:
662
- pool = list(eligible_voters)
663
- if not pool:
664
- pool = list(worker_agents)
665
- return local_rng.choice(pool)
666
-
667
- for idx, submission in enumerate(submissions):
668
- sub_id = str(submission.get("submission_id") or "").strip()
669
- sub_agent = str(submission.get("agent_id") or "").strip()
670
- if not sub_id:
671
- continue
672
- for vote_idx in range(votes_per_submission):
673
- voter = pick_voter(sub_agent)
674
- used_voters.add(voter)
675
- vote_value = "approve" if idx == 0 else ("approve" if local_rng.random() >= 0.5 else "reject")
676
- vote_for_submission(job_id, sub_id, voter, vote_value, local)
677
-
678
- sel_code, sel_body, sel_dt = post_json(
679
- f"/select_winner/{job_id}",
680
- {},
681
- headers={"Authorization": f"Bearer {job_token}"},
682
- )
683
- local["endpoint_latencies_ms"]["select_winner"].append(sel_dt)
684
- sel_error = str(sel_body.get("error", "")).lower()
685
- if sel_code != 200 and sel_code == 409 and (
686
- "not enough votes" in sel_error or "strict majority" in sel_error
687
- ):
688
- # Top up the best-ranked submission with additional approvals, then retry once.
689
- first_sub = submissions[0]
690
- first_sub_id = str(first_sub.get("submission_id") or "").strip()
691
- first_agent = str(first_sub.get("agent_id") or "").strip()
692
- if first_sub_id:
693
- top_up = max(min_votes_to_select, 1)
694
- for _ in range(top_up):
695
- voter = pick_voter(first_agent)
696
- used_voters.add(voter)
697
- vote_for_submission(job_id, first_sub_id, voter, "approve", local)
698
- sel_code, sel_body, sel_dt = post_json(
699
- f"/select_winner/{job_id}",
700
- {},
701
- headers={"Authorization": f"Bearer {job_token}"},
702
- )
703
- local["endpoint_latencies_ms"]["select_winner"].append(sel_dt)
704
-
705
- if sel_code == 200:
706
- local["winner_select_success"] += 1
707
- econ = sel_body.get("economics") if isinstance(sel_body, dict) else {}
708
- if isinstance(econ, dict):
709
- local["economics_total_amount_specks"] += int(econ.get("amount_specks") or 0)
710
- local["economics_total_fee_specks"] += int(econ.get("fee") or 0)
711
- local["economics_total_net_specks"] += int(econ.get("net_to_agent") or 0)
712
- else:
713
- local["winner_select_failures"] += 1
714
- local["jobs_flow_failed"] += 1
715
- local["errors"].append(compact_error("select_winner", sel_code, sel_body))
716
- return local
717
-
718
- st_code, st_body, st_dt = get_json(
719
- f"/status/{job_id}",
720
- headers={"Authorization": f"Bearer {job_token}"},
721
- )
722
- local["endpoint_latencies_ms"]["status"].append(st_dt)
723
- if st_code == 200:
724
- internal = str(st_body.get("internal_status") or "")
725
- claims_count = int(st_body.get("claims_count") or 0)
726
- if claims_count > max_agents_per_job:
727
- local["claim_cap_violations"] += 1
728
- if internal == "awaiting_approval":
729
- local["status_awaiting_approval"] += 1
730
- elif internal == "multisig_pending":
731
- local["status_multisig_pending"] += 1
732
- else:
733
- local["status_other"] += 1
734
- else:
735
- local["status_other"] += 1
736
- local["errors"].append(compact_error("status", st_code, st_body))
737
-
738
- local["jobs_flow_completed"] += 1
739
- return local
740
-
741
-
742
- def format_latency_table(latency_map):
743
- out = {}
744
- for endpoint, samples in latency_map.items():
745
- if not samples:
746
- out[endpoint] = {"count": 0, "avg_ms": 0.0, "p50_ms": 0.0, "p95_ms": 0.0, "max_ms": 0.0}
747
- continue
748
- out[endpoint] = {
749
- "count": len(samples),
750
- "avg_ms": round(sum(samples) / len(samples), 2),
751
- "p50_ms": round(pctl(samples, 50), 2),
752
- "p95_ms": round(pctl(samples, 95), 2),
753
- "max_ms": round(max(samples), 2),
754
- }
755
- return out
756
-
757
-
758
- def round_header(round_number):
759
- return f"[load-sim] round={round_number} ts={now_iso()} base={base_url}"
760
-
761
-
762
- def run_round(round_number):
763
- round_metrics = empty_metrics()
764
- started = time.perf_counter()
765
-
766
- with concurrent.futures.ThreadPoolExecutor(max_workers=job_workers) as pool:
767
- futures = [pool.submit(run_job, round_number, i) for i in range(jobs_per_round)]
768
- for fut in concurrent.futures.as_completed(futures):
769
- try:
770
- result = fut.result()
771
- except Exception as exc:
772
- result = empty_metrics()
773
- result["jobs_planned"] = 1
774
- result["jobs_flow_failed"] = 1
775
- result["errors"].append({"stage": "executor", "code": 0, "error": str(exc)[:200]})
776
- merge_metrics(round_metrics, result)
777
-
778
- elapsed = time.perf_counter() - started
779
- round_metrics["round_elapsed_seconds"] = round(elapsed, 2)
780
- round_metrics["latency"] = format_latency_table(round_metrics["endpoint_latencies_ms"])
781
- return round_metrics
782
-
783
-
784
- def printable_metrics(metrics):
785
- out = dict(metrics)
786
- out.pop("endpoint_latencies_ms", None)
787
- errors = out.get("errors", [])
788
- out["error_count"] = len(errors)
789
- out["sample_errors"] = errors[:8]
790
- out["errors"] = []
791
- return out
792
-
793
-
794
- def print_round_summary(label, metrics):
795
- print(round_header(label))
796
- print(
797
- json.dumps(
798
- printable_metrics(metrics),
799
- separators=(",", ":"),
800
- sort_keys=True,
801
- ),
802
- flush=True,
803
- )
804
- print(
805
- json.dumps(
806
- {"latency": metrics.get("latency", {})},
807
- separators=(",", ":"),
808
- sort_keys=True,
809
- ),
810
- flush=True,
811
- )
812
-
813
-
814
- def empty_activity_metrics():
815
- return {
816
- "tasks_target": int(activity_target_tasks),
817
- "tasks_attempted": 0,
818
- "tasks_started": 0,
819
- "tasks_completed": 0,
820
- "tasks_failed": 0,
821
- "claim_success": 0,
822
- "submission_success": 0,
823
- "completion_success": 0,
824
- "endpoint_latencies_ms": {
825
- "start_job": [],
826
- "claim_job": [],
827
- "provide_result": [],
828
- "complete_job": [],
829
- "status": [],
830
- },
831
- "errors": [],
832
- }
833
-
834
-
835
- def load_activity_state(path):
836
- if not path:
837
- return None
838
- if not os.path.exists(path):
839
- return None
840
- try:
841
- with open(path, "r", encoding="utf-8") as f:
842
- raw = json.load(f)
843
- except Exception:
844
- return None
845
- if not isinstance(raw, dict):
846
- return None
847
- state = empty_activity_metrics()
848
- for key in ("tasks_attempted", "tasks_started", "tasks_completed", "tasks_failed", "claim_success", "submission_success", "completion_success"):
849
- try:
850
- value = int(raw.get(key, 0))
851
- if value < 0:
852
- value = 0
853
- state[key] = value
854
- except Exception:
855
- state[key] = 0
856
- return state
857
-
858
-
859
- def save_activity_state(path, metrics, last_event=None):
860
- if not path:
861
- return
862
- payload = {
863
- "mode": "activity",
864
- "updated_at": now_iso(),
865
- "seed": seed,
866
- "base_url": base_url,
867
- "tasks_target": int(activity_target_tasks),
868
- "tasks_attempted": int(metrics.get("tasks_attempted", 0)),
869
- "tasks_started": int(metrics.get("tasks_started", 0)),
870
- "tasks_completed": int(metrics.get("tasks_completed", 0)),
871
- "tasks_failed": int(metrics.get("tasks_failed", 0)),
872
- "claim_success": int(metrics.get("claim_success", 0)),
873
- "submission_success": int(metrics.get("submission_success", 0)),
874
- "completion_success": int(metrics.get("completion_success", 0)),
875
- "last_event": last_event or {},
876
- }
877
- out_dir = os.path.dirname(path)
878
- if out_dir:
879
- os.makedirs(out_dir, exist_ok=True)
880
- tmp = f"{path}.tmp"
881
- with open(tmp, "w", encoding="utf-8") as f:
882
- json.dump(payload, f, separators=(",", ":"), sort_keys=True)
883
- os.replace(tmp, path)
884
-
885
-
886
- def pick_two_distinct_agents(local_rng, agents):
887
- first = local_rng.choice(agents)
888
- second = first
889
- if len(agents) > 1:
890
- while second == first:
891
- second = local_rng.choice(agents)
892
- return first, second
893
-
894
-
895
- def run_activity_task(task_seq, local_rng):
896
- event = {
897
- "event": "activity-task",
898
- "ts": now_iso(),
899
- "task_seq": int(task_seq),
900
- }
901
- commitment_hash = secrets.token_hex(32)
902
- requester, preferred_worker = pick_two_distinct_agents(local_rng, activity_agents)
903
- payload = {
904
- "input_data": {
905
- "description": f"epic-sim-{requester}-to-{preferred_worker}-task-{task_seq}-{commitment_hash[:10]}",
906
- "commitmentHash": commitment_hash,
907
- "network": "preprod",
908
- },
909
- "amount_specks": amount_specks,
910
- "visibility": activity_visibility,
911
- # Present in strict mode, ignored in compat mode.
912
- "agentIdentifier": requester,
913
- "identifier_from_purchaser": f"buyer-{requester}",
914
- }
915
- event["requester"] = requester
916
- event["preferred_worker"] = preferred_worker
917
-
918
- code, start_body, start_dt = post_json("/start_job", payload)
919
- event["start_status"] = int(code)
920
- event["start_ms"] = round(start_dt, 2)
921
- if code != 200:
922
- event["ok"] = False
923
- event["stage"] = "start_job"
924
- event["error"] = str(start_body.get("error", "start_job failed"))[:200]
925
- return event
926
-
927
- legacy = start_body.get("legacy") if isinstance(start_body, dict) else {}
928
- if not isinstance(legacy, dict):
929
- legacy = {}
930
- job_id = str(
931
- start_body.get("job_id")
932
- or start_body.get("id")
933
- or legacy.get("job_id")
934
- or ""
935
- ).strip()
936
- job_token = str(
937
- start_body.get("job_token")
938
- or start_body.get("jobToken")
939
- or legacy.get("job_token")
940
- or ""
941
- ).strip()
942
- if not job_id or not job_token:
943
- event["ok"] = False
944
- event["stage"] = "start_job"
945
- event["error"] = "missing job_id/job_token"
946
- if isinstance(start_body, dict):
947
- event["start_body_keys"] = sorted(start_body.keys())
948
- if isinstance(legacy, dict):
949
- event["legacy_keys"] = sorted(legacy.keys())
950
- return event
951
-
952
- event["job_id"] = job_id
953
- claim_candidates = [preferred_worker]
954
- if len(activity_agents) > 1:
955
- remaining = [agent for agent in activity_agents if agent != preferred_worker]
956
- local_rng.shuffle(remaining)
957
- claim_candidates.extend(remaining[:4])
958
-
959
- claimed_agent = ""
960
- claim_errors = []
961
- claim_dt_last = 0.0
962
- for candidate in claim_candidates:
963
- c_code, c_body, c_dt = post_json(f"/claim_job/{job_id}", {"agent_id": candidate})
964
- claim_dt_last = c_dt
965
- if c_code == 200:
966
- claimed_agent = candidate
967
- break
968
- claim_errors.append({"code": int(c_code), "error": str(c_body.get("error", ""))[:120]})
969
- event["claim_ms"] = round(claim_dt_last, 2)
970
- if not claimed_agent:
971
- event["ok"] = False
972
- event["stage"] = "claim_job"
973
- event["error"] = "claim failed for all candidates"
974
- event["claim_errors"] = claim_errors[:4]
975
- return event
976
-
977
- event["worker"] = claimed_agent
978
- work_output = (
979
- f"epic completion task={task_seq} requester={requester} worker={claimed_agent} "
980
- f"commit={commitment_hash[:12]}"
981
- )
982
- p_code, p_body, p_dt = post_json(
983
- f"/provide_result/{job_id}",
984
- {"agent_id": claimed_agent, "work_output": work_output, "artifact_file_paths": [f"/tmp/{job_id}/{claimed_agent}.txt"]},
985
- headers={"Authorization": f"Bearer {job_token}"},
986
- )
987
- event["provide_status"] = int(p_code)
988
- event["provide_ms"] = round(p_dt, 2)
989
- if p_code != 200:
990
- event["ok"] = False
991
- event["stage"] = "provide_result"
992
- event["error"] = str(p_body.get("error", "provide_result failed"))[:200]
993
- return event
994
-
995
- if activity_skip_complete == 0:
996
- comp_payload = {
997
- "receiptHash": secrets.token_hex(32),
998
- "outputHash": secrets.token_hex(32),
999
- "onChain": False,
1000
- }
1001
- k_code, k_body, k_dt = post_json(
1002
- f"/complete_job/{job_id}",
1003
- comp_payload,
1004
- headers={"Authorization": f"Bearer {operator_secret}"},
1005
- )
1006
- event["complete_status"] = int(k_code)
1007
- event["complete_ms"] = round(k_dt, 2)
1008
- if k_code != 200:
1009
- event["ok"] = False
1010
- event["stage"] = "complete_job"
1011
- event["error"] = str(k_body.get("error", "complete_job failed"))[:200]
1012
- return event
1013
- else:
1014
- event["complete_status"] = 0
1015
- event["complete_ms"] = 0.0
1016
-
1017
- s_code, s_body, s_dt = get_json(f"/status/{job_id}", headers={"Authorization": f"Bearer {job_token}"})
1018
- event["status_check"] = int(s_code)
1019
- event["status_ms"] = round(s_dt, 2)
1020
- strict_status = ""
1021
- if s_code == 200:
1022
- event["internal_status"] = str(s_body.get("internal_status") or "")
1023
- strict_status = str(s_body.get("status") or "")
1024
- event["status"] = strict_status
1025
- internal_status = str(event.get("internal_status") or "")
1026
- if activity_skip_complete:
1027
- status_ok = internal_status in ("awaiting_approval", "multisig_pending") or strict_status == "running"
1028
- else:
1029
- status_ok = internal_status == "completed" or strict_status == "completed"
1030
- if s_code != 200 or not status_ok:
1031
- expected = "awaiting_approval/running" if activity_skip_complete else "completed"
1032
- event["ok"] = False
1033
- event["stage"] = "status"
1034
- got = internal_status or strict_status or "unknown"
1035
- event["error"] = f"expected {expected}, got {got}"
1036
- return event
1037
-
1038
- event["ok"] = True
1039
- return event
1040
-
1041
-
1042
- def print_activity_summary(metrics, started_at, label):
1043
- elapsed = max(time.perf_counter() - started_at, 0.001)
1044
- started = int(metrics.get("tasks_started", 0))
1045
- completed = int(metrics.get("tasks_completed", 0))
1046
- failed = int(metrics.get("tasks_failed", 0))
1047
- remaining = max(int(activity_target_tasks) - started, 0)
1048
- rate = started / elapsed
1049
- eta_seconds = int(remaining / rate) if rate > 0 and remaining > 0 else 0
1050
- summary = {
1051
- "event": "activity-sim-progress",
1052
- "label": label,
1053
- "ts": now_iso(),
1054
- "tasks_target": int(activity_target_tasks),
1055
- "tasks_attempted": int(metrics.get("tasks_attempted", 0)),
1056
- "tasks_started": started,
1057
- "tasks_completed": completed,
1058
- "tasks_failed": failed,
1059
- "remaining_tasks": remaining,
1060
- "rate_tasks_per_second": round(rate, 4),
1061
- "eta_seconds": eta_seconds,
1062
- "latency": format_latency_table(metrics.get("endpoint_latencies_ms", {})),
1063
- "error_count": len(metrics.get("errors", [])),
1064
- "sample_errors": metrics.get("errors", [])[:8],
1065
- }
1066
- print(json.dumps(summary, separators=(",", ":"), sort_keys=True), flush=True)
1067
-
1068
-
1069
- def run_activity_mode():
1070
- metrics = empty_activity_metrics()
1071
- started_at = time.perf_counter()
1072
- state = load_activity_state(activity_state_file)
1073
- if state:
1074
- metrics.update({k: v for k, v in state.items() if k in metrics})
1075
- print(
1076
- json.dumps(
1077
- {
1078
- "event": "activity-sim-resume",
1079
- "ts": now_iso(),
1080
- "state_file": activity_state_file,
1081
- "tasks_attempted": metrics["tasks_attempted"],
1082
- "tasks_started": metrics["tasks_started"],
1083
- "tasks_completed": metrics["tasks_completed"],
1084
- "tasks_failed": metrics["tasks_failed"],
1085
- },
1086
- separators=(",", ":"),
1087
- sort_keys=True,
1088
- ),
1089
- flush=True,
1090
- )
1091
-
1092
- availability_code, availability_payload, availability_dt = get_json("/availability")
1093
- if availability_code != 200:
1094
- err = {
1095
- "event": "load-sim-preflight-failed",
1096
- "mode": "activity",
1097
- "base_url": base_url,
1098
- "endpoint": "/availability",
1099
- "status_code": availability_code,
1100
- "latency_ms": round(availability_dt, 2),
1101
- "error": str(availability_payload.get("error", "service unavailable"))[:300],
1102
- }
1103
- print(json.dumps(err, separators=(",", ":"), sort_keys=True), flush=True)
1104
- return 2
1105
-
1106
- print(
1107
- json.dumps(
1108
- {
1109
- "event": "load-sim-preflight-ok",
1110
- "mode": "activity",
1111
- "base_url": base_url,
1112
- "availability_status": availability_payload.get("status"),
1113
- "latency_ms": round(availability_dt, 2),
1114
- },
1115
- separators=(",", ":"),
1116
- sort_keys=True,
1117
- ),
1118
- flush=True,
1119
- )
1120
- print(
1121
- json.dumps(
1122
- {
1123
- "event": "activity-agents",
1124
- "count": len(activity_agents),
1125
- "agents": activity_agents,
1126
- },
1127
- separators=(",", ":"),
1128
- sort_keys=True,
1129
- ),
1130
- flush=True,
1131
- )
1132
-
1133
- consecutive_failures = 0
1134
- max_consecutive_failures = 50
1135
- try:
1136
- while True:
1137
- if not continuous and metrics["tasks_started"] >= activity_target_tasks:
1138
- break
1139
-
1140
- task_seq = metrics["tasks_attempted"] + 1
1141
- task_started_t0 = time.perf_counter()
1142
- event = run_activity_task(task_seq, rng)
1143
- event["task_runtime_ms"] = round((time.perf_counter() - task_started_t0) * 1000.0, 2)
1144
- metrics["tasks_attempted"] += 1
1145
-
1146
- start_ms = event.get("start_ms")
1147
- if isinstance(start_ms, (int, float)):
1148
- metrics["endpoint_latencies_ms"]["start_job"].append(float(start_ms))
1149
- claim_ms = event.get("claim_ms")
1150
- if isinstance(claim_ms, (int, float)) and claim_ms > 0:
1151
- metrics["endpoint_latencies_ms"]["claim_job"].append(float(claim_ms))
1152
- provide_ms = event.get("provide_ms")
1153
- if isinstance(provide_ms, (int, float)) and provide_ms > 0:
1154
- metrics["endpoint_latencies_ms"]["provide_result"].append(float(provide_ms))
1155
- complete_ms = event.get("complete_ms")
1156
- if isinstance(complete_ms, (int, float)) and complete_ms > 0:
1157
- metrics["endpoint_latencies_ms"]["complete_job"].append(float(complete_ms))
1158
- status_ms = event.get("status_ms")
1159
- if isinstance(status_ms, (int, float)) and status_ms > 0:
1160
- metrics["endpoint_latencies_ms"]["status"].append(float(status_ms))
1161
-
1162
- if event.get("start_status") == 200:
1163
- metrics["tasks_started"] += 1
1164
- if event.get("worker"):
1165
- metrics["claim_success"] += 1
1166
- if event.get("provide_status") == 200:
1167
- metrics["submission_success"] += 1
1168
- if activity_skip_complete == 0 and event.get("complete_status") == 200:
1169
- metrics["completion_success"] += 1
1170
- if activity_skip_complete == 1 and event.get("status_check") == 200:
1171
- metrics["completion_success"] += 1
1172
-
1173
- if event.get("ok"):
1174
- metrics["tasks_completed"] += 1
1175
- consecutive_failures = 0
1176
- else:
1177
- metrics["tasks_failed"] += 1
1178
- consecutive_failures += 1
1179
- metrics["errors"].append(
1180
- {
1181
- "task_seq": int(task_seq),
1182
- "stage": str(event.get("stage", "unknown")),
1183
- "error": str(event.get("error", ""))[:200],
1184
- }
1185
- )
1186
-
1187
- next_delay = 0
1188
- if activity_interval_max_seconds > 0:
1189
- next_delay = rng.randint(activity_interval_min_seconds, activity_interval_max_seconds)
1190
- event["next_delay_seconds"] = int(next_delay)
1191
- event["tasks_started_total"] = int(metrics["tasks_started"])
1192
- event["tasks_completed_total"] = int(metrics["tasks_completed"])
1193
- event["tasks_failed_total"] = int(metrics["tasks_failed"])
1194
- print(json.dumps(event, separators=(",", ":"), sort_keys=True), flush=True)
1195
-
1196
- save_activity_state(activity_state_file, metrics, event)
1197
-
1198
- if consecutive_failures >= max_consecutive_failures:
1199
- print(
1200
- json.dumps(
1201
- {
1202
- "event": "activity-sim-abort",
1203
- "ts": now_iso(),
1204
- "reason": "too_many_consecutive_failures",
1205
- "consecutive_failures": consecutive_failures,
1206
- },
1207
- separators=(",", ":"),
1208
- sort_keys=True,
1209
- ),
1210
- flush=True,
1211
- )
1212
- print_activity_summary(metrics, started_at, "abort")
1213
- save_activity_state(activity_state_file, metrics, {"event": "activity-sim-abort"})
1214
- return 1
1215
-
1216
- if metrics["tasks_attempted"] % activity_report_every == 0:
1217
- print_activity_summary(metrics, started_at, "periodic")
1218
-
1219
- if not continuous and metrics["tasks_started"] >= activity_target_tasks:
1220
- break
1221
- if next_delay > 0:
1222
- time.sleep(next_delay)
1223
- except KeyboardInterrupt:
1224
- print("[load-sim] interrupted", flush=True)
1225
- print_activity_summary(metrics, started_at, "interrupted")
1226
- save_activity_state(activity_state_file, metrics, {"event": "interrupt"})
1227
- return 130
1228
-
1229
- print_activity_summary(metrics, started_at, "done")
1230
- save_activity_state(activity_state_file, metrics, {"event": "done"})
1231
- return 0
1232
-
1233
-
1234
- def main():
1235
- if activity_mode:
1236
- return run_activity_mode()
1237
-
1238
- cumulative = empty_metrics()
1239
- round_index = 1
1240
- availability_code, availability_payload, availability_dt = get_json("/availability")
1241
- if availability_code != 200:
1242
- err = {
1243
- "event": "load-sim-preflight-failed",
1244
- "base_url": base_url,
1245
- "endpoint": "/availability",
1246
- "status_code": availability_code,
1247
- "latency_ms": round(availability_dt, 2),
1248
- "error": str(availability_payload.get("error", "service unavailable"))[:300],
1249
- }
1250
- print(json.dumps(err, separators=(",", ":"), sort_keys=True), flush=True)
1251
- return 2
1252
- print(
1253
- json.dumps(
1254
- {
1255
- "event": "load-sim-preflight-ok",
1256
- "base_url": base_url,
1257
- "availability_status": availability_payload.get("status"),
1258
- "latency_ms": round(availability_dt, 2),
1259
- },
1260
- separators=(",", ":"),
1261
- sort_keys=True,
1262
- ),
1263
- flush=True,
1264
- )
1265
- try:
1266
- while True:
1267
- metrics = run_round(round_index)
1268
- merge_metrics(cumulative, metrics)
1269
- cumulative["latency"] = format_latency_table(cumulative["endpoint_latencies_ms"])
1270
- print_round_summary(round_index, metrics)
1271
- print_round_summary("cumulative", cumulative)
1272
-
1273
- if not continuous and round_index >= rounds:
1274
- break
1275
- round_index += 1
1276
- if sleep_seconds > 0:
1277
- time.sleep(sleep_seconds)
1278
- except KeyboardInterrupt:
1279
- cumulative["latency"] = format_latency_table(cumulative["endpoint_latencies_ms"])
1280
- print("[load-sim] interrupted", flush=True)
1281
- print_round_summary("cumulative", cumulative)
1282
- return 130
1283
- return 0
1284
-
1285
-
1286
- if __name__ == "__main__":
1287
- start_mode = "activity" if activity_mode else "round"
1288
- print(
1289
- json.dumps(
1290
- {
1291
- "event": "load-sim-start",
1292
- "ts": now_iso(),
1293
- "mode": start_mode,
1294
- "base_url": base_url,
1295
- "activity_agent_count": activity_agent_count,
1296
- "activity_target_tasks": activity_target_tasks,
1297
- "activity_interval_min_seconds": activity_interval_min_seconds,
1298
- "activity_interval_max_seconds": activity_interval_max_seconds,
1299
- "activity_report_every": activity_report_every,
1300
- "activity_state_file": activity_state_file,
1301
- "activity_visibility": activity_visibility,
1302
- "activity_skip_complete": bool(activity_skip_complete),
1303
- "jobs_per_round": jobs_per_round,
1304
- "rounds": rounds,
1305
- "continuous": bool(continuous),
1306
- "job_workers": job_workers,
1307
- "task_agent_count": task_agent_count,
1308
- "worker_agent_count": worker_agent_count,
1309
- "voter_agent_count": voter_agent_count,
1310
- "claim_attempts_per_job": claim_attempts_per_job,
1311
- "max_agents_per_job": max_agents_per_job,
1312
- "min_votes_to_select": min_votes_to_select,
1313
- "votes_per_submission": votes_per_submission,
1314
- "amount_specks": amount_specks,
1315
- "timeout_seconds": timeout_seconds,
1316
- "seed": seed,
1317
- },
1318
- separators=(",", ":"),
1319
- sort_keys=True,
1320
- ),
1321
- flush=True,
1322
- )
1323
- sys.exit(main())
1324
- PYCODE