@kestrel-agents/ruhroh 0.5.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +114 -0
  3. package/assets/ruhroh-badge.png +0 -0
  4. package/assets/ruhroh-logo.png +0 -0
  5. package/dist/adapters.d.ts +97 -0
  6. package/dist/adapters.d.ts.map +1 -0
  7. package/dist/adapters.js +21 -0
  8. package/dist/adapters.js.map +1 -0
  9. package/dist/builtin-scenarios.d.ts +8 -0
  10. package/dist/builtin-scenarios.d.ts.map +1 -0
  11. package/dist/builtin-scenarios.js +22 -0
  12. package/dist/builtin-scenarios.js.map +1 -0
  13. package/dist/cli.d.ts +30 -0
  14. package/dist/cli.d.ts.map +1 -0
  15. package/dist/cli.js +313 -0
  16. package/dist/cli.js.map +1 -0
  17. package/dist/env.d.ts +6 -0
  18. package/dist/env.d.ts.map +1 -0
  19. package/dist/env.js +66 -0
  20. package/dist/env.js.map +1 -0
  21. package/dist/generate.d.ts +32 -0
  22. package/dist/generate.d.ts.map +1 -0
  23. package/dist/generate.js +231 -0
  24. package/dist/generate.js.map +1 -0
  25. package/dist/harbor.d.ts +28 -0
  26. package/dist/harbor.d.ts.map +1 -0
  27. package/dist/harbor.js +47 -0
  28. package/dist/harbor.js.map +1 -0
  29. package/dist/index.d.ts +8 -0
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +8 -0
  32. package/dist/index.js.map +1 -0
  33. package/dist/results.d.ts +66 -0
  34. package/dist/results.d.ts.map +1 -0
  35. package/dist/results.js +31 -0
  36. package/dist/results.js.map +1 -0
  37. package/dist/scenarios.d.ts +61 -0
  38. package/dist/scenarios.d.ts.map +1 -0
  39. package/dist/scenarios.js +69 -0
  40. package/dist/scenarios.js.map +1 -0
  41. package/package.json +66 -0
  42. package/python/ruhroh/__init__.py +5 -0
  43. package/python/ruhroh/harbor_agent.py +345 -0
  44. package/python/ruhroh/loop_controller.py +783 -0
  45. package/python/ruhroh/setup.sh +12 -0
  46. package/scenarios/grocery-budget-planner/instruction.md +1 -0
  47. package/scenarios/grocery-budget-planner/scenario.json +44 -0
  48. package/scenarios/nextjs-task-board/instruction.md +1 -0
  49. package/scenarios/nextjs-task-board/scenario.json +45 -0
  50. package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/coverage-rules.json +29 -0
  51. package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/employees.csv +8 -0
  52. package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/existing-schedule.csv +9 -0
  53. package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/shift-requirements.csv +8 -0
  54. package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/time-off-requests.csv +5 -0
  55. package/scenarios/shift-coverage-planner/instruction.md +1 -0
  56. package/scenarios/shift-coverage-planner/scenario.json +47 -0
  57. package/scenarios/simple-newsletter/instruction.md +1 -0
  58. package/scenarios/simple-newsletter/scenario.json +40 -0
  59. package/scenarios/vite-csv-reconciliation/assets/prompt-assets/csv-reconciliation-people/source-a.csv +9 -0
  60. package/scenarios/vite-csv-reconciliation/assets/prompt-assets/csv-reconciliation-people/source-b.csv +9 -0
  61. package/scenarios/vite-csv-reconciliation/instruction.md +1 -0
  62. package/scenarios/vite-csv-reconciliation/scenario.json +48 -0
  63. package/scenarios/vite-sprint-planner/instruction.md +1 -0
  64. package/scenarios/vite-sprint-planner/scenario.json +45 -0
@@ -0,0 +1,61 @@
1
+ import { type RuhrohContinuityLevel, type RuhrohRunAgentAdapterCapabilities } from "./adapters.js";
2
+ export type RuhrohScenarioTier = "smoke" | "nightly" | "release";
3
+ export type RuhrohScenarioKind = "real_user" | "contract_stress";
4
+ export type RuhrohLoopStopPolicy = "goal_satisfied_or_max";
5
+ export type RuhrohDriverMode = "build" | "plan" | "chat";
6
+ export type RuhrohEvaluationMode = "agentic_goal_review";
7
+ export type RuhrohScenarioVersion = "ruhroh_scenario_v1" | "ruhroh_scenario_v2";
8
+ export interface RuhrohScenario {
9
+ version: RuhrohScenarioVersion;
10
+ id: string;
11
+ title: string;
12
+ tier: RuhrohScenarioTier;
13
+ kind: RuhrohScenarioKind;
14
+ userPrompt: string;
15
+ assets?: string[] | undefined;
16
+ driver?: {
17
+ adapter: string;
18
+ profileId?: string | undefined;
19
+ mode?: RuhrohDriverMode | undefined;
20
+ timeoutSeconds: number;
21
+ env?: Record<string, string> | undefined;
22
+ command?: string | undefined;
23
+ completionProtocol?: string | undefined;
24
+ };
25
+ run: {
26
+ mode?: RuhrohDriverMode | undefined;
27
+ timeoutSeconds: number;
28
+ };
29
+ requires: {
30
+ continuity: RuhrohContinuityLevel;
31
+ tools: string[];
32
+ network: boolean;
33
+ };
34
+ loop: {
35
+ defaultMaxIterations: number;
36
+ stopPolicy: RuhrohLoopStopPolicy;
37
+ };
38
+ evaluation: {
39
+ mode: RuhrohEvaluationMode;
40
+ scenarioContext: string[];
41
+ goalRubric: string[];
42
+ evidenceGuidance: string[];
43
+ };
44
+ }
45
+ export interface ValidateRuhrohScenarioOptions {
46
+ adapters?: Record<string, RuhrohRunAgentAdapterCapabilities> | undefined;
47
+ }
48
+ export interface RuhrohScenarioSource {
49
+ scenarioDir: string;
50
+ scenarioPath: string;
51
+ instructionPath?: string | undefined;
52
+ assetsDir?: string | undefined;
53
+ }
54
+ export declare function validateRuhrohScenario(scenario: RuhrohScenario, options?: ValidateRuhrohScenarioOptions): string[];
55
+ export declare function getRuhrohScenarioById<TScenario extends {
56
+ id: string;
57
+ }>(scenarios: TScenario[], id: string): TScenario | undefined;
58
+ export declare function getRuhrohScenariosByTier<TScenario extends {
59
+ tier: RuhrohScenarioTier;
60
+ }>(scenarios: TScenario[], tier: RuhrohScenarioTier): TScenario[];
61
+ //# sourceMappingURL=scenarios.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenarios.d.ts","sourceRoot":"","sources":["../src/scenarios.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,qBAAqB,EAC1B,KAAK,iCAAiC,EACvC,MAAM,eAAe,CAAC;AAEvB,MAAM,MAAM,kBAAkB,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC;AACjE,MAAM,MAAM,kBAAkB,GAAG,WAAW,GAAG,iBAAiB,CAAC;AACjE,MAAM,MAAM,oBAAoB,GAAG,uBAAuB,CAAC;AAC3D,MAAM,MAAM,gBAAgB,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;AACzD,MAAM,MAAM,oBAAoB,GAAG,qBAAqB,CAAC;AACzD,MAAM,MAAM,qBAAqB,GAAG,oBAAoB,GAAG,oBAAoB,CAAC;AAEhF,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,qBAAqB,CAAC;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,kBAAkB,CAAC;IACzB,IAAI,EAAE,kBAAkB,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,EAAE,GAAG,SAAS,CAAC;IAC9B,MAAM,CAAC,EAAE;QACP,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAC/B,IAAI,CAAC,EAAE,gBAAgB,GAAG,SAAS,CAAC;QACpC,cAAc,EAAE,MAAM,CAAC;QACvB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;QACzC,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAC7B,kBAAkB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;KACzC,CAAC;IACF,GAAG,EAAE;QACH,IAAI,CAAC,EAAE,gBAAgB,GAAG,SAAS,CAAC;QACpC,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,QAAQ,EAAE;QACR,UAAU,EAAE,qBAAqB,CAAC;QAClC,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,OAAO,EAAE,OAAO,CAAC;KAClB,CAAC;IACF,IAAI,EAAE;QACJ,oBAAoB,EAAE,MAAM,CAAC;QAC7B,UAAU,EAAE,oBAAoB,CAAC;KAClC,CAAC;IACF,UAAU,EAAE;QACV,IAAI,EAAE,oBAAoB,CAAC;QAC3B,eAAe,EAAE,MAAM,EAAE,CAAC;QAC1B,UAAU,EAAE,MAAM,EAAE,CAAC;QACrB,gBAAgB,EAAE,MAAM,EAAE,CAAC;KAC5B,CAAC;CACH;AAED,MAAM,WAAW,6BAA6B;IAC5C,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,iCAAiC,CAAC,GAAG,SAAS,CAAC;CAC1E;AAED,MAAM,WAAW,oBAAoB;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACrC,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAChC;AAED,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,cAAc,EACxB,OAAO,GAAE,6BAAkC,GAC1C,MAAM,EAAE,CA4DV;AAED,wBAAgB,qBAAqB,CAAC,SAAS,SAAS;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,EACpE,SAAS,EAAE,SAAS,EAAE,EACtB,EAAE,EAAE,MAAM,GACT,SAAS,GAAG,SAAS,CAEvB;AAED,wBAAgB,wBAAwB,CAAC,SAAS,SAAS;IAAE,IAAI,EAAE,kBAAkB,CAAA;CAAE,EACrF,SAAS,EAAE,SAAS,EAAE,EACtB,IAAI,EAAE,kBAAkB,GACvB,SAAS,EAAE,CAEb"}
@@ -0,0 +1,69 @@
1
+ import { adapterSatisfiesRequirements, } from "./adapters.js";
2
+ export function validateRuhrohScenario(scenario, options = {}) {
3
+ const errors = [];
4
+ if (scenario.version !== "ruhroh_scenario_v1" && scenario.version !== "ruhroh_scenario_v2") {
5
+ errors.push("version must be ruhroh_scenario_v1 or ruhroh_scenario_v2");
6
+ }
7
+ if (scenario.id.trim().length === 0) {
8
+ errors.push("id is required");
9
+ }
10
+ if (!/^[a-zA-Z0-9._-]+$/u.test(scenario.id)) {
11
+ errors.push(`id contains unsafe characters: ${scenario.id}`);
12
+ }
13
+ if (scenario.title.trim().length === 0) {
14
+ errors.push("title is required");
15
+ }
16
+ if (scenario.userPrompt.trim().length === 0) {
17
+ errors.push("userPrompt is required");
18
+ }
19
+ if (scenario.version === "ruhroh_scenario_v2" && scenario.driver !== undefined) {
20
+ errors.push("driver is not allowed in ruhroh_scenario_v2; choose adapters at runtime");
21
+ }
22
+ if (scenario.version !== "ruhroh_scenario_v2") {
23
+ if (scenario.driver === undefined || scenario.driver.adapter.trim().length === 0) {
24
+ errors.push("driver.adapter is required for legacy scenarios");
25
+ }
26
+ if ((scenario.driver?.timeoutSeconds ?? 0) <= 0) {
27
+ errors.push("driver.timeoutSeconds must be positive for legacy scenarios");
28
+ }
29
+ }
30
+ if (scenario.run.timeoutSeconds <= 0) {
31
+ errors.push("run.timeoutSeconds must be positive");
32
+ }
33
+ if (!["native_session", "workspace_plus_transcript", "workspace_only"].includes(scenario.requires.continuity)) {
34
+ errors.push("requires.continuity must be native_session, workspace_plus_transcript, or workspace_only");
35
+ }
36
+ if (scenario.requires.tools.some((tool) => tool.trim().length === 0)) {
37
+ errors.push("requires.tools entries must be non-empty");
38
+ }
39
+ const capabilities = scenario.driver?.adapter === undefined ? undefined : options.adapters?.[scenario.driver.adapter];
40
+ if (capabilities !== undefined) {
41
+ errors.push(...adapterSatisfiesRequirements(capabilities, scenario.requires));
42
+ }
43
+ if (scenario.loop.defaultMaxIterations <= 0) {
44
+ errors.push("loop.defaultMaxIterations must be positive");
45
+ }
46
+ if (scenario.loop.stopPolicy !== "goal_satisfied_or_max") {
47
+ errors.push("loop.stopPolicy must be goal_satisfied_or_max");
48
+ }
49
+ if (scenario.evaluation.mode !== "agentic_goal_review") {
50
+ errors.push("evaluation.mode must be agentic_goal_review");
51
+ }
52
+ if (scenario.evaluation.goalRubric.length === 0) {
53
+ errors.push("evaluation.goalRubric must include at least one criterion");
54
+ }
55
+ if (scenario.evaluation.scenarioContext.some((item) => item.trim().length === 0)) {
56
+ errors.push("evaluation.scenarioContext entries must be non-empty");
57
+ }
58
+ if (scenario.evaluation.evidenceGuidance.some((item) => item.trim().length === 0)) {
59
+ errors.push("evaluation.evidenceGuidance entries must be non-empty");
60
+ }
61
+ return errors;
62
+ }
63
+ export function getRuhrohScenarioById(scenarios, id) {
64
+ return scenarios.find((scenario) => scenario.id === id);
65
+ }
66
+ export function getRuhrohScenariosByTier(scenarios, tier) {
67
+ return scenarios.filter((scenario) => scenario.tier === tier);
68
+ }
69
+ //# sourceMappingURL=scenarios.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenarios.js","sourceRoot":"","sources":["../src/scenarios.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,4BAA4B,GAG7B,MAAM,eAAe,CAAC;AA0DvB,MAAM,UAAU,sBAAsB,CACpC,QAAwB,EACxB,UAAyC,EAAE;IAE3C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,QAAQ,CAAC,OAAO,KAAK,oBAAoB,IAAI,QAAQ,CAAC,OAAO,KAAK,oBAAoB,EAAE,CAAC;QAC3F,MAAM,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;IAC1E,CAAC;IACD,IAAI,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAChC,CAAC;IACD,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,kCAAkC,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACxC,CAAC;IACD,IAAI,QAAQ,CAAC,OAAO,KAAK,oBAAoB,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QAC/E,MAAM,CAAC,IAAI,CAAC,yEAAyE,CAAC,CAAC;IACzF,CAAC;IACD,IAAI,QAAQ,CAAC,OAAO,KAAK,oBAAoB,EAAE,CAAC;QAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjF,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACjE,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,cAAc,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IACD,IAAI,QAAQ,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,EAAE,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,CAAC,CAAC,gBAAgB,EAAE,2BAA2B,EAAE,gBAAgB,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC9G,MAAM,CAAC,IAAI,CAAC,0FAA0F,CAAC,CAAC;IAC1G,CAAC;IACD,IAAI,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IAC1D,CAAC;IACD,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,EAAE,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACtH,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,GAAG,4BAA4B,CAAC,YAAY,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IAChF,CAAC;IACD,IAAI,QAAQ,CAAC,IAAI,CAAC,oBAAoB,IAAI,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,QAAQ,CAAC,IAAI,CAAC,UAAU,KAAK,uBAAuB,EAAE,CAAC;QACzD,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC;IAC7D,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;QACjF,MAAM,CAAC,IAAI,CAAC,sDAAsD,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;QAClF,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;IACvE,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,SAAsB,EACtB,EAAU;IAEV,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,SAAsB,EACtB,IAAwB;IAExB,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AAChE,CAAC"}
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "@kestrel-agents/ruhroh",
3
+ "version": "0.5.0-beta.0",
4
+ "description": "Real-User Harness for Repair-Oriented Harbor",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/LumiCorp/ruhroh.git"
9
+ },
10
+ "homepage": "https://github.com/LumiCorp/ruhroh",
11
+ "bugs": {
12
+ "url": "https://github.com/LumiCorp/ruhroh/issues"
13
+ },
14
+ "keywords": [
15
+ "ruhroh",
16
+ "harbor",
17
+ "benchmark",
18
+ "agents",
19
+ "repair",
20
+ "typescript"
21
+ ],
22
+ "type": "module",
23
+ "main": "./dist/index.js",
24
+ "types": "./dist/index.d.ts",
25
+ "exports": {
26
+ ".": {
27
+ "types": "./dist/index.d.ts",
28
+ "import": "./dist/index.js"
29
+ },
30
+ "./cli": {
31
+ "types": "./dist/cli.d.ts",
32
+ "import": "./dist/cli.js"
33
+ }
34
+ },
35
+ "bin": {
36
+ "ruhroh": "./dist/cli.js"
37
+ },
38
+ "files": [
39
+ "assets/**/*",
40
+ "dist",
41
+ "python/**/*.py",
42
+ "python/**/*.sh",
43
+ "scenarios/**/*",
44
+ "README.md",
45
+ "LICENSE"
46
+ ],
47
+ "sideEffects": false,
48
+ "publishConfig": {
49
+ "access": "public"
50
+ },
51
+ "engines": {
52
+ "node": ">=20"
53
+ },
54
+ "scripts": {
55
+ "clean": "node --input-type=module -e \"import { rmSync } from 'node:fs'; rmSync('dist', { recursive: true, force: true });\"",
56
+ "build": "pnpm run clean && tsc -p tsconfig.json",
57
+ "prepare": "pnpm run build",
58
+ "test": "node --import tsx --test tests/*.test.ts"
59
+ },
60
+ "devDependencies": {
61
+ "@types/node": "^22.13.10",
62
+ "tsx": "^4.19.3",
63
+ "typescript": "^5.8.2"
64
+ },
65
+ "packageManager": "pnpm@9.12.3"
66
+ }
@@ -0,0 +1,5 @@
1
+ """Package-owned Ruhroh Harbor runtime."""
2
+
3
+ from .harbor_agent import RuhrohHarborAgent
4
+
5
+ __all__ = ["RuhrohHarborAgent"]
@@ -0,0 +1,345 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import os
6
+ import re
7
+ import shlex
8
+ import tempfile
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ try:
14
+ from harbor.agents.installed.base import BaseInstalledAgent
15
+ except ImportError:
16
+ class BaseInstalledAgent: # type: ignore[no-redef]
17
+ async def exec_as_root(self, environment: Any, command: str, **kwargs: Any) -> Any:
18
+ raise RuntimeError("harbor is not installed")
19
+
20
+ async def exec_as_agent(self, environment: Any, command: str, **kwargs: Any) -> Any:
21
+ raise RuntimeError("harbor is not installed")
22
+
23
+
24
+ RUHROH_ADAPTER = "ruhroh-harbor"
25
+ RUHROH_DATASET = "ruhroh@local"
26
+ RUHROH_AGENT_NAME = "ruhroh-harbor"
27
+ RUHROH_RESULT_RE = re.compile(r"RUHROH_RESULT_JSON_BASE64:(?P<payload>[A-Za-z0-9+/=]+)")
28
+
29
+
30
+ class RuhrohHarborAgent(BaseInstalledAgent):
31
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
32
+ super_init = getattr(super(), "__init__", None)
33
+ if callable(super_init):
34
+ try:
35
+ super_init(*args, **kwargs)
36
+ except TypeError:
37
+ super_init()
38
+ self.logs_dir = kwargs.get("logs_dir")
39
+ self.model_name = kwargs.get("model_name")
40
+ self.max_iterations = kwargs.get("max_iterations")
41
+
42
+ @staticmethod
43
+ def name() -> str:
44
+ return RUHROH_AGENT_NAME
45
+
46
+ async def install(self, environment: Any) -> None:
47
+ runtime_root = Path(__file__).parent
48
+ await self._exec_as_root(environment, "mkdir -p /installed-agent && chmod 755 /installed-agent")
49
+ for source, destination in {
50
+ runtime_root / "loop_controller.py": "/installed-agent/ruhroh_loop_controller.py",
51
+ runtime_root / "setup.sh": "/installed-agent/install-agent.sh",
52
+ }.items():
53
+ await upload_file_to_environment(environment, source, destination)
54
+ await write_text_to_environment(self, environment, "/installed-agent/setup-env.sh", harbor_env_setup_script())
55
+ await self._exec_as_root(
56
+ environment,
57
+ "chmod a+r /installed-agent/ruhroh_loop_controller.py "
58
+ "&& chmod +x /installed-agent/install-agent.sh "
59
+ "&& . /installed-agent/setup-env.sh "
60
+ "&& /installed-agent/install-agent.sh",
61
+ timeout_sec=harbor_install_timeout_sec(),
62
+ )
63
+
64
+ async def run(self, instruction: str, environment: Any, context: Any) -> None:
65
+ started_at = time.monotonic()
66
+ scenario_id = harbor_task_id(context, getattr(self, "logs_dir", None))
67
+ encoded = base64.b64encode(instruction.encode("utf-8")).decode("ascii")
68
+ max_iterations = resolve_max_iterations(getattr(self, "max_iterations", None))
69
+ env_file = create_run_env_file(max_iterations)
70
+ command = (
71
+ "RUHROH_ENV_FILE=/installed-agent/ruhroh-loop-env.json "
72
+ "python3 /installed-agent/ruhroh_loop_controller.py "
73
+ f"--instruction-base64 {shlex.quote(encoded)} "
74
+ f"--scenario-id {shlex.quote(scenario_id)} "
75
+ f"--max-iterations {shlex.quote(str(max_iterations))}"
76
+ )
77
+ wrapped = (
78
+ f"{command}; "
79
+ "__ruhroh_status=$?; "
80
+ "printf '\\nRUHROH_AGENT_EXIT_CODE:%s\\n' \"$__ruhroh_status\"; "
81
+ "exit 0"
82
+ )
83
+ try:
84
+ await upload_file_to_environment(environment, env_file, "/installed-agent/ruhroh-loop-env.json")
85
+ await self._exec_as_root(environment, "chmod 600 /installed-agent/ruhroh-loop-env.json")
86
+ result = await self._exec_as_root(environment, wrapped, timeout_sec=resolve_agent_timeout_sec(scenario_id))
87
+ finally:
88
+ env_file.unlink(missing_ok=True)
89
+ await persist_ruhroh_debug_artifacts(environment, getattr(self, "logs_dir", None))
90
+ write_command_result_artifact(context, result, getattr(self, "logs_dir", None))
91
+ parsed = parse_ruhroh_result(command_output_text(result)) or await read_ruhroh_result(self, environment)
92
+ if parsed is None:
93
+ parsed = {
94
+ "version": "ruhroh_loop_result_v1",
95
+ "adapter": RUHROH_ADAPTER,
96
+ "dataset": RUHROH_DATASET,
97
+ "scenarioId": scenario_id,
98
+ "task_id": scenario_id,
99
+ "status": "failed",
100
+ "failure_kind": "cli_command_failed",
101
+ "failureBucket": "cli_command_failed",
102
+ "duration_ms": round((time.monotonic() - started_at) * 1000),
103
+ "stoppedReason": "missing_result_marker",
104
+ }
105
+ write_ruhroh_result_artifact(context, parsed, getattr(self, "logs_dir", None))
106
+ raise RuntimeError("Ruhroh run did not emit a structured result marker.")
107
+ parsed.setdefault("adapter", RUHROH_ADAPTER)
108
+ parsed.setdefault("dataset", RUHROH_DATASET)
109
+ parsed.setdefault("task_id", scenario_id)
110
+ parsed.setdefault("duration_ms", round((time.monotonic() - started_at) * 1000))
111
+ write_ruhroh_result_artifact(context, parsed, getattr(self, "logs_dir", None))
112
+
113
+ async def _exec_as_root(self, environment: Any, command: str, **kwargs: Any) -> Any:
114
+ return await maybe_await(self.exec_as_root(environment, command=command, **without_none_values(kwargs)))
115
+
116
+
117
+ def harbor_env_setup_script() -> str:
118
+ return "\n".join(
119
+ [
120
+ f"export RUHROH_RESULT_ADAPTER={shlex.quote(RUHROH_ADAPTER)}",
121
+ f"export RUHROH_RESULT_DATASET={shlex.quote(RUHROH_DATASET)}",
122
+ ]
123
+ ) + "\n"
124
+
125
+
126
+ def build_run_env_values(max_iterations: int) -> dict[str, str]:
127
+ env = {
128
+ "RUHROH_RESULT_ADAPTER": RUHROH_ADAPTER,
129
+ "RUHROH_RESULT_DATASET": RUHROH_DATASET,
130
+ "RUHROH_MAX_ITERATIONS": str(max_iterations),
131
+ }
132
+ for key in (
133
+ "RUHROH_EVAL_RESULT_FIXTURE",
134
+ "RUHROH_EVAL_RESULT_FIXTURE_PATH",
135
+ "RUHROH_ITERATION_TIMEOUT_SEC",
136
+ "RUHROH_AGENT_TIMEOUT_SEC",
137
+ "RUHROH_INSTALL_TIMEOUT_SEC",
138
+ "RUHROH_RUN_AGENT_ADAPTER",
139
+ "RUHROH_RUN_AGENT_ADAPTER",
140
+ "RUHROH_RUN_AGENT_COMMAND",
141
+ "RUHROH_RUN_AGENT_COMPLETION_PROTOCOL",
142
+ "RUHROH_EVAL_COMMAND",
143
+ ):
144
+ value = os.environ.get(key)
145
+ if value is not None:
146
+ env[key] = value
147
+ return env
148
+
149
+
150
+ def create_run_env_file(max_iterations: int) -> Path:
151
+ fd, raw_path = tempfile.mkstemp(prefix="ruhroh-env-", suffix=".json")
152
+ path = Path(raw_path)
153
+ try:
154
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
155
+ json.dump(build_run_env_values(max_iterations), handle, sort_keys=True)
156
+ handle.write("\n")
157
+ except Exception:
158
+ path.unlink(missing_ok=True)
159
+ raise
160
+ return path
161
+
162
+
163
+ async def upload_file_to_environment(environment: Any, source: Path, destination: str) -> None:
164
+ for method_name in ("copy_to", "copy_to_container", "upload_file", "upload"):
165
+ method = getattr(environment, method_name, None)
166
+ if method is None:
167
+ continue
168
+ for args in (
169
+ (source, destination),
170
+ (str(source), destination),
171
+ (source, Path(destination)),
172
+ (str(source), Path(destination)),
173
+ ):
174
+ try:
175
+ await maybe_await(method(*args))
176
+ return
177
+ except TypeError:
178
+ continue
179
+ raise RuntimeError(f"Harbor environment cannot upload {source} to {destination}.")
180
+
181
+
182
+ async def write_text_to_environment(agent: RuhrohHarborAgent, environment: Any, path: str, content: str) -> None:
183
+ parent = str(Path(path).parent)
184
+ await agent._exec_as_root(
185
+ environment,
186
+ f"mkdir -p {shlex.quote(parent)} && printf %s {shlex.quote(content)} > {shlex.quote(path)}",
187
+ )
188
+
189
+
190
+ async def read_ruhroh_result(agent: RuhrohHarborAgent, environment: Any) -> dict[str, Any] | None:
191
+ result = await agent._exec_as_root(environment, "cat /installed-agent/ruhroh-loop-result.json 2>/dev/null || true")
192
+ text = command_output_text(result)
193
+ if not text.strip():
194
+ return None
195
+ try:
196
+ parsed = json.loads(text)
197
+ except json.JSONDecodeError:
198
+ return None
199
+ return parsed if isinstance(parsed, dict) else None
200
+
201
+
202
+ async def persist_ruhroh_debug_artifacts(environment: Any, logs_dir: Any) -> list[Path]:
203
+ if not isinstance(logs_dir, (str, Path)):
204
+ return []
205
+ target_dir = Path(logs_dir)
206
+ target_dir.mkdir(parents=True, exist_ok=True)
207
+ copied: list[Path] = []
208
+ for source, name in (
209
+ ("/installed-agent/ruhroh-loop-result.json", "ruhroh-loop-result.json"),
210
+ ("/installed-agent/ruhroh-loop-iterations.jsonl", "ruhroh-loop-iterations.jsonl"),
211
+ ("/installed-agent/ruhroh-loop-journey.json", "ruhroh-loop-journey.json"),
212
+ ("/installed-agent/ruhroh-loop-eval.json", "ruhroh-loop-eval.json"),
213
+ ("/installed-agent/ruhroh-loop-bridge.jsonl", "ruhroh-loop-bridge.jsonl"),
214
+ ("/installed-agent/ruhroh-workspace.tar.gz", "ruhroh-workspace.tar.gz"),
215
+ ("/installed-agent/ruhroh-loop-events.tar.gz", "ruhroh-loop-events.tar.gz"),
216
+ ("/installed-agent/ruhroh-loop-transcripts.tar.gz", "ruhroh-loop-transcripts.tar.gz"),
217
+ ):
218
+ destination = target_dir / name
219
+ try:
220
+ await maybe_await(environment.download_file(source, destination))
221
+ except Exception:
222
+ continue
223
+ copied.append(destination)
224
+ return copied
225
+
226
+
227
+ def write_ruhroh_result_artifact(context: Any, result: dict[str, Any], logs_dir: Any) -> Path | None:
228
+ target_dir = context_logs_dir(context, logs_dir)
229
+ if target_dir is None:
230
+ return None
231
+ target_dir.mkdir(parents=True, exist_ok=True)
232
+ task_id = str(result.get("task_id") or result.get("scenarioId") or "unknown")
233
+ path = target_dir / f"ruhroh-{safe_artifact_name(task_id)}.json"
234
+ path.write_text(json.dumps(result, indent=2, sort_keys=True) + "\n", encoding="utf-8")
235
+ return path
236
+
237
+
238
+ def write_command_result_artifact(context: Any, result: Any, logs_dir: Any) -> Path | None:
239
+ target_dir = context_logs_dir(context, logs_dir)
240
+ if target_dir is None:
241
+ return None
242
+ target_dir.mkdir(parents=True, exist_ok=True)
243
+ path = target_dir / "ruhroh-agent-command-output.log"
244
+ path.write_text(command_output_text(result), encoding="utf-8")
245
+ return path
246
+
247
+
248
+ def context_logs_dir(context: Any, logs_dir: Any) -> Path | None:
249
+ for candidate in (
250
+ getattr(context, "agent_logs_dir", None),
251
+ getattr(context, "logs_dir", None),
252
+ logs_dir,
253
+ ):
254
+ if isinstance(candidate, (str, Path)):
255
+ return Path(candidate)
256
+ return None
257
+
258
+
259
+ def parse_ruhroh_result(text: str) -> dict[str, Any] | None:
260
+ match = RUHROH_RESULT_RE.search(text)
261
+ if match is None:
262
+ return None
263
+ try:
264
+ parsed = json.loads(base64.b64decode(match.group("payload")).decode("utf-8"))
265
+ except Exception:
266
+ return None
267
+ return parsed if isinstance(parsed, dict) else None
268
+
269
+
270
+ def command_output_text(result: Any) -> str:
271
+ parts: list[str] = []
272
+ for attr in ("stdout", "stderr", "output"):
273
+ value = getattr(result, attr, None)
274
+ if isinstance(value, bytes):
275
+ parts.append(value.decode("utf-8", errors="replace"))
276
+ elif isinstance(value, str):
277
+ parts.append(value)
278
+ return "\n".join(part for part in parts if part)
279
+
280
+
281
+ def harbor_task_id(context: Any, logs_dir: Any) -> str:
282
+ for attr in ("task_id", "task_name", "name"):
283
+ value = getattr(context, attr, None)
284
+ if isinstance(value, str) and value.strip():
285
+ return safe_task_id(value)
286
+ if isinstance(logs_dir, (str, Path)):
287
+ parent = Path(logs_dir).parent.name
288
+ if "__" in parent:
289
+ return safe_task_id(parent.split("__", 1)[0])
290
+ return "unknown"
291
+
292
+
293
+ def resolve_max_iterations(value: Any) -> int:
294
+ if value is not None:
295
+ try:
296
+ return max(1, int(value))
297
+ except (TypeError, ValueError):
298
+ pass
299
+ raw = os.environ.get("RUHROH_MAX_ITERATIONS")
300
+ if raw is not None:
301
+ try:
302
+ return max(1, int(raw))
303
+ except ValueError:
304
+ pass
305
+ return 3
306
+
307
+
308
+ def resolve_agent_timeout_sec(scenario_id: str) -> int:
309
+ del scenario_id
310
+ raw = os.environ.get("RUHROH_AGENT_TIMEOUT_SEC")
311
+ if raw is not None:
312
+ try:
313
+ return max(1, int(raw))
314
+ except ValueError:
315
+ pass
316
+ return 3600
317
+
318
+
319
+ def harbor_install_timeout_sec() -> int:
320
+ raw = os.environ.get("RUHROH_INSTALL_TIMEOUT_SEC")
321
+ if raw is not None:
322
+ try:
323
+ return max(1, int(raw))
324
+ except ValueError:
325
+ pass
326
+ return 900
327
+
328
+
329
+ def safe_task_id(value: str) -> str:
330
+ stripped = value.strip().split("/")[-1]
331
+ return safe_artifact_name(stripped) or "unknown"
332
+
333
+
334
+ def safe_artifact_name(value: str) -> str:
335
+ return "".join(char if char.isalnum() or char in "-_." else "-" for char in value).strip("-_.")
336
+
337
+
338
+ def without_none_values(value: dict[str, Any]) -> dict[str, Any]:
339
+ return {key: item for key, item in value.items() if item is not None}
340
+
341
+
342
+ async def maybe_await(value: Any) -> Any:
343
+ if hasattr(value, "__await__"):
344
+ return await value
345
+ return value