@vibeiao/sdk 0.1.35 → 0.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agentLoop.js +3 -3
- package/dist/chunk-BQVU67TR.js +63 -0
- package/dist/chunk-RUKN3KQ2.js +158 -0
- package/dist/humanAppLoop.d.ts +73 -0
- package/dist/humanAppLoop.js +316 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +38 -548
- package/dist/outcomeBoundFlow.d.ts +38 -0
- package/dist/outcomeBoundFlow.js +12 -0
- package/dist/strictMemoryRuntime.d.ts +57 -0
- package/dist/strictMemoryRuntime.js +14 -0
- package/dist/treasuryGuardian.d.ts +3 -166
- package/package.json +1 -1
package/dist/agentLoop.js
CHANGED
|
@@ -5,6 +5,9 @@ import {
|
|
|
5
5
|
import {
|
|
6
6
|
createDurabilityProxyClient
|
|
7
7
|
} from "./chunk-PVCW4MAY.js";
|
|
8
|
+
import {
|
|
9
|
+
createSelfRelianceMonitor
|
|
10
|
+
} from "./chunk-M7DQTU5R.js";
|
|
8
11
|
import {
|
|
9
12
|
evaluateEscapeHatch,
|
|
10
13
|
formatEscapeHatchDecision
|
|
@@ -14,9 +17,6 @@ import {
|
|
|
14
17
|
formatSurvivalRecommendation,
|
|
15
18
|
getSurvivalRecommendation
|
|
16
19
|
} from "./chunk-JQE72P4C.js";
|
|
17
|
-
import {
|
|
18
|
-
createSelfRelianceMonitor
|
|
19
|
-
} from "./chunk-M7DQTU5R.js";
|
|
20
20
|
|
|
21
21
|
// src/agentLoop.ts
|
|
22
22
|
var asFiniteNumber = (value) => {
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// src/outcomeBoundFlow.ts
|
|
2
|
+
var OUTCOME_BOUND_FLOW_SCHEMA = "outcome-bound-autonomous-flow/v1";
|
|
3
|
+
var OUTCOME_BOUND_REQUIRED_GATES = [
|
|
4
|
+
"public_deploy_url",
|
|
5
|
+
"listing_updated",
|
|
6
|
+
"external_smoke_check",
|
|
7
|
+
"evidence_log",
|
|
8
|
+
"context_pack_preflight"
|
|
9
|
+
];
|
|
10
|
+
var isNonEmpty = (v) => typeof v === "string" && v.trim().length > 0;
|
|
11
|
+
var evaluateOutcomeBoundRun = (input) => {
|
|
12
|
+
const failedGates = [];
|
|
13
|
+
const reasons = [];
|
|
14
|
+
if (!isNonEmpty(input.publicDeployUrl)) {
|
|
15
|
+
failedGates.push("public_deploy_url");
|
|
16
|
+
reasons.push("Missing public deploy URL.");
|
|
17
|
+
}
|
|
18
|
+
if (!input.listingUpdated || !isNonEmpty(input.listingId)) {
|
|
19
|
+
failedGates.push("listing_updated");
|
|
20
|
+
reasons.push("Listing update is incomplete (listingUpdated/listingId required).");
|
|
21
|
+
}
|
|
22
|
+
if (!input.externalSmokeCheck?.passed) {
|
|
23
|
+
failedGates.push("external_smoke_check");
|
|
24
|
+
reasons.push("External smoke check did not pass.");
|
|
25
|
+
}
|
|
26
|
+
if (!isNonEmpty(input.evidenceLogPath)) {
|
|
27
|
+
failedGates.push("evidence_log");
|
|
28
|
+
reasons.push("Missing evidence log path.");
|
|
29
|
+
}
|
|
30
|
+
if (!input.contextPackPreflight?.passed) {
|
|
31
|
+
failedGates.push("context_pack_preflight");
|
|
32
|
+
reasons.push("Context-pack preflight did not pass.");
|
|
33
|
+
}
|
|
34
|
+
const passedCount = OUTCOME_BOUND_REQUIRED_GATES.length - failedGates.length;
|
|
35
|
+
const score = Number((passedCount / OUTCOME_BOUND_REQUIRED_GATES.length).toFixed(2));
|
|
36
|
+
const completed = failedGates.length === 0;
|
|
37
|
+
return {
|
|
38
|
+
schema: OUTCOME_BOUND_FLOW_SCHEMA,
|
|
39
|
+
runId: input.runId,
|
|
40
|
+
objective: input.objective,
|
|
41
|
+
completed,
|
|
42
|
+
failedGates,
|
|
43
|
+
score,
|
|
44
|
+
status: completed ? "pass" : "fail",
|
|
45
|
+
reasons,
|
|
46
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
var assertOutcomeBoundCompleted = (input) => {
|
|
50
|
+
const status = evaluateOutcomeBoundRun(input);
|
|
51
|
+
if (!status.completed) {
|
|
52
|
+
const details = status.reasons.join(" ");
|
|
53
|
+
throw new Error(`outcome_bound_incomplete:${status.failedGates.join(",")}${details ? `:${details}` : ""}`);
|
|
54
|
+
}
|
|
55
|
+
return status;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export {
|
|
59
|
+
OUTCOME_BOUND_FLOW_SCHEMA,
|
|
60
|
+
OUTCOME_BOUND_REQUIRED_GATES,
|
|
61
|
+
evaluateOutcomeBoundRun,
|
|
62
|
+
assertOutcomeBoundCompleted
|
|
63
|
+
};
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// src/strictMemoryRuntime.ts
|
|
2
|
+
var STRICT_MEMORY_RUNTIME_SCHEMA = "strict-memory-runtime/v1";
|
|
3
|
+
var DEFAULT_MUTATION_KEYWORDS = [
|
|
4
|
+
"deploy",
|
|
5
|
+
"restart",
|
|
6
|
+
"install",
|
|
7
|
+
"delete",
|
|
8
|
+
"remove",
|
|
9
|
+
"push",
|
|
10
|
+
"publish",
|
|
11
|
+
"release",
|
|
12
|
+
"migrate",
|
|
13
|
+
"config",
|
|
14
|
+
"cron",
|
|
15
|
+
"service",
|
|
16
|
+
"production",
|
|
17
|
+
"prod"
|
|
18
|
+
];
|
|
19
|
+
var DEFAULT_COMPLEX_KEYWORDS = [
|
|
20
|
+
"multi-step",
|
|
21
|
+
"cross-day",
|
|
22
|
+
"production",
|
|
23
|
+
"owner",
|
|
24
|
+
"id",
|
|
25
|
+
"listing",
|
|
26
|
+
"migration",
|
|
27
|
+
"incident",
|
|
28
|
+
"release",
|
|
29
|
+
"rollout",
|
|
30
|
+
"infra",
|
|
31
|
+
"security"
|
|
32
|
+
];
|
|
33
|
+
var uniq = (arr) => [...new Set(arr.map((s) => String(s).trim()).filter(Boolean))];
|
|
34
|
+
var createStrictMemoryRuntimePreset = (overrides = {}) => {
|
|
35
|
+
const base = {
|
|
36
|
+
schema: STRICT_MEMORY_RUNTIME_SCHEMA,
|
|
37
|
+
enabled: true,
|
|
38
|
+
requireContextPackForComplex: true,
|
|
39
|
+
requireSemanticRecallForComplex: true,
|
|
40
|
+
requireApprovalPreflightForMutations: true,
|
|
41
|
+
maxContextPackAgeMin: 180,
|
|
42
|
+
mutationKeywords: [...DEFAULT_MUTATION_KEYWORDS],
|
|
43
|
+
complexTaskTrigger: {
|
|
44
|
+
keywords: [...DEFAULT_COMPLEX_KEYWORDS],
|
|
45
|
+
minTaskChars: 80
|
|
46
|
+
},
|
|
47
|
+
upgradePolicy: {
|
|
48
|
+
mode: "observe",
|
|
49
|
+
safeUpgrade: {
|
|
50
|
+
backupBeforeEnable: true,
|
|
51
|
+
requireHealthcheckPass: true,
|
|
52
|
+
rollbackOnBlockRateAbove: 0.35
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
return {
|
|
57
|
+
...base,
|
|
58
|
+
...overrides,
|
|
59
|
+
mutationKeywords: uniq(overrides.mutationKeywords || base.mutationKeywords),
|
|
60
|
+
complexTaskTrigger: {
|
|
61
|
+
...base.complexTaskTrigger,
|
|
62
|
+
...overrides.complexTaskTrigger || {},
|
|
63
|
+
keywords: uniq(overrides.complexTaskTrigger?.keywords || base.complexTaskTrigger.keywords)
|
|
64
|
+
},
|
|
65
|
+
upgradePolicy: {
|
|
66
|
+
...base.upgradePolicy,
|
|
67
|
+
...overrides.upgradePolicy || {},
|
|
68
|
+
safeUpgrade: {
|
|
69
|
+
...base.upgradePolicy.safeUpgrade,
|
|
70
|
+
...overrides.upgradePolicy?.safeUpgrade || {}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
var includesAny = (text, words) => {
|
|
76
|
+
const low = text.toLowerCase();
|
|
77
|
+
return words.some((w) => low.includes(w.toLowerCase()));
|
|
78
|
+
};
|
|
79
|
+
var isComplexTask = (taskText, preset = createStrictMemoryRuntimePreset()) => {
|
|
80
|
+
const text = String(taskText || "").trim();
|
|
81
|
+
if (!text) return false;
|
|
82
|
+
if (text.length >= preset.complexTaskTrigger.minTaskChars) return true;
|
|
83
|
+
return includesAny(text, preset.complexTaskTrigger.keywords);
|
|
84
|
+
};
|
|
85
|
+
var evaluateStrictMemoryExecution = (input, preset = createStrictMemoryRuntimePreset()) => {
|
|
86
|
+
const complex = isComplexTask(input.taskText, preset);
|
|
87
|
+
const shouldEnforce = preset.enabled && (complex || input.isMutation);
|
|
88
|
+
const requiredSteps = [];
|
|
89
|
+
const missingSteps = [];
|
|
90
|
+
const reasons = [];
|
|
91
|
+
if (!shouldEnforce) {
|
|
92
|
+
return { complex, shouldEnforce, allowed: true, requiredSteps, missingSteps, reasons };
|
|
93
|
+
}
|
|
94
|
+
if (preset.requireContextPackForComplex && complex) {
|
|
95
|
+
requiredSteps.push("context_pack_prepared");
|
|
96
|
+
if (!input.contextPackPrepared) {
|
|
97
|
+
missingSteps.push("context_pack_prepared");
|
|
98
|
+
reasons.push("Missing context pack for complex task.");
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (preset.requireSemanticRecallForComplex && complex) {
|
|
102
|
+
requiredSteps.push("semantic_recall_confirmed");
|
|
103
|
+
if (!input.semanticRecallConfirmed) {
|
|
104
|
+
missingSteps.push("semantic_recall_confirmed");
|
|
105
|
+
reasons.push("Missing semantic recall confirmation for complex task.");
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (preset.requireApprovalPreflightForMutations && input.isMutation) {
|
|
109
|
+
requiredSteps.push("approval_preflight_passed");
|
|
110
|
+
if (!input.approvalPreflightPassed) {
|
|
111
|
+
missingSteps.push("approval_preflight_passed");
|
|
112
|
+
reasons.push("Mutation preflight approval did not pass.");
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
complex,
|
|
117
|
+
shouldEnforce,
|
|
118
|
+
allowed: missingSteps.length === 0,
|
|
119
|
+
requiredSteps,
|
|
120
|
+
missingSteps,
|
|
121
|
+
reasons
|
|
122
|
+
};
|
|
123
|
+
};
|
|
124
|
+
var upgradeToStrictMemoryRuntimePreset = (input = {}) => {
|
|
125
|
+
const current = createStrictMemoryRuntimePreset(input.current || {});
|
|
126
|
+
const targetMode = input.targetMode || current.upgradePolicy.mode || "observe";
|
|
127
|
+
const next = createStrictMemoryRuntimePreset({
|
|
128
|
+
...current,
|
|
129
|
+
upgradePolicy: {
|
|
130
|
+
...current.upgradePolicy,
|
|
131
|
+
mode: targetMode
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
const reasons = [];
|
|
135
|
+
const safeCfg = next.upgradePolicy.safeUpgrade;
|
|
136
|
+
if (safeCfg.backupBeforeEnable && !input.backupCreated) {
|
|
137
|
+
reasons.push("Backup not confirmed.");
|
|
138
|
+
}
|
|
139
|
+
if (safeCfg.requireHealthcheckPass && !input.healthcheckPassed) {
|
|
140
|
+
reasons.push("Healthcheck not confirmed.");
|
|
141
|
+
}
|
|
142
|
+
if (typeof input.recentBlockRate === "number" && input.recentBlockRate > safeCfg.rollbackOnBlockRateAbove && targetMode === "enforce") {
|
|
143
|
+
reasons.push("Recent block rate is too high for enforce mode; keep observe mode first.");
|
|
144
|
+
}
|
|
145
|
+
return {
|
|
146
|
+
next,
|
|
147
|
+
safe: reasons.length === 0,
|
|
148
|
+
reasons
|
|
149
|
+
};
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
export {
|
|
153
|
+
STRICT_MEMORY_RUNTIME_SCHEMA,
|
|
154
|
+
createStrictMemoryRuntimePreset,
|
|
155
|
+
isComplexTask,
|
|
156
|
+
evaluateStrictMemoryExecution,
|
|
157
|
+
upgradeToStrictMemoryRuntimePreset
|
|
158
|
+
};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
declare const HUMAN_APP_LOOP_SCHEMA = "human-app-reflective-loop/v1";
|
|
2
|
+
declare const HUMAN_APP_LOOP_PHASES: readonly ["research", "hypothesis", "build", "launch", "observe", "reflect", "iterate"];
|
|
3
|
+
type HumanAppLoopPhase = (typeof HUMAN_APP_LOOP_PHASES)[number];
|
|
4
|
+
type HumanAppPhaseContract = {
|
|
5
|
+
phase: HumanAppLoopPhase;
|
|
6
|
+
requiredInputs: string[];
|
|
7
|
+
requiredOutputs: string[];
|
|
8
|
+
goNoGoCriteria: string[];
|
|
9
|
+
stopOrRollbackConditions: string[];
|
|
10
|
+
kpis: string[];
|
|
11
|
+
};
|
|
12
|
+
type HumanAppLoopSpec = {
|
|
13
|
+
schema: typeof HUMAN_APP_LOOP_SCHEMA;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
phases: HumanAppPhaseContract[];
|
|
16
|
+
optionalToolUsePolicy: {
|
|
17
|
+
enabled: true;
|
|
18
|
+
rules: string[];
|
|
19
|
+
};
|
|
20
|
+
overlapWithAgentListingLoops: {
|
|
21
|
+
overlap: string[];
|
|
22
|
+
differences: string[];
|
|
23
|
+
};
|
|
24
|
+
evaluationRubric: {
|
|
25
|
+
dimensions: Array<{
|
|
26
|
+
id: 'researchDepth' | 'iterationQuality' | 'outcomeUsefulness';
|
|
27
|
+
description: string;
|
|
28
|
+
weight: number;
|
|
29
|
+
passThreshold: number;
|
|
30
|
+
}>;
|
|
31
|
+
overallPassThreshold: number;
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
type HumanAppTrialInput = {
|
|
35
|
+
id: string;
|
|
36
|
+
summary?: string;
|
|
37
|
+
evidence: {
|
|
38
|
+
researchNotesCount: number;
|
|
39
|
+
hypothesisCount: number;
|
|
40
|
+
experimentsRun: number;
|
|
41
|
+
measurableKpiCount: number;
|
|
42
|
+
rollbackPlanPresent: boolean;
|
|
43
|
+
shippedArtifactPresent: boolean;
|
|
44
|
+
iterationSteps: number;
|
|
45
|
+
};
|
|
46
|
+
};
|
|
47
|
+
type HumanAppTrialEvaluation = {
|
|
48
|
+
id: string;
|
|
49
|
+
scores: {
|
|
50
|
+
researchDepth: number;
|
|
51
|
+
iterationQuality: number;
|
|
52
|
+
outcomeUsefulness: number;
|
|
53
|
+
};
|
|
54
|
+
weightedScore: number;
|
|
55
|
+
pass: boolean;
|
|
56
|
+
notes: string[];
|
|
57
|
+
};
|
|
58
|
+
declare const createHumanAppLoopSpec: (createdAt?: string) => HumanAppLoopSpec;
|
|
59
|
+
declare const evaluateHumanAppTrial: (trial: HumanAppTrialInput, spec?: HumanAppLoopSpec) => HumanAppTrialEvaluation;
|
|
60
|
+
type ScaffoldHumanAppLoopPackOptions = {
|
|
61
|
+
root?: string;
|
|
62
|
+
outputDir?: string;
|
|
63
|
+
overwrite?: boolean;
|
|
64
|
+
};
|
|
65
|
+
type ScaffoldHumanAppLoopPackResult = {
|
|
66
|
+
root: string;
|
|
67
|
+
outputDir: string;
|
|
68
|
+
files: string[];
|
|
69
|
+
createdAt: string;
|
|
70
|
+
};
|
|
71
|
+
declare const scaffoldHumanAppLoopPack: (options?: ScaffoldHumanAppLoopPackOptions) => Promise<ScaffoldHumanAppLoopPackResult>;
|
|
72
|
+
|
|
73
|
+
export { HUMAN_APP_LOOP_PHASES, HUMAN_APP_LOOP_SCHEMA, type HumanAppLoopPhase, type HumanAppLoopSpec, type HumanAppPhaseContract, type HumanAppTrialEvaluation, type HumanAppTrialInput, type ScaffoldHumanAppLoopPackOptions, type ScaffoldHumanAppLoopPackResult, createHumanAppLoopSpec, evaluateHumanAppTrial, scaffoldHumanAppLoopPack };
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
// src/humanAppLoop.ts
|
|
2
|
+
import { promises as fs } from "fs";
|
|
3
|
+
import path from "path";
|
|
4
|
+
var HUMAN_APP_LOOP_SCHEMA = "human-app-reflective-loop/v1";
|
|
5
|
+
var HUMAN_APP_LOOP_PHASES = [
|
|
6
|
+
"research",
|
|
7
|
+
"hypothesis",
|
|
8
|
+
"build",
|
|
9
|
+
"launch",
|
|
10
|
+
"observe",
|
|
11
|
+
"reflect",
|
|
12
|
+
"iterate"
|
|
13
|
+
];
|
|
14
|
+
var nowIso = () => (/* @__PURE__ */ new Date()).toISOString();
|
|
15
|
+
var createHumanAppLoopSpec = (createdAt = nowIso()) => ({
|
|
16
|
+
schema: HUMAN_APP_LOOP_SCHEMA,
|
|
17
|
+
createdAt,
|
|
18
|
+
phases: [
|
|
19
|
+
{
|
|
20
|
+
phase: "research",
|
|
21
|
+
requiredInputs: ["problem statement", "target users", "constraints (time/budget/risk)"],
|
|
22
|
+
requiredOutputs: ["evidence log", "competitor/alternative scan", "top unresolved assumptions"],
|
|
23
|
+
goNoGoCriteria: [">=3 independent evidence points", "clear user pain identified"],
|
|
24
|
+
stopOrRollbackConditions: ["no meaningful user pain found", "constraints make task infeasible"],
|
|
25
|
+
kpis: ["evidence_count", "source_diversity", "problem_clarity_score"]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
phase: "hypothesis",
|
|
29
|
+
requiredInputs: ["research outputs"],
|
|
30
|
+
requiredOutputs: ["testable hypotheses", "success/failure thresholds", "experiment plan"],
|
|
31
|
+
goNoGoCriteria: ["each hypothesis is falsifiable", "each has measurable KPI target"],
|
|
32
|
+
stopOrRollbackConditions: ["hypothesis not measurable", "missing baseline"],
|
|
33
|
+
kpis: ["hypothesis_count", "kpi_defined_ratio"]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
phase: "build",
|
|
37
|
+
requiredInputs: ["approved hypothesis", "scope boundary"],
|
|
38
|
+
requiredOutputs: ["MVP artifact", "change log", "known limitations list"],
|
|
39
|
+
goNoGoCriteria: ["MVP covers core user path", "critical defects resolved"],
|
|
40
|
+
stopOrRollbackConditions: ["critical bug unresolved", "scope creep beyond budget"],
|
|
41
|
+
kpis: ["mvp_completion", "critical_bug_count", "time_to_mvp_hours"]
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
phase: "launch",
|
|
45
|
+
requiredInputs: ["MVP artifact", "release checklist"],
|
|
46
|
+
requiredOutputs: ["launch record", "segment/channel", "baseline metrics snapshot"],
|
|
47
|
+
goNoGoCriteria: ["launch checklist passed", "monitoring ready"],
|
|
48
|
+
stopOrRollbackConditions: ["monitoring absent", "compliance/safety issue found"],
|
|
49
|
+
kpis: ["launch_readiness_score", "time_to_first_user_feedback_hours"]
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
phase: "observe",
|
|
53
|
+
requiredInputs: ["launch metrics", "feedback stream"],
|
|
54
|
+
requiredOutputs: ["observation report", "anomaly list", "KPI trend summary"],
|
|
55
|
+
goNoGoCriteria: ["minimum observation window met", "KPI deltas measurable"],
|
|
56
|
+
stopOrRollbackConditions: ["harmful regressions", "rollback threshold crossed"],
|
|
57
|
+
kpis: ["retention_d1", "task_success_rate", "error_rate", "feedback_signal_ratio"]
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
phase: "reflect",
|
|
61
|
+
requiredInputs: ["observation report", "hypothesis outcomes"],
|
|
62
|
+
requiredOutputs: ["lessons learned", "root-cause analysis", "decision log"],
|
|
63
|
+
goNoGoCriteria: ["at least one actionable insight", "root cause linked to evidence"],
|
|
64
|
+
stopOrRollbackConditions: ["insufficient evidence quality"],
|
|
65
|
+
kpis: ["insight_actionability_score", "decision_traceability_ratio"]
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
phase: "iterate",
|
|
69
|
+
requiredInputs: ["reflection decisions", "prioritized backlog"],
|
|
70
|
+
requiredOutputs: ["next-cycle plan", "versioned experiment backlog", "risk updates"],
|
|
71
|
+
goNoGoCriteria: ["next cycle has explicit KPI targets", "owner and timeline assigned"],
|
|
72
|
+
stopOrRollbackConditions: ["no measurable improvement path"],
|
|
73
|
+
kpis: ["iteration_cycle_time", "kpi_improvement_rate", "rollback_incident_count"]
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
optionalToolUsePolicy: {
|
|
77
|
+
enabled: true,
|
|
78
|
+
rules: [
|
|
79
|
+
"Tool choice is optional, but each phase must record a one-line rationale for selected tools.",
|
|
80
|
+
"Prefer the least-privilege/lowest-cost tool that can satisfy evidence quality.",
|
|
81
|
+
"If external tools are skipped, document why manual reasoning is sufficient."
|
|
82
|
+
]
|
|
83
|
+
},
|
|
84
|
+
overlapWithAgentListingLoops: {
|
|
85
|
+
overlap: [
|
|
86
|
+
"Both loops require measurable KPI targets and decision gates.",
|
|
87
|
+
"Both run observe -> reflect -> iterate compounding cycles.",
|
|
88
|
+
"Both benefit from versioned changelogs and user/review feedback integration."
|
|
89
|
+
],
|
|
90
|
+
differences: [
|
|
91
|
+
"Human app loop starts with product problem discovery; agent-listing loops start with listing/distribution economics.",
|
|
92
|
+
"Human app loop emphasizes user value and usability outcomes; agent-listing loops emphasize market visibility, conversion, and listing operations.",
|
|
93
|
+
"Human app loop gates include stop/rollback on product harm/usability regressions before growth mechanics."
|
|
94
|
+
]
|
|
95
|
+
},
|
|
96
|
+
evaluationRubric: {
|
|
97
|
+
dimensions: [
|
|
98
|
+
{
|
|
99
|
+
id: "researchDepth",
|
|
100
|
+
description: "Evidence quality, source diversity, and assumption clarity.",
|
|
101
|
+
weight: 0.35,
|
|
102
|
+
passThreshold: 3
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
id: "iterationQuality",
|
|
106
|
+
description: "Strength of hypothesis-to-build-to-reflect loop with measurable changes.",
|
|
107
|
+
weight: 0.35,
|
|
108
|
+
passThreshold: 3
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
id: "outcomeUsefulness",
|
|
112
|
+
description: "Practical utility of outputs for a human owner/builder.",
|
|
113
|
+
weight: 0.3,
|
|
114
|
+
passThreshold: 3
|
|
115
|
+
}
|
|
116
|
+
],
|
|
117
|
+
overallPassThreshold: 3.2
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
var clamp = (v, min, max) => Math.max(min, Math.min(max, v));
|
|
121
|
+
var evaluateHumanAppTrial = (trial, spec = createHumanAppLoopSpec("1970-01-01T00:00:00.000Z")) => {
|
|
122
|
+
const notes = [];
|
|
123
|
+
const r = trial.evidence;
|
|
124
|
+
const researchDepth = clamp(
|
|
125
|
+
(r.researchNotesCount >= 4 ? 2 : r.researchNotesCount >= 2 ? 1 : 0) + (r.hypothesisCount >= 2 ? 1 : 0) + (r.measurableKpiCount >= 2 ? 1 : 0),
|
|
126
|
+
1,
|
|
127
|
+
5
|
|
128
|
+
);
|
|
129
|
+
const iterationQuality = clamp(
|
|
130
|
+
(r.experimentsRun >= 2 ? 2 : r.experimentsRun >= 1 ? 1 : 0) + (r.iterationSteps >= 2 ? 2 : r.iterationSteps >= 1 ? 1 : 0) + (r.rollbackPlanPresent ? 1 : 0),
|
|
131
|
+
1,
|
|
132
|
+
5
|
|
133
|
+
);
|
|
134
|
+
const outcomeUsefulness = clamp(
|
|
135
|
+
(r.shippedArtifactPresent ? 2 : 0) + (r.measurableKpiCount >= 2 ? 2 : r.measurableKpiCount >= 1 ? 1 : 0) + (r.rollbackPlanPresent ? 1 : 0),
|
|
136
|
+
1,
|
|
137
|
+
5
|
|
138
|
+
);
|
|
139
|
+
const weights = Object.fromEntries(spec.evaluationRubric.dimensions.map((d) => [d.id, d.weight]));
|
|
140
|
+
const weightedScore = researchDepth * (weights.researchDepth || 0) + iterationQuality * (weights.iterationQuality || 0) + outcomeUsefulness * (weights.outcomeUsefulness || 0);
|
|
141
|
+
if (!r.rollbackPlanPresent) notes.push("Missing rollback plan evidence.");
|
|
142
|
+
if (r.measurableKpiCount < 2) notes.push("Insufficient measurable KPI coverage.");
|
|
143
|
+
if (r.experimentsRun < 1) notes.push("No experiment run evidence.");
|
|
144
|
+
const dimensionThresholds = Object.fromEntries(
|
|
145
|
+
spec.evaluationRubric.dimensions.map((d) => [d.id, d.passThreshold])
|
|
146
|
+
);
|
|
147
|
+
const pass = researchDepth >= (dimensionThresholds.researchDepth || 0) && iterationQuality >= (dimensionThresholds.iterationQuality || 0) && outcomeUsefulness >= (dimensionThresholds.outcomeUsefulness || 0) && weightedScore >= spec.evaluationRubric.overallPassThreshold;
|
|
148
|
+
return {
|
|
149
|
+
id: trial.id,
|
|
150
|
+
scores: { researchDepth, iterationQuality, outcomeUsefulness },
|
|
151
|
+
weightedScore: Number(weightedScore.toFixed(2)),
|
|
152
|
+
pass,
|
|
153
|
+
notes
|
|
154
|
+
};
|
|
155
|
+
};
|
|
156
|
+
var ensureDir = async (dir) => {
|
|
157
|
+
await fs.mkdir(dir, { recursive: true });
|
|
158
|
+
};
|
|
159
|
+
var writeIfAllowed = async (filePath, content, overwrite) => {
|
|
160
|
+
if (!overwrite) {
|
|
161
|
+
try {
|
|
162
|
+
await fs.access(filePath);
|
|
163
|
+
return false;
|
|
164
|
+
} catch {
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
await fs.writeFile(filePath, content, "utf-8");
|
|
168
|
+
return true;
|
|
169
|
+
};
|
|
170
|
+
var buildLoopRunTemplate = () => `# Human App Reflective Loop Run Template
|
|
171
|
+
|
|
172
|
+
Use this in order: research -> hypothesis -> build -> launch -> observe -> reflect -> iterate
|
|
173
|
+
|
|
174
|
+
## 0) Metadata
|
|
175
|
+
- App idea:
|
|
176
|
+
- Target user:
|
|
177
|
+
- Owner:
|
|
178
|
+
- Date:
|
|
179
|
+
- Version/cycle:
|
|
180
|
+
|
|
181
|
+
## 1) Research
|
|
182
|
+
- Inputs used:
|
|
183
|
+
- Evidence notes (>=3):
|
|
184
|
+
- Output summary:
|
|
185
|
+
- Go/No-Go decision:
|
|
186
|
+
|
|
187
|
+
## 2) Hypothesis
|
|
188
|
+
- Hypothesis H1:
|
|
189
|
+
- KPI targets + baseline:
|
|
190
|
+
- Experiment plan:
|
|
191
|
+
- Go/No-Go decision:
|
|
192
|
+
|
|
193
|
+
## 3) Build
|
|
194
|
+
- Scope:
|
|
195
|
+
- Artifact shipped:
|
|
196
|
+
- Known limitations:
|
|
197
|
+
- Go/No-Go decision:
|
|
198
|
+
|
|
199
|
+
## 4) Launch
|
|
200
|
+
- Channel/audience:
|
|
201
|
+
- Launch checklist:
|
|
202
|
+
- Baseline metric snapshot:
|
|
203
|
+
- Go/No-Go decision:
|
|
204
|
+
|
|
205
|
+
## 5) Observe
|
|
206
|
+
- Observation window:
|
|
207
|
+
- KPI deltas:
|
|
208
|
+
- Risks/anomalies:
|
|
209
|
+
- Stop/Rollback triggered? why/why not:
|
|
210
|
+
|
|
211
|
+
## 6) Reflect
|
|
212
|
+
- What worked:
|
|
213
|
+
- What failed:
|
|
214
|
+
- Root cause evidence:
|
|
215
|
+
- Decisions recorded:
|
|
216
|
+
|
|
217
|
+
## 7) Iterate
|
|
218
|
+
- Next-cycle plan:
|
|
219
|
+
- Backlog prioritization:
|
|
220
|
+
- KPI delta target:
|
|
221
|
+
- Rollback guardrails:
|
|
222
|
+
`;
|
|
223
|
+
var buildSubagentTaskTemplate = () => `# Subagent Task Template \u2014 Human App Reflective Loop
|
|
224
|
+
|
|
225
|
+
You are tasked to run ONE full human-app reflective loop cycle on a small app/game idea.
|
|
226
|
+
|
|
227
|
+
Required order (do not skip):
|
|
228
|
+
1. research
|
|
229
|
+
2. hypothesis
|
|
230
|
+
3. build (MVP spec or mock implementation)
|
|
231
|
+
4. launch (simulated launch plan)
|
|
232
|
+
5. observe (expected metrics + monitoring)
|
|
233
|
+
6. reflect
|
|
234
|
+
7. iterate
|
|
235
|
+
|
|
236
|
+
Requirements:
|
|
237
|
+
- Include measurable KPIs and thresholds.
|
|
238
|
+
- Include explicit go/no-go decision at each phase.
|
|
239
|
+
- Include stop/rollback conditions.
|
|
240
|
+
- If you use tools, add one-line justification per tool.
|
|
241
|
+
- Output using LOOP_RUN_TEMPLATE.md headings.
|
|
242
|
+
`;
|
|
243
|
+
var buildScorecardTemplate = () => `# Human App Loop Trial Scorecard
|
|
244
|
+
|
|
245
|
+
## Trial ID
|
|
246
|
+
-
|
|
247
|
+
|
|
248
|
+
## Scores (1-5)
|
|
249
|
+
- researchDepth:
|
|
250
|
+
- iterationQuality:
|
|
251
|
+
- outcomeUsefulness:
|
|
252
|
+
- weightedScore:
|
|
253
|
+
- pass/fail:
|
|
254
|
+
|
|
255
|
+
## Evidence
|
|
256
|
+
- research notes count:
|
|
257
|
+
- hypotheses count:
|
|
258
|
+
- experiments run:
|
|
259
|
+
- measurable KPI count:
|
|
260
|
+
- rollback plan present:
|
|
261
|
+
- shipped artifact present:
|
|
262
|
+
- iteration steps:
|
|
263
|
+
|
|
264
|
+
## Issues / Fixes
|
|
265
|
+
-
|
|
266
|
+
`;
|
|
267
|
+
var buildReadme = () => `# Human App Reflective Loop Pack
|
|
268
|
+
|
|
269
|
+
This pack is first-class scaffold for human app lifecycle execution.
|
|
270
|
+
|
|
271
|
+
## Files
|
|
272
|
+
- LOOP_RUN_TEMPLATE.md
|
|
273
|
+
- SUBAGENT_TASK_TEMPLATE.md
|
|
274
|
+
- TRIAL_SCORECARD_TEMPLATE.md
|
|
275
|
+
- SPEC.snapshot.json
|
|
276
|
+
|
|
277
|
+
## Quick usage
|
|
278
|
+
1) Fill LOOP_RUN_TEMPLATE.md for a cycle.
|
|
279
|
+
2) For delegated runs, paste SUBAGENT_TASK_TEMPLATE.md into a subagent task.
|
|
280
|
+
3) Evaluate trial outputs with TRIAL_SCORECARD_TEMPLATE.md and the SDK evaluator.
|
|
281
|
+
`;
|
|
282
|
+
var scaffoldHumanAppLoopPack = async (options = {}) => {
|
|
283
|
+
const root = path.resolve(options.root || ".");
|
|
284
|
+
const outputDir = path.resolve(root, options.outputDir || "projects/vibeiao/human-app-loop-pack");
|
|
285
|
+
const overwrite = Boolean(options.overwrite);
|
|
286
|
+
const createdAt = nowIso();
|
|
287
|
+
const files = [];
|
|
288
|
+
await ensureDir(outputDir);
|
|
289
|
+
const spec = createHumanAppLoopSpec(createdAt);
|
|
290
|
+
const writes = [
|
|
291
|
+
["LOOP_RUN_TEMPLATE.md", buildLoopRunTemplate()],
|
|
292
|
+
["SUBAGENT_TASK_TEMPLATE.md", buildSubagentTaskTemplate()],
|
|
293
|
+
["TRIAL_SCORECARD_TEMPLATE.md", buildScorecardTemplate()],
|
|
294
|
+
["README.md", buildReadme()],
|
|
295
|
+
["SPEC.snapshot.json", `${JSON.stringify(spec, null, 2)}
|
|
296
|
+
`]
|
|
297
|
+
];
|
|
298
|
+
for (const [name, content] of writes) {
|
|
299
|
+
const full = path.join(outputDir, name);
|
|
300
|
+
const written = await writeIfAllowed(full, content, overwrite);
|
|
301
|
+
if (written) files.push(full);
|
|
302
|
+
}
|
|
303
|
+
return {
|
|
304
|
+
root,
|
|
305
|
+
outputDir,
|
|
306
|
+
files,
|
|
307
|
+
createdAt
|
|
308
|
+
};
|
|
309
|
+
};
|
|
310
|
+
export {
|
|
311
|
+
HUMAN_APP_LOOP_PHASES,
|
|
312
|
+
HUMAN_APP_LOOP_SCHEMA,
|
|
313
|
+
createHumanAppLoopSpec,
|
|
314
|
+
evaluateHumanAppTrial,
|
|
315
|
+
scaffoldHumanAppLoopPack
|
|
316
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -6,6 +6,8 @@ export { SurvivalMode, SurvivalRecommendation, classifySurvivalMode, formatSurvi
|
|
|
6
6
|
export { SurvivalIntegrationDecision, getSurvivalPlaybookDecision, getSurvivalPlaybookDecisionFromSelfReliance } from './survivalIntegration.js';
|
|
7
7
|
export { EscapeHatchDecision, EscapeHatchPolicy, EscapeHatchSnapshot, evaluateEscapeHatch, formatEscapeHatchDecision } from './survivalEscapeHatch.js';
|
|
8
8
|
export { MarketDiscoveryClient, MarketNeed, MarketSignal, deriveMarketNeeds, discoverMarketNeeds, extractMarketSignals, runMarketDiscovery } from './marketDiscovery.js';
|
|
9
|
-
export { A as AgentResourceProvidersManifest, a as AnalyticsPoint, b as ApiCreditProvider, c as ApiCreditProviderFactoryOptions, d as ApiCreditProviderPreset, e as ApiCreditProviderPresetInput, f as ApiResponse, B as BuybackEvent, C as CONTEXT_PACK_SECTION_ORDER, g as ContextPack, h as ContextPackBudget, i as ContextPackInput, j as ContextPackOptions, k as ContextPackSectionKey, l as ContextPackSections, D as DurabilityCheckpointWriteOptions, m as DurabilityProxyClientOptions, n as DurabilityRestoreDrillWriteOptions,
|
|
9
|
+
export { A as AgentResourceProvidersManifest, a as AnalyticsPoint, b as ApiCreditProvider, c as ApiCreditProviderFactoryOptions, d as ApiCreditProviderPreset, e as ApiCreditProviderPresetInput, f as ApiResponse, B as BuybackEvent, C as CONTEXT_PACK_SECTION_ORDER, g as ContextPack, h as ContextPackBudget, i as ContextPackInput, j as ContextPackOptions, k as ContextPackSectionKey, l as ContextPackSections, D as DurabilityCheckpointWriteOptions, m as DurabilityProxyClientOptions, n as DurabilityRestoreDrillWriteOptions, L as LISTING_NAME_MAX_LENGTH, o as LISTING_NAME_RECOMMENDED_MAX, p as LISTING_TAGLINE_MAX_LENGTH, q as LISTING_TAGLINE_RECOMMENDED_MAX, r as LeaderboardEntry, s as LeaderboardQuery, t as ListingNamingValidationOptions, u as ListingNamingValidationResult, v as ListingQuery, w as ListingReviewCreatePayload, x as ListingReviewResponsePayload, y as ListingVersionPayload, M as MarketingCampaign, z as MarketingLinkOptions, E as MemoryPingChallengeResponse, F as MemoryPingPayload, O as OpenRouterCredits, P as ProcurementCandidate, G as ProcurementDecision, H as ProcurementTaskProfile, I as ProcurementWeights, R as ResourceProviderManifestEntry, J as ResourceSnapshot, K as ReviewGate, N as ReviewGateRecord, Q as ReviewRequiredPayload, S as SdkAutoUpdatedRestartRequiredError, T as SdkUpdateCheckOptions, U as SdkUpdatePolicyCheckOptions, V as SdkUpdateRequiredError, W as SdkUpdateStatus, TopupDecision, TopupRequest, TreasuryLedgerEvent, TreasuryPolicy, TreasuryPolicyV1, TreasuryState, X as VIBEIAO_IDL, Y as VibeClient, Z as VibeClientOptions, _ as VibeRegistry, $ as assertSurvivalProvidersConfigured, a0 as buildBadgeMarkdown, a1 as buildClaimMessage, a2 as buildJupiterSwapUrl, a3 as buildListingVersionMessage, a4 as buildMemoryPingMessage, a5 as buildOwnerTransferMessage, a6 as buildProcurementPrompt, a7 as buildRaydiumSwapUrl, a8 as buildReviewPrompt, a9 as buildReviewRequired, aa as buildReviewResponseMessage, ab as buildSdkUpdateCommand, ac as buildShareCopy, ad as buildShareLink, ae as buildTradeLinks, buildTreasuryLedgerEvent, af as checkForSdkUpdate, ag as checkForSdkUpdatePolicy, ah as compareVersions, ai as createApiCreditProvider, aj as createApiCreditProviders, ak as createApiCreditProvidersFromManifest, al as createCampaign, am as createContextPack, an as createDurabilityProxyClient, createTreasuryPolicy, ao as decideProcurementForTask, ap as estimateContextPackTokens, evaluateTopupRequest, aq as getResourceSnapshot, ar as normalizeListingText, as as rankListingsForTask, at as sanitizeListingNaming, au as scoreListingForTask, treasuryStateFromSnapshot, av as validateContextPack, aw as validateListingNaming, validateTreasuryPolicy } from './treasuryGuardian.js';
|
|
10
|
+
export { OUTCOME_BOUND_FLOW_SCHEMA, OUTCOME_BOUND_REQUIRED_GATES, OutcomeBoundRequiredGate, OutcomeBoundRunInput, OutcomeBoundRunStatus, assertOutcomeBoundCompleted, evaluateOutcomeBoundRun } from './outcomeBoundFlow.js';
|
|
11
|
+
export { STRICT_MEMORY_RUNTIME_SCHEMA, StrictMemoryEvaluation, StrictMemoryEvaluationInput, StrictMemoryRuntimePreset, StrictMemoryTriggerSet, StrictMemoryUpgradeInput, StrictMemoryUpgradePolicy, StrictMemoryUpgradeResult, createStrictMemoryRuntimePreset, evaluateStrictMemoryExecution, isComplexTask, upgradeToStrictMemoryRuntimePreset } from './strictMemoryRuntime.js';
|
|
10
12
|
export { fetchSolBalance, fetchTokenBalance, fetchTokenBalances } from './solana.js';
|
|
11
13
|
import '@coral-xyz/anchor';
|