deepflow 0.1.54 → 0.1.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/deepflow-auto.sh +26 -1
- package/package.json +1 -1
package/bin/deepflow-auto.sh
CHANGED
|
@@ -436,6 +436,7 @@ run_single_spike() {
|
|
|
436
436
|
local slug="$2"
|
|
437
437
|
local hypothesis="$3"
|
|
438
438
|
local method="$4"
|
|
439
|
+
local spec_file="$5"
|
|
439
440
|
|
|
440
441
|
local worktree_path="${PROJECT_ROOT}/.deepflow/worktrees/${spec_name}-${slug}"
|
|
441
442
|
local branch_name="df/${spec_name}-${slug}"
|
|
@@ -455,6 +456,12 @@ run_single_spike() {
|
|
|
455
456
|
}
|
|
456
457
|
fi
|
|
457
458
|
|
|
459
|
+
# Extract acceptance criteria from spec (the human's judgment proxy)
|
|
460
|
+
local acceptance_criteria=""
|
|
461
|
+
if [[ -f "$spec_file" ]]; then
|
|
462
|
+
acceptance_criteria="$(sed -n '/^## Acceptance Criteria/,/^## /{ /^## Acceptance Criteria/d; /^## /d; p; }' "$spec_file")"
|
|
463
|
+
fi
|
|
464
|
+
|
|
458
465
|
# Build spike prompt
|
|
459
466
|
local spike_prompt
|
|
460
467
|
spike_prompt="You are running a spike experiment to validate a hypothesis for spec '${spec_name}'.
|
|
@@ -465,13 +472,19 @@ Hypothesis: ${hypothesis}
|
|
|
465
472
|
Method: ${method}
|
|
466
473
|
--- END HYPOTHESIS ---
|
|
467
474
|
|
|
475
|
+
--- ACCEPTANCE CRITERIA (from spec — the human's judgment proxy) ---
|
|
476
|
+
${acceptance_criteria}
|
|
477
|
+
--- END ACCEPTANCE CRITERIA ---
|
|
478
|
+
|
|
468
479
|
Your tasks:
|
|
469
480
|
1. Validate this hypothesis by implementing the minimum necessary to prove or disprove it.
|
|
481
|
+
The spike must demonstrate that the approach can satisfy the acceptance criteria above.
|
|
470
482
|
2. Write an experiment file at: .deepflow/experiments/${spec_name}--${slug}--active.md
|
|
471
483
|
The experiment file should contain:
|
|
472
484
|
- ## Hypothesis: restate the hypothesis
|
|
473
485
|
- ## Method: what you did to validate
|
|
474
486
|
- ## Results: what you observed
|
|
487
|
+
- ## Criteria Check: for each acceptance criterion, can this approach satisfy it? (yes/no/unclear)
|
|
475
488
|
- ## Conclusion: PASSED or FAILED with reasoning
|
|
476
489
|
3. Write a result YAML file at: .deepflow/results/spike-${slug}.yaml
|
|
477
490
|
The YAML must contain:
|
|
@@ -573,7 +586,7 @@ run_spikes() {
|
|
|
573
586
|
auto_log "Spawning spike for ${slug} (hypothesis ${i}/${count})"
|
|
574
587
|
echo "Spawning spike: ${slug}"
|
|
575
588
|
|
|
576
|
-
run_single_spike "$spec_name" "$slug" "$hypothesis" "$method" &
|
|
589
|
+
run_single_spike "$spec_name" "$slug" "$hypothesis" "$method" "$spec_file" &
|
|
577
590
|
pids+=($!)
|
|
578
591
|
done
|
|
579
592
|
|
|
@@ -875,6 +888,13 @@ $(cat "$experiment_file")
|
|
|
875
888
|
# -----------------------------------------------------------------------
|
|
876
889
|
# 2. Build selection prompt
|
|
877
890
|
# -----------------------------------------------------------------------
|
|
891
|
+
|
|
892
|
+
# Extract acceptance criteria from spec (the human's judgment proxy)
|
|
893
|
+
local acceptance_criteria=""
|
|
894
|
+
if [[ -f "$spec_file" ]]; then
|
|
895
|
+
acceptance_criteria="$(sed -n '/^## Acceptance Criteria/,/^## /{ /^## Acceptance Criteria/d; /^## /d; p; }' "$spec_file")"
|
|
896
|
+
fi
|
|
897
|
+
|
|
878
898
|
local selection_prompt
|
|
879
899
|
selection_prompt="You are an adversarial quality judge in an autonomous development workflow.
|
|
880
900
|
Your job is to compare implementation approaches for spec '${spec_name}' and select the best one — or reject all if quality is insufficient.
|
|
@@ -883,6 +903,11 @@ IMPORTANT:
|
|
|
883
903
|
- This selection phase ALWAYS runs, even with only 1 approach. With a single approach you act as a quality gate.
|
|
884
904
|
- You CAN and SHOULD reject all approaches if the quality is insufficient. Do not rubber-stamp poor work.
|
|
885
905
|
- Base your judgment ONLY on the artifacts provided below. Do NOT read code files.
|
|
906
|
+
- Judge each approach against the ACCEPTANCE CRITERIA below — these represent the human's intent.
|
|
907
|
+
|
|
908
|
+
--- ACCEPTANCE CRITERIA (from spec) ---
|
|
909
|
+
${acceptance_criteria}
|
|
910
|
+
--- END ACCEPTANCE CRITERIA ---
|
|
886
911
|
|
|
887
912
|
There are ${#approach_slugs[@]} approach(es) to evaluate:
|
|
888
913
|
|