universal-dev-standards 5.3.2 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/ai/standards/adversarial-test.ai.yaml +277 -0
- package/bundled/ai/standards/agent-communication-protocol.ai.yaml +32 -166
- package/bundled/ai/standards/agent-dispatch.ai.yaml +32 -58
- package/bundled/ai/standards/audit-trail.ai.yaml +113 -0
- package/bundled/ai/standards/branch-completion.ai.yaml +34 -70
- package/bundled/ai/standards/change-batching-standards.ai.yaml +31 -180
- package/bundled/ai/standards/chaos-injection-tests.ai.yaml +91 -0
- package/bundled/ai/standards/container-image-standards.ai.yaml +88 -0
- package/bundled/ai/standards/container-security.ai.yaml +331 -0
- package/bundled/ai/standards/cost-budget-test.ai.yaml +96 -0
- package/bundled/ai/standards/data-contract.ai.yaml +110 -0
- package/bundled/ai/standards/data-migration-testing.ai.yaml +96 -0
- package/bundled/ai/standards/data-pipeline.ai.yaml +113 -0
- package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +89 -0
- package/bundled/ai/standards/execution-history.ai.yaml +30 -288
- package/bundled/ai/standards/flaky-test-management.ai.yaml +89 -0
- package/bundled/ai/standards/flow-based-testing.ai.yaml +240 -0
- package/bundled/ai/standards/iac-design-principles.ai.yaml +83 -0
- package/bundled/ai/standards/incident-response.ai.yaml +107 -0
- package/bundled/ai/standards/license-compliance.ai.yaml +106 -0
- package/bundled/ai/standards/llm-output-validation.ai.yaml +269 -0
- package/bundled/ai/standards/mock-boundary.ai.yaml +250 -0
- package/bundled/ai/standards/mutation-testing.ai.yaml +192 -0
- package/bundled/ai/standards/pii-classification.ai.yaml +109 -0
- package/bundled/ai/standards/pipeline-integration-standards.ai.yaml +28 -169
- package/bundled/ai/standards/policy-as-code-testing.ai.yaml +227 -0
- package/bundled/ai/standards/prd-standards.ai.yaml +88 -0
- package/bundled/ai/standards/product-metrics-standards.ai.yaml +111 -0
- package/bundled/ai/standards/prompt-regression.ai.yaml +94 -0
- package/bundled/ai/standards/property-based-testing.ai.yaml +105 -0
- package/bundled/ai/standards/release-quality-manifest.ai.yaml +135 -0
- package/bundled/ai/standards/replay-test.ai.yaml +111 -0
- package/bundled/ai/standards/runbook.ai.yaml +104 -0
- package/bundled/ai/standards/sast-advanced.ai.yaml +135 -0
- package/bundled/ai/standards/schema-evolution.ai.yaml +111 -0
- package/bundled/ai/standards/secret-management-standards.ai.yaml +105 -0
- package/bundled/ai/standards/secure-op.ai.yaml +365 -0
- package/bundled/ai/standards/security-testing.ai.yaml +171 -0
- package/bundled/ai/standards/server-ops-security.ai.yaml +274 -0
- package/bundled/ai/standards/slo-sli.ai.yaml +97 -0
- package/bundled/ai/standards/smoke-test.ai.yaml +87 -0
- package/bundled/ai/standards/supply-chain-attestation.ai.yaml +109 -0
- package/bundled/ai/standards/test-completeness-dimensions.ai.yaml +52 -5
- package/bundled/ai/standards/user-story-mapping.ai.yaml +108 -0
- package/bundled/ai/standards/workflow-enforcement.ai.yaml +34 -240
- package/bundled/ai/standards/workflow-state-protocol.ai.yaml +31 -107
- package/bundled/core/adversarial-test.md +212 -0
- package/bundled/core/chaos-injection-tests.md +116 -0
- package/bundled/core/container-security.md +521 -0
- package/bundled/core/cost-budget-test.md +69 -0
- package/bundled/core/data-migration-testing.md +110 -0
- package/bundled/core/disaster-recovery-drill.md +73 -0
- package/bundled/core/flaky-test-management.md +73 -0
- package/bundled/core/flow-based-testing.md +142 -0
- package/bundled/core/llm-output-validation.md +178 -0
- package/bundled/core/mock-boundary.md +100 -0
- package/bundled/core/mutation-testing.md +97 -0
- package/bundled/core/policy-as-code-testing.md +188 -0
- package/bundled/core/prompt-regression.md +72 -0
- package/bundled/core/property-based-testing.md +73 -0
- package/bundled/core/release-quality-manifest.md +147 -0
- package/bundled/core/replay-test.md +86 -0
- package/bundled/core/sast-advanced.md +300 -0
- package/bundled/core/secure-op.md +314 -0
- package/bundled/core/security-testing.md +87 -0
- package/bundled/core/server-ops-security.md +493 -0
- package/bundled/core/smoke-test.md +65 -0
- package/bundled/core/supply-chain-attestation.md +117 -0
- package/bundled/locales/zh-CN/CHANGELOG.md +3 -3
- package/bundled/locales/zh-CN/README.md +1 -1
- package/bundled/locales/zh-CN/skills/ai-instruction-standards/SKILL.md +5 -5
- package/bundled/locales/zh-TW/CHANGELOG.md +3 -3
- package/bundled/locales/zh-TW/README.md +1 -1
- package/bundled/locales/zh-TW/skills/ai-instruction-standards/SKILL.md +183 -79
- package/bundled/skills/README.md +4 -3
- package/bundled/skills/SKILL_NAMING.md +94 -0
- package/bundled/skills/ai-instruction-standards/SKILL.md +181 -88
- package/bundled/skills/atdd-assistant/SKILL.md +8 -0
- package/bundled/skills/bdd-assistant/SKILL.md +7 -0
- package/bundled/skills/checkin-assistant/SKILL.md +8 -0
- package/bundled/skills/code-review-assistant/SKILL.md +7 -0
- package/bundled/skills/journey-test-assistant/SKILL.md +203 -0
- package/bundled/skills/orchestrate/SKILL.md +167 -0
- package/bundled/skills/plan/SKILL.md +234 -0
- package/bundled/skills/pr-automation-assistant/SKILL.md +8 -0
- package/bundled/skills/push/SKILL.md +49 -2
- package/bundled/skills/{process-automation → skill-builder}/SKILL.md +1 -1
- package/bundled/skills/{forward-derivation → spec-derivation}/SKILL.md +1 -1
- package/bundled/skills/spec-driven-dev/SKILL.md +7 -0
- package/bundled/skills/sweep/SKILL.md +145 -0
- package/bundled/skills/tdd-assistant/SKILL.md +7 -0
- package/package.json +1 -1
- package/src/commands/flow.js +8 -0
- package/src/commands/start.js +14 -0
- package/src/commands/sweep.js +8 -0
- package/src/commands/workflow.js +8 -0
- package/standards-registry.json +474 -12
- package/bundled/locales/zh-CN/skills/ac-coverage-assistant/SKILL.md +0 -190
- package/bundled/locales/zh-CN/skills/forward-derivation/SKILL.md +0 -71
- package/bundled/locales/zh-CN/skills/forward-derivation/guide.md +0 -130
- package/bundled/locales/zh-CN/skills/methodology-system/SKILL.md +0 -88
- package/bundled/locales/zh-CN/skills/methodology-system/create-methodology.md +0 -350
- package/bundled/locales/zh-CN/skills/methodology-system/guide.md +0 -131
- package/bundled/locales/zh-CN/skills/methodology-system/runtime.md +0 -279
- package/bundled/locales/zh-CN/skills/process-automation/SKILL.md +0 -143
- package/bundled/locales/zh-TW/skills/ac-coverage-assistant/SKILL.md +0 -195
- package/bundled/locales/zh-TW/skills/deploy-assistant/SKILL.md +0 -178
- package/bundled/locales/zh-TW/skills/forward-derivation/SKILL.md +0 -69
- package/bundled/locales/zh-TW/skills/forward-derivation/guide.md +0 -415
- package/bundled/locales/zh-TW/skills/methodology-system/SKILL.md +0 -86
- package/bundled/locales/zh-TW/skills/methodology-system/create-methodology.md +0 -350
- package/bundled/locales/zh-TW/skills/methodology-system/guide.md +0 -131
- package/bundled/locales/zh-TW/skills/methodology-system/runtime.md +0 -279
- package/bundled/locales/zh-TW/skills/process-automation/SKILL.md +0 -144
- /package/bundled/skills/{ac-coverage-assistant → ac-coverage}/SKILL.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/SKILL.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/create-methodology.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/guide.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/integrated-flow.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/prerequisite-check.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/runtime.md +0 -0
- /package/bundled/skills/{forward-derivation → spec-derivation}/guide.md +0 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Data Pipeline Standards - AI Optimized
|
|
2
|
+
# Source: XSPEC-068 Wave 3 Data Engineering Pack
|
|
3
|
+
|
|
4
|
+
id: data-pipeline
|
|
5
|
+
title: Data Pipeline Standards
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
status: Active
|
|
8
|
+
tags: [data-engineering, pipeline, etl, data-quality, orchestration, idempotency]
|
|
9
|
+
summary: |
|
|
10
|
+
Defines engineering standards for building reliable, observable, and
|
|
11
|
+
maintainable data pipelines. Covers idempotency and exactly-once semantics,
|
|
12
|
+
error handling and dead-letter queues, checkpoint and recovery patterns,
|
|
13
|
+
data lineage tracking, pipeline observability (metrics, alerting), testing
|
|
14
|
+
requirements, and deployment practices. Applicable to batch ETL, streaming
|
|
15
|
+
pipelines, and ML feature pipelines.
|
|
16
|
+
|
|
17
|
+
requirements:
|
|
18
|
+
- id: REQ-001
|
|
19
|
+
title: Idempotency and Exactly-Once Processing
|
|
20
|
+
description: |
|
|
21
|
+
Every data pipeline MUST be designed for idempotent execution:
|
|
22
|
+
re-running the same pipeline for the same time window or batch MUST
|
|
23
|
+
produce identical output without duplication or data loss. Pipelines
|
|
24
|
+
MUST use deterministic keys for deduplication. Batch pipelines MUST
|
|
25
|
+
support re-processing historical partitions cleanly. Streaming pipelines
|
|
26
|
+
MUST implement exactly-once or at-least-once with deduplication using
|
|
27
|
+
unique event IDs. Overwrites of output partitions are preferred over
|
|
28
|
+
appends for batch jobs.
|
|
29
|
+
level: MUST
|
|
30
|
+
examples:
|
|
31
|
+
- "Batch: pipeline writes to date-partitioned output and overwrites the partition on re-run"
|
|
32
|
+
- "Streaming: dedup using Kafka message key + consumer group offset tracking"
|
|
33
|
+
- "Test: running pipeline twice for 2026-04-01 produces same row count both times"
|
|
34
|
+
|
|
35
|
+
- id: REQ-002
|
|
36
|
+
title: Error Handling and Dead-Letter Queues
|
|
37
|
+
description: |
|
|
38
|
+
Data pipelines MUST implement structured error handling with
|
|
39
|
+
categorized failure modes. Transient errors (network timeout, API
|
|
40
|
+
rate limit) MUST use exponential backoff retry (max 3 attempts).
|
|
41
|
+
Permanent errors (schema violation, invalid data) MUST route records
|
|
42
|
+
to a Dead-Letter Queue (DLQ) with the original record, error type,
|
|
43
|
+
error message, and processing timestamp. DLQ records MUST be
|
|
44
|
+
monitored and addressed within the pipeline's SLA.
|
|
45
|
+
level: MUST
|
|
46
|
+
examples:
|
|
47
|
+
- "Transient retry: retry_policy: {max_attempts: 3, backoff_base: 2s, max_backoff: 30s}"
|
|
48
|
+
- "DLQ record: {original_record: {...}, error_type: 'SCHEMA_VIOLATION', error_msg: 'field amount is null', ts: '...'}"
|
|
49
|
+
- "DLQ alert: >100 DLQ messages in 1 hour → PagerDuty alert to data-oncall"
|
|
50
|
+
|
|
51
|
+
- id: REQ-003
|
|
52
|
+
title: Checkpoint and Recovery
|
|
53
|
+
description: |
|
|
54
|
+
Long-running batch pipelines and stateful streaming pipelines MUST
|
|
55
|
+
implement checkpointing to enable recovery from mid-run failures
|
|
56
|
+
without full reprocessing. Checkpoints MUST record: last successfully
|
|
57
|
+
processed partition/offset/watermark, job run ID, and timestamp.
|
|
58
|
+
Recovery MUST resume from the last checkpoint, not from the beginning.
|
|
59
|
+
Checkpoint state MUST be stored in durable external storage (not
|
|
60
|
+
local disk).
|
|
61
|
+
level: MUST
|
|
62
|
+
examples:
|
|
63
|
+
- "Batch: checkpoint stores {last_processed_date: '2026-04-28', last_id: 12345678} in S3"
|
|
64
|
+
- "Streaming: Flink checkpoint interval 5 minutes, stored in S3 with 3 checkpoints retained"
|
|
65
|
+
- "Recovery test: kill job mid-run, restart, verify output matches full run with no duplicates"
|
|
66
|
+
|
|
67
|
+
- id: REQ-004
|
|
68
|
+
title: Data Lineage Tracking
|
|
69
|
+
description: |
|
|
70
|
+
Every data pipeline MUST emit lineage metadata describing its data
|
|
71
|
+
flow: source datasets (with versions/timestamps), transformation logic
|
|
72
|
+
applied, and output datasets produced. Lineage MUST be machine-readable
|
|
73
|
+
and ingested into a central lineage store or data catalog. Lineage
|
|
74
|
+
enables root-cause analysis of data quality issues and impact assessment
|
|
75
|
+
of upstream changes.
|
|
76
|
+
level: MUST
|
|
77
|
+
examples:
|
|
78
|
+
- "Lineage emit: {job: 'orders-aggregator', inputs: ['raw_orders@2026-04-30'], outputs: ['daily_order_summary@2026-04-30'], transform_version: 'v1.3.2'}"
|
|
79
|
+
- "OpenLineage event emitted to Marquez or DataHub on job start and completion"
|
|
80
|
+
- "Lineage query: 'Which pipelines read from raw_orders?' returns 5 downstream jobs"
|
|
81
|
+
|
|
82
|
+
- id: REQ-005
|
|
83
|
+
title: Pipeline Observability and SLOs
|
|
84
|
+
description: |
|
|
85
|
+
Every production data pipeline MUST expose the following metrics:
|
|
86
|
+
records processed (counter), processing latency (histogram), error
|
|
87
|
+
rate (gauge), DLQ depth (gauge), and last successful run timestamp.
|
|
88
|
+
Pipelines MUST define SLOs for: freshness (data available within N
|
|
89
|
+
hours of source), completeness (≥ X% records successfully processed),
|
|
90
|
+
and latency (p95 processing time within threshold). SLO violations
|
|
91
|
+
MUST trigger alerts.
|
|
92
|
+
level: MUST
|
|
93
|
+
examples:
|
|
94
|
+
- "Metric: pipeline_records_processed_total{pipeline='orders-agg',status='success'}"
|
|
95
|
+
- "Freshness SLO: daily_order_summary available by 03:00 UTC — alert if missing by 04:00 UTC"
|
|
96
|
+
- "Completeness alert: processed_records / expected_records < 0.99 → P2 alert"
|
|
97
|
+
|
|
98
|
+
- id: REQ-006
|
|
99
|
+
title: Pipeline Testing Requirements
|
|
100
|
+
description: |
|
|
101
|
+
Data pipelines MUST have automated tests covering: unit tests for
|
|
102
|
+
transformation logic (test with sample input/output DataFrames),
|
|
103
|
+
integration tests validating end-to-end flow with synthetic data,
|
|
104
|
+
and schema conformance tests validating output matches declared
|
|
105
|
+
data contract. Pipelines SHOULD have regression tests for historically
|
|
106
|
+
problematic edge cases (nulls in key fields, negative amounts,
|
|
107
|
+
duplicate records). Test coverage MUST be ≥ 80% for transformation
|
|
108
|
+
logic.
|
|
109
|
+
level: MUST
|
|
110
|
+
examples:
|
|
111
|
+
- "Unit test: test_calculate_order_total() — asserts discount applied correctly on sample rows"
|
|
112
|
+
- "Integration test: runs full pipeline on 1000 synthetic orders, validates output row count and schema"
|
|
113
|
+
- "Edge case test: pipeline handles duplicate order_id gracefully, deduplication logic verified"
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
name: Disaster Recovery Drill Standards
|
|
3
|
+
nameZh: 災難恢復演練標準
|
|
4
|
+
id: disaster-recovery-drill
|
|
5
|
+
version: "1.0.0"
|
|
6
|
+
category: operations
|
|
7
|
+
scope: reliability
|
|
8
|
+
summary: >
|
|
9
|
+
Structured DR drill standards: quarterly runbook execution, RTO/RPO
|
|
10
|
+
measurement, backup restore verification, and Game Day protocols.
|
|
11
|
+
Untested recovery plans fail at the worst moment.
|
|
12
|
+
|
|
13
|
+
requirements:
|
|
14
|
+
- id: REQ-01
|
|
15
|
+
title: RTO/RPO Targets Defined
|
|
16
|
+
titleZh: RTO/RPO 目標定義
|
|
17
|
+
level: MUST
|
|
18
|
+
description: >
|
|
19
|
+
Each system MUST have documented RTO (Recovery Time Objective) and RPO
|
|
20
|
+
(Recovery Point Objective) targets. These must be agreed with stakeholders
|
|
21
|
+
before any DR drill can be considered meaningful.
|
|
22
|
+
examples:
|
|
23
|
+
- "VibeOps commercial: RTO < 1 hour, RPO < 24 hours (daily backup)"
|
|
24
|
+
|
|
25
|
+
- id: REQ-02
|
|
26
|
+
title: Backup Restore Test
|
|
27
|
+
titleZh: 備份還原測試
|
|
28
|
+
level: MUST
|
|
29
|
+
description: >
|
|
30
|
+
At minimum quarterly, a full backup restore MUST be executed in an
|
|
31
|
+
isolated environment and verified for data integrity. The restore time
|
|
32
|
+
MUST be measured and compared to the RTO target.
|
|
33
|
+
|
|
34
|
+
- id: REQ-03
|
|
35
|
+
title: Runbook Completeness
|
|
36
|
+
titleZh: 運行手冊完整性
|
|
37
|
+
level: MUST
|
|
38
|
+
description: >
|
|
39
|
+
A DR runbook MUST exist covering: (1) detection (how do we know disaster
|
|
40
|
+
occurred?), (2) decision (who declares DR?), (3) recovery steps
|
|
41
|
+
(step-by-step, executable commands), (4) verification (how do we confirm
|
|
42
|
+
recovery?), (5) communication plan.
|
|
43
|
+
|
|
44
|
+
- id: REQ-04
|
|
45
|
+
title: Game Day Exercise
|
|
46
|
+
titleZh: Game Day 演練
|
|
47
|
+
level: SHOULD
|
|
48
|
+
description: >
|
|
49
|
+
At minimum annually, a Game Day exercise SHOULD be conducted where the
|
|
50
|
+
team simulates a realistic failure scenario and executes the runbook from
|
|
51
|
+
scratch. Results SHOULD be documented and used to improve the runbook.
|
|
52
|
+
|
|
53
|
+
- id: REQ-05
|
|
54
|
+
title: Drill Record
|
|
55
|
+
titleZh: 演練記錄
|
|
56
|
+
level: MUST
|
|
57
|
+
description: >
|
|
58
|
+
Every DR drill MUST produce a written record including: date, participants,
|
|
59
|
+
scenario tested, RTO achieved, RPO achieved, issues found, remediation
|
|
60
|
+
actions. Records MUST be retained for 12 months.
|
|
61
|
+
|
|
62
|
+
examples:
|
|
63
|
+
- name: "DR drill record template"
|
|
64
|
+
code: |
|
|
65
|
+
date: 2026-05-05
|
|
66
|
+
participants: [alice, bob]
|
|
67
|
+
scenario: "Database total loss — restore from daily backup"
|
|
68
|
+
rto_target: "1 hour"
|
|
69
|
+
rto_achieved: "42 minutes"
|
|
70
|
+
rpo_target: "24 hours"
|
|
71
|
+
rpo_achieved: "23 hours 15 minutes"
|
|
72
|
+
issues_found:
|
|
73
|
+
- "backup script path was stale — fixed in XSPEC-170"
|
|
74
|
+
remediation:
|
|
75
|
+
- "Update backup path in backup-restore.sh"
|
|
76
|
+
status: PASS
|
|
77
|
+
|
|
78
|
+
anti_patterns:
|
|
79
|
+
- description: >
|
|
80
|
+
Only verifying that a backup file exists — always restore it and
|
|
81
|
+
verify data integrity. An untested backup is not a backup.
|
|
82
|
+
- description: >
|
|
83
|
+
Running DR drills in production — always use an isolated environment
|
|
84
|
+
to avoid turning a drill into an actual disaster.
|
|
85
|
+
|
|
86
|
+
related_standards:
|
|
87
|
+
- deployment-standards
|
|
88
|
+
- chaos-engineering-standards
|
|
89
|
+
- verification-evidence
|
|
@@ -1,302 +1,44 @@
|
|
|
1
|
-
# Execution History Repository Standards -
|
|
2
|
-
#
|
|
1
|
+
# Execution History Repository Standards - DEPRECATED STUB
|
|
2
|
+
# This file has been migrated to DevAP per DEC-049 (UDS/DevAP responsibility split).
|
|
3
|
+
# Canonical location: dev-autopilot/standards/orchestration/execution-history.ai.yaml
|
|
4
|
+
# Migration: XSPEC-086 Phase 2 (2026-04-27)
|
|
5
|
+
#
|
|
6
|
+
# Source spec: cross-project/specs/XSPEC-003-execution-history-standard-sdd.md
|
|
7
|
+
# Deprecation schedule: UDS 5.4.0 deprecated → UDS 6.0.0 removed
|
|
3
8
|
|
|
4
9
|
standard:
|
|
5
10
|
id: execution-history
|
|
6
|
-
name: "Execution History Repository Standards"
|
|
7
|
-
description: "Structured system for persisting agent execution artifacts with L1/L2/L3 tiered access model"
|
|
8
|
-
guidelines:
|
|
9
|
-
- "每次 agent 任務執行完畢後,必須產出 required artifacts"
|
|
10
|
-
- "歷史以檔案系統目錄結構組織,支援直接路徑存取"
|
|
11
|
-
- "提供 L1/L2/L3 三層存取,平衡資訊量與 token 成本"
|
|
12
|
-
- "敏感資訊在寫入時自動 redact"
|
|
13
|
-
- "跨專案僅共享 L1 層級,遵守授權隔離"
|
|
14
|
-
- "歷史保留策略確保 L1/L2 索引永久保留,L3 artifacts 依 max_runs 設定自動清理以控制儲存空間"
|
|
15
|
-
|
|
16
11
|
meta:
|
|
17
|
-
version: "1.0.
|
|
18
|
-
updated: "2026-04-
|
|
12
|
+
version: "1.0.1"
|
|
13
|
+
updated: "2026-04-27"
|
|
14
|
+
deprecated: true
|
|
15
|
+
deprecated_since: "5.4.0"
|
|
16
|
+
removal_version: "6.0.0"
|
|
17
|
+
canonical_owner: devap
|
|
18
|
+
canonical_path: "dev-autopilot/standards/orchestration/execution-history.ai.yaml"
|
|
19
19
|
source: cross-project/specs/XSPEC-003-execution-history-standard-sdd.md
|
|
20
|
-
description:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
storage:
|
|
24
|
-
description: "執行歷史為 append-only log,支援兩種儲存後端"
|
|
25
|
-
backends:
|
|
26
|
-
local:
|
|
27
|
-
description: "儲存在 repo 內的 .execution-history/ 目錄"
|
|
28
|
-
git_policy: |
|
|
29
|
-
L3 artifacts 不追蹤(納入 .gitignore),L1 index.json 可選追蹤。
|
|
30
|
-
歷史本質是寫入後不變動的 log,不適合 git 追蹤大量變動。
|
|
31
|
-
gitignore_rules: |
|
|
32
|
-
.execution-history/*/ # L3 artifacts 不追蹤
|
|
33
|
-
!.execution-history/index.json # L1 索引可選追蹤
|
|
34
|
-
!.execution-history/index-archive.json # L1 歸檔索引可選追蹤
|
|
35
|
-
when_to_use: "個人開發、小型專案、或不需跨環境共享歷史時"
|
|
36
|
-
file_server:
|
|
37
|
-
description: "儲存在外部 FileServer(如 S3、MinIO、NAS、共享磁碟)"
|
|
38
|
-
config_file: ".execution-history/storage.json"
|
|
39
|
-
config_schema:
|
|
40
|
-
backend: { type: string, enum: [local, file_server] }
|
|
41
|
-
file_server_url: { type: string, description: "FileServer 端點 URL" }
|
|
42
|
-
auth_method: { type: string, enum: [none, api_key, oauth], default: none }
|
|
43
|
-
sync_l1_to_local: { type: boolean, default: true, description: "是否將 L1 索引同步到本地" }
|
|
44
|
-
when_to_use: "團隊協作、跨環境共享、歷史量大、或需要長期保存時"
|
|
45
|
-
rules:
|
|
46
|
-
- "L1 索引始終同步到本地(sync_l1_to_local: true),確保離線可讀"
|
|
47
|
-
- "L2/L3 按需從 FileServer 拉取"
|
|
48
|
-
- "寫入走 FileServer API,本地不留 L3 副本"
|
|
49
|
-
default_backend: local
|
|
50
|
-
|
|
51
|
-
directory_structure:
|
|
52
|
-
root: ".execution-history/"
|
|
53
|
-
layout: |
|
|
54
|
-
.execution-history/
|
|
55
|
-
├── storage.json # 儲存後端配置(可選,預設 local)
|
|
56
|
-
├── index.json # L1: 最近 50 個活躍 tasks 索引
|
|
57
|
-
├── index-archive.json # L1-ext: 歸檔 tasks 索引(> 90 天無新 run)
|
|
58
|
-
├── {task-id}/
|
|
59
|
-
│ ├── manifest.json # L2: 任務層級摘要
|
|
60
|
-
│ ├── {run-number}/ # 三位數字(001-999)
|
|
61
|
-
│ │ ├── manifest.json # L2: 單次執行摘要
|
|
62
|
-
│ │ ├── task-description.md # Required: 任務描述
|
|
63
|
-
│ │ ├── code-diff.patch # Required: 程式碼變更
|
|
64
|
-
│ │ ├── test-results.json # Required: 測試結果
|
|
65
|
-
│ │ ├── execution-log.jsonl # Required: 執行日誌
|
|
66
|
-
│ │ ├── token-usage.json # Required: Token 使用量
|
|
67
|
-
│ │ ├── final-status.json # Required: 最終狀態
|
|
68
|
-
│ │ ├── error-analysis.md # Optional: 錯誤分析
|
|
69
|
-
│ │ └── agent-reasoning.md # Optional: Agent 推理過程
|
|
70
|
-
│ └── ...
|
|
71
|
-
└── ...
|
|
72
|
-
index_strategy:
|
|
73
|
-
description: "分層索引策略,平衡即時性與歷史完整性"
|
|
74
|
-
active_index: "index.json — 最近 50 個活躍 tasks(< 200 tokens 目標)"
|
|
75
|
-
archive_index: "index-archive.json — 超過 90 天無新 run 的 tasks 自動歸檔"
|
|
76
|
-
archive_trigger: "task 最後一次 run 距今 > 90 天"
|
|
77
|
-
reactivate_trigger: "歸檔 task 有新 run 時自動移回 active index"
|
|
78
|
-
|
|
79
|
-
artifacts:
|
|
80
|
-
required:
|
|
81
|
-
- id: task-description
|
|
82
|
-
file: "task-description.md"
|
|
83
|
-
format: markdown
|
|
84
|
-
description: "任務目標、輸入、預期產出"
|
|
85
|
-
max_size: "2KB"
|
|
86
|
-
- id: code-diff
|
|
87
|
-
file: "code-diff.patch"
|
|
88
|
-
format: "unified diff"
|
|
89
|
-
description: "本次執行產生的程式碼變更"
|
|
90
|
-
max_size: "50KB"
|
|
91
|
-
- id: test-results
|
|
92
|
-
file: "test-results.json"
|
|
93
|
-
format: json
|
|
94
|
-
description: "測試執行結果(通過/失敗/跳過數量、失敗詳情)"
|
|
95
|
-
schema_ref: "#/definitions/test-results"
|
|
96
|
-
- id: execution-log
|
|
97
|
-
file: "execution-log.jsonl"
|
|
98
|
-
format: jsonl
|
|
99
|
-
description: "結構化執行日誌(每行一個事件)"
|
|
100
|
-
schema_ref: "#/definitions/log-entry"
|
|
101
|
-
- id: token-usage
|
|
102
|
-
file: "token-usage.json"
|
|
103
|
-
format: json
|
|
104
|
-
description: "Token 使用量明細(input/output/total,按步驟分)"
|
|
105
|
-
schema_ref: "#/definitions/token-usage"
|
|
106
|
-
- id: final-status
|
|
107
|
-
file: "final-status.json"
|
|
108
|
-
format: json
|
|
109
|
-
description: "最終狀態(success/failure/partial,含摘要)"
|
|
110
|
-
schema_ref: "#/definitions/final-status"
|
|
111
|
-
optional:
|
|
112
|
-
- id: error-analysis
|
|
113
|
-
file: "error-analysis.md"
|
|
114
|
-
format: markdown
|
|
115
|
-
description: "失敗時的根因分析"
|
|
116
|
-
condition: "status != success"
|
|
117
|
-
- id: agent-reasoning
|
|
118
|
-
file: "agent-reasoning.md"
|
|
119
|
-
format: markdown
|
|
120
|
-
description: "Agent 的推理過程與決策紀錄"
|
|
121
|
-
|
|
122
|
-
access_layers:
|
|
123
|
-
L1:
|
|
124
|
-
name: "索引層"
|
|
125
|
-
files:
|
|
126
|
-
active: "index.json"
|
|
127
|
-
archive: "index-archive.json"
|
|
128
|
-
target_tokens: "< 200 (active), 按需 (archive)"
|
|
129
|
-
fields:
|
|
130
|
-
- task_id
|
|
131
|
-
- task_name
|
|
132
|
-
- tags
|
|
133
|
-
- latest_run
|
|
134
|
-
- latest_status
|
|
135
|
-
- latest_date
|
|
136
|
-
- total_runs
|
|
137
|
-
cross_project_access: true
|
|
138
|
-
L2:
|
|
139
|
-
name: "摘要層"
|
|
140
|
-
file: "manifest.json"
|
|
141
|
-
target_tokens: "< 1,000 per task"
|
|
142
|
-
fields:
|
|
143
|
-
- task_description_summary
|
|
144
|
-
- run_history
|
|
145
|
-
- key_metrics
|
|
146
|
-
- artifacts_available
|
|
147
|
-
- failure_summary
|
|
148
|
-
cross_project_access: false
|
|
149
|
-
L3:
|
|
150
|
-
name: "完整紀錄層"
|
|
151
|
-
file: "各 artifact 檔案"
|
|
152
|
-
target_tokens: "不限"
|
|
153
|
-
description: "直接讀取 run 目錄下的個別 artifact 檔案"
|
|
154
|
-
cross_project_access: false
|
|
155
|
-
|
|
156
|
-
retention_policy:
|
|
157
|
-
configurable: true
|
|
158
|
-
defaults:
|
|
159
|
-
max_runs_per_task: 50
|
|
160
|
-
max_total_size_mb: 500
|
|
161
|
-
cleanup_strategy: "oldest_l3_first"
|
|
162
|
-
rules:
|
|
163
|
-
- "超過 max_runs 時,最舊的 run 的 L3 artifacts 被刪除"
|
|
164
|
-
- "L1 和 L2 索引永久保留(除非手動刪除)"
|
|
165
|
-
- "cleanup 以 task 為單位,不跨 task 清理"
|
|
166
|
-
|
|
167
|
-
sensitive_data:
|
|
168
|
-
redact_on_write: true
|
|
169
|
-
sensitive_patterns:
|
|
170
|
-
- pattern: "sk-[a-zA-Z0-9_-]{20,}"
|
|
171
|
-
label: "API_KEY"
|
|
172
|
-
- pattern: "ghp_[a-zA-Z0-9]{36}"
|
|
173
|
-
label: "GITHUB_TOKEN"
|
|
174
|
-
- pattern: "password\\s*[:=]\\s*\\S+"
|
|
175
|
-
label: "PASSWORD"
|
|
176
|
-
- pattern: "-----BEGIN .* PRIVATE KEY-----"
|
|
177
|
-
label: "PRIVATE_KEY"
|
|
178
|
-
redact_format: "[REDACTED:{label}]"
|
|
179
|
-
|
|
180
|
-
definitions:
|
|
181
|
-
test-results:
|
|
182
|
-
type: object
|
|
183
|
-
required: [timestamp, summary, details]
|
|
184
|
-
properties:
|
|
185
|
-
timestamp:
|
|
186
|
-
type: string
|
|
187
|
-
format: "ISO 8601"
|
|
188
|
-
summary:
|
|
189
|
-
type: object
|
|
190
|
-
properties:
|
|
191
|
-
total: { type: integer }
|
|
192
|
-
passed: { type: integer }
|
|
193
|
-
failed: { type: integer }
|
|
194
|
-
skipped: { type: integer }
|
|
195
|
-
details:
|
|
196
|
-
type: array
|
|
197
|
-
items:
|
|
198
|
-
type: object
|
|
199
|
-
properties:
|
|
200
|
-
test_name: { type: string }
|
|
201
|
-
status: { type: string, enum: [passed, failed, skipped] }
|
|
202
|
-
duration_ms: { type: integer }
|
|
203
|
-
error_message: { type: string }
|
|
204
|
-
|
|
205
|
-
log-entry:
|
|
206
|
-
type: object
|
|
207
|
-
required: [timestamp, level, event]
|
|
208
|
-
properties:
|
|
209
|
-
timestamp: { type: string, format: "ISO 8601" }
|
|
210
|
-
level: { type: string, enum: [debug, info, warn, error] }
|
|
211
|
-
event: { type: string }
|
|
212
|
-
details: { type: object }
|
|
213
|
-
tool_call: { type: string }
|
|
214
|
-
tokens: { type: integer }
|
|
215
|
-
|
|
216
|
-
token-usage:
|
|
217
|
-
type: object
|
|
218
|
-
required: [total, breakdown]
|
|
219
|
-
properties:
|
|
220
|
-
total:
|
|
221
|
-
type: object
|
|
222
|
-
properties:
|
|
223
|
-
input_tokens: { type: integer }
|
|
224
|
-
output_tokens: { type: integer }
|
|
225
|
-
breakdown:
|
|
226
|
-
type: array
|
|
227
|
-
items:
|
|
228
|
-
type: object
|
|
229
|
-
properties:
|
|
230
|
-
step: { type: string }
|
|
231
|
-
input_tokens: { type: integer }
|
|
232
|
-
output_tokens: { type: integer }
|
|
233
|
-
|
|
234
|
-
final-status:
|
|
235
|
-
type: object
|
|
236
|
-
required: [status, summary, timestamp]
|
|
237
|
-
properties:
|
|
238
|
-
status: { type: string, enum: [success, failure, partial] }
|
|
239
|
-
summary: { type: string }
|
|
240
|
-
timestamp: { type: string, format: "ISO 8601" }
|
|
241
|
-
duration_seconds: { type: number }
|
|
242
|
-
error: { type: string }
|
|
20
|
+
description: >
|
|
21
|
+
DEPRECATED: This standard has moved to DevAP (orchestration layer).
|
|
22
|
+
Install DevAP and load standards/orchestration/execution-history.ai.yaml instead.
|
|
243
23
|
|
|
244
24
|
rules:
|
|
245
|
-
- id:
|
|
246
|
-
trigger:
|
|
25
|
+
- id: deprecation-notice
|
|
26
|
+
trigger: any execution history operation
|
|
247
27
|
instruction: >
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
若使用 file_server 後端,寫入走 FileServer API,
|
|
252
|
-
同時將 L1 索引同步到本地。
|
|
253
|
-
priority: required
|
|
254
|
-
|
|
255
|
-
- id: use-l1-first
|
|
256
|
-
trigger: "Agent 需要參考先前執行歷史"
|
|
257
|
-
instruction: >
|
|
258
|
-
先讀取 .execution-history/index.json(L1)篩選相關任務,
|
|
259
|
-
再讀取相關任務的 manifest.json(L2)了解脈絡,
|
|
260
|
-
僅在需要深入診斷時才讀取 L3 完整 artifacts。
|
|
261
|
-
priority: required
|
|
28
|
+
This standard (execution-history.ai.yaml) has been migrated to DevAP.
|
|
29
|
+
For the canonical executable definition, load:
|
|
30
|
+
dev-autopilot/standards/orchestration/execution-history.ai.yaml
|
|
262
31
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
instruction: >
|
|
266
|
-
在寫入前,掃描內容中的 sensitive_patterns,
|
|
267
|
-
將匹配內容替換為 [REDACTED:{label}] 格式。
|
|
32
|
+
Note: DevAP's packages/core already implements execution history (ExecutionHistoryStorage).
|
|
33
|
+
The YAML standard is now co-located with the implementation.
|
|
268
34
|
priority: required
|
|
269
35
|
|
|
270
|
-
- id:
|
|
271
|
-
trigger: "
|
|
272
|
-
instruction: >
|
|
273
|
-
檢查該 task 的 run 數量是否超過 max_runs_per_task,
|
|
274
|
-
若超過則刪除最舊 run 的 L3 artifacts,保留 L1/L2 索引。
|
|
275
|
-
priority: recommended
|
|
276
|
-
|
|
277
|
-
- id: archive-stale-tasks
|
|
278
|
-
trigger: "index.json 更新時,檢查是否有 task 需要歸檔"
|
|
36
|
+
- id: record-on-completion
|
|
37
|
+
trigger: "Agent 任務執行完畢"
|
|
279
38
|
instruction: >
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
若歸檔 task 有新 run 寫入,自動移回 index.json。
|
|
283
|
-
index.json 最多保留 50 個活躍 tasks。
|
|
284
|
-
priority: recommended
|
|
39
|
+
DEPRECATED — load dev-autopilot/standards/orchestration/execution-history.ai.yaml
|
|
40
|
+
for the current executable history protocol.
|
|
285
41
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
instruction: >
|
|
289
|
-
跨專案存取僅限 L1 層級(index.json),
|
|
290
|
-
不得讀取 L2/L3 層級以遵守授權隔離。
|
|
42
|
+
Minimal fallback: Write required artifacts to .execution-history/{task-id}/{run-number}/
|
|
43
|
+
after each agent task completes.
|
|
291
44
|
priority: required
|
|
292
|
-
|
|
293
|
-
storage:
|
|
294
|
-
directory: ".execution-history/"
|
|
295
|
-
format: "JSON + Markdown + JSONL + Patch"
|
|
296
|
-
|
|
297
|
-
architecture:
|
|
298
|
-
classification: always-on-protocol
|
|
299
|
-
note: >
|
|
300
|
-
Execution History 是 Always-On Protocol,
|
|
301
|
-
與 developer-memory、project-context-memory 同層級。
|
|
302
|
-
AI 自動遵循所有 rules,不需要 slash command。
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
name: Flaky Test Management Standards
|
|
3
|
+
nameZh: 不穩定測試管理標準
|
|
4
|
+
id: flaky-test-management
|
|
5
|
+
version: "1.0.0"
|
|
6
|
+
category: testing
|
|
7
|
+
scope: test-reliability
|
|
8
|
+
summary: >
|
|
9
|
+
Policies and tooling for detecting, quarantining, and eliminating flaky
|
|
10
|
+
tests. Flaky tests erode CI confidence, cause false failures, and mask
|
|
11
|
+
real bugs.
|
|
12
|
+
|
|
13
|
+
requirements:
|
|
14
|
+
- id: REQ-01
|
|
15
|
+
title: Flaky Test Definition
|
|
16
|
+
titleZh: 不穩定測試定義
|
|
17
|
+
level: MUST
|
|
18
|
+
description: >
|
|
19
|
+
A test is considered flaky if it produces different results (pass/fail)
|
|
20
|
+
on consecutive runs with the same code. Teams MUST define a flakiness
|
|
21
|
+
threshold: a test that fails ≥ 2% of runs on main branch without code
|
|
22
|
+
changes is flaky.
|
|
23
|
+
|
|
24
|
+
- id: REQ-02
|
|
25
|
+
title: Quarantine Protocol
|
|
26
|
+
titleZh: 隔離協議
|
|
27
|
+
level: MUST
|
|
28
|
+
description: >
|
|
29
|
+
Flaky tests MUST be quarantined within 48 hours of detection by:
|
|
30
|
+
(1) adding a `.skip` or `.todo` annotation, (2) opening a tracking
|
|
31
|
+
issue, (3) adding a comment with the issue link and known failure mode.
|
|
32
|
+
Quarantined tests MUST NOT block CI merges.
|
|
33
|
+
|
|
34
|
+
- id: REQ-03
|
|
35
|
+
title: Retry Policy
|
|
36
|
+
titleZh: 重試策略
|
|
37
|
+
level: SHOULD
|
|
38
|
+
description: >
|
|
39
|
+
CI SHOULD allow a maximum of 2 retries for tests in the quarantine list.
|
|
40
|
+
Retries SHOULD be applied only to known-flaky tests, not the entire suite.
|
|
41
|
+
A test that passes after retry is still considered flaky and MUST be fixed.
|
|
42
|
+
|
|
43
|
+
- id: REQ-04
|
|
44
|
+
title: Flaky Test Elimination SLA
|
|
45
|
+
titleZh: 修復 SLA
|
|
46
|
+
level: MUST
|
|
47
|
+
description: >
|
|
48
|
+
Quarantined tests MUST be either fixed or permanently removed within
|
|
49
|
+
30 days of quarantine. Tests left quarantined for > 30 days with no
|
|
50
|
+
activity SHOULD be automatically deleted.
|
|
51
|
+
|
|
52
|
+
- id: REQ-05
|
|
53
|
+
title: Root Cause Categories
|
|
54
|
+
titleZh: 根因分類
|
|
55
|
+
level: SHOULD
|
|
56
|
+
description: >
|
|
57
|
+
When eliminating a flaky test, the root cause SHOULD be documented in
|
|
58
|
+
the fixing PR. Common root causes: timing/race conditions, test isolation
|
|
59
|
+
failures (shared state), external service dependencies, random seed
|
|
60
|
+
dependence, file system ordering.
|
|
61
|
+
|
|
62
|
+
examples:
|
|
63
|
+
- name: "Quarantine annotation (Vitest)"
|
|
64
|
+
code: |
|
|
65
|
+
// TODO: flaky test quarantined 2026-05-05 — see issue #42
|
|
66
|
+
// Root cause: race condition in WebSocket reconnection
|
|
67
|
+
it.skip("reconnects after disconnect", async () => { ... })
|
|
68
|
+
|
|
69
|
+
- name: "Vitest retry config for known flaky tests"
|
|
70
|
+
code: |
|
|
71
|
+
// vitest.config.ts
|
|
72
|
+
export default defineConfig({
|
|
73
|
+
test: {
|
|
74
|
+
retry: 2, // global retry for all tests
|
|
75
|
+
}
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
anti_patterns:
|
|
79
|
+
- description: >
|
|
80
|
+
Allowing flaky tests to block CI without quarantine — developers learn
|
|
81
|
+
to ignore CI failures, which hides real bugs.
|
|
82
|
+
- description: >
|
|
83
|
+
Using arbitrary sleeps (setTimeout/sleep) to fix race conditions —
|
|
84
|
+
this makes tests slower and more fragile. Use proper async coordination.
|
|
85
|
+
|
|
86
|
+
related_standards:
|
|
87
|
+
- testing
|
|
88
|
+
- test-governance
|
|
89
|
+
- ci-cd-standards
|