universal-dev-standards 5.3.2 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/bundled/ai/standards/adversarial-test.ai.yaml +277 -0
  2. package/bundled/ai/standards/agent-communication-protocol.ai.yaml +32 -166
  3. package/bundled/ai/standards/agent-dispatch.ai.yaml +32 -58
  4. package/bundled/ai/standards/audit-trail.ai.yaml +113 -0
  5. package/bundled/ai/standards/branch-completion.ai.yaml +34 -70
  6. package/bundled/ai/standards/change-batching-standards.ai.yaml +31 -180
  7. package/bundled/ai/standards/chaos-injection-tests.ai.yaml +91 -0
  8. package/bundled/ai/standards/container-image-standards.ai.yaml +88 -0
  9. package/bundled/ai/standards/container-security.ai.yaml +331 -0
  10. package/bundled/ai/standards/cost-budget-test.ai.yaml +96 -0
  11. package/bundled/ai/standards/data-contract.ai.yaml +110 -0
  12. package/bundled/ai/standards/data-migration-testing.ai.yaml +96 -0
  13. package/bundled/ai/standards/data-pipeline.ai.yaml +113 -0
  14. package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +89 -0
  15. package/bundled/ai/standards/execution-history.ai.yaml +30 -288
  16. package/bundled/ai/standards/flaky-test-management.ai.yaml +89 -0
  17. package/bundled/ai/standards/flow-based-testing.ai.yaml +240 -0
  18. package/bundled/ai/standards/iac-design-principles.ai.yaml +83 -0
  19. package/bundled/ai/standards/incident-response.ai.yaml +107 -0
  20. package/bundled/ai/standards/license-compliance.ai.yaml +106 -0
  21. package/bundled/ai/standards/llm-output-validation.ai.yaml +269 -0
  22. package/bundled/ai/standards/mock-boundary.ai.yaml +250 -0
  23. package/bundled/ai/standards/mutation-testing.ai.yaml +192 -0
  24. package/bundled/ai/standards/pii-classification.ai.yaml +109 -0
  25. package/bundled/ai/standards/pipeline-integration-standards.ai.yaml +28 -169
  26. package/bundled/ai/standards/policy-as-code-testing.ai.yaml +227 -0
  27. package/bundled/ai/standards/prd-standards.ai.yaml +88 -0
  28. package/bundled/ai/standards/product-metrics-standards.ai.yaml +111 -0
  29. package/bundled/ai/standards/prompt-regression.ai.yaml +94 -0
  30. package/bundled/ai/standards/property-based-testing.ai.yaml +105 -0
  31. package/bundled/ai/standards/release-quality-manifest.ai.yaml +135 -0
  32. package/bundled/ai/standards/replay-test.ai.yaml +111 -0
  33. package/bundled/ai/standards/runbook.ai.yaml +104 -0
  34. package/bundled/ai/standards/sast-advanced.ai.yaml +135 -0
  35. package/bundled/ai/standards/schema-evolution.ai.yaml +111 -0
  36. package/bundled/ai/standards/secret-management-standards.ai.yaml +105 -0
  37. package/bundled/ai/standards/secure-op.ai.yaml +365 -0
  38. package/bundled/ai/standards/security-testing.ai.yaml +171 -0
  39. package/bundled/ai/standards/server-ops-security.ai.yaml +274 -0
  40. package/bundled/ai/standards/slo-sli.ai.yaml +97 -0
  41. package/bundled/ai/standards/smoke-test.ai.yaml +87 -0
  42. package/bundled/ai/standards/supply-chain-attestation.ai.yaml +109 -0
  43. package/bundled/ai/standards/test-completeness-dimensions.ai.yaml +52 -5
  44. package/bundled/ai/standards/user-story-mapping.ai.yaml +108 -0
  45. package/bundled/ai/standards/workflow-enforcement.ai.yaml +34 -240
  46. package/bundled/ai/standards/workflow-state-protocol.ai.yaml +31 -107
  47. package/bundled/core/adversarial-test.md +212 -0
  48. package/bundled/core/chaos-injection-tests.md +116 -0
  49. package/bundled/core/container-security.md +521 -0
  50. package/bundled/core/cost-budget-test.md +69 -0
  51. package/bundled/core/data-migration-testing.md +110 -0
  52. package/bundled/core/disaster-recovery-drill.md +73 -0
  53. package/bundled/core/flaky-test-management.md +73 -0
  54. package/bundled/core/flow-based-testing.md +142 -0
  55. package/bundled/core/llm-output-validation.md +178 -0
  56. package/bundled/core/mock-boundary.md +100 -0
  57. package/bundled/core/mutation-testing.md +97 -0
  58. package/bundled/core/policy-as-code-testing.md +188 -0
  59. package/bundled/core/prompt-regression.md +72 -0
  60. package/bundled/core/property-based-testing.md +73 -0
  61. package/bundled/core/release-quality-manifest.md +147 -0
  62. package/bundled/core/replay-test.md +86 -0
  63. package/bundled/core/sast-advanced.md +300 -0
  64. package/bundled/core/secure-op.md +314 -0
  65. package/bundled/core/security-testing.md +87 -0
  66. package/bundled/core/server-ops-security.md +493 -0
  67. package/bundled/core/smoke-test.md +65 -0
  68. package/bundled/core/supply-chain-attestation.md +117 -0
  69. package/bundled/locales/zh-CN/CHANGELOG.md +3 -3
  70. package/bundled/locales/zh-CN/README.md +1 -1
  71. package/bundled/locales/zh-CN/skills/ai-instruction-standards/SKILL.md +5 -5
  72. package/bundled/locales/zh-TW/CHANGELOG.md +3 -3
  73. package/bundled/locales/zh-TW/README.md +1 -1
  74. package/bundled/locales/zh-TW/skills/ai-instruction-standards/SKILL.md +183 -79
  75. package/bundled/skills/README.md +4 -3
  76. package/bundled/skills/SKILL_NAMING.md +94 -0
  77. package/bundled/skills/ai-instruction-standards/SKILL.md +181 -88
  78. package/bundled/skills/atdd-assistant/SKILL.md +8 -0
  79. package/bundled/skills/bdd-assistant/SKILL.md +7 -0
  80. package/bundled/skills/checkin-assistant/SKILL.md +8 -0
  81. package/bundled/skills/code-review-assistant/SKILL.md +7 -0
  82. package/bundled/skills/journey-test-assistant/SKILL.md +203 -0
  83. package/bundled/skills/orchestrate/SKILL.md +167 -0
  84. package/bundled/skills/plan/SKILL.md +234 -0
  85. package/bundled/skills/pr-automation-assistant/SKILL.md +8 -0
  86. package/bundled/skills/push/SKILL.md +49 -2
  87. package/bundled/skills/{process-automation → skill-builder}/SKILL.md +1 -1
  88. package/bundled/skills/{forward-derivation → spec-derivation}/SKILL.md +1 -1
  89. package/bundled/skills/spec-driven-dev/SKILL.md +7 -0
  90. package/bundled/skills/sweep/SKILL.md +145 -0
  91. package/bundled/skills/tdd-assistant/SKILL.md +7 -0
  92. package/package.json +1 -1
  93. package/src/commands/flow.js +8 -0
  94. package/src/commands/start.js +14 -0
  95. package/src/commands/sweep.js +8 -0
  96. package/src/commands/workflow.js +8 -0
  97. package/standards-registry.json +474 -12
  98. package/bundled/locales/zh-CN/skills/ac-coverage-assistant/SKILL.md +0 -190
  99. package/bundled/locales/zh-CN/skills/forward-derivation/SKILL.md +0 -71
  100. package/bundled/locales/zh-CN/skills/forward-derivation/guide.md +0 -130
  101. package/bundled/locales/zh-CN/skills/methodology-system/SKILL.md +0 -88
  102. package/bundled/locales/zh-CN/skills/methodology-system/create-methodology.md +0 -350
  103. package/bundled/locales/zh-CN/skills/methodology-system/guide.md +0 -131
  104. package/bundled/locales/zh-CN/skills/methodology-system/runtime.md +0 -279
  105. package/bundled/locales/zh-CN/skills/process-automation/SKILL.md +0 -143
  106. package/bundled/locales/zh-TW/skills/ac-coverage-assistant/SKILL.md +0 -195
  107. package/bundled/locales/zh-TW/skills/deploy-assistant/SKILL.md +0 -178
  108. package/bundled/locales/zh-TW/skills/forward-derivation/SKILL.md +0 -69
  109. package/bundled/locales/zh-TW/skills/forward-derivation/guide.md +0 -415
  110. package/bundled/locales/zh-TW/skills/methodology-system/SKILL.md +0 -86
  111. package/bundled/locales/zh-TW/skills/methodology-system/create-methodology.md +0 -350
  112. package/bundled/locales/zh-TW/skills/methodology-system/guide.md +0 -131
  113. package/bundled/locales/zh-TW/skills/methodology-system/runtime.md +0 -279
  114. package/bundled/locales/zh-TW/skills/process-automation/SKILL.md +0 -144
  115. /package/bundled/skills/{ac-coverage-assistant → ac-coverage}/SKILL.md +0 -0
  116. /package/bundled/skills/{methodology-system → dev-methodology}/SKILL.md +0 -0
  117. /package/bundled/skills/{methodology-system → dev-methodology}/create-methodology.md +0 -0
  118. /package/bundled/skills/{methodology-system → dev-methodology}/guide.md +0 -0
  119. /package/bundled/skills/{methodology-system → dev-methodology}/integrated-flow.md +0 -0
  120. /package/bundled/skills/{methodology-system → dev-methodology}/prerequisite-check.md +0 -0
  121. /package/bundled/skills/{methodology-system → dev-methodology}/runtime.md +0 -0
  122. /package/bundled/skills/{forward-derivation → spec-derivation}/guide.md +0 -0
@@ -0,0 +1,113 @@
1
+ # Data Pipeline Standards - AI Optimized
2
+ # Source: XSPEC-068 Wave 3 Data Engineering Pack
3
+
4
+ id: data-pipeline
5
+ title: Data Pipeline Standards
6
+ version: "1.0.0"
7
+ status: Active
8
+ tags: [data-engineering, pipeline, etl, data-quality, orchestration, idempotency]
9
+ summary: |
10
+ Defines engineering standards for building reliable, observable, and
11
+ maintainable data pipelines. Covers idempotency and exactly-once semantics,
12
+ error handling and dead-letter queues, checkpoint and recovery patterns,
13
+ data lineage tracking, pipeline observability (metrics, alerting), testing
14
+ requirements, and deployment practices. Applicable to batch ETL, streaming
15
+ pipelines, and ML feature pipelines.
16
+
17
+ requirements:
18
+ - id: REQ-001
19
+ title: Idempotency and Exactly-Once Processing
20
+ description: |
21
+ Every data pipeline MUST be designed for idempotent execution:
22
+ re-running the same pipeline for the same time window or batch MUST
23
+ produce identical output without duplication or data loss. Pipelines
24
+ MUST use deterministic keys for deduplication. Batch pipelines MUST
25
+ support re-processing historical partitions cleanly. Streaming pipelines
26
+ MUST implement exactly-once or at-least-once with deduplication using
27
+ unique event IDs. Overwrites of output partitions are preferred over
28
+ appends for batch jobs.
29
+ level: MUST
30
+ examples:
31
+ - "Batch: pipeline writes to date-partitioned output and overwrites the partition on re-run"
32
+ - "Streaming: dedup using Kafka message key + consumer group offset tracking"
33
+ - "Test: running pipeline twice for 2026-04-01 produces same row count both times"
34
+
35
+ - id: REQ-002
36
+ title: Error Handling and Dead-Letter Queues
37
+ description: |
38
+ Data pipelines MUST implement structured error handling with
39
+ categorized failure modes. Transient errors (network timeout, API
40
+ rate limit) MUST use exponential backoff retry (max 3 attempts).
41
+ Permanent errors (schema violation, invalid data) MUST route records
42
+ to a Dead-Letter Queue (DLQ) with the original record, error type,
43
+ error message, and processing timestamp. DLQ records MUST be
44
+ monitored and addressed within the pipeline's SLA.
45
+ level: MUST
46
+ examples:
47
+ - "Transient retry: retry_policy: {max_attempts: 3, backoff_base: 2s, max_backoff: 30s}"
48
+ - "DLQ record: {original_record: {...}, error_type: 'SCHEMA_VIOLATION', error_msg: 'field amount is null', ts: '...'}"
49
+ - "DLQ alert: >100 DLQ messages in 1 hour → PagerDuty alert to data-oncall"
50
+
51
+ - id: REQ-003
52
+ title: Checkpoint and Recovery
53
+ description: |
54
+ Long-running batch pipelines and stateful streaming pipelines MUST
55
+ implement checkpointing to enable recovery from mid-run failures
56
+ without full reprocessing. Checkpoints MUST record: last successfully
57
+ processed partition/offset/watermark, job run ID, and timestamp.
58
+ Recovery MUST resume from the last checkpoint, not from the beginning.
59
+ Checkpoint state MUST be stored in durable external storage (not
60
+ local disk).
61
+ level: MUST
62
+ examples:
63
+ - "Batch: checkpoint stores {last_processed_date: '2026-04-28', last_id: 12345678} in S3"
64
+ - "Streaming: Flink checkpoint interval 5 minutes, stored in S3 with 3 checkpoints retained"
65
+ - "Recovery test: kill job mid-run, restart, verify output matches full run with no duplicates"
66
+
67
+ - id: REQ-004
68
+ title: Data Lineage Tracking
69
+ description: |
70
+ Every data pipeline MUST emit lineage metadata describing its data
71
+ flow: source datasets (with versions/timestamps), transformation logic
72
+ applied, and output datasets produced. Lineage MUST be machine-readable
73
+ and ingested into a central lineage store or data catalog. Lineage
74
+ enables root-cause analysis of data quality issues and impact assessment
75
+ of upstream changes.
76
+ level: MUST
77
+ examples:
78
+ - "Lineage emit: {job: 'orders-aggregator', inputs: ['raw_orders@2026-04-30'], outputs: ['daily_order_summary@2026-04-30'], transform_version: 'v1.3.2'}"
79
+ - "OpenLineage event emitted to Marquez or DataHub on job start and completion"
80
+ - "Lineage query: 'Which pipelines read from raw_orders?' returns 5 downstream jobs"
81
+
82
+ - id: REQ-005
83
+ title: Pipeline Observability and SLOs
84
+ description: |
85
+ Every production data pipeline MUST expose the following metrics:
86
+ records processed (counter), processing latency (histogram), error
87
+ rate (gauge), DLQ depth (gauge), and last successful run timestamp.
88
+ Pipelines MUST define SLOs for: freshness (data available within N
89
+ hours of source), completeness (≥ X% records successfully processed),
90
+ and latency (p95 processing time within threshold). SLO violations
91
+ MUST trigger alerts.
92
+ level: MUST
93
+ examples:
94
+ - "Metric: pipeline_records_processed_total{pipeline='orders-agg',status='success'}"
95
+ - "Freshness SLO: daily_order_summary available by 03:00 UTC — alert if missing by 04:00 UTC"
96
+ - "Completeness alert: processed_records / expected_records < 0.99 → P2 alert"
97
+
98
+ - id: REQ-006
99
+ title: Pipeline Testing Requirements
100
+ description: |
101
+ Data pipelines MUST have automated tests covering: unit tests for
102
+ transformation logic (test with sample input/output DataFrames),
103
+ integration tests validating end-to-end flow with synthetic data,
104
+ and schema conformance tests validating output matches declared
105
+ data contract. Pipelines SHOULD have regression tests for historically
106
+ problematic edge cases (nulls in key fields, negative amounts,
107
+ duplicate records). Test coverage MUST be ≥ 80% for transformation
108
+ logic.
109
+ level: MUST
110
+ examples:
111
+ - "Unit test: test_calculate_order_total() — asserts discount applied correctly on sample rows"
112
+ - "Integration test: runs full pipeline on 1000 synthetic orders, validates output row count and schema"
113
+ - "Edge case test: pipeline handles duplicate order_id gracefully, deduplication logic verified"
@@ -0,0 +1,89 @@
1
+ # SPDX-License-Identifier: MIT
2
+ name: Disaster Recovery Drill Standards
3
+ nameZh: 災難恢復演練標準
4
+ id: disaster-recovery-drill
5
+ version: "1.0.0"
6
+ category: operations
7
+ scope: reliability
8
+ summary: >
9
+ Structured DR drill standards: quarterly runbook execution, RTO/RPO
10
+ measurement, backup restore verification, and Game Day protocols.
11
+ Untested recovery plans fail at the worst moment.
12
+
13
+ requirements:
14
+ - id: REQ-01
15
+ title: RTO/RPO Targets Defined
16
+ titleZh: RTO/RPO 目標定義
17
+ level: MUST
18
+ description: >
19
+ Each system MUST have documented RTO (Recovery Time Objective) and RPO
20
+ (Recovery Point Objective) targets. These must be agreed with stakeholders
21
+ before any DR drill can be considered meaningful.
22
+ examples:
23
+ - "VibeOps commercial: RTO < 1 hour, RPO < 24 hours (daily backup)"
24
+
25
+ - id: REQ-02
26
+ title: Backup Restore Test
27
+ titleZh: 備份還原測試
28
+ level: MUST
29
+ description: >
30
+ At minimum quarterly, a full backup restore MUST be executed in an
31
+ isolated environment and verified for data integrity. The restore time
32
+ MUST be measured and compared to the RTO target.
33
+
34
+ - id: REQ-03
35
+ title: Runbook Completeness
36
+ titleZh: 運行手冊完整性
37
+ level: MUST
38
+ description: >
39
+ A DR runbook MUST exist covering: (1) detection (how do we know disaster
40
+ occurred?), (2) decision (who declares DR?), (3) recovery steps
41
+ (step-by-step, executable commands), (4) verification (how do we confirm
42
+ recovery?), (5) communication plan.
43
+
44
+ - id: REQ-04
45
+ title: Game Day Exercise
46
+ titleZh: Game Day 演練
47
+ level: SHOULD
48
+ description: >
49
+ At minimum annually, a Game Day exercise SHOULD be conducted where the
50
+ team simulates a realistic failure scenario and executes the runbook from
51
+ scratch. Results SHOULD be documented and used to improve the runbook.
52
+
53
+ - id: REQ-05
54
+ title: Drill Record
55
+ titleZh: 演練記錄
56
+ level: MUST
57
+ description: >
58
+ Every DR drill MUST produce a written record including: date, participants,
59
+ scenario tested, RTO achieved, RPO achieved, issues found, remediation
60
+ actions. Records MUST be retained for 12 months.
61
+
62
+ examples:
63
+ - name: "DR drill record template"
64
+ code: |
65
+ date: 2026-05-05
66
+ participants: [alice, bob]
67
+ scenario: "Database total loss — restore from daily backup"
68
+ rto_target: "1 hour"
69
+ rto_achieved: "42 minutes"
70
+ rpo_target: "24 hours"
71
+ rpo_achieved: "23 hours 15 minutes"
72
+ issues_found:
73
+ - "backup script path was stale — fixed in XSPEC-170"
74
+ remediation:
75
+ - "Update backup path in backup-restore.sh"
76
+ status: PASS
77
+
78
+ anti_patterns:
79
+ - description: >
80
+ Only verifying that a backup file exists — always restore it and
81
+ verify data integrity. An untested backup is not a backup.
82
+ - description: >
83
+ Running DR drills in production — always use an isolated environment
84
+ to avoid turning a drill into an actual disaster.
85
+
86
+ related_standards:
87
+ - deployment-standards
88
+ - chaos-engineering-standards
89
+ - verification-evidence
@@ -1,302 +1,44 @@
1
- # Execution History Repository Standards - AI Optimized
2
- # Source: cross-project/specs/XSPEC-003-execution-history-standard-sdd.md
1
+ # Execution History Repository Standards - DEPRECATED STUB
2
+ # This file has been migrated to DevAP per DEC-049 (UDS/DevAP responsibility split).
3
+ # Canonical location: dev-autopilot/standards/orchestration/execution-history.ai.yaml
4
+ # Migration: XSPEC-086 Phase 2 (2026-04-27)
5
+ #
6
+ # Source spec: cross-project/specs/XSPEC-003-execution-history-standard-sdd.md
7
+ # Deprecation schedule: UDS 5.4.0 deprecated → UDS 6.0.0 removed
3
8
 
4
9
  standard:
5
10
  id: execution-history
6
- name: "Execution History Repository Standards"
7
- description: "Structured system for persisting agent execution artifacts with L1/L2/L3 tiered access model"
8
- guidelines:
9
- - "每次 agent 任務執行完畢後,必須產出 required artifacts"
10
- - "歷史以檔案系統目錄結構組織,支援直接路徑存取"
11
- - "提供 L1/L2/L3 三層存取,平衡資訊量與 token 成本"
12
- - "敏感資訊在寫入時自動 redact"
13
- - "跨專案僅共享 L1 層級,遵守授權隔離"
14
- - "歷史保留策略確保 L1/L2 索引永久保留,L3 artifacts 依 max_runs 設定自動清理以控制儲存空間"
15
-
16
11
  meta:
17
- version: "1.0.0"
18
- updated: "2026-04-02"
12
+ version: "1.0.1"
13
+ updated: "2026-04-27"
14
+ deprecated: true
15
+ deprecated_since: "5.4.0"
16
+ removal_version: "6.0.0"
17
+ canonical_owner: devap
18
+ canonical_path: "dev-autopilot/standards/orchestration/execution-history.ai.yaml"
19
19
  source: cross-project/specs/XSPEC-003-execution-history-standard-sdd.md
20
- description: "基於 Meta-Harness 論文洞見,建立跨專案執行歷史標準"
21
-
22
- schema:
23
- storage:
24
- description: "執行歷史為 append-only log,支援兩種儲存後端"
25
- backends:
26
- local:
27
- description: "儲存在 repo 內的 .execution-history/ 目錄"
28
- git_policy: |
29
- L3 artifacts 不追蹤(納入 .gitignore),L1 index.json 可選追蹤。
30
- 歷史本質是寫入後不變動的 log,不適合 git 追蹤大量變動。
31
- gitignore_rules: |
32
- .execution-history/*/ # L3 artifacts 不追蹤
33
- !.execution-history/index.json # L1 索引可選追蹤
34
- !.execution-history/index-archive.json # L1 歸檔索引可選追蹤
35
- when_to_use: "個人開發、小型專案、或不需跨環境共享歷史時"
36
- file_server:
37
- description: "儲存在外部 FileServer(如 S3、MinIO、NAS、共享磁碟)"
38
- config_file: ".execution-history/storage.json"
39
- config_schema:
40
- backend: { type: string, enum: [local, file_server] }
41
- file_server_url: { type: string, description: "FileServer 端點 URL" }
42
- auth_method: { type: string, enum: [none, api_key, oauth], default: none }
43
- sync_l1_to_local: { type: boolean, default: true, description: "是否將 L1 索引同步到本地" }
44
- when_to_use: "團隊協作、跨環境共享、歷史量大、或需要長期保存時"
45
- rules:
46
- - "L1 索引始終同步到本地(sync_l1_to_local: true),確保離線可讀"
47
- - "L2/L3 按需從 FileServer 拉取"
48
- - "寫入走 FileServer API,本地不留 L3 副本"
49
- default_backend: local
50
-
51
- directory_structure:
52
- root: ".execution-history/"
53
- layout: |
54
- .execution-history/
55
- ├── storage.json # 儲存後端配置(可選,預設 local)
56
- ├── index.json # L1: 最近 50 個活躍 tasks 索引
57
- ├── index-archive.json # L1-ext: 歸檔 tasks 索引(> 90 天無新 run)
58
- ├── {task-id}/
59
- │ ├── manifest.json # L2: 任務層級摘要
60
- │ ├── {run-number}/ # 三位數字(001-999)
61
- │ │ ├── manifest.json # L2: 單次執行摘要
62
- │ │ ├── task-description.md # Required: 任務描述
63
- │ │ ├── code-diff.patch # Required: 程式碼變更
64
- │ │ ├── test-results.json # Required: 測試結果
65
- │ │ ├── execution-log.jsonl # Required: 執行日誌
66
- │ │ ├── token-usage.json # Required: Token 使用量
67
- │ │ ├── final-status.json # Required: 最終狀態
68
- │ │ ├── error-analysis.md # Optional: 錯誤分析
69
- │ │ └── agent-reasoning.md # Optional: Agent 推理過程
70
- │ └── ...
71
- └── ...
72
- index_strategy:
73
- description: "分層索引策略,平衡即時性與歷史完整性"
74
- active_index: "index.json — 最近 50 個活躍 tasks(< 200 tokens 目標)"
75
- archive_index: "index-archive.json — 超過 90 天無新 run 的 tasks 自動歸檔"
76
- archive_trigger: "task 最後一次 run 距今 > 90 天"
77
- reactivate_trigger: "歸檔 task 有新 run 時自動移回 active index"
78
-
79
- artifacts:
80
- required:
81
- - id: task-description
82
- file: "task-description.md"
83
- format: markdown
84
- description: "任務目標、輸入、預期產出"
85
- max_size: "2KB"
86
- - id: code-diff
87
- file: "code-diff.patch"
88
- format: "unified diff"
89
- description: "本次執行產生的程式碼變更"
90
- max_size: "50KB"
91
- - id: test-results
92
- file: "test-results.json"
93
- format: json
94
- description: "測試執行結果(通過/失敗/跳過數量、失敗詳情)"
95
- schema_ref: "#/definitions/test-results"
96
- - id: execution-log
97
- file: "execution-log.jsonl"
98
- format: jsonl
99
- description: "結構化執行日誌(每行一個事件)"
100
- schema_ref: "#/definitions/log-entry"
101
- - id: token-usage
102
- file: "token-usage.json"
103
- format: json
104
- description: "Token 使用量明細(input/output/total,按步驟分)"
105
- schema_ref: "#/definitions/token-usage"
106
- - id: final-status
107
- file: "final-status.json"
108
- format: json
109
- description: "最終狀態(success/failure/partial,含摘要)"
110
- schema_ref: "#/definitions/final-status"
111
- optional:
112
- - id: error-analysis
113
- file: "error-analysis.md"
114
- format: markdown
115
- description: "失敗時的根因分析"
116
- condition: "status != success"
117
- - id: agent-reasoning
118
- file: "agent-reasoning.md"
119
- format: markdown
120
- description: "Agent 的推理過程與決策紀錄"
121
-
122
- access_layers:
123
- L1:
124
- name: "索引層"
125
- files:
126
- active: "index.json"
127
- archive: "index-archive.json"
128
- target_tokens: "< 200 (active), 按需 (archive)"
129
- fields:
130
- - task_id
131
- - task_name
132
- - tags
133
- - latest_run
134
- - latest_status
135
- - latest_date
136
- - total_runs
137
- cross_project_access: true
138
- L2:
139
- name: "摘要層"
140
- file: "manifest.json"
141
- target_tokens: "< 1,000 per task"
142
- fields:
143
- - task_description_summary
144
- - run_history
145
- - key_metrics
146
- - artifacts_available
147
- - failure_summary
148
- cross_project_access: false
149
- L3:
150
- name: "完整紀錄層"
151
- file: "各 artifact 檔案"
152
- target_tokens: "不限"
153
- description: "直接讀取 run 目錄下的個別 artifact 檔案"
154
- cross_project_access: false
155
-
156
- retention_policy:
157
- configurable: true
158
- defaults:
159
- max_runs_per_task: 50
160
- max_total_size_mb: 500
161
- cleanup_strategy: "oldest_l3_first"
162
- rules:
163
- - "超過 max_runs 時,最舊的 run 的 L3 artifacts 被刪除"
164
- - "L1 和 L2 索引永久保留(除非手動刪除)"
165
- - "cleanup 以 task 為單位,不跨 task 清理"
166
-
167
- sensitive_data:
168
- redact_on_write: true
169
- sensitive_patterns:
170
- - pattern: "sk-[a-zA-Z0-9_-]{20,}"
171
- label: "API_KEY"
172
- - pattern: "ghp_[a-zA-Z0-9]{36}"
173
- label: "GITHUB_TOKEN"
174
- - pattern: "password\\s*[:=]\\s*\\S+"
175
- label: "PASSWORD"
176
- - pattern: "-----BEGIN .* PRIVATE KEY-----"
177
- label: "PRIVATE_KEY"
178
- redact_format: "[REDACTED:{label}]"
179
-
180
- definitions:
181
- test-results:
182
- type: object
183
- required: [timestamp, summary, details]
184
- properties:
185
- timestamp:
186
- type: string
187
- format: "ISO 8601"
188
- summary:
189
- type: object
190
- properties:
191
- total: { type: integer }
192
- passed: { type: integer }
193
- failed: { type: integer }
194
- skipped: { type: integer }
195
- details:
196
- type: array
197
- items:
198
- type: object
199
- properties:
200
- test_name: { type: string }
201
- status: { type: string, enum: [passed, failed, skipped] }
202
- duration_ms: { type: integer }
203
- error_message: { type: string }
204
-
205
- log-entry:
206
- type: object
207
- required: [timestamp, level, event]
208
- properties:
209
- timestamp: { type: string, format: "ISO 8601" }
210
- level: { type: string, enum: [debug, info, warn, error] }
211
- event: { type: string }
212
- details: { type: object }
213
- tool_call: { type: string }
214
- tokens: { type: integer }
215
-
216
- token-usage:
217
- type: object
218
- required: [total, breakdown]
219
- properties:
220
- total:
221
- type: object
222
- properties:
223
- input_tokens: { type: integer }
224
- output_tokens: { type: integer }
225
- breakdown:
226
- type: array
227
- items:
228
- type: object
229
- properties:
230
- step: { type: string }
231
- input_tokens: { type: integer }
232
- output_tokens: { type: integer }
233
-
234
- final-status:
235
- type: object
236
- required: [status, summary, timestamp]
237
- properties:
238
- status: { type: string, enum: [success, failure, partial] }
239
- summary: { type: string }
240
- timestamp: { type: string, format: "ISO 8601" }
241
- duration_seconds: { type: number }
242
- error: { type: string }
20
+ description: >
21
+ DEPRECATED: This standard has moved to DevAP (orchestration layer).
22
+ Install DevAP and load standards/orchestration/execution-history.ai.yaml instead.
243
23
 
244
24
  rules:
245
- - id: record-on-completion
246
- trigger: "Agent 任務執行完畢(無論成功或失敗)"
25
+ - id: deprecation-notice
26
+ trigger: any execution history operation
247
27
  instruction: >
248
- 在任務完成後,依據 storage.json 配置的後端(預設 local),
249
- 將所有 required artifacts 寫入 .execution-history/{task-id}/{run-number}/ 目錄,
250
- 並更新 task manifest.json 和根 index.json。
251
- 若使用 file_server 後端,寫入走 FileServer API,
252
- 同時將 L1 索引同步到本地。
253
- priority: required
254
-
255
- - id: use-l1-first
256
- trigger: "Agent 需要參考先前執行歷史"
257
- instruction: >
258
- 先讀取 .execution-history/index.json(L1)篩選相關任務,
259
- 再讀取相關任務的 manifest.json(L2)了解脈絡,
260
- 僅在需要深入診斷時才讀取 L3 完整 artifacts。
261
- priority: required
28
+ This standard (execution-history.ai.yaml) has been migrated to DevAP.
29
+ For the canonical executable definition, load:
30
+ dev-autopilot/standards/orchestration/execution-history.ai.yaml
262
31
 
263
- - id: redact-sensitive
264
- trigger: "寫入任何 artifact .execution-history/"
265
- instruction: >
266
- 在寫入前,掃描內容中的 sensitive_patterns,
267
- 將匹配內容替換為 [REDACTED:{label}] 格式。
32
+ Note: DevAP's packages/core already implements execution history (ExecutionHistoryStorage).
33
+ The YAML standard is now co-located with the implementation.
268
34
  priority: required
269
35
 
270
- - id: respect-retention
271
- trigger: " run 寫入後,檢查 retention_policy"
272
- instruction: >
273
- 檢查該 task 的 run 數量是否超過 max_runs_per_task,
274
- 若超過則刪除最舊 run 的 L3 artifacts,保留 L1/L2 索引。
275
- priority: recommended
276
-
277
- - id: archive-stale-tasks
278
- trigger: "index.json 更新時,檢查是否有 task 需要歸檔"
36
+ - id: record-on-completion
37
+ trigger: "Agent 任務執行完畢"
279
38
  instruction: >
280
- index.json 中某 task 的 latest_date 距今超過 90 天,
281
- 將該 task index.json 移至 index-archive.json。
282
- 若歸檔 task 有新 run 寫入,自動移回 index.json。
283
- index.json 最多保留 50 個活躍 tasks。
284
- priority: recommended
39
+ DEPRECATED load dev-autopilot/standards/orchestration/execution-history.ai.yaml
40
+ for the current executable history protocol.
285
41
 
286
- - id: cross-project-l1-only
287
- trigger: "從其他專案讀取執行歷史"
288
- instruction: >
289
- 跨專案存取僅限 L1 層級(index.json),
290
- 不得讀取 L2/L3 層級以遵守授權隔離。
42
+ Minimal fallback: Write required artifacts to .execution-history/{task-id}/{run-number}/
43
+ after each agent task completes.
291
44
  priority: required
292
-
293
- storage:
294
- directory: ".execution-history/"
295
- format: "JSON + Markdown + JSONL + Patch"
296
-
297
- architecture:
298
- classification: always-on-protocol
299
- note: >
300
- Execution History 是 Always-On Protocol,
301
- 與 developer-memory、project-context-memory 同層級。
302
- AI 自動遵循所有 rules,不需要 slash command。
@@ -0,0 +1,89 @@
1
+ # SPDX-License-Identifier: MIT
2
+ name: Flaky Test Management Standards
3
+ nameZh: 不穩定測試管理標準
4
+ id: flaky-test-management
5
+ version: "1.0.0"
6
+ category: testing
7
+ scope: test-reliability
8
+ summary: >
9
+ Policies and tooling for detecting, quarantining, and eliminating flaky
10
+ tests. Flaky tests erode CI confidence, cause false failures, and mask
11
+ real bugs.
12
+
13
+ requirements:
14
+ - id: REQ-01
15
+ title: Flaky Test Definition
16
+ titleZh: 不穩定測試定義
17
+ level: MUST
18
+ description: >
19
+ A test is considered flaky if it produces different results (pass/fail)
20
+ on consecutive runs with the same code. Teams MUST define a flakiness
21
+ threshold: a test that fails ≥ 2% of runs on main branch without code
22
+ changes is flaky.
23
+
24
+ - id: REQ-02
25
+ title: Quarantine Protocol
26
+ titleZh: 隔離協議
27
+ level: MUST
28
+ description: >
29
+ Flaky tests MUST be quarantined within 48 hours of detection by:
30
+ (1) adding a `.skip` or `.todo` annotation, (2) opening a tracking
31
+ issue, (3) adding a comment with the issue link and known failure mode.
32
+ Quarantined tests MUST NOT block CI merges.
33
+
34
+ - id: REQ-03
35
+ title: Retry Policy
36
+ titleZh: 重試策略
37
+ level: SHOULD
38
+ description: >
39
+ CI SHOULD allow a maximum of 2 retries for tests in the quarantine list.
40
+ Retries SHOULD be applied only to known-flaky tests, not the entire suite.
41
+ A test that passes after retry is still considered flaky and MUST be fixed.
42
+
43
+ - id: REQ-04
44
+ title: Flaky Test Elimination SLA
45
+ titleZh: 修復 SLA
46
+ level: MUST
47
+ description: >
48
+ Quarantined tests MUST be either fixed or permanently removed within
49
+ 30 days of quarantine. Tests left quarantined for > 30 days with no
50
+ activity SHOULD be automatically deleted.
51
+
52
+ - id: REQ-05
53
+ title: Root Cause Categories
54
+ titleZh: 根因分類
55
+ level: SHOULD
56
+ description: >
57
+ When eliminating a flaky test, the root cause SHOULD be documented in
58
+ the fixing PR. Common root causes: timing/race conditions, test isolation
59
+ failures (shared state), external service dependencies, random seed
60
+ dependence, file system ordering.
61
+
62
+ examples:
63
+ - name: "Quarantine annotation (Vitest)"
64
+ code: |
65
+ // TODO: flaky test quarantined 2026-05-05 — see issue #42
66
+ // Root cause: race condition in WebSocket reconnection
67
+ it.skip("reconnects after disconnect", async () => { ... })
68
+
69
+ - name: "Vitest retry config for known flaky tests"
70
+ code: |
71
+ // vitest.config.ts
72
+ export default defineConfig({
73
+ test: {
74
+ retry: 2, // global retry for all tests
75
+ }
76
+ })
77
+
78
+ anti_patterns:
79
+ - description: >
80
+ Allowing flaky tests to block CI without quarantine — developers learn
81
+ to ignore CI failures, which hides real bugs.
82
+ - description: >
83
+ Using arbitrary sleeps (setTimeout/sleep) to fix race conditions —
84
+ this makes tests slower and more fragile. Use proper async coordination.
85
+
86
+ related_standards:
87
+ - testing
88
+ - test-governance
89
+ - ci-cd-standards