kc-beta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/bin/kc-beta.js +16 -0
  2. package/package.json +32 -0
  3. package/src/agent/confidence-scorer.js +120 -0
  4. package/src/agent/context.js +124 -0
  5. package/src/agent/corner-case-registry.js +119 -0
  6. package/src/agent/engine.js +224 -0
  7. package/src/agent/events.js +27 -0
  8. package/src/agent/history.js +101 -0
  9. package/src/agent/llm-client.js +131 -0
  10. package/src/agent/pipelines/base.js +14 -0
  11. package/src/agent/pipelines/distillation.js +113 -0
  12. package/src/agent/pipelines/extraction.js +92 -0
  13. package/src/agent/pipelines/index.js +23 -0
  14. package/src/agent/pipelines/initializer.js +163 -0
  15. package/src/agent/pipelines/production-qc.js +99 -0
  16. package/src/agent/pipelines/skill-authoring.js +83 -0
  17. package/src/agent/pipelines/skill-testing.js +111 -0
  18. package/src/agent/tools/agent-tool.js +100 -0
  19. package/src/agent/tools/base.js +35 -0
  20. package/src/agent/tools/dashboard-render.js +146 -0
  21. package/src/agent/tools/document-parse.js +184 -0
  22. package/src/agent/tools/document-search.js +111 -0
  23. package/src/agent/tools/evolution-cycle.js +150 -0
  24. package/src/agent/tools/qc-sample.js +94 -0
  25. package/src/agent/tools/registry.js +55 -0
  26. package/src/agent/tools/rule-catalog.js +113 -0
  27. package/src/agent/tools/sandbox-exec.js +106 -0
  28. package/src/agent/tools/tier-downgrade.js +114 -0
  29. package/src/agent/tools/worker-llm-call.js +109 -0
  30. package/src/agent/tools/workflow-run.js +138 -0
  31. package/src/agent/tools/workspace-file.js +122 -0
  32. package/src/agent/version-manager.js +130 -0
  33. package/src/agent/workspace.js +82 -0
  34. package/src/cli/components.js +164 -0
  35. package/src/cli/index.js +329 -0
  36. package/src/cli/init.js +80 -0
  37. package/src/cli/onboard.js +182 -0
  38. package/src/cli/terminal.js +143 -0
  39. package/src/config.js +93 -0
  40. package/template/.env.template +31 -0
  41. package/template/CLAUDE.md +137 -0
  42. package/template/Input/.gitkeep +0 -0
  43. package/template/Output/.gitkeep +0 -0
  44. package/template/Rules/.gitkeep +0 -0
  45. package/template/Samples/.gitkeep +0 -0
  46. package/template/skills/en/meta/compliance-judgment/SKILL.md +114 -0
  47. package/template/skills/en/meta/compliance-judgment/references/output-format.md +151 -0
  48. package/template/skills/en/meta/confidence-system/SKILL.md +117 -0
  49. package/template/skills/en/meta/corner-case-management/SKILL.md +111 -0
  50. package/template/skills/en/meta/cross-document-verification/SKILL.md +131 -0
  51. package/template/skills/en/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
  52. package/template/skills/en/meta/data-sensibility/SKILL.md +115 -0
  53. package/template/skills/en/meta/document-parsing/SKILL.md +108 -0
  54. package/template/skills/en/meta/document-parsing/references/parser-catalog.md +40 -0
  55. package/template/skills/en/meta/entity-extraction/SKILL.md +129 -0
  56. package/template/skills/en/meta/tree-processing/SKILL.md +103 -0
  57. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +70 -0
  58. package/template/skills/en/meta-meta/dashboard-reporting/SKILL.md +106 -0
  59. package/template/skills/en/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
  60. package/template/skills/en/meta-meta/evolution-loop/SKILL.md +210 -0
  61. package/template/skills/en/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
  62. package/template/skills/en/meta-meta/quality-control/SKILL.md +138 -0
  63. package/template/skills/en/meta-meta/quality-control/references/qa-layers.md +92 -0
  64. package/template/skills/en/meta-meta/quality-control/references/sampling-strategies.md +76 -0
  65. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +100 -0
  66. package/template/skills/en/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
  67. package/template/skills/en/meta-meta/rule-graph/SKILL.md +118 -0
  68. package/template/skills/en/meta-meta/skill-authoring/SKILL.md +108 -0
  69. package/template/skills/en/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
  70. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +150 -0
  71. package/template/skills/en/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
  72. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +129 -0
  73. package/template/skills/en/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
  74. package/template/skills/en/meta-meta/version-control/SKILL.md +152 -0
  75. package/template/skills/en/meta-meta/version-control/references/trace-id-spec.md +79 -0
  76. package/template/skills/en/skill-creator/LICENSE.txt +202 -0
  77. package/template/skills/en/skill-creator/SKILL.md +479 -0
  78. package/template/skills/en/skill-creator/agents/analyzer.md +274 -0
  79. package/template/skills/en/skill-creator/agents/comparator.md +202 -0
  80. package/template/skills/en/skill-creator/agents/grader.md +223 -0
  81. package/template/skills/en/skill-creator/assets/eval_review.html +146 -0
  82. package/template/skills/en/skill-creator/eval-viewer/generate_review.py +471 -0
  83. package/template/skills/en/skill-creator/eval-viewer/viewer.html +1325 -0
  84. package/template/skills/en/skill-creator/references/schemas.md +430 -0
  85. package/template/skills/en/skill-creator/scripts/__init__.py +0 -0
  86. package/template/skills/en/skill-creator/scripts/aggregate_benchmark.py +401 -0
  87. package/template/skills/en/skill-creator/scripts/generate_report.py +326 -0
  88. package/template/skills/en/skill-creator/scripts/improve_description.py +248 -0
  89. package/template/skills/en/skill-creator/scripts/package_skill.py +136 -0
  90. package/template/skills/en/skill-creator/scripts/quick_validate.py +103 -0
  91. package/template/skills/en/skill-creator/scripts/run_eval.py +310 -0
  92. package/template/skills/en/skill-creator/scripts/run_loop.py +332 -0
  93. package/template/skills/en/skill-creator/scripts/utils.py +47 -0
  94. package/template/skills/zh/meta/compliance-judgment/SKILL.md +303 -0
  95. package/template/skills/zh/meta/compliance-judgment/references/output-format.md +151 -0
  96. package/template/skills/zh/meta/confidence-system/SKILL.md +228 -0
  97. package/template/skills/zh/meta/corner-case-management/SKILL.md +235 -0
  98. package/template/skills/zh/meta/cross-document-verification/SKILL.md +241 -0
  99. package/template/skills/zh/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
  100. package/template/skills/zh/meta/data-sensibility/SKILL.md +235 -0
  101. package/template/skills/zh/meta/document-parsing/SKILL.md +168 -0
  102. package/template/skills/zh/meta/document-parsing/references/parser-catalog.md +40 -0
  103. package/template/skills/zh/meta/entity-extraction/SKILL.md +276 -0
  104. package/template/skills/zh/meta/tree-processing/SKILL.md +233 -0
  105. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +147 -0
  106. package/template/skills/zh/meta-meta/dashboard-reporting/SKILL.md +281 -0
  107. package/template/skills/zh/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
  108. package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +302 -0
  109. package/template/skills/zh/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
  110. package/template/skills/zh/meta-meta/quality-control/SKILL.md +269 -0
  111. package/template/skills/zh/meta-meta/quality-control/references/qa-layers.md +92 -0
  112. package/template/skills/zh/meta-meta/quality-control/references/sampling-strategies.md +76 -0
  113. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +208 -0
  114. package/template/skills/zh/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
  115. package/template/skills/zh/meta-meta/rule-graph/SKILL.md +203 -0
  116. package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +235 -0
  117. package/template/skills/zh/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
  118. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +275 -0
  119. package/template/skills/zh/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
  120. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +224 -0
  121. package/template/skills/zh/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
  122. package/template/skills/zh/meta-meta/version-control/SKILL.md +284 -0
  123. package/template/skills/zh/meta-meta/version-control/references/trace-id-spec.md +79 -0
  124. package/template/skills/zh/skill-creator/LICENSE.txt +202 -0
  125. package/template/skills/zh/skill-creator/SKILL.md +479 -0
  126. package/template/skills/zh/skill-creator/agents/analyzer.md +274 -0
  127. package/template/skills/zh/skill-creator/agents/comparator.md +202 -0
  128. package/template/skills/zh/skill-creator/agents/grader.md +223 -0
  129. package/template/skills/zh/skill-creator/assets/eval_review.html +146 -0
  130. package/template/skills/zh/skill-creator/eval-viewer/generate_review.py +471 -0
  131. package/template/skills/zh/skill-creator/eval-viewer/viewer.html +1325 -0
  132. package/template/skills/zh/skill-creator/references/schemas.md +430 -0
  133. package/template/skills/zh/skill-creator/scripts/__init__.py +0 -0
  134. package/template/skills/zh/skill-creator/scripts/aggregate_benchmark.py +401 -0
  135. package/template/skills/zh/skill-creator/scripts/generate_report.py +326 -0
  136. package/template/skills/zh/skill-creator/scripts/improve_description.py +248 -0
  137. package/template/skills/zh/skill-creator/scripts/package_skill.py +136 -0
  138. package/template/skills/zh/skill-creator/scripts/quick_validate.py +103 -0
  139. package/template/skills/zh/skill-creator/scripts/run_eval.py +310 -0
  140. package/template/skills/zh/skill-creator/scripts/run_loop.py +332 -0
  141. package/template/skills/zh/skill-creator/scripts/utils.py +47 -0
@@ -0,0 +1,284 @@
1
+ ---
2
+ name: version-control
3
+ description: Manage versioning of skills, workflows, prompts, and system configuration throughout the lifecycle. Use when skills are modified, workflows are regenerated, prompts are updated, or any artifact needs rollback capability. Covers what to version, how to version with file-system conventions, maintaining a version manifest, and rollback procedures. Also use when comparing performance between versions or when production results need to trace back to the exact workflow version that produced them.
4
+ ---
5
+
6
+ # 版本控制与制品溯源
7
+
8
+ ## 设计目标
9
+
10
+ 这套版本控制机制不是为了多人协作——在这个系统中,编程智能体是唯一的执行者。版本控制的目的是:
11
+
12
+ 1. **可回退**:任何修改如果导致回归,能立即恢复到上一个正常版本
13
+ 2. **可溯源**:生产环境输出的每一条核查结果,都能追溯到产出它的工作流版本、提示词版本、技能版本
14
+ 3. **可对比**:能清晰地比较不同版本之间的性能差异,作为迭代决策的依据
15
+
16
+ 轻量是原则。不需要 Git 那样的完整版本控制系统。基于文件系统的命名约定和一份版本清单即可。
17
+
18
+ ## 需要版本管理的制品
19
+
20
+ ### 技能(Skills)
21
+
22
+ - `rule-skills/R001-xxx/SKILL.md`
23
+ - `rule-skills/R001-xxx/scripts/*.py`
24
+ - `rule-skills/R001-xxx/references/*.md`
25
+
26
+ 技能的版本通过 CHANGELOG.md 追踪,文件本身只保留最新版本。重大变更前手动备份。
27
+
28
+ ### 工作流(Workflows)
29
+
30
+ - `workflows/R001-xxx/workflow_v1.py`
31
+ - `workflows/R001-xxx/workflow_v2.py`
32
+ - ...
33
+
34
+ 工作流采用文件名版本号机制,所有历史版本的文件共存于同一目录。
35
+
36
+ ### 提示词(Prompts)
37
+
38
+ - `workflows/R001-xxx/prompts/extract_dates_v1.md`
39
+ - `workflows/R001-xxx/prompts/extract_dates_v2.md`
40
+
41
+ 提示词与工作流类似,采用文件名版本号,历史版本保留。
42
+
43
+ ### 配置(Configs)
44
+
45
+ - `workflows/R001-xxx/config.json`
46
+
47
+ 配置文件只保留当前版本,但每次修改前将旧版本记录到版本清单中。
48
+
49
+ ## 不需要版本管理的内容
50
+
51
+ - **日志文件**(`logs/`):日志本身就是时间序列数据,天然具有版本属性
52
+ - **输出结果**(`Output/`):每次产出带有时间戳和版本引用,无需单独版本化
53
+ - **测试样本**(`assets/samples.json`):只增不减,通过 `discovered_in` 字段标注来源轮次
54
+ - **规则目录**(`rule-catalog.json`):实时状态文件,不需要版本历史
55
+
56
+ ## 文件系统版本命名规范
57
+
58
+ ### 工作流与提示词
59
+
60
+ 采用 `_v{N}` 后缀:
61
+
62
+ ```
63
+ workflow_v1.py # 初始蒸馏版本
64
+ workflow_v2.py # 第一次优化
65
+ workflow_v3.py # 第二次优化
66
+
67
+ extract_dates_v1.md # 初始提示词
68
+ extract_dates_v2.md # 优化后的提示词
69
+ ```
70
+
71
+ 版本号单调递增,从不复用。即使 v3 回退到 v1 的逻辑,也不删除 v2 和 v3,而是创建 v4(内容与 v1 相同,但在版本清单中注明原因)。
72
+
73
+ ### 技能文件
74
+
75
+ 技能采用 CHANGELOG.md 记录变更,主文件始终覆盖更新:
76
+
77
+ ```markdown
78
+ ## v1.2 - 2025-04-01
79
+ - 补充框架合同展期的判定逻辑
80
+ - 来源:演化循环第3轮
81
+
82
+ ## v1.1 - 2025-03-28
83
+ - 修正边界条件:日期等于到期日视为通过
84
+ - 来源:演化循环第2轮
85
+
86
+ ## v1.0 - 2025-03-25
87
+ - 初始版本
88
+ ```
89
+
90
+ 如果需要回退技能,根据 CHANGELOG 中的描述手动恢复。对于关键技能,可在修改前备份为 `SKILL.md.bak`。
91
+
92
+ ## 版本清单:versions.json
93
+
94
+ 工作空间根目录的 `versions.json` 是全局版本清单,记录所有制品的当前版本及历史版本摘要。
95
+
96
+ ```json
97
+ {
98
+ "workspace_version": "1.0.0",
99
+ "last_updated": "2025-04-01T18:00:00Z",
100
+ "skills": {
101
+ "R001-invoice-date-validity": {
102
+ "current_version": "v1.2",
103
+ "skill_accuracy": 0.95,
104
+ "last_modified": "2025-04-01",
105
+ "status": "workflow_distilled"
106
+ },
107
+ "R002-amount-consistency": {
108
+ "current_version": "v1.0",
109
+ "skill_accuracy": 0.88,
110
+ "last_modified": "2025-03-30",
111
+ "status": "skill_testing"
112
+ }
113
+ },
114
+ "workflows": {
115
+ "R001-invoice-date-validity": {
116
+ "current_version": "v2",
117
+ "workflow_accuracy": 0.92,
118
+ "model_tier": "TIER3",
119
+ "prompt_versions": {
120
+ "extract_dates": "v2",
121
+ "judge_validity": "v1"
122
+ },
123
+ "distilled_from_skill_version": "v1.2",
124
+ "last_modified": "2025-04-01",
125
+ "status": "production"
126
+ }
127
+ },
128
+ "config": {
129
+ "env_hash": "a3b2c1d4",
130
+ "last_modified": "2025-03-25"
131
+ }
132
+ }
133
+ ```
134
+
135
+ ### 版本清单的更新时机
136
+
137
+ 每次以下操作发生时,必须同步更新 versions.json:
138
+
139
+ - 技能 CHANGELOG 新增条目
140
+ - 工作流创建新版本文件
141
+ - 提示词创建新版本文件
142
+ - 模型层级变更
143
+ - 工作流进入或退出生产状态
144
+
145
+ ## 何时创建新版本
146
+
147
+ **核心规则:在修改任何可工作的制品之前,先创建新版本。**
148
+
149
+ 具体而言:
150
+
151
+ ### 必须创建新版本的场景
152
+
153
+ - 优化工作流的提取逻辑 → 创建 `workflow_v{N+1}.py`
154
+ - 修改提示词 → 创建 `prompt_v{N+1}.md`
155
+ - 演化循环要求修改判定逻辑 → 更新技能 CHANGELOG,创建工作流新版本
156
+
157
+ ### 不需要创建新版本的场景
158
+
159
+ - 修正注释或格式(不影响逻辑的变更)
160
+ - 更新测试样本数据
161
+ - 修改日志内容
162
+
163
+ ## 回退操作
164
+
165
+ 当新版本导致回归或性能下降时,执行回退:
166
+
167
+ ### 回退步骤
168
+
169
+ 1. 在 versions.json 中将 `current_version` 修改为目标版本
170
+ 2. 在工作流 config.json 中更新版本引用
171
+ 3. 在 `logs/evolution/` 中记录回退原因
172
+ 4. **不要删除失败的版本文件**——保留用于后续分析
173
+
174
+ ### 回退日志
175
+
176
+ ```json
177
+ {
178
+ "action": "rollback",
179
+ "rule_id": "R001",
180
+ "artifact": "workflow",
181
+ "from_version": "v3",
182
+ "to_version": "v2",
183
+ "reason": "v3 在框架合同场景下准确率从 92% 下降到 78%",
184
+ "timestamp": "2025-04-01T20:00:00Z",
185
+ "failed_version_kept": true
186
+ }
187
+ ```
188
+
189
+ ## 结果溯源
190
+
191
+ 生产环境输出的每一条核查结果都必须包含版本信息:
192
+
193
+ ```json
194
+ {
195
+ "document_id": "DOC-2025-0042",
196
+ "rule_id": "R001",
197
+ "verdict": "pass",
198
+ "produced_by": {
199
+ "workflow_version": "v2",
200
+ "prompt_versions": {
201
+ "extract_dates": "v2",
202
+ "judge_validity": "v1"
203
+ },
204
+ "model_tier": "TIER3",
205
+ "skill_version": "v1.2",
206
+ "workspace_version": "1.0.0"
207
+ },
208
+ "timestamp": "2025-04-01T18:30:00Z"
209
+ }
210
+ ```
211
+
212
+ 这样,当质控发现某条结果有误时,可以精确定位到产出它的工作流版本和提示词版本,避免在排查过程中浪费时间。
213
+
214
+ ## 核查溯源标识(Trace ID)
215
+
216
+ 版本溯源解决的是「哪个版本产出了这条结果」。溯源标识解决的是更深一层的问题:**这条核查结论的证据在原文的哪个位置?**
217
+
218
+ 在每条核查结果中嵌入一个永久性溯源标识,直接链接到原始证据的精确位置。
219
+
220
+ ### 溯源标识结构
221
+
222
+ ```json
223
+ {
224
+ "trace_id": "R001-DOC042-P3-S2-C120:180",
225
+ "source_location": {
226
+ "document": "bank_annual_report_2024.pdf",
227
+ "page": 3,
228
+ "section": "3.2 资本充足率",
229
+ "char_range": [120, 180]
230
+ },
231
+ "rule_version": "v1.2",
232
+ "workflow_version": "v2",
233
+ "model_tier": "TIER3"
234
+ }
235
+ ```
236
+
237
+ ### 三个关键属性
238
+
239
+ - **嵌入式,非日志式**:溯源标识嵌入在核查结果数据内部,而非保存在独立的日志文件中。无论结果被导出、重新导入、聚合还是被下游系统消费,溯源标识始终随结果同行。
240
+ - **永久性**:溯源标识一旦生成,永不修改。对同一文档重新核查会生成新的溯源标识——旧标识保留在历史结果中。
241
+ - **自包含**:溯源标识本身编码了足够的信息来定位原始证据,无需查询外部索引。
242
+
243
+ ### 为什么这很重要
244
+
245
+ 如银保监会现场检查时,审计人员问「你们为什么判定这笔贷款符合第十五条?」——溯源标识可以直接指向原文的精确段落、判定时使用的规则版本和工作流版本。没有溯源标识,这种回溯需要手动关联日志、结果和原始文档——在监管审计中,这种手动关联是不可接受的。
246
+
247
+ 参见 `references/trace-id-spec.md` 获取完整的格式规范和生成算法。
248
+
249
+ ## 与演化循环的集成
250
+
251
+ 演化循环的每一轮迭代,都是版本控制的触发事件:
252
+
253
+ ```
254
+ 演化循环第 N 轮开始
255
+ → 记录当前版本快照
256
+ → 执行修改
257
+ → 创建新版本
258
+ → 测试
259
+ → 如果回归 → 回退版本
260
+ → 如果通过 → 更新版本清单
261
+ 演化循环第 N 轮结束
262
+ ```
263
+
264
+ 版本控制为演化循环提供安全网——无论怎么改,都可以回到上一个已知好的状态。
265
+
266
+ ## 版本对比
267
+
268
+ 当需要评估不同版本的性能差异时,利用日志中的测试结果进行对比:
269
+
270
+ ```json
271
+ {
272
+ "comparison": {
273
+ "rule_id": "R001",
274
+ "versions": ["v1", "v2"],
275
+ "metrics": {
276
+ "v1": {"accuracy": 0.85, "avg_cost": 0.005, "model_tier": "TIER2"},
277
+ "v2": {"accuracy": 0.92, "avg_cost": 0.003, "model_tier": "TIER3"}
278
+ },
279
+ "conclusion": "v2 在准确率和成本两个维度均优于 v1"
280
+ }
281
+ }
282
+ ```
283
+
284
+ 这些对比数据也是仪表盘展示的重要素材。
@@ -0,0 +1,79 @@
1
+ # Trace ID Specification
2
+
3
+ Trace IDs embed source evidence pointers directly inside verification results. This document defines the format, generation rules, and integration points.
4
+
5
+ ## Format
6
+
7
+ ```
8
+ {rule_id}-{document_id}-P{page}-S{section}-C{char_start}:{char_end}
9
+ ```
10
+
11
+ | Segment | Description | Example |
12
+ |---------|-------------|---------|
13
+ | `rule_id` | The rule that produced this result. Matches the ID in `rule-catalog.json`. | `R001` |
14
+ | `document_id` | A short identifier for the source document. Derived from filename or batch assignment. | `DOC042` |
15
+ | `P{page}` | The 1-indexed page number where the source evidence appears. | `P3` |
16
+ | `S{section}` | The section number within the page, following the document's own numbering. | `S2` |
17
+ | `C{char_start}:{char_end}` | Character offset range within the extracted text block that constitutes the evidence. | `C120:180` |
18
+
19
+ Full example: `R001-DOC042-P3-S2-C120:180`
20
+
21
+ When a rule draws evidence from multiple locations, generate one trace ID per location and store them as an array in the result.
22
+
23
+ ## Generation
24
+
25
+ Trace ID generation is **deterministic**: the same rule applied to the same document at the same location always produces the same trace ID. This is achieved by deriving every segment from stable inputs:
26
+
27
+ - `rule_id` comes from the rule catalog.
28
+ - `document_id` comes from the document's filename or a developer-user-assigned identifier.
29
+ - Page, section, and character range come from the extraction step.
30
+
31
+ Trace IDs are generated at verification time, immediately after entity extraction identifies the source location. They are never modified after creation. Re-verifying the same document produces new result records with new timestamps but identical trace IDs (because the source location has not changed). If the document is modified, the new version gets a new `document_id`, producing different trace IDs.
32
+
33
+ ## Collision Avoidance
34
+
35
+ The combination of rule ID + document ID + page + section + character range makes collisions astronomically unlikely in practice. Two different pieces of evidence would need to match on all five segments simultaneously.
36
+
37
+ If document IDs are not guaranteed unique across batches (e.g., multiple batches contain files named `report.pdf`), prefix the document ID with the batch identifier: `B003-DOC042`. This extends the trace ID format to `R001-B003-DOC042-P3-S2-C120:180`.
38
+
39
+ Do not use random UUIDs. Deterministic trace IDs allow deduplication and comparison across verification runs.
40
+
41
+ ## Storage Overhead
42
+
43
+ A single trace ID string is approximately 30-50 bytes. The full trace ID object (including `source_location`, `rule_version`, `workflow_version`, and `model_tier`) is approximately 100-200 bytes in JSON.
44
+
45
+ For a typical batch of 1000 verification results, trace IDs add roughly 100-200 KB of storage. This is negligible relative to the result data itself and the source documents.
46
+
47
+ ## Surviving Export/Re-Import
48
+
49
+ Trace IDs are embedded in the result JSON structure, not stored in external metadata, sidecar files, or database columns that might be lost during export.
50
+
51
+ Any system that consumes the verification result JSON automatically receives the trace IDs. Specific scenarios:
52
+
53
+ - **CSV export**: The `trace_id` field becomes a column. A developer user reviewing results in a spreadsheet can copy a trace ID and paste it back to locate the source evidence.
54
+ - **Aggregation**: When results from multiple batches are merged, trace IDs remain attached to their individual results. No re-linking is needed.
55
+ - **Downstream APIs**: Systems consuming verification results via API receive trace IDs as part of the payload. They can store, index, or display them without any awareness of the trace ID format.
56
+ - **Archival**: Archived results retain full traceability years later, even if the original verification system has evolved.
57
+
58
+ ## Integration with Cross-Document Verification
59
+
60
+ When `cross-document-verification` detects a contradiction between two documents, reference trace IDs from both sides:
61
+
62
+ ```json
63
+ {
64
+ "contradiction": {
65
+ "field": "total_assets",
66
+ "document_a": {
67
+ "trace_id": "R005-DOC042-P7-S1-C200:260",
68
+ "value": "1,234,567"
69
+ },
70
+ "document_b": {
71
+ "trace_id": "R005-DOC043-P3-S2-C80:140",
72
+ "value": "1,234,590"
73
+ },
74
+ "discrepancy": "23"
75
+ }
76
+ }
77
+ ```
78
+
79
+ This creates a linked evidence chain: auditors can follow both trace IDs to the exact locations in both documents, verify the extracted values, and determine which document (if either) is correct. Without trace IDs, cross-document contradictions require manual search through both documents to find the relevant passages.
@@ -0,0 +1,202 @@
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.