ai-execution-protocol 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/AGENTS.md +12 -1
  2. package/README.md +54 -2
  3. package/behavior/audit-checklist.yaml +55 -0
  4. package/behavior/contract.yaml +156 -0
  5. package/dataset/README.md +38 -0
  6. package/dataset/raw-examples.md +36 -0
  7. package/dataset/train.jsonl +3 -0
  8. package/dataset/validation.jsonl +2 -0
  9. package/dist/minimal/AGENTS.md +8 -1
  10. package/dist/minimal/README.md +3 -0
  11. package/dist/minimal/behavior/audit-checklist.yaml +15 -0
  12. package/dist/minimal/behavior/contract.yaml +29 -0
  13. package/dist/minimal/canonical-state.yaml +1 -1
  14. package/dist/minimal/capabilities/registry.yaml +48 -0
  15. package/dist/minimal/context-map.yaml +2 -1
  16. package/dist/minimal/ide-rules/instruction-block.md +23 -0
  17. package/dist/minimal/memory/INDEX.yaml +1 -1
  18. package/dist/minimal/protocol/README.yaml +11 -1
  19. package/dist/minimal/protocol/capability-gate.yaml +56 -0
  20. package/dist/minimal/protocol/capability-router.yaml +123 -0
  21. package/dist/minimal/protocol/context-rules.yaml +2 -1
  22. package/dist/minimal/protocol/fast-path.yaml +8 -1
  23. package/dist/minimal/protocol/intelligence-router.yaml +63 -0
  24. package/dist/minimal/protocol/route-packs.yaml +49 -1
  25. package/dist/minimal/protocol/router.yaml +35 -1
  26. package/docs/00-visao-geral.md +41 -0
  27. package/docs/01-modelo-de-execucao.md +25 -0
  28. package/docs/02-niveis-de-risco.md +62 -0
  29. package/docs/03-mapeamento-antes-de-alterar.md +48 -0
  30. package/docs/04-janela-de-contexto.md +56 -0
  31. package/docs/05-validacao-e-entrega.md +48 -0
  32. package/docs/06-memoria-e-continuidade.md +27 -0
  33. package/docs/07-legibilidade-para-ia.md +47 -0
  34. package/docs/08-posicionamento.md +48 -0
  35. package/docs/09-governanca-de-mudancas.md +48 -0
  36. package/docs/10-economia-de-prompt.md +79 -0
  37. package/docs/11-retencao-de-resultados.md +26 -0
  38. package/docs/12-instalacao-em-outro-projeto.md +254 -0
  39. package/docs/13-uso-em-ides.md +137 -0
  40. package/docs/14-publicacao.md +128 -0
  41. package/docs/15-contexto-persistente.md +204 -0
  42. package/docs/16-release-e-atualizacao.md +146 -0
  43. package/docs/17-documentacao-atomica.md +117 -0
  44. package/docs/18-memoria-adaptativa.md +107 -0
  45. package/docs/19-orcamento-de-contexto.md +63 -0
  46. package/docs/20-validacao-seletiva.md +46 -0
  47. package/docs/21-roteamento-de-capacidades.md +121 -0
  48. package/docs/22-roadmap-v1.md +163 -0
  49. package/docs/23-contrato-comportamental.md +116 -0
  50. package/docs/24-gate-de-capacidades-e-inteligencia.md +109 -0
  51. package/docs/README.md +58 -0
  52. package/eval/README.md +27 -0
  53. package/eval/rubric.yaml +57 -0
  54. package/eval/sample-result.yaml +28 -0
  55. package/install-manifest.json +38 -2
  56. package/package.json +9 -2
  57. package/protocol/README.yaml +11 -1
  58. package/protocol/capability-gate.yaml +56 -0
  59. package/protocol/capability-router.yaml +123 -0
  60. package/protocol/context-rules.yaml +2 -1
  61. package/protocol/fast-path.yaml +8 -1
  62. package/protocol/intelligence-router.yaml +63 -0
  63. package/protocol/route-packs.yaml +49 -1
  64. package/protocol/router.yaml +35 -1
  65. package/roadmap/v1.yaml +139 -0
  66. package/schema/README.md +26 -0
  67. package/schema/behavior-contract.schema.json +31 -0
  68. package/schema/capability-registry.schema.json +51 -0
  69. package/schema/evaluated-response.schema.json +27 -0
  70. package/schema/evaluation-result.schema.json +32 -0
  71. package/schema/memory-entry.schema.json +55 -0
  72. package/schema/protocol-rule.schema.json +16 -0
  73. package/schema/protocol-rule.schema.yaml +28 -0
  74. package/schema/test-case.schema.json +44 -0
  75. package/schema/test-case.schema.yaml +37 -0
  76. package/scripts/README.md +79 -1
  77. package/scripts/build_dist.py +3 -0
  78. package/scripts/npm_install_protocol.js +60 -1
  79. package/scripts/verify_install.py +25 -0
  80. package/templates/minimal/AGENTS.md +8 -1
  81. package/templates/minimal/behavior/audit-checklist.yaml +15 -0
  82. package/templates/minimal/behavior/contract.yaml +29 -0
  83. package/templates/minimal/canonical-state.yaml +1 -1
  84. package/templates/minimal/capabilities/registry.yaml +48 -0
  85. package/templates/minimal/context-map.yaml +2 -1
  86. package/templates/minimal/ide-rules/instruction-block.md +23 -0
  87. package/templates/minimal/memory/INDEX.yaml +1 -1
  88. package/templates/minimal/protocol/capability-gate.yaml +10 -0
  89. package/templates/minimal/protocol/intelligence-router.yaml +10 -0
@@ -0,0 +1,123 @@
1
+ id: capability_router
2
+ type: operational_rules
3
+ version: 0.4.0
4
+ purpose: select_only_necessary_skills_mcps_and_tools
5
+ principle: minimum_capability_set_must_preserve_required_quality
6
+ platform_boundary:
7
+ can_control:
8
+ - selection
9
+ - instruction_loading
10
+ - invocation
11
+ - operation_scope
12
+ cannot_guarantee:
13
+ - physical_unloading_of_host_exposed_tools
14
+ - revocation_of_platform_permissions
15
+ rule: exposed_capability_must_remain_unused_until_selected
16
+ entrypoint:
17
+ registry: capabilities/registry.yaml
18
+ selection_flow:
19
+ - classify_task_and_risk
20
+ - define_required_outcomes_and_operations
21
+ - inspect_available_capability_metadata
22
+ - prefer_existing_local_capability
23
+ - select_smallest_set_covering_required_outcomes
24
+ - add_dependency_only_when_selected_capability_requires_it
25
+ - verify_permissions_confirmation_and_validation
26
+ - stop_discovery_when_coverage_is_complete
27
+ capability_types:
28
+ - built_in_reasoning
29
+ - local_tool
30
+ - skill
31
+ - mcp
32
+ - remote_service
33
+ operations:
34
+ read:
35
+ effect: none_or_read_only
36
+ write:
37
+ effect: state_change
38
+ publish:
39
+ effect: external_release
40
+ destructive:
41
+ effect: irreversible_or_high_impact
42
+ risk_policy:
43
+ level_0:
44
+ external_capability_budget: 0
45
+ allow:
46
+ - built_in_reasoning
47
+ expand_when:
48
+ - direct_answer_requires_verified_current_data
49
+ level_1:
50
+ external_capability_budget: 1
51
+ prefer:
52
+ - local_read
53
+ - focused_skill
54
+ level_2:
55
+ external_capability_budget: 3
56
+ prefer:
57
+ - specialized_skill
58
+ - targeted_mcp
59
+ - local_validation
60
+ level_3:
61
+ external_capability_budget: 3
62
+ principle: higher_risk_means_stricter_permissions_not_more_tools
63
+ require:
64
+ - least_privilege
65
+ - explicit_operation_scope
66
+ - confirmation_before_sensitive_write_publish_or_destructive
67
+ - validation_before_and_after
68
+ cost_model:
69
+ dimensions:
70
+ - context_tokens
71
+ - latency
72
+ - remote_calls
73
+ - permission_scope
74
+ - side_effect_risk
75
+ choose_when:
76
+ - required_outcome_is_covered
77
+ - expected_quality_gain_exceeds_incremental_cost
78
+ never_trade:
79
+ - correctness
80
+ - security
81
+ - required_validation
82
+ - current_information_when_task_depends_on_it
83
+ preference_order:
84
+ - built_in_reasoning
85
+ - existing_project_context
86
+ - local_read_tool
87
+ - focused_local_skill
88
+ - targeted_remote_read
89
+ - remote_write
90
+ - publish_or_destructive
91
+ discovery:
92
+ do:
93
+ - use_known_available_capabilities_first
94
+ - search_for_tool_only_when_required_capability_is_missing
95
+ - load_skill_instructions_only_after_selection
96
+ - connect_mcp_only_for_matching_operation
97
+ avoid:
98
+ - loading_all_skills_before_selection
99
+ - listing_all_mcp_resources_without_need
100
+ - installing_adjacent_tools_not_required_by_task
101
+ - continuing_discovery_after_complete_coverage
102
+ permission_policy:
103
+ - read_permission_does_not_imply_write_permission
104
+ - write_permission_does_not_imply_publish_permission
105
+ - memory_never_authorizes_sensitive_operation
106
+ - capability_availability_does_not_authorize_use
107
+ - current_user_request_defines_allowed_scope
108
+ fallback:
109
+ when_required_coverage_is_missing:
110
+ - do_not_execute_incomplete_high_risk_workflow
111
+ - use_safe_local_partial_work_when_independently_valid
112
+ - report_missing_capability
113
+ - request_installation_or_user_action_only_when_required
114
+ delivery:
115
+ include_when_capability_used:
116
+ - selected_capabilities
117
+ - selection_reason
118
+ - operation_scope
119
+ - confirmation_status_when_required
120
+ - validation
121
+ omit:
122
+ - full_available_capability_catalog
123
+ - rejected_capabilities_without_audit_need
@@ -70,6 +70,7 @@ existing_project_files:
70
70
  - .cursorrules
71
71
  - CLAUDE.md
72
72
  - .github/copilot-instructions.md
73
+ - .cursor/rules/ai-execution-protocol.mdc
73
74
  - package_docs
74
75
  - framework_configs
75
76
  behavior:
@@ -78,7 +79,7 @@ existing_project_files:
78
79
  - treat_generated_or_old_docs_as_untrusted_until_verified
79
80
  - keep_protocol_rules_in_AGENTS_and_protocol_folder
80
81
  - use_framework_configs_as_technical_source_when_task_touches_framework
81
- - do_not_duplicate_protocol_rules_across_ide_files
82
+ - duplicate_protocol_rules_across_ide_files_only_with_marked_integration
82
83
  conflict_order:
83
84
  - current_user_request
84
85
  - AGENTS_protocol_block
@@ -1,11 +1,14 @@
1
1
  id: fast_path
2
2
  type: agent_entrypoint
3
- version: 0.3
3
+ version: 0.4.0
4
4
  purpose: minimum_rules_to_start_any_task
5
5
  read_next:
6
6
  - router.yaml
7
7
  - route-packs.yaml
8
8
  - context-budget.yaml
9
+ - capability-router.yaml
10
+ - capability-gate.yaml
11
+ - intelligence-router.yaml
9
12
  - modes.yaml
10
13
  core_rules:
11
14
  - classify_risk_before_action
@@ -24,6 +27,10 @@ core_rules:
24
27
  - use_only_matching_memory_subjects
25
28
  - check_memory_update_result_after_task
26
29
  - use_selective_validation_by_blast_radius
30
+ - select_minimum_capability_set_before_loading_skills_or_mcps
31
+ - require_capability_plan_before_skill_mcp_or_remote_tool_use
32
+ - choose_intelligence_level_proportional_to_risk_and_complexity
33
+ - follow_behavioral_execution_contract
27
34
  risk_short:
28
35
  level_0: answer_only
29
36
  level_1: small_clear_reversible_isolated_change
@@ -0,0 +1,63 @@
1
+ id: intelligence_router
2
+ type: operational_rules
3
+ version: 0.4.0
4
+ purpose: choose_model_reasoning_and_effort_proportional_to_task_need
5
+ principle: use_the_cheapest_sufficient_intelligence_without_trading_correctness
6
+ levels:
7
+ minimal:
8
+ use_when:
9
+ - level_0_direct_answer
10
+ - no_current_external_data_needed
11
+ - no_file_change
12
+ model_need: low_cost_fast
13
+ reasoning_depth: low
14
+ tools: none
15
+ standard:
16
+ use_when:
17
+ - level_1_small_change
18
+ - focused_file_read
19
+ - simple_validation
20
+ model_need: default
21
+ reasoning_depth: medium
22
+ tools: local_only
23
+ deep:
24
+ use_when:
25
+ - level_2_flow_bug
26
+ - refactor
27
+ - ambiguous_impact
28
+ - failed_first_validation
29
+ model_need: stronger_or_more_reasoning
30
+ reasoning_depth: high
31
+ tools: selected_local_or_targeted_remote
32
+ critical:
33
+ use_when:
34
+ - level_3_data_auth_security_deploy_publish_destructive
35
+ - high_blast_radius
36
+ - irreversible_or_external_side_effect
37
+ model_need: strongest_available_for_task
38
+ reasoning_depth: high_with_audit
39
+ tools: least_privilege_confirmed
40
+ escalate_when:
41
+ - risk_level_increases
42
+ - ambiguity_blocks_safe_action
43
+ - validation_fails
44
+ - context_conflict_detected
45
+ - external_current_data_is_required
46
+ - specialized_modality_is_required
47
+ deescalate_when:
48
+ - task_is_direct_answer
49
+ - no_code_or_external_state_needed
50
+ - validation_plan_is_trivial
51
+ - previous_high_risk_assumption_is_not_supported_by_evidence
52
+ never_trade:
53
+ - security
54
+ - correctness
55
+ - required_validation
56
+ - explicit_user_scope
57
+ delivery:
58
+ include_when_level_2_or_3:
59
+ - intelligence_level
60
+ - escalation_reason_if_any
61
+ - why_lower_level_was_not_enough
62
+ omit_for_level_0:
63
+ - model_discussion_unless_user_asks
@@ -1,6 +1,6 @@
1
1
  id: route_packs
2
2
  type: route_summary_index
3
- version: 0.3
3
+ version: 0.4.0
4
4
  purpose: compact_first_read_before_full_route_files
5
5
  principle: read_pack_first_expand_only_when_needed
6
6
  use:
@@ -120,10 +120,12 @@ packs:
120
120
  - run_post_deploy_check_if_executed
121
121
  evaluate_response:
122
122
  read_if_pack_insufficient:
123
+ - ../behavior/contract.yaml
123
124
  - ../eval/rubric.yaml
124
125
  - ../schema/evaluated-response.schema.json
125
126
  do:
126
127
  - score_risk_behavior_avoidance_delivery_clarity
128
+ - check_behavior_contract_alignment
127
129
  - apply_automatic_fail_rules
128
130
  create_or_edit_yaml:
129
131
  read_if_pack_insufficient:
@@ -182,3 +184,49 @@ packs:
182
184
  - infer_checks_from_changed_files
183
185
  - run_smallest_sufficient_validation
184
186
  - expand_when_shared_contract_changes
187
+ capability_selection:
188
+ risk: adaptive
189
+ read_if_pack_insufficient:
190
+ - capability-router.yaml
191
+ - capability-gate.yaml
192
+ - context-budget.yaml
193
+ do:
194
+ - define_required_outcomes_and_operations
195
+ - select_smallest_available_capability_set
196
+ - load_only_selected_skill_or_mcp
197
+ - require_confirmation_for_sensitive_remote_effect
198
+ - audit_used_capabilities_against_selected_plan
199
+ - stop_discovery_when_quality_coverage_is_complete
200
+ intelligence_selection:
201
+ risk: adaptive
202
+ read_if_pack_insufficient:
203
+ - intelligence-router.yaml
204
+ - context-budget.yaml
205
+ do:
206
+ - choose_cheapest_sufficient_intelligence_level
207
+ - escalate_for_risk_ambiguity_validation_failure_or_large_context
208
+ - deescalate_when_task_is_direct_and_low_risk
209
+ - do_not_trade_security_correctness_or_validation_for_cost
210
+ behavior_evaluation:
211
+ risk: 1
212
+ read_if_pack_insufficient:
213
+ - ../behavior/contract.yaml
214
+ - ../behavior/audit-checklist.yaml
215
+ - ../eval/rubric.yaml
216
+ do:
217
+ - compare_response_to_observable_behaviors
218
+ - verify_simple_tasks_are_not_overprocessed
219
+ - verify_critical_tasks_are_not_undercontrolled
220
+ - apply_behavior_automatic_fail_rules
221
+ dataset_preparation:
222
+ risk: 1
223
+ read_if_pack_insufficient:
224
+ - ../behavior/contract.yaml
225
+ - ../behavior/audit-checklist.yaml
226
+ - prompt-economy.yaml
227
+ - ../dataset/README.md
228
+ do:
229
+ - create_examples_from_observable_behavior
230
+ - include_good_bad_and_reason
231
+ - keep_training_examples_consistent
232
+ - avoid_rewarding_bureaucracy
@@ -1,6 +1,6 @@
1
1
  id: protocol_router
2
2
  type: read_router
3
- version: 0.3
3
+ version: 0.4.0
4
4
  purpose: choose_minimum_protocol_files_by_task
5
5
  default_read:
6
6
  - fast-path.yaml
@@ -76,8 +76,17 @@ routes:
76
76
  evaluate_response:
77
77
  read:
78
78
  - fast-path.yaml
79
+ - ../behavior/contract.yaml
79
80
  - ../eval/rubric.yaml
80
81
  - ../schema/evaluated-response.schema.json
82
+ behavior_evaluation:
83
+ risk: 1
84
+ read:
85
+ - fast-path.yaml
86
+ - ../behavior/contract.yaml
87
+ - ../behavior/audit-checklist.yaml
88
+ - ../eval/rubric.yaml
89
+ - ../dataset/README.md
81
90
  create_or_edit_yaml:
82
91
  read:
83
92
  - fast-path.yaml
@@ -113,6 +122,27 @@ routes:
113
122
  read:
114
123
  - fast-path.yaml
115
124
  - selective-validation.yaml
125
+ capability_selection:
126
+ risk: adaptive
127
+ read:
128
+ - fast-path.yaml
129
+ - capability-router.yaml
130
+ - capability-gate.yaml
131
+ - context-budget.yaml
132
+ intelligence_selection:
133
+ risk: adaptive
134
+ read:
135
+ - fast-path.yaml
136
+ - intelligence-router.yaml
137
+ - context-budget.yaml
138
+ dataset_preparation:
139
+ risk: 1
140
+ read:
141
+ - fast-path.yaml
142
+ - ../behavior/contract.yaml
143
+ - ../behavior/audit-checklist.yaml
144
+ - prompt-economy.yaml
145
+ - ../dataset/README.md
116
146
  rules:
117
147
  - start_with_default_read
118
148
  - choose_one_route_if_task_type_is_clear
@@ -120,6 +150,10 @@ rules:
120
150
  - expand_from_route_pack_only_when_needed
121
151
  - apply_context_budget_to_selected_route
122
152
  - retrieve_only_matching_memory_subjects
153
+ - select_capabilities_before_loading_skill_or_connecting_mcp
154
+ - require_capability_gate_before_invocation
155
+ - route_model_or_reasoning_effort_by_risk_and_complexity
156
+ - use_behavior_contract_when_task_is_about_adherence_dataset_or_training
123
157
  - if_route_unclear_read_risk_levels_then_choose_route
124
158
  - do_not_read_docs_unless_protocol_is_insufficient
125
159
  - do_not_read_cases_unless_testing_or_comparing_behavior
@@ -0,0 +1,139 @@
1
+ id: roadmap_v1
2
+ type: release_roadmap
3
+ version: 0.1
4
+ target_release: 1.0.0
5
+ purpose: guide_each_update_until_public_v1
6
+ status: active
7
+ principle:
8
+ - do_not_market_as_stable_before_v1
9
+ - each_release_must_close_one_maturity_gap
10
+ - prove_context_economy_quality_and_safety_with_examples
11
+ - keep_protocol_core_stable_before_broad_public_launch
12
+ current_position:
13
+ current_series: 0.4.x
14
+ maturity: behavioral_execution_layer
15
+ public_positioning: experimental_until_v1
16
+ publish_strategy: publish_packages_for_testing_not_broad_marketing
17
+ v1_success_criteria:
18
+ protocol_stability:
19
+ - risk_levels_stable
20
+ - context_budget_stable
21
+ - adaptive_memory_stable
22
+ - capability_routing_stable
23
+ - selective_validation_stable
24
+ evidence:
25
+ - real_cases_documented
26
+ - before_after_examples_available
27
+ - benchmark_report_current
28
+ - install_and_update_flow_verified
29
+ adoption:
30
+ - codex_guide_clear
31
+ - other_agent_portability_documented
32
+ - troubleshooting_documented
33
+ - contribution_rules_clear
34
+ safety:
35
+ - secret_handling_documented
36
+ - sensitive_actions_require_confirmation
37
+ - memory_deduplication_and_replacement_validated
38
+ - capability_permissions_follow_least_privilege
39
+ release_path:
40
+ - version: 0.3.x
41
+ goal: harden_current_capability_context_memory_stack
42
+ status: completed
43
+ exit_criteria:
44
+ - capability_routing_documented_and_tested
45
+ - docs_commands_and_install_flow_stay_synced
46
+ - no_known_packaging_gap_blocks_testing
47
+ - version: 0.4.0
48
+ goal: add_behavioral_execution_contract_and_economy_reports
49
+ status: current
50
+ focus:
51
+ - behavioral_contract
52
+ - trainable_behavior_units
53
+ - token_and_file_read_savings
54
+ - tools_avoided
55
+ - validation_cost_by_risk
56
+ - quality_preserved_examples
57
+ exit_criteria:
58
+ - behavior_contract_is_installed_and_validated
59
+ - benchmark_report_has_clear_before_after_numbers
60
+ - benchmark_can_be_reproduced_locally
61
+ - version: 0.5.0
62
+ goal: add_real_world_cases
63
+ focus:
64
+ - simple_task
65
+ - medium_risk_bug
66
+ - docs_update
67
+ - capability_or_mcp_task
68
+ - memory_update_task
69
+ - release_task
70
+ exit_criteria:
71
+ - at_least_6_real_or_realistic_cases
72
+ - each_case_has_context_used_validation_and_outcome
73
+ - version: 0.6.0
74
+ goal: document_portability_beyond_codex
75
+ focus:
76
+ - codex_primary_flow
77
+ - cursor_adaptation
78
+ - claude_or_generic_agent_adaptation
79
+ - mcp_capability_boundaries
80
+ exit_criteria:
81
+ - portability_limits_are_explicit
82
+ - codex_specific_rules_are_separated_from_generic_rules
83
+ - version: 0.7.0
84
+ goal: harden_schemas_validation_and_health_checks
85
+ focus:
86
+ - schema_coverage
87
+ - health_check_coverage
88
+ - install_manifest_consistency
89
+ - package_content_checks
90
+ exit_criteria:
91
+ - health_check_catches_missing_core_files
92
+ - release_checks_cover_docs_protocol_templates_and_packages
93
+ - version: 0.8.0
94
+ goal: finish_adoption_documentation
95
+ focus:
96
+ - getting_started
97
+ - install_update_verify
98
+ - project_adaptation
99
+ - troubleshooting
100
+ - contribution_guidelines
101
+ exit_criteria:
102
+ - new_user_can_install_verify_and_understand_core_flow
103
+ - docs_are_atomic_and_indexed
104
+ - version: 0.9.0
105
+ goal: release_candidate
106
+ focus:
107
+ - freeze_core_contracts
108
+ - remove_or_mark_unstable_experimental_parts
109
+ - run_full_validation
110
+ - prepare_v1_release_notes
111
+ exit_criteria:
112
+ - no_known_blocker_for_v1
113
+ - docs_and_protocol_are_consistent
114
+ - packages_install_cleanly
115
+ - version: 1.0.0
116
+ goal: stable_public_release
117
+ focus:
118
+ - stable_protocol_core
119
+ - clear_public_positioning
120
+ - reproducible_evidence
121
+ - safe_installation
122
+ exit_criteria:
123
+ - v1_success_criteria_met
124
+ update_rule:
125
+ before_each_release:
126
+ - read_this_roadmap
127
+ - choose_next_smallest_maturity_gap
128
+ - update_docs_protocol_tests_when_behavior_changes
129
+ - record_completed_and_remaining_exit_criteria
130
+ after_each_release:
131
+ - update_current_position
132
+ - mark_exit_criteria_done_or_pending
133
+ - update_docs_22_roadmap_v1
134
+ - keep_changelog_release_notes_synced
135
+ avoid:
136
+ - broad_marketing_before_v1
137
+ - claiming_security_guarantees
138
+ - changing_core_terms_without_migration_note
139
+ - adding_new_surfaces_without_tests_or_docs
@@ -0,0 +1,26 @@
1
+ # Schemas
2
+
3
+ Esta pasta descreve o formato esperado dos arquivos YAML.
4
+
5
+ Os schemas sao leves e servem como contrato de organizacao. Eles ajudam a IA a
6
+ manter arquivos parecidos entre si.
7
+
8
+ ## Arquivos
9
+
10
+ - [protocol-rule.schema.yaml](./protocol-rule.schema.yaml): formato de regra
11
+ operacional.
12
+ - [test-case.schema.yaml](./test-case.schema.yaml): formato de caso de teste.
13
+ - [protocol-rule.schema.json](./protocol-rule.schema.json): JSON Schema
14
+ validavel para regras operacionais.
15
+ - [test-case.schema.json](./test-case.schema.json): JSON Schema validavel para
16
+ casos.
17
+ - [evaluated-response.schema.json](./evaluated-response.schema.json): JSON
18
+ Schema validavel para respostas avaliaveis.
19
+ - [evaluation-result.schema.json](./evaluation-result.schema.json): JSON Schema
20
+ validavel para resultados.
21
+ - [memory-entry.schema.json](./memory-entry.schema.json): contrato de uma
22
+ entrada de memoria adaptativa.
23
+ - [capability-registry.schema.json](./capability-registry.schema.json):
24
+ contrato do registro de skills, MCPs e ferramentas.
25
+ - [behavior-contract.schema.json](./behavior-contract.schema.json): contrato
26
+ da camada comportamental observavel.
@@ -0,0 +1,31 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ai-research.local/schema/behavior-contract.schema.json",
4
+ "title": "Behavior contract",
5
+ "type": "object",
6
+ "required": [
7
+ "id",
8
+ "type",
9
+ "version",
10
+ "purpose",
11
+ "principle",
12
+ "automatic_fail_when"
13
+ ],
14
+ "properties": {
15
+ "id": { "type": "string" },
16
+ "type": { "const": "behavior_contract" },
17
+ "version": { "type": ["string", "number"] },
18
+ "purpose": { "type": "string" },
19
+ "subtitle": { "type": "string" },
20
+ "status": { "type": "string" },
21
+ "principle": { "type": "array", "items": { "type": "string" } },
22
+ "scope": { "type": "object" },
23
+ "behavior_sets": { "type": "array" },
24
+ "core_behaviors": { "type": "array", "items": { "type": "string" } },
25
+ "trainable_units": { "type": "array" },
26
+ "evaluation_dimensions": { "type": "array" },
27
+ "automatic_fail_when": { "type": "array", "items": { "type": "string" } },
28
+ "dataset_policy": { "type": "object" }
29
+ },
30
+ "additionalProperties": false
31
+ }
@@ -0,0 +1,51 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ai-research.local/schema/capability-registry.schema.json",
4
+ "title": "Capability registry",
5
+ "type": "object",
6
+ "required": ["id", "type", "version", "capabilities"],
7
+ "properties": {
8
+ "id": { "type": "string" },
9
+ "type": { "const": "capability_registry" },
10
+ "version": { "type": ["string", "number"] },
11
+ "purpose": { "type": "string" },
12
+ "policy": { "type": "object" },
13
+ "capabilities": {
14
+ "type": "array",
15
+ "items": {
16
+ "type": "object",
17
+ "required": [
18
+ "id",
19
+ "type",
20
+ "available",
21
+ "tags",
22
+ "operations",
23
+ "cost",
24
+ "side_effect",
25
+ "confirmation"
26
+ ],
27
+ "properties": {
28
+ "id": { "type": "string" },
29
+ "type": {
30
+ "enum": [
31
+ "built_in_reasoning",
32
+ "local_tool",
33
+ "skill",
34
+ "mcp",
35
+ "remote_service"
36
+ ]
37
+ },
38
+ "available": { "type": ["boolean", "string"] },
39
+ "tags": { "type": "array", "items": { "type": "string" } },
40
+ "operations": { "type": "array", "items": { "type": "string" } },
41
+ "cost": { "type": "object" },
42
+ "side_effect": { "type": "string" },
43
+ "confirmation": { "type": "string" },
44
+ "depends_on": { "type": "array", "items": { "type": "string" } }
45
+ },
46
+ "additionalProperties": false
47
+ }
48
+ }
49
+ },
50
+ "additionalProperties": false
51
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ai-research.local/schema/evaluated-response.schema.json",
4
+ "title": "Evaluated AI response",
5
+ "type": "object",
6
+ "required": [
7
+ "id",
8
+ "type",
9
+ "case_id",
10
+ "selected_level",
11
+ "behaviors",
12
+ "avoided",
13
+ "delivery"
14
+ ],
15
+ "properties": {
16
+ "id": { "type": "string" },
17
+ "type": { "const": "evaluated_response" },
18
+ "case_id": { "type": "string" },
19
+ "selected_level": { "type": ["integer", "null"], "enum": [0, 1, 2, 3, null] },
20
+ "behaviors": { "type": "array", "items": { "type": "string" } },
21
+ "behavior_contract_alignment": { "type": "array", "items": { "type": "string" } },
22
+ "avoided": { "type": "array", "items": { "type": "string" } },
23
+ "delivery": { "type": "array", "items": { "type": "string" } },
24
+ "structured": { "type": "boolean" },
25
+ "automatic_fail": { "type": "array", "items": { "type": "string" } }
26
+ }
27
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ai-research.local/schema/evaluation-result.schema.json",
4
+ "title": "Evaluation result",
5
+ "type": "object",
6
+ "required": [
7
+ "id",
8
+ "type",
9
+ "case_id",
10
+ "expected_level",
11
+ "selected_level",
12
+ "score",
13
+ "status"
14
+ ],
15
+ "properties": {
16
+ "id": { "type": "string" },
17
+ "type": { "const": "evaluation_result" },
18
+ "case_id": { "type": "string" },
19
+ "expected_level": { "type": "integer", "enum": [0, 1, 2, 3] },
20
+ "selected_level": { "type": ["integer", "null"], "enum": [0, 1, 2, 3, null] },
21
+ "score": {
22
+ "type": "object",
23
+ "required": ["total", "max"],
24
+ "properties": {
25
+ "total": { "type": "integer" },
26
+ "max": { "type": "integer" },
27
+ "by_criteria": { "type": "object" }
28
+ }
29
+ },
30
+ "status": { "type": "string", "enum": ["pass", "partial", "fail"] }
31
+ }
32
+ }