modekeeper 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. modekeeper-0.1.1/LICENSE +6 -0
  2. modekeeper-0.1.1/PKG-INFO +119 -0
  3. modekeeper-0.1.1/README.md +103 -0
  4. modekeeper-0.1.1/pyproject.toml +52 -0
  5. modekeeper-0.1.1/setup.cfg +4 -0
  6. modekeeper-0.1.1/src/modekeeper/__init__.py +4 -0
  7. modekeeper-0.1.1/src/modekeeper/actuators/knobs.py +49 -0
  8. modekeeper-0.1.1/src/modekeeper/adapters/kubernetes.py +33 -0
  9. modekeeper-0.1.1/src/modekeeper/adapters/lightning.py +77 -0
  10. modekeeper-0.1.1/src/modekeeper/audit/__init__.py +2 -0
  11. modekeeper-0.1.1/src/modekeeper/audit/decision_trace.py +27 -0
  12. modekeeper-0.1.1/src/modekeeper/chords/__init__.py +2 -0
  13. modekeeper-0.1.1/src/modekeeper/chords/catalog.py +157 -0
  14. modekeeper-0.1.1/src/modekeeper/chords/catalog_v1.json +138 -0
  15. modekeeper-0.1.1/src/modekeeper/chords/v1.py +18 -0
  16. modekeeper-0.1.1/src/modekeeper/cli.py +4827 -0
  17. modekeeper-0.1.1/src/modekeeper/core/analysis.py +82 -0
  18. modekeeper-0.1.1/src/modekeeper/core/cost_model.py +19 -0
  19. modekeeper-0.1.1/src/modekeeper/core/modes.py +8 -0
  20. modekeeper-0.1.1/src/modekeeper/core/opportunity.py +69 -0
  21. modekeeper-0.1.1/src/modekeeper/core/passport.py +22 -0
  22. modekeeper-0.1.1/src/modekeeper/core/state_machine.py +21 -0
  23. modekeeper-0.1.1/src/modekeeper/core/summary.py +116 -0
  24. modekeeper-0.1.1/src/modekeeper/core/value_summary.py +83 -0
  25. modekeeper-0.1.1/src/modekeeper/demo/mk068_demo.py +167 -0
  26. modekeeper-0.1.1/src/modekeeper/demo/runner.py +30 -0
  27. modekeeper-0.1.1/src/modekeeper/fleet/__init__.py +2 -0
  28. modekeeper-0.1.1/src/modekeeper/fleet/inventory.py +149 -0
  29. modekeeper-0.1.1/src/modekeeper/fleet/policy_propagation.py +184 -0
  30. modekeeper-0.1.1/src/modekeeper/governance/__init__.py +2 -0
  31. modekeeper-0.1.1/src/modekeeper/governance/approval.py +19 -0
  32. modekeeper-0.1.1/src/modekeeper/k8s/__init__.py +2 -0
  33. modekeeper-0.1.1/src/modekeeper/k8s/rbac_diagnostics.py +93 -0
  34. modekeeper-0.1.1/src/modekeeper/license/__init__.py +5 -0
  35. modekeeper-0.1.1/src/modekeeper/license/canonical.py +15 -0
  36. modekeeper-0.1.1/src/modekeeper/license/public_keys.json +4 -0
  37. modekeeper-0.1.1/src/modekeeper/license/public_keys.py +41 -0
  38. modekeeper-0.1.1/src/modekeeper/license/verify.py +247 -0
  39. modekeeper-0.1.1/src/modekeeper/passports/__init__.py +15 -0
  40. modekeeper-0.1.1/src/modekeeper/passports/observe_max.py +174 -0
  41. modekeeper-0.1.1/src/modekeeper/passports/templates/__init__.py +1 -0
  42. modekeeper-0.1.1/src/modekeeper/passports/templates/comm.json +47 -0
  43. modekeeper-0.1.1/src/modekeeper/passports/templates/cost.json +46 -0
  44. modekeeper-0.1.1/src/modekeeper/passports/templates/io.json +46 -0
  45. modekeeper-0.1.1/src/modekeeper/passports/templates/perf.json +50 -0
  46. modekeeper-0.1.1/src/modekeeper/passports/templates/pilot.json +40 -0
  47. modekeeper-0.1.1/src/modekeeper/passports/templates/recovery.json +45 -0
  48. modekeeper-0.1.1/src/modekeeper/passports/templates/safe.json +50 -0
  49. modekeeper-0.1.1/src/modekeeper/passports/v0.py +207 -0
  50. modekeeper-0.1.1/src/modekeeper/policy/actions.py +19 -0
  51. modekeeper-0.1.1/src/modekeeper/policy/bundle.py +74 -0
  52. modekeeper-0.1.1/src/modekeeper/policy/chords.py +16 -0
  53. modekeeper-0.1.1/src/modekeeper/policy/rules.py +132 -0
  54. modekeeper-0.1.1/src/modekeeper/policy/scalar.py +38 -0
  55. modekeeper-0.1.1/src/modekeeper/roi/__init__.py +2 -0
  56. modekeeper-0.1.1/src/modekeeper/roi/estimate.py +83 -0
  57. modekeeper-0.1.1/src/modekeeper/roi/mk074_before_after.py +165 -0
  58. modekeeper-0.1.1/src/modekeeper/safety/explain.py +21 -0
  59. modekeeper-0.1.1/src/modekeeper/safety/guards.py +482 -0
  60. modekeeper-0.1.1/src/modekeeper/safety/rollback.py +37 -0
  61. modekeeper-0.1.1/src/modekeeper/telemetry/collector.py +12 -0
  62. modekeeper-0.1.1/src/modekeeper/telemetry/file_source.py +243 -0
  63. modekeeper-0.1.1/src/modekeeper/telemetry/k8s_log_source.py +366 -0
  64. modekeeper-0.1.1/src/modekeeper/telemetry/models.py +18 -0
  65. modekeeper-0.1.1/src/modekeeper/telemetry/raw_recorder.py +57 -0
  66. modekeeper-0.1.1/src/modekeeper/telemetry/sources.py +56 -0
  67. modekeeper-0.1.1/src/modekeeper/trainer/__init__.py +5 -0
  68. modekeeper-0.1.1/src/modekeeper/trainer/__main__.py +89 -0
  69. modekeeper-0.1.1/src/modekeeper/trainer/knobs.py +36 -0
  70. modekeeper-0.1.1/src/modekeeper.egg-info/PKG-INFO +119 -0
  71. modekeeper-0.1.1/src/modekeeper.egg-info/SOURCES.txt +133 -0
  72. modekeeper-0.1.1/src/modekeeper.egg-info/dependency_links.txt +1 -0
  73. modekeeper-0.1.1/src/modekeeper.egg-info/entry_points.txt +2 -0
  74. modekeeper-0.1.1/src/modekeeper.egg-info/requires.txt +4 -0
  75. modekeeper-0.1.1/src/modekeeper.egg-info/top_level.txt +1 -0
  76. modekeeper-0.1.1/tests/test_chords_recover_relock.py +102 -0
  77. modekeeper-0.1.1/tests/test_cli_artifacts.py +239 -0
  78. modekeeper-0.1.1/tests/test_cli_policy_scalar.py +45 -0
  79. modekeeper-0.1.1/tests/test_cli_value_summary.py +36 -0
  80. modekeeper-0.1.1/tests/test_closed_loop_apply_pipeline.py +349 -0
  81. modekeeper-0.1.1/tests/test_closed_loop_k8s_observe.py +264 -0
  82. modekeeper-0.1.1/tests/test_closed_loop_watch.py +163 -0
  83. modekeeper-0.1.1/tests/test_demo_mk068.py +56 -0
  84. modekeeper-0.1.1/tests/test_duration_parse.py +25 -0
  85. modekeeper-0.1.1/tests/test_file_source_worker_latencies.py +54 -0
  86. modekeeper-0.1.1/tests/test_gpu_saturated.py +28 -0
  87. modekeeper-0.1.1/tests/test_k8s_apply_blocked.py +112 -0
  88. modekeeper-0.1.1/tests/test_k8s_apply_errors.py +71 -0
  89. modekeeper-0.1.1/tests/test_k8s_apply_real.py +148 -0
  90. modekeeper-0.1.1/tests/test_k8s_log_source_container_auto.py +50 -0
  91. modekeeper-0.1.1/tests/test_k8s_multi_object_plan.py +217 -0
  92. modekeeper-0.1.1/tests/test_k8s_preflight.py +189 -0
  93. modekeeper-0.1.1/tests/test_k8s_render.py +142 -0
  94. modekeeper-0.1.1/tests/test_k8s_render_errors.py +88 -0
  95. modekeeper-0.1.1/tests/test_k8s_verify.py +501 -0
  96. modekeeper-0.1.1/tests/test_k8s_verify_errors.py +59 -0
  97. modekeeper-0.1.1/tests/test_k8s_verify_helpers.py +35 -0
  98. modekeeper-0.1.1/tests/test_lightning_adapter_import.py +18 -0
  99. modekeeper-0.1.1/tests/test_mk062_chords_v1.py +55 -0
  100. modekeeper-0.1.1/tests/test_mk068_demo.py +39 -0
  101. modekeeper-0.1.1/tests/test_mk074_before_after.py +80 -0
  102. modekeeper-0.1.1/tests/test_mk075_decision_trace.py +55 -0
  103. modekeeper-0.1.1/tests/test_mk076_approval_gate.py +39 -0
  104. modekeeper-0.1.1/tests/test_mk077_inventory.py +63 -0
  105. modekeeper-0.1.1/tests/test_mk078_policy_propagation.py +86 -0
  106. modekeeper-0.1.1/tests/test_mk080_roi_estimate.py +56 -0
  107. modekeeper-0.1.1/tests/test_mk082_license_gates.py +289 -0
  108. modekeeper-0.1.1/tests/test_mk082_license_verify.py +100 -0
  109. modekeeper-0.1.1/tests/test_mk083_policy_bundle.py +93 -0
  110. modekeeper-0.1.1/tests/test_mk084_chord_catalog_validate.py +103 -0
  111. modekeeper-0.1.1/tests/test_mk084_guardrails_envelope.py +37 -0
  112. modekeeper-0.1.1/tests/test_mk085_killswitch_absolute.py +336 -0
  113. modekeeper-0.1.1/tests/test_mk086_license_kid_and_rotation.py +91 -0
  114. modekeeper-0.1.1/tests/test_mk089_telemetry_and_watch.py +163 -0
  115. modekeeper-0.1.1/tests/test_mk091_environment_fingerprint.py +127 -0
  116. modekeeper-0.1.1/tests/test_mk093_customer_eval.py +68 -0
  117. modekeeper-0.1.1/tests/test_mk096_roi_report.py +144 -0
  118. modekeeper-0.1.1/tests/test_mk097_export_bundle.py +114 -0
  119. modekeeper-0.1.1/tests/test_mk098_stdout_jsonl_ingest.py +61 -0
  120. modekeeper-0.1.1/tests/test_observe_file_source.py +61 -0
  121. modekeeper-0.1.1/tests/test_observe_k8s_source.py +61 -0
  122. modekeeper-0.1.1/tests/test_observe_summary.py +19 -0
  123. modekeeper-0.1.1/tests/test_opportunity_estimate.py +43 -0
  124. modekeeper-0.1.1/tests/test_passport_observe_max.py +75 -0
  125. modekeeper-0.1.1/tests/test_passport_observe_max_redaction.py +51 -0
  126. modekeeper-0.1.1/tests/test_passport_observe_max_report_only.py +47 -0
  127. modekeeper-0.1.1/tests/test_passports_v0.py +52 -0
  128. modekeeper-0.1.1/tests/test_policy_scalar_baseline.py +48 -0
  129. modekeeper-0.1.1/tests/test_rbac_diagnostics_parse.py +54 -0
  130. modekeeper-0.1.1/tests/test_record_replay.py +449 -0
  131. modekeeper-0.1.1/tests/test_report_contracts.py +94 -0
  132. modekeeper-0.1.1/tests/test_safety_guardrails.py +221 -0
  133. modekeeper-0.1.1/tests/test_state_machine.py +26 -0
  134. modekeeper-0.1.1/tests/test_trainer_knobs_parse.py +22 -0
  135. modekeeper-0.1.1/tests/test_value_summary.py +74 -0
@@ -0,0 +1,6 @@
1
+ ModeKeeper — Proprietary License
2
+
3
+ Copyright (c) 2026.
4
+
5
+ All rights reserved. Unauthorized copying, modification, distribution, or use of this software,
6
+ via any medium, is strictly prohibited without prior written permission.
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: modekeeper
3
+ Version: 0.1.1
4
+ Summary: ModeKeeper: self-serve observability and safe closed-loop tuning
5
+ Author: ModeKeeper
6
+ License-Expression: LicenseRef-Proprietary
7
+ Keywords: mlops,observability,autotuning,safety
8
+ Classifier: Programming Language :: Python :: 3
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.4; extra == "dev"
14
+ Requires-Dist: ruff>=0.4.8; extra == "dev"
15
+ Dynamic: license-file
16
+
17
+ # ModeKeeper
18
+ <!-- modekeeper:product-intro:start -->
19
+
20
+ ## Для кого и зачем
21
+
22
+ **ModeKeeper** — инструмент для платформенных команд (**SRE / MLOps / FinOps**), которые запускают обучение/инференс в **Kubernetes** и хотят **снижать стоимость и нестабильность** без риска и без “ручного тюнинга”.
23
+
24
+ Что делает (в двух словах):
25
+ - **наблюдает** метрики/состояние ворклоада и собирает контекст;
26
+ - строит **план изменений** (**plan-only**, ничего не применяет);
27
+ - умеет **проверить применимость** плана (**verify**, тоже без изменений);
28
+ - в **платном режиме** может **применить** план (**apply**) внутри ModeKeeper — только при активной лицензии и после успешного verify.
29
+
30
+ Режимы:
31
+ - **Free:** observe + closed-loop dry-run + k8s render/verify (**ничего не меняет**)
32
+ - **Paid:** one-shot `mk closed-loop run --apply` под `MODEKEEPER_PAID=1`; детали — `docs/WORKFLOW.md`.
33
+
34
+ См. подробное продуктовое описание: `docs/product.md`.
35
+ См. технический snapshot для продолжения: `docs/SNAPSHOT.md`.
36
+ См. workflow (plan-only + verify + paid apply skeleton): `docs/WORKFLOW.md`.
37
+
38
+ <!-- modekeeper:product-intro:end -->
39
+
40
+
41
+ ModeKeeper — self-serve слой контроля для ML-систем. Сначала работает в режиме **OBSERVE_ONLY** (бесплатная неделя): собирает телеметрию и формирует отчет «теряете ли вы деньги». Затем, по желанию, включается режим **CLOSED_LOOP** для безопасного автотюнинга с guardrails и откатами.
42
+
43
+ ## Возможности
44
+ - Два режима: `OBSERVE_ONLY` и `CLOSED_LOOP`.
45
+ - Модульная архитектура: core, telemetry, actuators, policy, safety, adapters.
46
+ - Explain-log везде: каждое решение, ограничение и действие фиксируется.
47
+ - Локальные демо-сценарии без внешних сервисов.
48
+
49
+ ## Установка
50
+ ```bash
51
+ python -m venv .venv
52
+ . .venv/bin/activate
53
+ pip install -e .
54
+ ```
55
+
56
+ ## Быстрый старт
57
+ ```bash
58
+ mk observe --duration 250ms --out report/_observe_quick
59
+ mk demo run --scenario drift
60
+ mk closed-loop run --scenario drift --dry-run --out report/_golden_dryrun
61
+ ```
62
+ Canonical paid e2e (kind) + scripts: `docs/WORKFLOW.md`.
63
+
64
+ ## Quickstart
65
+ Observe-only quickstart + RBAC verify-only replay:
66
+ `docs/QUICKSTART.md`
67
+
68
+ ## E2E (kind)
69
+ Canonical workflow: `docs/WORKFLOW.md` (paid path is one-shot `closed-loop run --apply`).
70
+ Continuation snapshot: `docs/SNAPSHOT.md`.
71
+
72
+ Canonical scripts:
73
+ - `./scripts/e2e-smoke-kind.sh` (safe: kill switch blocks apply)
74
+ - `./scripts/e2e-apply-kind.sh` (REAL kubectl patches)
75
+
76
+ Warning: `./scripts/e2e-apply-kind.sh` performs real `kubectl` patches in the kind cluster.
77
+
78
+ Observe параметры:
79
+ - `--source` synthetic|file (default: synthetic)
80
+ - `--path` обязателен для `--source file`
81
+ - `--duration` принимает `1.5s`, `250ms`, `10m`; без суффикса = секунды
82
+ - формат входа: `jsonl`/`csv` с полями `ts`, `step_time_ms`, `loss` (optional)
83
+
84
+ ## Демо
85
+ ```bash
86
+ mk demo run --scenario straggler
87
+ mk demo run --scenario burst
88
+ ```
89
+
90
+ Выходные данные (контракт v0):
91
+ - `report/` содержит JSON-отчеты и `explain.jsonl` (JSONL, `ensure_ascii=False`).
92
+ - `observe_latest.json`, `demo_latest.json`, `closed_loop_latest.json` — копии последних отчетов.
93
+ - Подробности k8s утилит/полей — см. `docs/WORKFLOW.md` и `docs/SNAPSHOT.md`.
94
+ - Все отчеты имеют поля верхнего уровня: `schema_version`, `started_at`, `finished_at`, `duration_s`, `out_dir`.
95
+ - В `OBSERVE_ONLY` добавляется `summary`:
96
+ - `money_leak_risk` (low|medium|high)
97
+ - `top_symptoms`
98
+ - `recommendations`
99
+ - В `CLOSED_LOOP` дополнительно создаётся `summary.md` (короткий человекочитаемый итог прогона).
100
+ - В `CLOSED_LOOP` также пишутся plan-only артефакты: `k8s_plan.json` и `k8s_plan.kubectl.sh` (в dry-run kubectl **не выполняется**).
101
+ - В `CLOSED_LOOP` добавляются поля:
102
+ - `decision_summary` (RU)
103
+ - `proposed` (предлагаемые действия)
104
+ - `applied` (результаты применения/блокировки)
105
+ - `status` (например, `"ok"`)
106
+ - `kill_switch_active` (true/false)
107
+ - `blocked_reasons` (агрегация причин блокировок)
108
+ - `applied_reasons` (агрегация причин применений)
109
+ - `k8s_plan_path` (путь к `k8s_plan.json`)
110
+ - `k8s_plan_items` (кол-во items в плане)
111
+ - `k8s_kubectl_plan_path` (путь к `k8s_plan.kubectl.sh`)
112
+ - `k8s_namespace` (целевой namespace для скрипта/плана)
113
+ - `k8s_deployment` (целевой deployment для скрипта/плана)
114
+
115
+ ## Примечания
116
+ - `closed-loop` по умолчанию работает в dry-run; для paid-apply используйте one-shot `closed-loop run --apply` (см. `docs/WORKFLOW.md`).
117
+ - В dry-run `closed-loop` **не выполняет** kubectl: он только генерирует `k8s_plan.kubectl.sh`.
118
+ - Для принудительной блокировки `--apply` используйте `MODEKEEPER_KILL_SWITCH=1` (в отчёте `blocked_reasons` будет `kill_switch`).
119
+ - Все safety-ограничения локальные, настраиваемые и аудируемые.
@@ -0,0 +1,103 @@
1
+ # ModeKeeper
2
+ <!-- modekeeper:product-intro:start -->
3
+
4
+ ## Для кого и зачем
5
+
6
+ **ModeKeeper** — инструмент для платформенных команд (**SRE / MLOps / FinOps**), которые запускают обучение/инференс в **Kubernetes** и хотят **снижать стоимость и нестабильность** без риска и без “ручного тюнинга”.
7
+
8
+ Что делает (в двух словах):
9
+ - **наблюдает** метрики/состояние ворклоада и собирает контекст;
10
+ - строит **план изменений** (**plan-only**, ничего не применяет);
11
+ - умеет **проверить применимость** плана (**verify**, тоже без изменений);
12
+ - в **платном режиме** может **применить** план (**apply**) внутри ModeKeeper — только при активной лицензии и после успешного verify.
13
+
14
+ Режимы:
15
+ - **Free:** observe + closed-loop dry-run + k8s render/verify (**ничего не меняет**)
16
+ - **Paid:** one-shot `mk closed-loop run --apply` под `MODEKEEPER_PAID=1`; детали — `docs/WORKFLOW.md`.
17
+
18
+ См. подробное продуктовое описание: `docs/product.md`.
19
+ См. технический snapshot для продолжения: `docs/SNAPSHOT.md`.
20
+ См. workflow (plan-only + verify + paid apply skeleton): `docs/WORKFLOW.md`.
21
+
22
+ <!-- modekeeper:product-intro:end -->
23
+
24
+
25
+ ModeKeeper — self-serve слой контроля для ML-систем. Сначала работает в режиме **OBSERVE_ONLY** (бесплатная неделя): собирает телеметрию и формирует отчет «теряете ли вы деньги». Затем, по желанию, включается режим **CLOSED_LOOP** для безопасного автотюнинга с guardrails и откатами.
26
+
27
+ ## Возможности
28
+ - Два режима: `OBSERVE_ONLY` и `CLOSED_LOOP`.
29
+ - Модульная архитектура: core, telemetry, actuators, policy, safety, adapters.
30
+ - Explain-log везде: каждое решение, ограничение и действие фиксируется.
31
+ - Локальные демо-сценарии без внешних сервисов.
32
+
33
+ ## Установка
34
+ ```bash
35
+ python -m venv .venv
36
+ . .venv/bin/activate
37
+ pip install -e .
38
+ ```
39
+
40
+ ## Быстрый старт
41
+ ```bash
42
+ mk observe --duration 250ms --out report/_observe_quick
43
+ mk demo run --scenario drift
44
+ mk closed-loop run --scenario drift --dry-run --out report/_golden_dryrun
45
+ ```
46
+ Canonical paid e2e (kind) + scripts: `docs/WORKFLOW.md`.
47
+
48
+ ## Quickstart
49
+ Observe-only quickstart + RBAC verify-only replay:
50
+ `docs/QUICKSTART.md`
51
+
52
+ ## E2E (kind)
53
+ Canonical workflow: `docs/WORKFLOW.md` (paid path is one-shot `closed-loop run --apply`).
54
+ Continuation snapshot: `docs/SNAPSHOT.md`.
55
+
56
+ Canonical scripts:
57
+ - `./scripts/e2e-smoke-kind.sh` (safe: kill switch blocks apply)
58
+ - `./scripts/e2e-apply-kind.sh` (REAL kubectl patches)
59
+
60
+ Warning: `./scripts/e2e-apply-kind.sh` performs real `kubectl` patches in the kind cluster.
61
+
62
+ Observe параметры:
63
+ - `--source` synthetic|file (default: synthetic)
64
+ - `--path` обязателен для `--source file`
65
+ - `--duration` принимает `1.5s`, `250ms`, `10m`; без суффикса = секунды
66
+ - формат входа: `jsonl`/`csv` с полями `ts`, `step_time_ms`, `loss` (optional)
67
+
68
+ ## Демо
69
+ ```bash
70
+ mk demo run --scenario straggler
71
+ mk demo run --scenario burst
72
+ ```
73
+
74
+ Выходные данные (контракт v0):
75
+ - `report/` содержит JSON-отчеты и `explain.jsonl` (JSONL, `ensure_ascii=False`).
76
+ - `observe_latest.json`, `demo_latest.json`, `closed_loop_latest.json` — копии последних отчетов.
77
+ - Подробности k8s утилит/полей — см. `docs/WORKFLOW.md` и `docs/SNAPSHOT.md`.
78
+ - Все отчеты имеют поля верхнего уровня: `schema_version`, `started_at`, `finished_at`, `duration_s`, `out_dir`.
79
+ - В `OBSERVE_ONLY` добавляется `summary`:
80
+ - `money_leak_risk` (low|medium|high)
81
+ - `top_symptoms`
82
+ - `recommendations`
83
+ - В `CLOSED_LOOP` дополнительно создаётся `summary.md` (короткий человекочитаемый итог прогона).
84
+ - В `CLOSED_LOOP` также пишутся plan-only артефакты: `k8s_plan.json` и `k8s_plan.kubectl.sh` (в dry-run kubectl **не выполняется**).
85
+ - В `CLOSED_LOOP` добавляются поля:
86
+ - `decision_summary` (RU)
87
+ - `proposed` (предлагаемые действия)
88
+ - `applied` (результаты применения/блокировки)
89
+ - `status` (например, `"ok"`)
90
+ - `kill_switch_active` (true/false)
91
+ - `blocked_reasons` (агрегация причин блокировок)
92
+ - `applied_reasons` (агрегация причин применений)
93
+ - `k8s_plan_path` (путь к `k8s_plan.json`)
94
+ - `k8s_plan_items` (кол-во items в плане)
95
+ - `k8s_kubectl_plan_path` (путь к `k8s_plan.kubectl.sh`)
96
+ - `k8s_namespace` (целевой namespace для скрипта/плана)
97
+ - `k8s_deployment` (целевой deployment для скрипта/плана)
98
+
99
+ ## Примечания
100
+ - `closed-loop` по умолчанию работает в dry-run; для paid-apply используйте one-shot `closed-loop run --apply` (см. `docs/WORKFLOW.md`).
101
+ - В dry-run `closed-loop` **не выполняет** kubectl: он только генерирует `k8s_plan.kubectl.sh`.
102
+ - Для принудительной блокировки `--apply` используйте `MODEKEEPER_KILL_SWITCH=1` (в отчёте `blocked_reasons` будет `kill_switch`).
103
+ - Все safety-ограничения локальные, настраиваемые и аудируемые.
@@ -0,0 +1,52 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "modekeeper"
7
+ version = "0.1.1"
8
+ description = "ModeKeeper: self-serve observability and safe closed-loop tuning"
9
+ readme = {file = "README.md", content-type = "text/markdown"}
10
+ requires-python = ">=3.10"
11
+ license = "LicenseRef-Proprietary"
12
+ license-files = ["LICENSE"]
13
+ authors = [{name = "ModeKeeper"}]
14
+ keywords = ["mlops", "observability", "autotuning", "safety"]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ ]
18
+
19
+ dependencies = []
20
+
21
+ [project.optional-dependencies]
22
+ dev = [
23
+ "pytest>=7.4",
24
+ "ruff>=0.4.8",
25
+ ]
26
+
27
+ [project.scripts]
28
+ mk = "modekeeper.cli:main"
29
+
30
+ [tool.setuptools]
31
+ package-dir = {"" = "src"}
32
+
33
+ [tool.setuptools.packages.find]
34
+ where = ["src"]
35
+
36
+ [tool.setuptools.package-data]
37
+ "modekeeper.passports.templates" = ["*.json"]
38
+ "modekeeper.chords" = ["*.json"]
39
+ "modekeeper.license" = ["*.json"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ select = ["E", "F", "I", "W"]
44
+
45
+ [tool.ruff.format]
46
+ quote-style = "double"
47
+ indent-style = "space"
48
+ line-ending = "lf"
49
+
50
+ [tool.pytest.ini_options]
51
+ addopts = "-q"
52
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ """ModeKeeper package."""
2
+
3
+ __all__ = ["__version__"]
4
+ __version__ = "0.1.1"
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+
6
+
7
+ @dataclass
8
+ class Knob:
9
+ name: str
10
+ min_value: int
11
+ max_value: int
12
+ step: int
13
+ value: int
14
+ last_changed_at: datetime | None = None
15
+
16
+ def clamp(self, target: int) -> int:
17
+ if target < self.min_value:
18
+ return self.min_value
19
+ if target > self.max_value:
20
+ return self.max_value
21
+ return target
22
+
23
+ def apply(self, target: int) -> int:
24
+ target = self.clamp(target)
25
+ self.value = target
26
+ self.last_changed_at = datetime.now(timezone.utc)
27
+ return self.value
28
+
29
+
30
+ class ActuatorRegistry:
31
+ def __init__(self) -> None:
32
+ self._knobs: dict[str, Knob] = {}
33
+
34
+ def register(self, knob: Knob) -> None:
35
+ self._knobs[knob.name] = knob
36
+
37
+ def get(self, name: str) -> Knob | None:
38
+ return self._knobs.get(name)
39
+
40
+ def snapshot(self) -> dict[str, int]:
41
+ return {k: v.value for k, v in self._knobs.items()}
42
+
43
+ def restore(self, snapshot: dict[str, int]) -> None:
44
+ for name, value in snapshot.items():
45
+ if name in self._knobs:
46
+ self._knobs[name].value = value
47
+
48
+ def list_names(self) -> list[str]:
49
+ return list(self._knobs.keys())
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def build_k8s_plan(
5
+ proposed_actions: list[object],
6
+ *,
7
+ namespace: str,
8
+ deployment: str,
9
+ ) -> list[dict]:
10
+ items_by_target: dict[tuple[str, str], dict] = {}
11
+ for action in proposed_actions:
12
+ knob = getattr(action, "knob", None)
13
+ target = getattr(action, "target", None)
14
+ key = (namespace, deployment)
15
+ item = items_by_target.get(key)
16
+ if item is None:
17
+ item = {
18
+ "apiVersion": "apps/v1",
19
+ "kind": "Deployment",
20
+ "namespace": namespace,
21
+ "name": deployment,
22
+ "reason": "coalesced",
23
+ "patch": {
24
+ "metadata": {"annotations": {}},
25
+ "spec": {"template": {"metadata": {"annotations": {}}}},
26
+ },
27
+ }
28
+ items_by_target[key] = item
29
+
30
+ annotation_key = f"modekeeper/knob.{knob}"
31
+ item["patch"]["metadata"]["annotations"][annotation_key] = f"{target}"
32
+ item["patch"]["spec"]["template"]["metadata"]["annotations"][annotation_key] = f"{target}"
33
+ return list(items_by_target.values())
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ from modekeeper.safety.explain import ExplainLog
7
+
8
+ try:
9
+ import pytorch_lightning as pl
10
+ except Exception:
11
+ try:
12
+ import lightning.pytorch as pl
13
+ except Exception:
14
+ pl = None
15
+
16
+ LIGHTNING_AVAILABLE = pl is not None
17
+
18
+
19
+ def build_lightning_callback(out_dir: Path) -> object | None:
20
+ if pl is None:
21
+ return None
22
+
23
+ explain = ExplainLog(out_dir / "explain.jsonl")
24
+
25
+ class ExplainCallback(pl.Callback):
26
+ def __init__(self) -> None:
27
+ super().__init__()
28
+ self._fit_start = None
29
+ self._batch_start = None
30
+
31
+ def on_train_start(self, trainer, pl_module) -> None:
32
+ now = time.monotonic()
33
+ if self._fit_start is None:
34
+ self._fit_start = now
35
+ explain.emit("pl_train_start", {"t": now})
36
+
37
+ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx) -> None:
38
+ now = time.monotonic()
39
+ self._batch_start = now
40
+ explain.emit("pl_train_batch_start", {"t": now, "batch_idx": batch_idx})
41
+
42
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx) -> None:
43
+ now = time.monotonic()
44
+ loss = _extract_loss(outputs)
45
+ payload = {"t": now, "batch_idx": batch_idx, "loss": loss}
46
+ if self._batch_start is not None:
47
+ payload["batch_duration_s"] = now - self._batch_start
48
+ explain.emit("pl_train_batch_end", payload)
49
+
50
+ def on_fit_end(self, trainer, pl_module) -> None:
51
+ now = time.monotonic()
52
+ payload = {"t": now}
53
+ if self._fit_start is not None:
54
+ payload["fit_duration_s"] = now - self._fit_start
55
+ explain.emit("pl_fit_end", payload)
56
+
57
+ return ExplainCallback()
58
+
59
+
60
+ def _extract_loss(outputs: object) -> float | None:
61
+ if outputs is None:
62
+ return None
63
+ if isinstance(outputs, (int, float)):
64
+ return float(outputs)
65
+ if isinstance(outputs, dict):
66
+ if "loss" in outputs:
67
+ return _extract_loss(outputs["loss"])
68
+ return None
69
+ if isinstance(outputs, (list, tuple)) and outputs:
70
+ return _extract_loss(outputs[0])
71
+ item = getattr(outputs, "item", None)
72
+ if callable(item):
73
+ try:
74
+ return float(item())
75
+ except Exception:
76
+ return None
77
+ return None
@@ -0,0 +1,2 @@
1
+ """Audit artifacts for closed-loop execution."""
2
+
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ SCHEMA_VERSION = "decision_trace_event.v0"
9
+
10
+
11
+ @dataclass
12
+ class DecisionTraceWriter:
13
+ path: Path
14
+
15
+ def emit(self, event: dict[str, Any]) -> None:
16
+ self.path.parent.mkdir(parents=True, exist_ok=True)
17
+ with self.path.open("a", encoding="utf-8") as f:
18
+ f.write(
19
+ json.dumps(
20
+ event,
21
+ sort_keys=True,
22
+ separators=(",", ":"),
23
+ ensure_ascii=False,
24
+ )
25
+ )
26
+ f.write("\n")
27
+
@@ -0,0 +1,2 @@
1
+ """Chord ID libraries."""
2
+
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from functools import lru_cache
5
+ from pathlib import Path
6
+
7
+ CATALOG_SCHEMA_VERSION = "chord_catalog.v1"
8
+ VALIDATE_SCHEMA_VERSION = "chords_validate.v0"
9
+ _REQUIRED_CHORD_KEYS = (
10
+ "id",
11
+ "intent",
12
+ "risk_tier",
13
+ "required_signals",
14
+ "invariants",
15
+ "knobs_touched",
16
+ )
17
+ _OPTIONAL_CHORD_KEYS = ("cooldown_ms", "budget")
18
+ _ALLOWED_CHORD_KEYS = frozenset((*_REQUIRED_CHORD_KEYS, *_OPTIONAL_CHORD_KEYS))
19
+
20
+
21
+ def _is_list_of_str(value: object) -> bool:
22
+ return isinstance(value, list) and all(isinstance(item, str) for item in value)
23
+
24
+
25
+ def validate_catalog_dict(catalog: dict, source: str) -> list[str]:
26
+ errors: list[str] = []
27
+ if not isinstance(catalog, dict):
28
+ return [f"{source}: top-level JSON must be an object"]
29
+
30
+ top_unknown_keys = sorted(set(catalog.keys()) - {"schema_version", "chords"})
31
+ for key in top_unknown_keys:
32
+ errors.append(f"{source}: unknown top-level field '{key}'")
33
+
34
+ schema_version = catalog.get("schema_version")
35
+ if schema_version != CATALOG_SCHEMA_VERSION:
36
+ errors.append(f"{source}: schema_version must be '{CATALOG_SCHEMA_VERSION}'")
37
+
38
+ chords = catalog.get("chords")
39
+ if not isinstance(chords, list):
40
+ errors.append(f"{source}: chords must be an array")
41
+ return errors
42
+
43
+ seen_ids: set[str] = set()
44
+ for index, item in enumerate(chords):
45
+ path = f"{source}: chords[{index}]"
46
+ if not isinstance(item, dict):
47
+ errors.append(f"{path} must be an object")
48
+ continue
49
+
50
+ unknown_keys = sorted(set(item.keys()) - _ALLOWED_CHORD_KEYS)
51
+ for key in unknown_keys:
52
+ errors.append(f"{path}: unknown field '{key}'")
53
+
54
+ for key in _REQUIRED_CHORD_KEYS:
55
+ if key not in item:
56
+ errors.append(f"{path}: missing required field '{key}'")
57
+
58
+ chord_id = item.get("id")
59
+ if isinstance(chord_id, str):
60
+ if chord_id in seen_ids:
61
+ errors.append(f"{path}: duplicate chord id '{chord_id}'")
62
+ else:
63
+ seen_ids.add(chord_id)
64
+ else:
65
+ errors.append(f"{path}: id must be string")
66
+
67
+ intent = item.get("intent")
68
+ if not isinstance(intent, str):
69
+ errors.append(f"{path}: intent must be string")
70
+
71
+ risk_tier = item.get("risk_tier")
72
+ if not isinstance(risk_tier, str):
73
+ errors.append(f"{path}: risk_tier must be string")
74
+
75
+ required_signals = item.get("required_signals")
76
+ if not _is_list_of_str(required_signals):
77
+ errors.append(f"{path}: required_signals must be array of strings")
78
+
79
+ invariants = item.get("invariants")
80
+ if not _is_list_of_str(invariants):
81
+ errors.append(f"{path}: invariants must be array of strings")
82
+
83
+ knobs_touched = item.get("knobs_touched")
84
+ if not _is_list_of_str(knobs_touched):
85
+ errors.append(f"{path}: knobs_touched must be array of strings")
86
+
87
+ if "cooldown_ms" in item and not isinstance(item.get("cooldown_ms"), int):
88
+ errors.append(f"{path}: cooldown_ms must be int")
89
+
90
+ if "budget" in item and not isinstance(item.get("budget"), dict):
91
+ errors.append(f"{path}: budget must be object")
92
+
93
+ return errors
94
+
95
+
96
+ def validate_catalog_file(path: Path) -> dict:
97
+ source = str(path)
98
+ errors: list[str] = []
99
+ payload: object = {}
100
+ try:
101
+ payload = json.loads(path.read_text(encoding="utf-8"))
102
+ except FileNotFoundError:
103
+ errors.append(f"{source}: file not found")
104
+ except json.JSONDecodeError as exc:
105
+ errors.append(f"{source}: invalid JSON: {exc}")
106
+
107
+ chord_count = 0
108
+ chord_ids: list[str] = []
109
+ if isinstance(payload, dict):
110
+ chords = payload.get("chords")
111
+ if isinstance(chords, list):
112
+ chord_count = len(chords)
113
+ chord_ids = sorted(
114
+ {
115
+ item.get("id")
116
+ for item in chords
117
+ if isinstance(item, dict) and isinstance(item.get("id"), str)
118
+ }
119
+ )
120
+
121
+ if errors:
122
+ return {
123
+ "schema_version": VALIDATE_SCHEMA_VERSION,
124
+ "ok": False,
125
+ "errors": errors,
126
+ "chord_count": chord_count,
127
+ "chord_ids": chord_ids,
128
+ }
129
+
130
+ if isinstance(payload, dict):
131
+ errors.extend(validate_catalog_dict(payload, source=source))
132
+ else:
133
+ errors.append(f"{source}: top-level JSON must be an object")
134
+
135
+ return {
136
+ "schema_version": VALIDATE_SCHEMA_VERSION,
137
+ "ok": len(errors) == 0,
138
+ "errors": errors,
139
+ "chord_count": chord_count,
140
+ "chord_ids": chord_ids,
141
+ }
142
+
143
+
144
+ def load_catalog_file(path: Path) -> dict:
145
+ payload = json.loads(path.read_text(encoding="utf-8"))
146
+ if not isinstance(payload, dict):
147
+ raise ValueError(f"{path}: top-level JSON must be an object")
148
+ errors = validate_catalog_dict(payload, source=str(path))
149
+ if errors:
150
+ raise ValueError("; ".join(errors))
151
+ return payload
152
+
153
+
154
+ @lru_cache(maxsize=1)
155
+ def load_default_catalog() -> dict:
156
+ path = Path(__file__).with_name("catalog_v1.json")
157
+ return load_catalog_file(path)