inferoa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/LICENSE +176 -0
  2. package/README.md +154 -0
  3. package/dist/src/app.d.ts +16 -0
  4. package/dist/src/app.js +17 -0
  5. package/dist/src/app.js.map +1 -0
  6. package/dist/src/autoresearch/state.d.ts +106 -0
  7. package/dist/src/autoresearch/state.js +469 -0
  8. package/dist/src/autoresearch/state.js.map +1 -0
  9. package/dist/src/cli.d.ts +2 -0
  10. package/dist/src/cli.js +415 -0
  11. package/dist/src/cli.js.map +1 -0
  12. package/dist/src/code-intelligence/codegraph-engine.d.ts +55 -0
  13. package/dist/src/code-intelligence/codegraph-engine.js +593 -0
  14. package/dist/src/code-intelligence/codegraph-engine.js.map +1 -0
  15. package/dist/src/code-intelligence/hub.d.ts +37 -0
  16. package/dist/src/code-intelligence/hub.js +65 -0
  17. package/dist/src/code-intelligence/hub.js.map +1 -0
  18. package/dist/src/config/config.d.ts +12 -0
  19. package/dist/src/config/config.js +229 -0
  20. package/dist/src/config/config.js.map +1 -0
  21. package/dist/src/config/defaults.d.ts +2 -0
  22. package/dist/src/config/defaults.js +44 -0
  23. package/dist/src/config/defaults.js.map +1 -0
  24. package/dist/src/config/secret-vault.d.ts +3 -0
  25. package/dist/src/config/secret-vault.js +106 -0
  26. package/dist/src/config/secret-vault.js.map +1 -0
  27. package/dist/src/context/compressor.d.ts +33 -0
  28. package/dist/src/context/compressor.js +501 -0
  29. package/dist/src/context/compressor.js.map +1 -0
  30. package/dist/src/context/prompt.d.ts +26 -0
  31. package/dist/src/context/prompt.js +572 -0
  32. package/dist/src/context/prompt.js.map +1 -0
  33. package/dist/src/daemon/serve.d.ts +2 -0
  34. package/dist/src/daemon/serve.js +11 -0
  35. package/dist/src/daemon/serve.js.map +1 -0
  36. package/dist/src/daemon/supervisor.d.ts +33 -0
  37. package/dist/src/daemon/supervisor.js +252 -0
  38. package/dist/src/daemon/supervisor.js.map +1 -0
  39. package/dist/src/goals/state.d.ts +105 -0
  40. package/dist/src/goals/state.js +736 -0
  41. package/dist/src/goals/state.js.map +1 -0
  42. package/dist/src/model/endpoint-signals.d.ts +15 -0
  43. package/dist/src/model/endpoint-signals.js +186 -0
  44. package/dist/src/model/endpoint-signals.js.map +1 -0
  45. package/dist/src/model/gateway.d.ts +11 -0
  46. package/dist/src/model/gateway.js +455 -0
  47. package/dist/src/model/gateway.js.map +1 -0
  48. package/dist/src/plans/state.d.ts +28 -0
  49. package/dist/src/plans/state.js +123 -0
  50. package/dist/src/plans/state.js.map +1 -0
  51. package/dist/src/runtime.d.ts +92 -0
  52. package/dist/src/runtime.js +757 -0
  53. package/dist/src/runtime.js.map +1 -0
  54. package/dist/src/session/store.d.ts +84 -0
  55. package/dist/src/session/store.js +593 -0
  56. package/dist/src/session/store.js.map +1 -0
  57. package/dist/src/session/workspace.d.ts +2 -0
  58. package/dist/src/session/workspace.js +14 -0
  59. package/dist/src/session/workspace.js.map +1 -0
  60. package/dist/src/skills/registry.d.ts +24 -0
  61. package/dist/src/skills/registry.js +203 -0
  62. package/dist/src/skills/registry.js.map +1 -0
  63. package/dist/src/tools/autoresearch-tools.d.ts +6 -0
  64. package/dist/src/tools/autoresearch-tools.js +412 -0
  65. package/dist/src/tools/autoresearch-tools.js.map +1 -0
  66. package/dist/src/tools/clarify-tool.d.ts +3 -0
  67. package/dist/src/tools/clarify-tool.js +107 -0
  68. package/dist/src/tools/clarify-tool.js.map +1 -0
  69. package/dist/src/tools/code-intelligence.d.ts +15 -0
  70. package/dist/src/tools/code-intelligence.js +391 -0
  71. package/dist/src/tools/code-intelligence.js.map +1 -0
  72. package/dist/src/tools/context.d.ts +11 -0
  73. package/dist/src/tools/context.js +2 -0
  74. package/dist/src/tools/context.js.map +1 -0
  75. package/dist/src/tools/goal-tools.d.ts +3 -0
  76. package/dist/src/tools/goal-tools.js +279 -0
  77. package/dist/src/tools/goal-tools.js.map +1 -0
  78. package/dist/src/tools/omni-tools.d.ts +8 -0
  79. package/dist/src/tools/omni-tools.js +349 -0
  80. package/dist/src/tools/omni-tools.js.map +1 -0
  81. package/dist/src/tools/permissions.d.ts +11 -0
  82. package/dist/src/tools/permissions.js +74 -0
  83. package/dist/src/tools/permissions.js.map +1 -0
  84. package/dist/src/tools/plan-tools.d.ts +3 -0
  85. package/dist/src/tools/plan-tools.js +314 -0
  86. package/dist/src/tools/plan-tools.js.map +1 -0
  87. package/dist/src/tools/process-tools.d.ts +6 -0
  88. package/dist/src/tools/process-tools.js +199 -0
  89. package/dist/src/tools/process-tools.js.map +1 -0
  90. package/dist/src/tools/registry.d.ts +20 -0
  91. package/dist/src/tools/registry.js +187 -0
  92. package/dist/src/tools/registry.js.map +1 -0
  93. package/dist/src/tools/schemas.d.ts +3 -0
  94. package/dist/src/tools/schemas.js +500 -0
  95. package/dist/src/tools/schemas.js.map +1 -0
  96. package/dist/src/tools/skill-tools.d.ts +6 -0
  97. package/dist/src/tools/skill-tools.js +124 -0
  98. package/dist/src/tools/skill-tools.js.map +1 -0
  99. package/dist/src/tools/text-args.d.ts +5 -0
  100. package/dist/src/tools/text-args.js +22 -0
  101. package/dist/src/tools/text-args.js.map +1 -0
  102. package/dist/src/tools/web-search.d.ts +5 -0
  103. package/dist/src/tools/web-search.js +602 -0
  104. package/dist/src/tools/web-search.js.map +1 -0
  105. package/dist/src/tools/workspace-tools.d.ts +17 -0
  106. package/dist/src/tools/workspace-tools.js +561 -0
  107. package/dist/src/tools/workspace-tools.js.map +1 -0
  108. package/dist/src/tui/activity.d.ts +11 -0
  109. package/dist/src/tui/activity.js +75 -0
  110. package/dist/src/tui/activity.js.map +1 -0
  111. package/dist/src/tui/ansi.d.ts +24 -0
  112. package/dist/src/tui/ansi.js +131 -0
  113. package/dist/src/tui/ansi.js.map +1 -0
  114. package/dist/src/tui/app.d.ts +163 -0
  115. package/dist/src/tui/app.js +4204 -0
  116. package/dist/src/tui/app.js.map +1 -0
  117. package/dist/src/tui/cache-footer.d.ts +21 -0
  118. package/dist/src/tui/cache-footer.js +75 -0
  119. package/dist/src/tui/cache-footer.js.map +1 -0
  120. package/dist/src/tui/clarify.d.ts +14 -0
  121. package/dist/src/tui/clarify.js +187 -0
  122. package/dist/src/tui/clarify.js.map +1 -0
  123. package/dist/src/tui/composer.d.ts +79 -0
  124. package/dist/src/tui/composer.js +592 -0
  125. package/dist/src/tui/composer.js.map +1 -0
  126. package/dist/src/tui/event-view.d.ts +5 -0
  127. package/dist/src/tui/event-view.js +392 -0
  128. package/dist/src/tui/event-view.js.map +1 -0
  129. package/dist/src/tui/home.d.ts +7 -0
  130. package/dist/src/tui/home.js +92 -0
  131. package/dist/src/tui/home.js.map +1 -0
  132. package/dist/src/tui/markdown.d.ts +18 -0
  133. package/dist/src/tui/markdown.js +271 -0
  134. package/dist/src/tui/markdown.js.map +1 -0
  135. package/dist/src/tui/mode-footer.d.ts +9 -0
  136. package/dist/src/tui/mode-footer.js +62 -0
  137. package/dist/src/tui/mode-footer.js.map +1 -0
  138. package/dist/src/tui/plan-view.d.ts +8 -0
  139. package/dist/src/tui/plan-view.js +45 -0
  140. package/dist/src/tui/plan-view.js.map +1 -0
  141. package/dist/src/tui/prompt-queue.d.ts +18 -0
  142. package/dist/src/tui/prompt-queue.js +27 -0
  143. package/dist/src/tui/prompt-queue.js.map +1 -0
  144. package/dist/src/tui/resize.d.ts +7 -0
  145. package/dist/src/tui/resize.js +15 -0
  146. package/dist/src/tui/resize.js.map +1 -0
  147. package/dist/src/tui/session-picker.d.ts +10 -0
  148. package/dist/src/tui/session-picker.js +17 -0
  149. package/dist/src/tui/session-picker.js.map +1 -0
  150. package/dist/src/tui/session-transcript.d.ts +2 -0
  151. package/dist/src/tui/session-transcript.js +44 -0
  152. package/dist/src/tui/session-transcript.js.map +1 -0
  153. package/dist/src/tui/slash-notice.d.ts +2 -0
  154. package/dist/src/tui/slash-notice.js +9 -0
  155. package/dist/src/tui/slash-notice.js.map +1 -0
  156. package/dist/src/tui/slash.d.ts +21 -0
  157. package/dist/src/tui/slash.js +103 -0
  158. package/dist/src/tui/slash.js.map +1 -0
  159. package/dist/src/tui/splash.d.ts +4 -0
  160. package/dist/src/tui/splash.js +64 -0
  161. package/dist/src/tui/splash.js.map +1 -0
  162. package/dist/src/tui/tool-renderer.d.ts +6 -0
  163. package/dist/src/tui/tool-renderer.js +1024 -0
  164. package/dist/src/tui/tool-renderer.js.map +1 -0
  165. package/dist/src/tui/transcript-spacing.d.ts +1 -0
  166. package/dist/src/tui/transcript-spacing.js +4 -0
  167. package/dist/src/tui/transcript-spacing.js.map +1 -0
  168. package/dist/src/types.d.ts +220 -0
  169. package/dist/src/types.js +2 -0
  170. package/dist/src/types.js.map +1 -0
  171. package/dist/src/util/abort.d.ts +3 -0
  172. package/dist/src/util/abort.js +19 -0
  173. package/dist/src/util/abort.js.map +1 -0
  174. package/dist/src/util/clock.d.ts +2 -0
  175. package/dist/src/util/clock.js +7 -0
  176. package/dist/src/util/clock.js.map +1 -0
  177. package/dist/src/util/fs.d.ts +13 -0
  178. package/dist/src/util/fs.js +75 -0
  179. package/dist/src/util/fs.js.map +1 -0
  180. package/dist/src/util/hash.d.ts +6 -0
  181. package/dist/src/util/hash.js +50 -0
  182. package/dist/src/util/hash.js.map +1 -0
  183. package/dist/src/util/limit.d.ts +11 -0
  184. package/dist/src/util/limit.js +29 -0
  185. package/dist/src/util/limit.js.map +1 -0
  186. package/dist/src/util/types.d.ts +22 -0
  187. package/dist/src/util/types.js +33 -0
  188. package/dist/src/util/types.js.map +1 -0
  189. package/dist/src/validation/acceptance.d.ts +12 -0
  190. package/dist/src/validation/acceptance.js +251 -0
  191. package/dist/src/validation/acceptance.js.map +1 -0
  192. package/dist/src/validation/milestone.d.ts +2 -0
  193. package/dist/src/validation/milestone.js +141 -0
  194. package/dist/src/validation/milestone.js.map +1 -0
  195. package/docs/final-acceptance-task.md +193 -0
  196. package/docs/public-source-hygiene.md +21 -0
  197. package/docs/roadmap.md +265 -0
  198. package/docs/tui-product-design.md +270 -0
  199. package/package.json +67 -0
  200. package/skills/coding-workflow/SKILL.md +16 -0
@@ -0,0 +1,193 @@
1
+ # Final Acceptance Task
2
+
3
+ Inferoa is not considered complete until it passes a real end-to-end task
4
+ with actual model endpoints.
5
+
6
+ Unit tests, mock endpoints, and isolated smoke tests are necessary but not
7
+ sufficient. The final acceptance task must prove that the implemented agent can
8
+ use its built-in tools, manage context, and call vLLM ecosystem endpoints in one
9
+ durable coding session.
10
+
11
+ ## Required Environment
12
+
13
+ The final task should use real configured endpoints:
14
+
15
+ - direct vLLM Engine endpoint for the coding model;
16
+ - vLLM Semantic Router `auto` endpoint when available;
17
+ - vLLM-Omni endpoint for multimodal tools;
18
+ - external OpenAI-compatible provider profile for required compatibility
19
+ validation. The primary acceptance path should still prove vLLM ecosystem
20
+ endpoints unless a specific run is intentionally testing an external
21
+ provider.
22
+
23
+ The planned AMD validation hosts are:
24
+
25
+ - `165.245.131.56`
26
+ - `134.199.199.149`
27
+
28
+ One host should run or expose direct vLLM Engine. The other should run or expose
29
+ vLLM-Omni. The exact host assignment can change, but the final report must
30
+ record endpoint addresses, model names, server flags, and any unavailable
31
+ capabilities.
32
+
33
+ For project acceptance, these AMD endpoints are not assumed to already exist.
34
+ The acceptance work includes gaining SSH/deployment access, installing or
35
+ starting the required runtime stack, choosing models that fit the node GPU
36
+ memory, exposing OpenAI-compatible endpoints, and recording deployment evidence.
37
+ The Inferoa product should still treat those services as configured
38
+ endpoints rather than embedding a general deployment controller.
39
+
40
+ The AMD validation hosts are disposable for this project validation. Existing
41
+ containers, model processes, ports, and partial deployments may be stopped and
42
+ replaced during deployment. The final report should record what was cleaned,
43
+ but the implementation does not need to preserve host state for backwards
44
+ compatibility.
45
+
46
+ The external provider path is also required. At minimum, setup and validation
47
+ must prove one OpenAI-compatible external provider by entering credentials,
48
+ probing `/v1/models`, selecting a model, and running a chat request through
49
+ Inferoa. One current validation target exposes `tke/deepseek-v4-flash` from
50
+ its model list. Raw API keys must not be committed to docs, config, progress
51
+ logs, or evidence artifacts.
52
+
53
+ ## Required Task Shape
54
+
55
+ Use a real coding task against a non-trivial local repository. The task must
56
+ force the agent to use the core coding workflow:
57
+
58
+ 1. inspect the repository;
59
+ 2. use `file_search`;
60
+ 3. read files;
61
+ 4. use code-intelligence where supported;
62
+ 5. edit files;
63
+ 6. run shell commands or tests;
64
+ 7. maintain a task/evidence ledger;
65
+ 8. handle at least one long-running or background process record;
66
+ 9. produce a final explanation with evidence.
67
+
68
+ The task should be large enough to make context optimization meaningful. It must
69
+ trigger context compression either naturally or through a controlled test setup.
70
+ The final report must show:
71
+
72
+ - when compression was triggered;
73
+ - what was preserved;
74
+ - what was moved into managed resources;
75
+ - prompt/token counts when available;
76
+ - cached-token evidence when direct vLLM exposes it.
77
+
78
+ ## Required Multimodal Coverage
79
+
80
+ The same acceptance run, or a linked continuation in the same durable session,
81
+ must exercise endpoint-backed Omni tools:
82
+
83
+ - image understanding;
84
+ - image generation;
85
+ - video generation.
86
+
87
+ If the deployed Omni endpoint also supports video understanding, include it.
88
+ If it does not, record the missing capability as an endpoint limitation rather
89
+ than a passed test.
90
+
91
+ Generated media should be stored as managed resources or artifacts, with stable
92
+ references in the session log. The agent should not paste large binary or media
93
+ payloads into the prompt.
94
+
95
+ ## Required Session And Supervisor Coverage
96
+
97
+ The final acceptance task must prove long-horizon behavior:
98
+
99
+ - session creation and resume;
100
+ - durable event log replay;
101
+ - single-writer lock behavior;
102
+ - background process event records;
103
+ - context compression and continued work after compression.
104
+
105
+ After T9, the same task must also prove `inferoa daemon` behavior:
106
+
107
+ - start a supervised run;
108
+ - detach the terminal;
109
+ - keep a long-running process or agent run alive;
110
+ - reattach and inspect status/logs;
111
+ - cancel or complete the supervised run;
112
+ - suspend and resume safely if a permission prompt occurs.
113
+
114
+ Before T9, daemon behavior can be recorded as not implemented, but the
115
+ event/process schema must already be compatible with it.
116
+
117
+ ## Pass Criteria
118
+
119
+ The final task passes only if all of these are true:
120
+
121
+ - the coding task is completed by Inferoa using a real model endpoint;
122
+ - built-in tools are used successfully, not only listed or mocked;
123
+ - context compression occurs and work continues after compression;
124
+ - image understanding works through the configured multimodal endpoint;
125
+ - image generation works through the configured multimodal endpoint;
126
+ - video generation works through the configured multimodal endpoint;
127
+ - session events, resources, prompt hashes, and endpoint evidence are persisted;
128
+ - direct vLLM cached-token evidence is recorded when the endpoint exposes it;
129
+ - failures are limited to explicitly unavailable endpoint capabilities and are
130
+ recorded with concrete endpoint/model details.
131
+
132
+ The project is not complete if the final evidence only shows unit tests, mock
133
+ servers, or manual calls outside the agent.
134
+
135
+ ## Runner
136
+
137
+ The prototype includes a real-endpoint acceptance runner:
138
+
139
+ ```bash
140
+ node dist/src/cli.js debug acceptance --daemon
141
+ ```
142
+
143
+ The runner checks configuration first and refuses to pass without:
144
+
145
+ - `model_setup.base_url`;
146
+ - `model_setup.model`;
147
+ - Omni `vision`, `image_generation`, and `video_generation` endpoint
148
+ `base_url` plus `model` values.
149
+
150
+ When configured, the acceptance workflow creates a durable session, forces
151
+ compression, asks the model to complete a real repository edit using built-in
152
+ tools, runs Omni multimodal tools through the agent tool loop, records endpoint
153
+ evidence, and validates daemon attach/detach/status/cancel behavior.
154
+
155
+ The final product acceptance must be driven from the TUI. The current CLI
156
+ runner can remain as an automation scaffold, but it does not by itself satisfy
157
+ final acceptance.
158
+
159
+ The TUI acceptance run must show per-turn cache evidence after each chat turn.
160
+ For direct vLLM this means cached prompt tokens, total prompt tokens, cache hit
161
+ rate, output tokens, endpoint mode, model, and request id when the endpoint
162
+ exposes those fields.
163
+
164
+ The runner verifies persisted evidence rather than trusting the prompt alone.
165
+ It checks for:
166
+
167
+ - required tool calls by category: `file_search`, read, edit,
168
+ shell/background process, git, todo, evidence, code intelligence, and Omni;
169
+ - background process start and stop/cancel events;
170
+ - context compression followed by later model or tool work;
171
+ - session resume evidence in the same durable session;
172
+ - managed resources;
173
+ - prompt hash and tool schema hash records;
174
+ - endpoint evidence and cached-token fields when the endpoint exposes them;
175
+ - daemon job status, attach, detach, and cancel records on the same session.
176
+
177
+ ## Final Report Requirements
178
+
179
+ The final report must include:
180
+
181
+ - repository and task description;
182
+ - configured provider, direct vLLM, SR, and Omni endpoints;
183
+ - model names;
184
+ - vLLM engine flags relevant to prefix caching, prompt token details, request
185
+ ids, chunked prefill, and tool calling;
186
+ - tool calls used by category;
187
+ - files changed;
188
+ - tests or commands run;
189
+ - context compression evidence;
190
+ - multimodal artifacts and resource ids;
191
+ - session id and resume evidence;
192
+ - daemon attach/detach evidence after T9;
193
+ - remaining endpoint limitations or blockers.
@@ -0,0 +1,21 @@
1
+ # Public Source Hygiene
2
+
3
+ Public Inferoa documents and source should describe Inferoa on its own
4
+ terms. Design research can inform private implementation work, but public
5
+ materials must avoid naming unrelated products as inspirations, competitors, or
6
+ implementation sources.
7
+
8
+ Allowed in this repository:
9
+
10
+ - Independent product positioning.
11
+ - Generalized terminal-agent design pressures.
12
+ - vLLM ecosystem-specific decisions.
13
+ - Original module contracts and roadmap.
14
+
15
+ Avoid in this repository:
16
+
17
+ - Naming unrelated coding agents as sources for UX or implementation choices.
18
+ - Copying prompts, UI language, tool schemas, command names, or file names from
19
+ unrelated products.
20
+ - Recording third-party implementation details that are not necessary for
21
+ Inferoa's own design.
@@ -0,0 +1,265 @@
1
+ # Roadmap
2
+
3
+ This roadmap supersedes the earlier CLI-first M0-M8 implementation labels.
4
+ The existing backend prototype is useful as scaffolding, but it does not satisfy
5
+ the product goal until the user workflow is TUI-first end to end.
6
+
7
+ ## Product Contract
8
+
9
+ Inferoa is a branded terminal application, not a collection of CLI
10
+ subcommands with a small readline fallback.
11
+
12
+ - `inferoa` launches the TUI by default.
13
+ - `inferoa "prompt"` launches the TUI and submits the initial prompt in the
14
+ new or resumed session.
15
+ - `inferoa setup` opens the TUI setup wizard.
16
+ - `inferoa --print "prompt"` is the explicit non-interactive path.
17
+ - This is a fast-development product, so there is no compatibility requirement
18
+ for the current CLI-first scaffold. Any command, flag, output format, or
19
+ workflow that conflicts with the TUI-first product contract should be removed
20
+ instead of preserved.
21
+ - JSON and narrow debug commands may exist only when they support tests or
22
+ acceptance automation. They should not shape the user workflow.
23
+ - The chat `/` command registry is a first-class TUI surface with its own
24
+ product-specific command set. It must not be a mirror of legacy CLI
25
+ subcommands.
26
+ - The user-facing identity stays simple: current directory plus session
27
+ id/title. Internal workspace, run, client, prompt epoch, and cache salt ids
28
+ remain implementation details.
29
+
30
+ ## T0: Goal Reset And UI Direction
31
+
32
+ - mark the earlier CLI-first prototype as scaffolding, not complete product;
33
+ - lock the TUI-first entrypoint contract;
34
+ - use `docs/tui-product-design.md` as the UI product contract;
35
+ - define the Inferoa brand language for the terminal UI: inference-native,
36
+ fast, technical, cache-aware, and visually distinct;
37
+ - define core scenes: welcome, setup, chat, sessions, tools, endpoints, daemon,
38
+ and acceptance;
39
+ - define terminal animation rules for smooth streaming, tool progress, endpoint
40
+ probing, compression, and artifact creation;
41
+ - define the canonical TUI slash command registry and delete incompatible CLI
42
+ subcommands or chat commands from the active user path;
43
+ - update public docs after validation.
44
+
45
+ Validation:
46
+
47
+ - `inferoa --help` documents TUI-first behavior;
48
+ - roadmap and README no longer present CLI-first commands as the main path.
49
+ - no legacy command is retained solely for backwards compatibility.
50
+
51
+ ## T1: TUI Application Shell
52
+
53
+ - implement a real terminal app shell with transcript, input editor, status
54
+ line, overlays, selector lists, notifications, and keyboard bindings;
55
+ - add branded Inferoa welcome screen with workspace, endpoint, model,
56
+ session, git, context, and daemon status;
57
+ - add slash command palette for setup, status, sessions, tools, endpoints,
58
+ daemon, and acceptance views;
59
+ - add responsive terminal layout that works in narrow and wide panes;
60
+ - keep rendering deterministic and testable through snapshot or ANSI output
61
+ checks.
62
+
63
+ Validation:
64
+
65
+ - `inferoa` opens the TUI;
66
+ - slash command palette opens and can select a command;
67
+ - terminal resize does not corrupt the layout.
68
+
69
+ ## T2: TUI Setup Wizard
70
+
71
+ - move provider setup into TUI scenes, not plain CLI output;
72
+ - support direct vLLM setup with endpoint URL, vault-backed API key, `/v1/models`
73
+ probing, and model selection;
74
+ - support external OpenAI-compatible setup with endpoint URL, masked vault
75
+ secret entry, `/v1/models` probing, and model selection;
76
+ - support `auto` setup through vLLM Semantic Router with `/v1/models` probing
77
+ when available;
78
+ - support Omni endpoint setup for vision, image generation, video
79
+ understanding, video generation, audio understanding, and audio generation;
80
+ - after the user enters a key or endpoint, actively probe the endpoint and show
81
+ a model picker instead of asking the user to type model ids manually;
82
+ - write config only after a final review screen;
83
+ - never persist raw pasted API keys in config; setup stores secrets in the
84
+ local encrypted vault and writes only `api_key_ref`.
85
+
86
+ Validation:
87
+
88
+ - setup can configure the provided OpenAI-compatible provider and list
89
+ `tke/deepseek-v4-flash`;
90
+ - setup can configure direct vLLM and Omni endpoints from the AMD validation
91
+ deployment;
92
+ - invalid endpoints produce actionable TUI errors.
93
+
94
+ ## T3: Chat And Tool Interaction TUI
95
+
96
+ - render streaming assistant output in the transcript;
97
+ - render tool calls as cards with status, duration, bounded output, and
98
+ expandable managed resources;
99
+ - animate pending tools with smooth but bounded redraw cadence;
100
+ - render file edits with diff previews and approval controls where policy
101
+ requires approval;
102
+ - stabilize streaming edit previews so partial removals do not jitter before
103
+ matching additions arrive;
104
+ - render line-numbered diffs with added/removed colors, indentation markers,
105
+ syntax-highlighted context, and intra-line changed-token emphasis;
106
+ - render shell and background process tools with live output, stop controls,
107
+ and bounded buffers;
108
+ - render git, todo, evidence, and code-intelligence results in compact
109
+ workflow cards;
110
+ - support image and video artifacts as first-class transcript resources.
111
+
112
+ Validation:
113
+
114
+ - a real coding task uses file search, read, edit, shell/process, git, todo,
115
+ evidence, and code intelligence through the TUI;
116
+ - permission prompts are handled inside the TUI.
117
+ - file diff and shell/process output are visually inspectable without raw JSON.
118
+
119
+ ## T4: Sessions And Workspace UX
120
+
121
+ - add TUI session picker with title, id, status, last updated time, and
122
+ workspace path;
123
+ - add resume, archive, rename, and new-session flows;
124
+ - support multiple independent sessions per workspace with a single active
125
+ writer lock per session;
126
+ - show lock conflicts and stale lock recovery in the TUI;
127
+ - keep internal workspace ids and client ids out of normal user output.
128
+
129
+ Validation:
130
+
131
+ - a session can be resumed from the TUI and continues the same event log;
132
+ - concurrent terminals show clear session ownership state.
133
+
134
+ ## T5: Endpoint Evidence And vLLM Optimization
135
+
136
+ - record direct vLLM cached-token usage when exposed;
137
+ - record prompt hashes, tool schema hashes, endpoint request ids, response ids,
138
+ model ids, and usage metadata;
139
+ - record `/tokenize` availability as optional endpoint evidence when available,
140
+ but do not require it for workflow token budgeting;
141
+ - show endpoint capability status in the TUI without assuming direct access to
142
+ serving metrics;
143
+ - after every assistant turn, render a compact cache/usage footer with prompt
144
+ tokens, cached prompt tokens, cache hit rate, output tokens, endpoint mode,
145
+ model, and request id when available;
146
+ - expose a `/cache` or equivalent TUI view for recent turns and aggregate cache
147
+ evidence;
148
+ - keep stable prompt sections and deterministic tool schema order within each
149
+ prompt epoch.
150
+
151
+ Validation:
152
+
153
+ - endpoint evidence is persisted and visible from the TUI;
154
+ - direct vLLM cache evidence is recorded when the endpoint exposes it.
155
+ - turns without provider cache fields omit cache-hit fields entirely; cache
156
+ hit rate is shown only when the endpoint exposes cached prompt tokens.
157
+
158
+ ## T6: Context Compression UX
159
+
160
+ - estimate prompt size against configured context windows;
161
+ - compact older middle context at the configured threshold;
162
+ - mechanically prune large raw tool outputs into managed resources before
163
+ model summarization;
164
+ - render compression events in the transcript with what was preserved, moved,
165
+ and summarized;
166
+ - continue work after compression in the same session.
167
+
168
+ Validation:
169
+
170
+ - a controlled long task triggers compression;
171
+ - the agent continues after compression, surfaces compression status in the
172
+ TUI transcript, and persists resume/compression evidence.
173
+
174
+ ## T7: AMD Endpoint Deployment And Validation
175
+
176
+ Product code must not become a deployment controller, but the project
177
+ acceptance environment must be deployed and validated by the project team.
178
+
179
+ Planned AMD validation hosts:
180
+
181
+ - `165.245.131.56`
182
+ - `134.199.199.149`
183
+
184
+ Required deployment shape:
185
+
186
+ - one host exposes a direct vLLM Engine OpenAI-compatible endpoint for the
187
+ coding model;
188
+ - one host exposes vLLM-Omni OpenAI-compatible endpoints for multimodal
189
+ tools;
190
+ - deployed models must fit the host GPU memory and support the required node
191
+ GPU/runtime stack;
192
+ - existing containers, model processes, and occupied ports on the validation
193
+ hosts may be cleaned during deployment. The runbook should record what was
194
+ stopped or replaced, but it does not need to preserve pre-existing services;
195
+ - the final report records endpoint URLs, model names, server flags, and
196
+ unavailable capabilities.
197
+
198
+ Validation:
199
+
200
+ - direct vLLM `/v1/models`, chat, tool calling, streaming, token usage, and
201
+ cached-token evidence work when exposed;
202
+ - Omni image understanding, image generation, and video generation work through
203
+ Inferoa tools;
204
+ - SSH/deployment access to both hosts is documented before this milestone can
205
+ pass.
206
+
207
+ ## T8: Auto Mode Through vLLM Semantic Router
208
+
209
+ - configure `auto` mode through the TUI;
210
+ - connect to vLLM Semantic Router as an endpoint, not as an owned deployment;
211
+ - pass stable session identity headers;
212
+ - preserve tool-loop continuity from the agent side;
213
+ - record router-visible model selection metadata when available.
214
+
215
+ Validation:
216
+
217
+ - TUI setup can configure SR;
218
+ - a coding session runs through SR and persists router evidence.
219
+
220
+ ## T9: Long-Horizon Supervisor TUI
221
+
222
+ - add daemon job view with queued, running, detached, cancelled, failed, and
223
+ complete states;
224
+ - support attach, detach, status, logs, and cancel from the TUI;
225
+ - keep long-running processes alive after terminal detach where possible;
226
+ - suspend safely when approval is required and resume after attach;
227
+ - transfer session writer ownership to the daemon while a supervised run is
228
+ active.
229
+
230
+ Validation:
231
+
232
+ - daemon attach, detach, status, and cancel work from the TUI on the same final
233
+ acceptance task.
234
+
235
+ ## T10: Final Real-Endpoint Acceptance
236
+
237
+ The project is complete only when the TUI-driven product completes a real
238
+ end-to-end coding task with actual configured endpoints.
239
+
240
+ Required coverage:
241
+
242
+ - complete a real coding task using a real model endpoint;
243
+ - use built-in tools: file search, read, edit, shell/process, git, todo,
244
+ evidence, and code intelligence where supported;
245
+ - trigger context compression and continue after compression;
246
+ - run image understanding through the configured Omni endpoint;
247
+ - run image generation through the configured Omni endpoint;
248
+ - run video generation through the configured Omni endpoint;
249
+ - persist session events, resources, prompt hashes, endpoint evidence, and
250
+ resume evidence;
251
+ - record direct vLLM cached-token evidence when exposed;
252
+ - validate daemon attach, detach, status, and cancel behavior on the same final
253
+ task.
254
+
255
+ ## Later
256
+
257
+ - local HTTP API;
258
+ - local web dashboard;
259
+ - standalone binary packaging;
260
+ - richer codegraph index layer;
261
+ - expanded multimodal workflows beyond endpoint-backed built-ins;
262
+ - Responses API continuation support;
263
+ - richer Semantic Router replay integration;
264
+ - remote or multi-machine long-horizon supervision;
265
+ - endpoint capability discovery for scheduler hints and cache diagnostics.