@aws/ml-container-creator 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +10 -1
  2. package/bin/cli.js +57 -0
  3. package/config/agent.json +16 -0
  4. package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
  5. package/package.json +5 -2
  6. package/pyproject.toml +3 -0
  7. package/servers/agent-knowledge/index.js +592 -0
  8. package/servers/agent-knowledge/package.json +15 -0
  9. package/servers/base-image-picker/index.js +65 -18
  10. package/servers/instance-sizer/index.js +32 -0
  11. package/servers/lib/catalogs/fleet-drivers.json +38 -0
  12. package/servers/lib/catalogs/model-arch-support.json +51 -0
  13. package/servers/lib/catalogs/model-servers.json +2842 -1730
  14. package/servers/lib/schemas/image-catalog.schema.json +12 -0
  15. package/src/agent/__init__.py +2 -0
  16. package/src/agent/__pycache__/__init__.cpython-312.pyc +0 -0
  17. package/src/agent/__pycache__/config_loader.cpython-312.pyc +0 -0
  18. package/src/agent/__pycache__/context.cpython-312.pyc +0 -0
  19. package/src/agent/__pycache__/health_check.cpython-312.pyc +0 -0
  20. package/src/agent/agent.py +513 -0
  21. package/src/agent/config_loader.py +215 -0
  22. package/src/agent/context.py +380 -0
  23. package/src/agent/data/capability-matrix.json +106 -0
  24. package/src/agent/health_check.py +341 -0
  25. package/src/agent/prompts/system.md +173 -0
  26. package/src/agent/requirements-agent.txt +3 -0
  27. package/src/app.js +6 -4
  28. package/src/lib/generated/cli-options.js +1 -1
  29. package/src/lib/generated/parameter-matrix.js +1 -1
  30. package/src/lib/generated/validation-rules.js +1 -1
  31. package/src/lib/mcp-query-runner.js +110 -3
  32. package/src/lib/prompt-runner.js +66 -22
  33. package/src/lib/template-variable-resolver.js +8 -0
  34. package/src/lib/train-config-builder.js +339 -0
  35. package/src/lib/tune-config-state.js +89 -68
  36. package/templates/do/.benchmark_writer.py +3 -0
  37. package/templates/do/.eval_helper.py +409 -0
  38. package/templates/do/.register_helper.py +185 -11
  39. package/templates/do/.train_build_request.py +102 -113
  40. package/templates/do/.train_helper.py +433 -0
  41. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  42. package/templates/do/adapter +157 -0
  43. package/templates/do/benchmark +60 -3
  44. package/templates/do/config +6 -1
  45. package/templates/do/deploy.d/managed-inference.ejs +83 -0
  46. package/templates/do/evaluate +272 -0
  47. package/templates/do/lib/resolve-instance.sh +155 -0
  48. package/templates/do/register +5 -0
  49. package/templates/do/test +1 -0
  50. package/templates/do/train +879 -126
  51. package/templates/do/training/config.yaml +83 -11
  52. package/templates/do/training/dpo/accelerate_config.yaml +24 -0
  53. package/templates/do/training/dpo/defaults.yaml +26 -0
  54. package/templates/do/training/dpo/prompts.json +8 -0
  55. package/templates/do/training/dpo/train.py +363 -0
  56. package/templates/do/training/sft/accelerate_config.yaml +22 -0
  57. package/templates/do/training/sft/defaults.yaml +18 -0
  58. package/templates/do/training/sft/prompts.json +7 -0
  59. package/templates/do/training/sft/train.py +310 -0
  60. package/templates/do/tune +11 -2
  61. package/src/lib/auto-prompt-builder.js +0 -172
  62. package/src/lib/cli-handler.js +0 -529
  63. package/src/lib/community-reports-validator.js +0 -91
  64. package/src/lib/configuration-exporter.js +0 -204
  65. package/src/lib/dataset-slug.js +0 -152
  66. package/src/lib/docker-introspection-validator.js +0 -51
  67. package/src/lib/known-flags-validator.js +0 -200
  68. package/src/lib/schema-validator.js +0 -157
  69. package/src/lib/train-config-parser.js +0 -136
  70. package/src/lib/train-config-persistence.js +0 -143
  71. package/src/lib/train-config-validator.js +0 -112
  72. package/src/lib/train-feedback.js +0 -46
  73. package/src/lib/train-idempotency.js +0 -97
  74. package/src/lib/train-request-builder.js +0 -120
  75. package/src/lib/tune-dataset-validator.js +0 -279
  76. package/src/lib/tune-output-resolver.js +0 -66
  77. package/templates/do/.train_poll_parser.py +0 -135
  78. package/templates/do/.train_status_parser.py +0 -187
  79. /package/templates/do/training/{train.py → custom/train.py} +0 -0
@@ -0,0 +1,215 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Agent configuration loader.
5
+
6
+ Reads config/agent.json, applies MCC_* environment variable overrides,
7
+ validates values, and returns a resolved AgentConfig dataclass.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any, Callable
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class AgentConfig:
22
+ """Resolved agent configuration (immutable after creation)."""
23
+
24
+ model_id: str
25
+ mcp_servers: list[str]
26
+ input_cost_per_1k: float
27
+ output_cost_per_1k: float
28
+ exit_commands: list[str]
29
+ reload_commands: list[str]
30
+ mcp_server_timeout: int
31
+
32
+
33
+ _DEFAULTS = AgentConfig(
34
+ model_id="us.anthropic.claude-sonnet-4-20250514",
35
+ mcp_servers=[
36
+ "instance-sizer",
37
+ "base-image-picker",
38
+ "model-picker",
39
+ "workload-picker",
40
+ "e2e-status",
41
+ "agent-knowledge",
42
+ ],
43
+ input_cost_per_1k=0.003,
44
+ output_cost_per_1k=0.015,
45
+ exit_commands=["exit", "quit", "bye", "q"],
46
+ reload_commands=["reload"],
47
+ mcp_server_timeout=30,
48
+ )
49
+
50
+
51
+ def _warn(msg: str) -> None:
52
+ """Emit a config warning to stderr."""
53
+ print(f"[config] warning: {msg}", file=sys.stderr)
54
+
55
+
56
+ def _resolve_field(
57
+ field_name: str,
58
+ env_var: str | None,
59
+ file_value: Any | None,
60
+ default_value: Any,
61
+ parser: Callable[[str], Any],
62
+ validator: Callable[[Any], bool],
63
+ ) -> Any:
64
+ """Resolve a single config field using the precedence chain.
65
+
66
+ 1. Try env var → parse → validate
67
+ 2. Try file value → validate
68
+ 3. Return default
69
+ """
70
+ # 1. Environment override
71
+ if env_var:
72
+ raw = os.environ.get(env_var)
73
+ if raw is not None:
74
+ try:
75
+ parsed = parser(raw)
76
+ if validator(parsed):
77
+ return parsed
78
+ else:
79
+ _warn(f"{env_var}={raw!r} failed validation, skipping")
80
+ except (ValueError, TypeError) as e:
81
+ _warn(f"{env_var}={raw!r} cannot be parsed: {e}")
82
+
83
+ # 2. Config file value
84
+ if file_value is not None:
85
+ if validator(file_value):
86
+ return file_value
87
+ else:
88
+ _warn(
89
+ f"config field '{field_name}' has invalid value {file_value!r}, using default"
90
+ )
91
+
92
+ # 3. Hardcoded default
93
+ return default_value
94
+
95
+
96
+ def load_agent_config(config_path: Path | None = None) -> AgentConfig:
97
+ """Load, validate, and resolve agent configuration.
98
+
99
+ Resolution order per parameter (highest to lowest):
100
+ 1. MCC_* environment variable (if set and valid)
101
+ 2. Value from config/agent.json (if file exists and value is valid)
102
+ 3. Hardcoded default
103
+
104
+ Args:
105
+ config_path: Override path to the JSON config file.
106
+ Defaults to <package_root>/config/agent.json.
107
+
108
+ Returns:
109
+ Fully-resolved AgentConfig instance.
110
+ """
111
+ if config_path is None:
112
+ package_root = Path(__file__).resolve().parent.parent.parent
113
+ config_path = package_root / "config" / "agent.json"
114
+
115
+ # Read and parse config file
116
+ file_data: dict[str, Any] = {}
117
+ if config_path.exists():
118
+ try:
119
+ file_data = json.loads(config_path.read_text(encoding="utf-8"))
120
+ except (json.JSONDecodeError, ValueError) as e:
121
+ _warn(f"config file '{config_path}' contains invalid JSON: {e}")
122
+ else:
123
+ _warn(f"config file '{config_path}' not found, using defaults")
124
+
125
+ # Parsers
126
+ _parse_str: Callable[[str], str] = lambda x: x
127
+ _parse_list: Callable[[str], list[str]] = lambda raw: [
128
+ s.strip() for s in raw.split(",")
129
+ ]
130
+ _parse_float: Callable[[str], float] = float
131
+ _parse_int: Callable[[str], int] = int
132
+
133
+ # Validators
134
+ _valid_str: Callable[[Any], bool] = lambda v: isinstance(v, str)
135
+ _valid_list_str: Callable[[Any], bool] = lambda v: isinstance(v, list) and all(
136
+ isinstance(s, str) for s in v
137
+ )
138
+ _valid_float_nn: Callable[[Any], bool] = (
139
+ lambda v: isinstance(v, (int, float)) and v >= 0
140
+ )
141
+ _valid_int_nn: Callable[[Any], bool] = lambda v: isinstance(v, int) and v >= 0
142
+
143
+ # Resolve each field
144
+ model_id = _resolve_field(
145
+ "modelId",
146
+ "MCC_MODEL_ID",
147
+ file_data.get("modelId"),
148
+ _DEFAULTS.model_id,
149
+ _parse_str,
150
+ _valid_str,
151
+ )
152
+
153
+ mcp_servers = _resolve_field(
154
+ "mcpServers",
155
+ "MCC_MCP_SERVERS",
156
+ file_data.get("mcpServers"),
157
+ _DEFAULTS.mcp_servers,
158
+ _parse_list,
159
+ _valid_list_str,
160
+ )
161
+
162
+ input_cost_per_1k = _resolve_field(
163
+ "inputCostPer1k",
164
+ "MCC_INPUT_COST_PER_1K",
165
+ file_data.get("inputCostPer1k"),
166
+ _DEFAULTS.input_cost_per_1k,
167
+ _parse_float,
168
+ _valid_float_nn,
169
+ )
170
+
171
+ output_cost_per_1k = _resolve_field(
172
+ "outputCostPer1k",
173
+ "MCC_OUTPUT_COST_PER_1K",
174
+ file_data.get("outputCostPer1k"),
175
+ _DEFAULTS.output_cost_per_1k,
176
+ _parse_float,
177
+ _valid_float_nn,
178
+ )
179
+
180
+ exit_commands = _resolve_field(
181
+ "exitCommands",
182
+ "MCC_EXIT_COMMANDS",
183
+ file_data.get("exitCommands"),
184
+ _DEFAULTS.exit_commands,
185
+ _parse_list,
186
+ _valid_list_str,
187
+ )
188
+
189
+ reload_commands = _resolve_field(
190
+ "reloadCommands",
191
+ "MCC_RELOAD_COMMANDS",
192
+ file_data.get("reloadCommands"),
193
+ _DEFAULTS.reload_commands,
194
+ _parse_list,
195
+ _valid_list_str,
196
+ )
197
+
198
+ mcp_server_timeout = _resolve_field(
199
+ "mcpServerTimeout",
200
+ "MCC_MCP_SERVER_TIMEOUT",
201
+ file_data.get("mcpServerTimeout"),
202
+ _DEFAULTS.mcp_server_timeout,
203
+ _parse_int,
204
+ _valid_int_nn,
205
+ )
206
+
207
+ return AgentConfig(
208
+ model_id=model_id,
209
+ mcp_servers=mcp_servers,
210
+ input_cost_per_1k=input_cost_per_1k,
211
+ output_cost_per_1k=output_cost_per_1k,
212
+ exit_commands=exit_commands,
213
+ reload_commands=reload_commands,
214
+ mcp_server_timeout=mcp_server_timeout,
215
+ )
@@ -0,0 +1,380 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Project context reader for the Strands agent.
5
+
6
+ Parses all project configuration files into a structured dict for prompt injection.
7
+ Pure file I/O and regex/YAML parsing — no subprocess calls.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import re
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ import yaml
19
+
20
+
21
+ # Regex patterns for shell export parsing
22
+ # Matches: export KEY="VALUE" or export KEY='VALUE' or export KEY=VALUE
23
+ _EXPORT_QUOTED_RE = re.compile(
24
+ r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=["'](.*)["']\s*$"""
25
+ )
26
+ # Matches: export KEY=${KEY:-DEFAULT}
27
+ _EXPORT_DEFAULT_RE = re.compile(
28
+ r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=\$\{[A-Za-z_][A-Za-z0-9_]*:-([^}]*)\}\s*$"""
29
+ )
30
+ # Matches: export KEY=VALUE (unquoted, no spaces in value)
31
+ _EXPORT_BARE_RE = re.compile(
32
+ r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=([^\s"'$][^\s]*)\s*$"""
33
+ )
34
+ # Matches: export KEY="" (empty quoted value)
35
+ _EXPORT_EMPTY_RE = re.compile(
36
+ r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=["']["']\s*$"""
37
+ )
38
+
39
+ # Dockerfile patterns
40
+ _FROM_RE = re.compile(r"^FROM\s+(.+?)(?:\s+AS\s+\S+)?\s*$", re.IGNORECASE)
41
+ _ENTRYPOINT_RE = re.compile(r"^ENTRYPOINT\s+(.+)\s*$", re.IGNORECASE)
42
+
43
+
44
+ class ProjectContext:
45
+ """Reads and structures all project configuration for the agent.
46
+
47
+ Parses do/config, do/ic/*.conf, do/training/config.yaml, Dockerfile,
48
+ do/adapters/*.conf, the bootstrap profile, and user context files into
49
+ a unified dict suitable for LLM prompt injection.
50
+ """
51
+
52
+ def __init__(self, project_dir: str) -> None:
53
+ """Initialize with the project root directory.
54
+
55
+ Args:
56
+ project_dir: Absolute or relative path to the project root
57
+ (the directory containing do/config).
58
+ """
59
+ self.project_dir = Path(project_dir).resolve()
60
+
61
+ def load(self) -> dict[str, Any]:
62
+ """Load all context. Returns structured dict for prompt injection.
63
+
64
+ Gracefully handles missing files — partial context is returned with
65
+ a ``_missing`` field listing files that could not be parsed.
66
+
67
+ Returns:
68
+ Dict with project configuration structured for prompt injection.
69
+ """
70
+ missing: list[str] = []
71
+
72
+ do_config = self._parse_do_config(missing)
73
+ ic_env_vars = self._parse_ic_confs(missing)
74
+ training_config = self._parse_training_config(missing)
75
+ dockerfile_info = self._parse_dockerfile(missing)
76
+ adapters = self._parse_adapters(missing)
77
+ profile = self._load_profile(missing)
78
+ user_context = self._load_user_context(missing)
79
+
80
+ context: dict[str, Any] = {
81
+ "project_name": do_config.get("PROJECT_NAME"),
82
+ "engine": do_config.get("MODEL_SERVER"),
83
+ "deployment_target": do_config.get("DEPLOYMENT_TARGET"),
84
+ "model": do_config.get("HF_MODEL_ID") or do_config.get("MODEL_NAME"),
85
+ "instance_type": do_config.get("INSTANCE_TYPE"),
86
+ "aws_region": do_config.get("AWS_REGION"),
87
+ "lora_enabled": do_config.get("ENABLE_LORA", "").lower() == "true",
88
+ "existing_endpoint": do_config.get("ENDPOINT_NAME")
89
+ if do_config.get("ENDPOINT_EXTERNAL") == "true"
90
+ else None,
91
+ "do_config_vars": do_config,
92
+ "ic_env_vars": ic_env_vars,
93
+ "training_config": training_config,
94
+ "base_image": dockerfile_info.get("base_image"),
95
+ "entrypoint": dockerfile_info.get("entrypoint"),
96
+ "adapters": adapters,
97
+ "profile": profile,
98
+ "user_context": user_context,
99
+ }
100
+
101
+ if missing:
102
+ context["_missing"] = missing
103
+
104
+ return context
105
+
106
+ def _parse_do_config(self, missing: list[str]) -> dict[str, str]:
107
+ """Parse do/config — regex for export KEY=VALUE lines.
108
+
109
+ Handles:
110
+ - export KEY="VALUE"
111
+ - export KEY='VALUE'
112
+ - export KEY=${KEY:-DEFAULT}
113
+ - export KEY=VALUE (bare, no spaces)
114
+ - Multi-line values via single-quoted heredoc-style (rare but possible)
115
+
116
+ Args:
117
+ missing: Accumulator list for files that could not be found/parsed.
118
+
119
+ Returns:
120
+ Dict of variable names to their values.
121
+ """
122
+ config_path = self.project_dir / "do" / "config"
123
+ if not config_path.is_file():
124
+ missing.append("do/config")
125
+ return {}
126
+
127
+ return self._parse_shell_exports(config_path)
128
+
129
+ def _parse_ic_confs(self, missing: list[str]) -> dict[str, dict[str, str]]:
130
+ """Parse do/ic/*.conf — IC_ENV_* variables grouped by filename.
131
+
132
+ Args:
133
+ missing: Accumulator list for files that could not be found/parsed.
134
+
135
+ Returns:
136
+ Dict mapping conf filename (without .conf) to a dict of variables.
137
+ """
138
+ ic_dir = self.project_dir / "do" / "ic"
139
+ if not ic_dir.is_dir():
140
+ missing.append("do/ic/")
141
+ return {}
142
+
143
+ result: dict[str, dict[str, str]] = {}
144
+ conf_files = sorted(ic_dir.glob("*.conf"))
145
+
146
+ if not conf_files:
147
+ missing.append("do/ic/*.conf")
148
+ return {}
149
+
150
+ for conf_path in conf_files:
151
+ name = conf_path.stem
152
+ result[name] = self._parse_shell_exports(conf_path)
153
+
154
+ return result
155
+
156
+ def _parse_training_config(self, missing: list[str]) -> dict[str, Any] | None:
157
+ """Parse do/training/config.yaml via yaml.safe_load().
158
+
159
+ Extracts key fields: technique, instance_type, hyperparameters,
160
+ dataset, image, and any other top-level keys.
161
+
162
+ Args:
163
+ missing: Accumulator list for files that could not be found/parsed.
164
+
165
+ Returns:
166
+ Parsed YAML dict, or None if file is missing/invalid.
167
+ """
168
+ yaml_path = self.project_dir / "do" / "training" / "config.yaml"
169
+ if not yaml_path.is_file():
170
+ missing.append("do/training/config.yaml")
171
+ return None
172
+
173
+ try:
174
+ text = yaml_path.read_text(encoding="utf-8")
175
+ data = yaml.safe_load(text)
176
+ if not isinstance(data, dict):
177
+ missing.append("do/training/config.yaml (invalid format)")
178
+ return None
179
+ return data
180
+ except (yaml.YAMLError, OSError):
181
+ missing.append("do/training/config.yaml (parse error)")
182
+ return None
183
+
184
+ def _parse_dockerfile(self, missing: list[str]) -> dict[str, str | None]:
185
+ """Extract FROM image and ENTRYPOINT from Dockerfile.
186
+
187
+ Args:
188
+ missing: Accumulator list for files that could not be found/parsed.
189
+
190
+ Returns:
191
+ Dict with 'base_image' and 'entrypoint' keys.
192
+ """
193
+ dockerfile_path = self.project_dir / "Dockerfile"
194
+ if not dockerfile_path.is_file():
195
+ missing.append("Dockerfile")
196
+ return {"base_image": None, "entrypoint": None}
197
+
198
+ try:
199
+ lines = dockerfile_path.read_text(encoding="utf-8").splitlines()
200
+ except OSError:
201
+ missing.append("Dockerfile (read error)")
202
+ return {"base_image": None, "entrypoint": None}
203
+
204
+ base_image: str | None = None
205
+ entrypoint: str | None = None
206
+
207
+ for line in lines:
208
+ stripped = line.strip()
209
+
210
+ # Take the last FROM (multi-stage build — final stage is what runs)
211
+ match = _FROM_RE.match(stripped)
212
+ if match:
213
+ base_image = match.group(1).strip()
214
+
215
+ match = _ENTRYPOINT_RE.match(stripped)
216
+ if match:
217
+ entrypoint = match.group(1).strip()
218
+
219
+ return {"base_image": base_image, "entrypoint": entrypoint}
220
+
221
+ def _parse_adapters(self, missing: list[str]) -> list[dict[str, Any]]:
222
+ """List do/adapters/*.conf with adapter names and key variables.
223
+
224
+ Args:
225
+ missing: Accumulator list for files that could not be found/parsed.
226
+
227
+ Returns:
228
+ List of dicts with 'name' and 'vars' for each adapter conf file.
229
+ """
230
+ adapters_dir = self.project_dir / "do" / "adapters"
231
+ if not adapters_dir.is_dir():
232
+ missing.append("do/adapters/")
233
+ return []
234
+
235
+ conf_files = sorted(adapters_dir.glob("*.conf"))
236
+ if not conf_files:
237
+ missing.append("do/adapters/*.conf")
238
+ return []
239
+
240
+ adapters: list[dict[str, Any]] = []
241
+ for conf_path in conf_files:
242
+ name = conf_path.stem
243
+ variables = self._parse_shell_exports(conf_path)
244
+ adapters.append({"name": name, "vars": variables})
245
+
246
+ return adapters
247
+
248
+ def _load_profile(self, missing: list[str]) -> dict[str, Any] | None:
249
+ """Load bootstrap profile from ~/.ml-container-creator/config.json.
250
+
251
+ Reads the config file, finds the active profile, and returns its
252
+ config object.
253
+
254
+ Args:
255
+ missing: Accumulator list for files that could not be found/parsed.
256
+
257
+ Returns:
258
+ Active profile config dict, or None if unavailable.
259
+ """
260
+ config_path = Path.home() / ".ml-container-creator" / "config.json"
261
+ if not config_path.is_file():
262
+ missing.append("~/.ml-container-creator/config.json")
263
+ return None
264
+
265
+ try:
266
+ text = config_path.read_text(encoding="utf-8")
267
+ data = json.loads(text)
268
+ except (json.JSONDecodeError, OSError):
269
+ missing.append("~/.ml-container-creator/config.json (parse error)")
270
+ return None
271
+
272
+ if not isinstance(data, dict):
273
+ missing.append("~/.ml-container-creator/config.json (invalid format)")
274
+ return None
275
+
276
+ active_name = data.get("activeProfile")
277
+ profiles = data.get("profiles")
278
+
279
+ if not active_name or not isinstance(profiles, dict):
280
+ return {"_raw": data, "_note": "no active profile set"}
281
+
282
+ profile_config = profiles.get(active_name)
283
+ if profile_config is None:
284
+ return {"_raw": data, "_note": f"active profile '{active_name}' not found in profiles"}
285
+
286
+ return {"name": active_name, "config": profile_config}
287
+
288
+ def _load_user_context(self, missing: list[str]) -> str | None:
289
+ """Read .mlcc-agent-context.md if it exists in project root.
290
+
291
+ This file allows teams to inject custom patterns, conventions,
292
+ and project-specific guidance into the agent's system prompt.
293
+
294
+ Args:
295
+ missing: Accumulator list for files that could not be found/parsed.
296
+
297
+ Returns:
298
+ File contents as string, or None if file doesn't exist.
299
+ """
300
+ context_path = self.project_dir / ".mlcc-agent-context.md"
301
+ if not context_path.is_file():
302
+ # This is optional — do not add to missing
303
+ return None
304
+
305
+ try:
306
+ return context_path.read_text(encoding="utf-8")
307
+ except OSError:
308
+ missing.append(".mlcc-agent-context.md (read error)")
309
+ return None
310
+
311
+ def _parse_shell_exports(self, file_path: Path) -> dict[str, str]:
312
+ """Parse shell export statements from a file.
313
+
314
+ Handles multiple patterns:
315
+ - export KEY="VALUE"
316
+ - export KEY='VALUE'
317
+ - export KEY=${KEY:-DEFAULT}
318
+ - export KEY=BARE_VALUE
319
+ - Multi-line values with trailing backslash continuation
320
+
321
+ Lines starting with # are treated as comments and skipped.
322
+ Lines that are not export statements are skipped.
323
+
324
+ Args:
325
+ file_path: Path to the shell file to parse.
326
+
327
+ Returns:
328
+ Dict of variable names to their string values.
329
+ """
330
+ try:
331
+ content = file_path.read_text(encoding="utf-8")
332
+ except OSError:
333
+ return {}
334
+
335
+ variables: dict[str, str] = {}
336
+ lines = content.splitlines()
337
+ i = 0
338
+
339
+ while i < len(lines):
340
+ line = lines[i]
341
+ stripped = line.strip()
342
+
343
+ # Skip comments and empty lines
344
+ if not stripped or stripped.startswith("#"):
345
+ i += 1
346
+ continue
347
+
348
+ # Handle line continuation (trailing backslash)
349
+ while stripped.endswith("\\") and i + 1 < len(lines):
350
+ i += 1
351
+ stripped = stripped[:-1] + lines[i].strip()
352
+
353
+ # Try each pattern in order of specificity
354
+ match = _EXPORT_EMPTY_RE.match(stripped)
355
+ if match:
356
+ variables[match.group(1)] = ""
357
+ i += 1
358
+ continue
359
+
360
+ match = _EXPORT_QUOTED_RE.match(stripped)
361
+ if match:
362
+ variables[match.group(1)] = match.group(2)
363
+ i += 1
364
+ continue
365
+
366
+ match = _EXPORT_DEFAULT_RE.match(stripped)
367
+ if match:
368
+ variables[match.group(1)] = match.group(2)
369
+ i += 1
370
+ continue
371
+
372
+ match = _EXPORT_BARE_RE.match(stripped)
373
+ if match:
374
+ variables[match.group(1)] = match.group(2)
375
+ i += 1
376
+ continue
377
+
378
+ i += 1
379
+
380
+ return variables