llm-logparser 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_logparser/cli/__init__.py +0 -0
- llm_logparser/cli/__main__.py +4 -0
- llm_logparser/cli/cli.py +289 -0
- llm_logparser/cli/common.py +72 -0
- llm_logparser/cli/config_apply.py +286 -0
- llm_logparser/cli/config_loader.py +95 -0
- llm_logparser/cli/config_model.py +599 -0
- llm_logparser/cli/handlers/__init__.py +25 -0
- llm_logparser/cli/handlers/analyze.py +149 -0
- llm_logparser/cli/handlers/chain.py +124 -0
- llm_logparser/cli/handlers/config.py +71 -0
- llm_logparser/cli/handlers/export.py +52 -0
- llm_logparser/cli/handlers/extract.py +33 -0
- llm_logparser/cli/handlers/parse.py +42 -0
- llm_logparser/cli/parser_builder.py +410 -0
- llm_logparser/cli/prompts.py +54 -0
- llm_logparser/core/analyzer_common.py +160 -0
- llm_logparser/core/analyzer_metrics.py +387 -0
- llm_logparser/core/analyzer_stats.py +226 -0
- llm_logparser/core/analyzer_timeline.py +104 -0
- llm_logparser/core/analyzer_tokens.py +326 -0
- llm_logparser/core/exporter.py +363 -0
- llm_logparser/core/i18n.py +235 -0
- llm_logparser/core/l1_derivation.py +236 -0
- llm_logparser/core/message_windows.py +111 -0
- llm_logparser/core/parser.py +546 -0
- llm_logparser/core/providers/__init__.py +15 -0
- llm_logparser/core/providers/anthropic/__init__.py +3 -0
- llm_logparser/core/providers/anthropic/adapter.py +5 -0
- llm_logparser/core/providers/anthropic/claude/__init__.py +3 -0
- llm_logparser/core/providers/anthropic/claude/adapter.py +129 -0
- llm_logparser/core/providers/openai/__init__.py +0 -0
- llm_logparser/core/providers/openai/adapter.py +5 -0
- llm_logparser/core/providers/openai/chatgpt/__init__.py +4 -0
- llm_logparser/core/providers/openai/chatgpt/adapter.py +293 -0
- llm_logparser/core/providers/openai/chatgpt/utils.py +170 -0
- llm_logparser/core/providers/openai/extractor.py +94 -0
- llm_logparser/core/providers/xai/__init__.py +0 -0
- llm_logparser/core/providers/xai/adapter.py +11 -0
- llm_logparser/core/providers/xai/grok/__init__.py +0 -0
- llm_logparser/core/providers/xai/grok/adapter.py +324 -0
- llm_logparser/core/sanitize.py +141 -0
- llm_logparser/core/schema_validation.py +386 -0
- llm_logparser/core/schemas/manifest.schema.json +41 -0
- llm_logparser/core/schemas/message.schema.json +41 -0
- llm_logparser/core/schemas/metrics.schema.json +129 -0
- llm_logparser/core/schemas/thread_stats.schema.json +62 -0
- llm_logparser/core/schemas/token_stats.schema.json +113 -0
- llm_logparser/core/utils.py +63 -0
- llm_logparser/i18n/en-US.yaml +235 -0
- llm_logparser/i18n/ja-JP.yaml +116 -0
- llm_logparser/l2_sqlite/__init__.py +3 -0
- llm_logparser/l2_sqlite/builder.py +73 -0
- llm_logparser/l2_sqlite/ingest_messages.py +72 -0
- llm_logparser/l2_sqlite/ingest_threads.py +63 -0
- llm_logparser/l2_sqlite/ingest_windows.py +85 -0
- llm_logparser/l2_sqlite/schema.py +57 -0
- llm_logparser-1.3.0.dist-info/METADATA +740 -0
- llm_logparser-1.3.0.dist-info/RECORD +63 -0
- llm_logparser-1.3.0.dist-info/WHEEL +5 -0
- llm_logparser-1.3.0.dist-info/entry_points.txt +3 -0
- llm_logparser-1.3.0.dist-info/licenses/LICENSE +21 -0
- llm_logparser-1.3.0.dist-info/top_level.txt +1 -0
|
File without changes
|
llm_logparser/cli/cli.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# src/llm_logparser/cli.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from llm_logparser.cli.common import (
|
|
9
|
+
setup_logger,
|
|
10
|
+
validate_path,
|
|
11
|
+
validate_split_option,
|
|
12
|
+
)
|
|
13
|
+
from llm_logparser.cli.config_apply import (
|
|
14
|
+
apply_profile_defaults,
|
|
15
|
+
missing_required_fields,
|
|
16
|
+
parse_explicit_flags,
|
|
17
|
+
resolve_profile,
|
|
18
|
+
resolve_sanitize_policy,
|
|
19
|
+
)
|
|
20
|
+
from llm_logparser.cli.config_loader import load_config_with_discovery
|
|
21
|
+
from llm_logparser.cli.handlers import (
|
|
22
|
+
run_analyze_metrics,
|
|
23
|
+
run_analyze_sqlite_build,
|
|
24
|
+
run_analyze_stats,
|
|
25
|
+
run_analyze_tokens,
|
|
26
|
+
run_analyze_timeline,
|
|
27
|
+
run_chain,
|
|
28
|
+
run_config_command,
|
|
29
|
+
run_export,
|
|
30
|
+
run_extract,
|
|
31
|
+
run_parse,
|
|
32
|
+
)
|
|
33
|
+
from llm_logparser.cli.config_model import AppConfig, ConfigProfile
|
|
34
|
+
from llm_logparser.cli.parser_builder import build_parser
|
|
35
|
+
from llm_logparser.cli.prompts import (
|
|
36
|
+
interactive_enabled,
|
|
37
|
+
prompt_choice,
|
|
38
|
+
prompt_existing_file,
|
|
39
|
+
prompt_text,
|
|
40
|
+
)
|
|
41
|
+
from llm_logparser.core.i18n import _, set_locale
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _bootstrap_cli_locale(raw_argv: list[str]) -> str | None:
|
|
45
|
+
for index, token in enumerate(raw_argv):
|
|
46
|
+
if token == "--":
|
|
47
|
+
break
|
|
48
|
+
if token.startswith("--locale="):
|
|
49
|
+
return token.split("=", 1)[1] or None
|
|
50
|
+
if token.startswith("--lang="):
|
|
51
|
+
return token.split("=", 1)[1] or None
|
|
52
|
+
if token in {"--locale", "--lang"}:
|
|
53
|
+
if index + 1 < len(raw_argv):
|
|
54
|
+
return raw_argv[index + 1]
|
|
55
|
+
return None
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _missing_arg_message(command: str, missing: list[str]) -> str:
|
|
60
|
+
key_hint = {
|
|
61
|
+
"provider": _("error.missing_required_hint.provider"),
|
|
62
|
+
"input": _("error.missing_required_hint.input"),
|
|
63
|
+
"conversation_id": _("error.missing_required_hint.conversation_id"),
|
|
64
|
+
}
|
|
65
|
+
lines = [_("error.missing_required", command=command)]
|
|
66
|
+
for name in missing:
|
|
67
|
+
lines.append(
|
|
68
|
+
f" - {name}: {key_hint.get(name, _('error.missing_required_hint.generic'))}"
|
|
69
|
+
)
|
|
70
|
+
return "\n".join(lines)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _resolve_profile(
|
|
74
|
+
args,
|
|
75
|
+
*,
|
|
76
|
+
can_prompt: bool,
|
|
77
|
+
) -> tuple[ConfigProfile | None, dict[str, ConfigProfile], AppConfig | None, Path | None]:
|
|
78
|
+
config, config_path = load_config_with_discovery(args.config)
|
|
79
|
+
profile: ConfigProfile | None = None
|
|
80
|
+
profiles: dict[str, ConfigProfile] = {}
|
|
81
|
+
if config is not None:
|
|
82
|
+
profile, profiles = resolve_profile(config, args.profile)
|
|
83
|
+
if profile is None and can_prompt and len(profiles) > 1:
|
|
84
|
+
selected = prompt_choice(
|
|
85
|
+
_("runtime.profile.select"),
|
|
86
|
+
list(profiles.keys()),
|
|
87
|
+
allow_skip=True,
|
|
88
|
+
)
|
|
89
|
+
if selected is not None:
|
|
90
|
+
candidate = profiles.get(selected)
|
|
91
|
+
if candidate is not None:
|
|
92
|
+
profile = candidate
|
|
93
|
+
return profile, profiles, config, config_path
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _apply_profile_input_defaults(
|
|
97
|
+
args,
|
|
98
|
+
profile: ConfigProfile | None,
|
|
99
|
+
*,
|
|
100
|
+
explicit_flags: set[str],
|
|
101
|
+
config_path,
|
|
102
|
+
) -> None:
|
|
103
|
+
if profile is None:
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
extra_info = apply_profile_defaults(
|
|
107
|
+
args,
|
|
108
|
+
profile,
|
|
109
|
+
explicit_flags,
|
|
110
|
+
base_dir=config_path.parent if config_path is not None else None,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
input_candidates = extra_info.get("input_candidates")
|
|
114
|
+
can_prompt = interactive_enabled(
|
|
115
|
+
non_interactive=args.non_interactive
|
|
116
|
+
or os.getenv("LLM_LOGPARSER_NON_INTERACTIVE") == "1"
|
|
117
|
+
)
|
|
118
|
+
if (
|
|
119
|
+
args.command in ("parse", "export", "chain", "extract")
|
|
120
|
+
and args.input is None
|
|
121
|
+
and isinstance(input_candidates, list)
|
|
122
|
+
and input_candidates
|
|
123
|
+
):
|
|
124
|
+
if can_prompt:
|
|
125
|
+
selected_input = prompt_choice(
|
|
126
|
+
_("runtime.profile.select_input"),
|
|
127
|
+
[str(p) for p in input_candidates],
|
|
128
|
+
)
|
|
129
|
+
if selected_input:
|
|
130
|
+
args.input = Path(selected_input)
|
|
131
|
+
else:
|
|
132
|
+
print(
|
|
133
|
+
_("runtime.profile.multiple_input_paths"),
|
|
134
|
+
file=sys.stderr,
|
|
135
|
+
)
|
|
136
|
+
sys.exit(2)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _prompt_missing_required(
|
|
140
|
+
args,
|
|
141
|
+
profile: ConfigProfile | None,
|
|
142
|
+
*,
|
|
143
|
+
can_prompt: bool,
|
|
144
|
+
logger,
|
|
145
|
+
) -> None:
|
|
146
|
+
missing = missing_required_fields(args)
|
|
147
|
+
if missing:
|
|
148
|
+
if can_prompt:
|
|
149
|
+
if "provider" in missing:
|
|
150
|
+
provider_default = profile.provider if profile is not None else None
|
|
151
|
+
args.provider = prompt_text(
|
|
152
|
+
_("runtime.prompt.provider"),
|
|
153
|
+
default=provider_default,
|
|
154
|
+
)
|
|
155
|
+
if "input" in missing:
|
|
156
|
+
default_input = None
|
|
157
|
+
if profile is not None:
|
|
158
|
+
default_input = profile.input.path or profile.input.parsed
|
|
159
|
+
args.input = prompt_existing_file(
|
|
160
|
+
_("runtime.prompt.input_file_path"),
|
|
161
|
+
default=default_input,
|
|
162
|
+
)
|
|
163
|
+
if "conversation_id" in missing:
|
|
164
|
+
conv_default = profile.extract.conversation_id if profile is not None else None
|
|
165
|
+
args.conversation_id = prompt_text(
|
|
166
|
+
_("runtime.prompt.conversation_id"),
|
|
167
|
+
default=conv_default,
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
logger.error(_missing_arg_message(args.command, missing))
|
|
171
|
+
sys.exit(2)
|
|
172
|
+
|
|
173
|
+
if (
|
|
174
|
+
args.command == "analyze"
|
|
175
|
+
and args.analyze_command in {"stats", "timeline", "tokens", "metrics", "sqlite-build"}
|
|
176
|
+
):
|
|
177
|
+
if args.input is None:
|
|
178
|
+
if can_prompt:
|
|
179
|
+
prompt_label = (
|
|
180
|
+
_("runtime.prompt.analyze_provider_root")
|
|
181
|
+
if args.analyze_command == "sqlite-build"
|
|
182
|
+
else _("runtime.prompt.analyze_input")
|
|
183
|
+
)
|
|
184
|
+
raw_input = prompt_text(prompt_label)
|
|
185
|
+
args.input = Path(raw_input) if raw_input else None
|
|
186
|
+
else:
|
|
187
|
+
logger.error(_("runtime.analyze.missing_input", command=args.analyze_command))
|
|
188
|
+
sys.exit(2)
|
|
189
|
+
|
|
190
|
+
if args.analyze_command == "sqlite-build" and not args.provider:
|
|
191
|
+
if can_prompt:
|
|
192
|
+
args.provider = prompt_text(_("runtime.prompt.provider_id"))
|
|
193
|
+
else:
|
|
194
|
+
logger.error(_("runtime.analyze.missing_provider"))
|
|
195
|
+
sys.exit(2)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _dispatch(args, logger) -> None:
|
|
199
|
+
if args.command == "parse":
|
|
200
|
+
run_parse(args, logger)
|
|
201
|
+
elif args.command == "export":
|
|
202
|
+
run_export(args, logger)
|
|
203
|
+
elif args.command == "extract":
|
|
204
|
+
run_extract(args, logger)
|
|
205
|
+
elif args.command == "analyze":
|
|
206
|
+
if args.analyze_command == "stats":
|
|
207
|
+
run_analyze_stats(args, logger)
|
|
208
|
+
elif args.analyze_command == "metrics":
|
|
209
|
+
run_analyze_metrics(args, logger)
|
|
210
|
+
elif args.analyze_command == "tokens":
|
|
211
|
+
run_analyze_tokens(args, logger)
|
|
212
|
+
elif args.analyze_command == "timeline":
|
|
213
|
+
run_analyze_timeline(args, logger)
|
|
214
|
+
elif args.analyze_command == "sqlite-build":
|
|
215
|
+
run_analyze_sqlite_build(args, logger)
|
|
216
|
+
elif args.command == "chain":
|
|
217
|
+
run_chain(args, logger)
|
|
218
|
+
elif args.command == "viewer":
|
|
219
|
+
logger.warning(_("runtime.viewer.todo"))
|
|
220
|
+
elif args.command == "config":
|
|
221
|
+
run_config_command(args, logger)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def main(argv: list[str] | None = None):
|
|
225
|
+
raw_argv = list(sys.argv[1:] if argv is None else argv)
|
|
226
|
+
set_locale(_bootstrap_cli_locale(raw_argv))
|
|
227
|
+
parser = build_parser()
|
|
228
|
+
args = parser.parse_args(raw_argv)
|
|
229
|
+
explicit_flags = parse_explicit_flags(raw_argv)
|
|
230
|
+
non_interactive = args.non_interactive or os.getenv("LLM_LOGPARSER_NON_INTERACTIVE") == "1"
|
|
231
|
+
can_prompt = interactive_enabled(non_interactive=non_interactive)
|
|
232
|
+
|
|
233
|
+
if args.command == "config":
|
|
234
|
+
config, _config_path = load_config_with_discovery(args.config)
|
|
235
|
+
config_locale = None
|
|
236
|
+
if config is not None:
|
|
237
|
+
profile, _profiles = resolve_profile(config, args.profile)
|
|
238
|
+
if profile is not None:
|
|
239
|
+
config_locale = profile.locale
|
|
240
|
+
set_locale(args.locale, config_locale=config_locale)
|
|
241
|
+
logger = setup_logger(args.log_level)
|
|
242
|
+
_dispatch(args, logger)
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
profile: ConfigProfile | None = None
|
|
246
|
+
config_path: Path | None = None
|
|
247
|
+
config_locale: str | None = None
|
|
248
|
+
if args.command in {"parse", "export", "chain", "extract", "analyze"}:
|
|
249
|
+
# Keep analyze on the same locale resolution path as the other runtime
|
|
250
|
+
# commands. Today it only consumes profile data for locale selection,
|
|
251
|
+
# but that still needs the documented CLI -> env -> profile -> en-US
|
|
252
|
+
# precedence instead of a command-specific exception.
|
|
253
|
+
profile, _profiles, _config, config_path = _resolve_profile(
|
|
254
|
+
args,
|
|
255
|
+
can_prompt=can_prompt,
|
|
256
|
+
)
|
|
257
|
+
if profile is not None:
|
|
258
|
+
config_locale = profile.locale
|
|
259
|
+
_apply_profile_input_defaults(
|
|
260
|
+
args,
|
|
261
|
+
profile,
|
|
262
|
+
explicit_flags=explicit_flags,
|
|
263
|
+
config_path=config_path,
|
|
264
|
+
)
|
|
265
|
+
if args.command == "extract":
|
|
266
|
+
args.sanitize_policy = resolve_sanitize_policy(profile)
|
|
267
|
+
|
|
268
|
+
set_locale(args.locale, config_locale=config_locale)
|
|
269
|
+
logger = setup_logger(args.log_level)
|
|
270
|
+
_prompt_missing_required(args, profile, can_prompt=can_prompt, logger=logger)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
_dispatch(args, logger)
|
|
274
|
+
except (FileNotFoundError, IsADirectoryError) as e:
|
|
275
|
+
logger.error(_("error.path", detail=e))
|
|
276
|
+
sys.exit(2)
|
|
277
|
+
except PermissionError as e:
|
|
278
|
+
logger.error(_("error.permission", detail=e))
|
|
279
|
+
sys.exit(3)
|
|
280
|
+
except Exception as e:
|
|
281
|
+
logger.exception(_("error.unexpected", detail=e))
|
|
282
|
+
sys.exit(99)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
__all__ = ["main", "setup_logger", "validate_path", "validate_split_option"]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
if __name__ == "__main__":
|
|
289
|
+
main()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from datetime import timezone as _dt_timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from zoneinfo import ZoneInfo
|
|
10
|
+
|
|
11
|
+
from llm_logparser.core.i18n import _
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def setup_logger(level: str | None = None) -> logging.Logger:
|
|
15
|
+
"""Configure the shared CLI logger once."""
|
|
16
|
+
logger = logging.getLogger("llm_logparser")
|
|
17
|
+
if not logger.handlers:
|
|
18
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
19
|
+
handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
|
|
20
|
+
logger.addHandler(handler)
|
|
21
|
+
env_level = os.getenv("LLM_LOGPARSER_LOGLEVEL")
|
|
22
|
+
raw_level = level or env_level or "INFO"
|
|
23
|
+
logger.setLevel(getattr(logging, raw_level.upper(), logging.INFO))
|
|
24
|
+
return logger
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_path(
|
|
28
|
+
path: Path,
|
|
29
|
+
*,
|
|
30
|
+
must_exist: bool = True,
|
|
31
|
+
expect_file: bool = False,
|
|
32
|
+
expect_dir: bool = False,
|
|
33
|
+
) -> Path:
|
|
34
|
+
"""Validate input and output paths."""
|
|
35
|
+
target = path.expanduser()
|
|
36
|
+
if must_exist and not target.exists():
|
|
37
|
+
raise FileNotFoundError(_("error.path_not_found", path=target))
|
|
38
|
+
if expect_file and target.is_dir():
|
|
39
|
+
raise IsADirectoryError(_("error.path_expected_file", path=target))
|
|
40
|
+
if expect_dir and not target.is_dir():
|
|
41
|
+
raise NotADirectoryError(_("error.path_expected_dir", path=target))
|
|
42
|
+
return target
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def validate_split_option(raw: str | None) -> str | None:
|
|
46
|
+
if raw is None:
|
|
47
|
+
return None
|
|
48
|
+
normalized = raw.strip()
|
|
49
|
+
if not normalized:
|
|
50
|
+
return None
|
|
51
|
+
lowered = normalized.lower()
|
|
52
|
+
if lowered == "auto" or lowered.startswith("size=") or lowered.startswith("count="):
|
|
53
|
+
return normalized
|
|
54
|
+
raise SystemExit(_("runtime.split_invalid", raw=raw))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def resolve_timezone(timezone_name: str, logger: logging.Logger):
|
|
58
|
+
try:
|
|
59
|
+
return ZoneInfo(timezone_name)
|
|
60
|
+
except Exception:
|
|
61
|
+
logger.warning(_("runtime.timezone_unknown", timezone_name=timezone_name))
|
|
62
|
+
return _dt_timezone.utc
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def write_or_print(rendered: str, out: Path | None) -> None:
|
|
66
|
+
if out:
|
|
67
|
+
if out.exists() and out.is_dir():
|
|
68
|
+
raise IsADirectoryError(_("error.path_expected_file", path=out))
|
|
69
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
out.write_text(f"{rendered}\n", encoding="utf-8")
|
|
71
|
+
else:
|
|
72
|
+
print(rendered)
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from argparse import Namespace
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from llm_logparser.cli.config_model import AppConfig, ConfigProfile
|
|
8
|
+
from llm_logparser.core.i18n import _
|
|
9
|
+
from llm_logparser.core.sanitize import SanitizePolicy
|
|
10
|
+
|
|
11
|
+
REQUIRED_FIELDS_BY_COMMAND: dict[str, list[str]] = {
|
|
12
|
+
"parse": ["provider", "input"],
|
|
13
|
+
"export": ["input"],
|
|
14
|
+
"chain": ["provider", "input"],
|
|
15
|
+
"extract": ["provider", "input", "conversation_id"],
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_explicit_flags(argv: list[str]) -> set[str]:
|
|
20
|
+
flags: set[str] = set()
|
|
21
|
+
for token in argv:
|
|
22
|
+
if token == "--":
|
|
23
|
+
break
|
|
24
|
+
if not token.startswith("-"):
|
|
25
|
+
continue
|
|
26
|
+
if token.startswith("--"):
|
|
27
|
+
flags.add(token.split("=", 1)[0])
|
|
28
|
+
else:
|
|
29
|
+
flags.add(token)
|
|
30
|
+
return flags
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def cli_provided(explicit_flags: set[str], *flags: str) -> bool:
|
|
34
|
+
return any(flag in explicit_flags for flag in flags)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def resolve_profile(
|
|
38
|
+
config: AppConfig,
|
|
39
|
+
profile_name: str | None,
|
|
40
|
+
) -> tuple[ConfigProfile | None, dict[str, ConfigProfile]]:
|
|
41
|
+
profiles = config.profiles
|
|
42
|
+
if profile_name:
|
|
43
|
+
profile = profiles.get(profile_name)
|
|
44
|
+
if profile is None:
|
|
45
|
+
raise SystemExit(_("runtime.config.profile_not_found", name=profile_name))
|
|
46
|
+
return profile, profiles
|
|
47
|
+
|
|
48
|
+
active = config.active_profile
|
|
49
|
+
if active:
|
|
50
|
+
profile = profiles.get(active)
|
|
51
|
+
if profile is None:
|
|
52
|
+
raise SystemExit(_("runtime.config.profile_not_found", name=active))
|
|
53
|
+
return profile, profiles
|
|
54
|
+
|
|
55
|
+
if len(profiles) == 1:
|
|
56
|
+
return next(iter(profiles.values())), profiles
|
|
57
|
+
|
|
58
|
+
return None, profiles
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def missing_required_fields(args: Namespace) -> list[str]:
|
|
62
|
+
required = REQUIRED_FIELDS_BY_COMMAND.get(args.command, [])
|
|
63
|
+
missing: list[str] = []
|
|
64
|
+
for field in required:
|
|
65
|
+
value = getattr(args, field, None)
|
|
66
|
+
if value is None:
|
|
67
|
+
missing.append(field)
|
|
68
|
+
elif isinstance(value, str) and not value.strip():
|
|
69
|
+
missing.append(field)
|
|
70
|
+
return missing
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _resolve_path(value: Any, base_dir: Path | None) -> Path | None:
|
|
74
|
+
if isinstance(value, Path):
|
|
75
|
+
p = value
|
|
76
|
+
elif isinstance(value, str) and value.strip():
|
|
77
|
+
p = Path(value)
|
|
78
|
+
else:
|
|
79
|
+
return None
|
|
80
|
+
if p.is_absolute() or base_dir is None:
|
|
81
|
+
return p
|
|
82
|
+
return (base_dir / p).resolve()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _input_candidates(profile: ConfigProfile, command: str, base_dir: Path | None) -> list[str]:
|
|
86
|
+
if command == "export":
|
|
87
|
+
if profile.input.parsed:
|
|
88
|
+
resolved = _resolve_path(profile.input.parsed, base_dir)
|
|
89
|
+
return [str(resolved)] if resolved is not None else []
|
|
90
|
+
|
|
91
|
+
if profile.input.path:
|
|
92
|
+
resolved = _resolve_path(profile.input.path, base_dir)
|
|
93
|
+
return [str(resolved)] if resolved is not None else []
|
|
94
|
+
|
|
95
|
+
if profile.input.paths:
|
|
96
|
+
out: list[str] = []
|
|
97
|
+
for item in profile.input.paths:
|
|
98
|
+
resolved = _resolve_path(item, base_dir)
|
|
99
|
+
if resolved is not None:
|
|
100
|
+
out.append(str(resolved))
|
|
101
|
+
return out
|
|
102
|
+
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _set_if_not_cli(
|
|
107
|
+
args: Namespace,
|
|
108
|
+
explicit_flags: set[str],
|
|
109
|
+
attr: str,
|
|
110
|
+
flags: tuple[str, ...],
|
|
111
|
+
value: Any,
|
|
112
|
+
*,
|
|
113
|
+
transform=lambda x: x,
|
|
114
|
+
) -> None:
|
|
115
|
+
if value is None:
|
|
116
|
+
return
|
|
117
|
+
if cli_provided(explicit_flags, *flags):
|
|
118
|
+
return
|
|
119
|
+
setattr(args, attr, transform(value))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def apply_profile_defaults(
|
|
123
|
+
args: Namespace,
|
|
124
|
+
profile: ConfigProfile,
|
|
125
|
+
explicit_flags: set[str],
|
|
126
|
+
*,
|
|
127
|
+
base_dir: Path | None = None,
|
|
128
|
+
) -> dict[str, Any]:
|
|
129
|
+
info: dict[str, Any] = {}
|
|
130
|
+
_set_if_not_cli(
|
|
131
|
+
args,
|
|
132
|
+
explicit_flags,
|
|
133
|
+
"log_level",
|
|
134
|
+
("--log-level",),
|
|
135
|
+
profile.logging.level,
|
|
136
|
+
transform=lambda v: str(v).upper(),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if args.command in ("export", "chain"):
|
|
140
|
+
_set_if_not_cli(
|
|
141
|
+
args,
|
|
142
|
+
explicit_flags,
|
|
143
|
+
"timezone",
|
|
144
|
+
("--timezone", "--tz"),
|
|
145
|
+
profile.timezone,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if args.command in ("parse", "chain", "extract"):
|
|
149
|
+
_set_if_not_cli(
|
|
150
|
+
args,
|
|
151
|
+
explicit_flags,
|
|
152
|
+
"provider",
|
|
153
|
+
("--provider",),
|
|
154
|
+
profile.provider,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if args.command in ("parse", "export", "chain", "extract") and not cli_provided(explicit_flags, "--input"):
|
|
158
|
+
candidates = _input_candidates(profile, args.command, base_dir)
|
|
159
|
+
if len(candidates) == 1:
|
|
160
|
+
args.input = Path(candidates[0])
|
|
161
|
+
elif len(candidates) > 1:
|
|
162
|
+
info["input_candidates"] = candidates
|
|
163
|
+
|
|
164
|
+
if args.command == "parse":
|
|
165
|
+
_set_if_not_cli(
|
|
166
|
+
args,
|
|
167
|
+
explicit_flags,
|
|
168
|
+
"outdir",
|
|
169
|
+
("--outdir",),
|
|
170
|
+
profile.parse.outdir,
|
|
171
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
for attr, flag in (("dry_run", "--dry-run"), ("fail_fast", "--fail-fast"), ("validate_schema", "--validate-schema")):
|
|
175
|
+
value = getattr(profile.parse, attr)
|
|
176
|
+
if value is not None:
|
|
177
|
+
_set_if_not_cli(args, explicit_flags, attr, (flag,), value)
|
|
178
|
+
|
|
179
|
+
elif args.command == "export":
|
|
180
|
+
_set_if_not_cli(
|
|
181
|
+
args,
|
|
182
|
+
explicit_flags,
|
|
183
|
+
"out",
|
|
184
|
+
("--out",),
|
|
185
|
+
profile.output.path,
|
|
186
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
187
|
+
)
|
|
188
|
+
_set_if_not_cli(
|
|
189
|
+
args,
|
|
190
|
+
explicit_flags,
|
|
191
|
+
"formatting",
|
|
192
|
+
("--formatting",),
|
|
193
|
+
profile.output.formatting,
|
|
194
|
+
)
|
|
195
|
+
_set_if_not_cli(args, explicit_flags, "split", ("--split",), profile.output.split)
|
|
196
|
+
|
|
197
|
+
for attr, flag in (("split_soft_overflow", "--split-soft-overflow"), ("tiny_tail_threshold", "--tiny-tail-threshold")):
|
|
198
|
+
value = getattr(profile.output, attr)
|
|
199
|
+
if value is not None:
|
|
200
|
+
_set_if_not_cli(args, explicit_flags, attr, (flag,), value)
|
|
201
|
+
|
|
202
|
+
for attr, flag in (("split_hard", "--split-hard"), ("split_preview", "--split-preview")):
|
|
203
|
+
value = getattr(profile.output, attr)
|
|
204
|
+
if value is not None:
|
|
205
|
+
_set_if_not_cli(args, explicit_flags, attr, (flag,), value)
|
|
206
|
+
|
|
207
|
+
elif args.command == "chain":
|
|
208
|
+
_set_if_not_cli(
|
|
209
|
+
args,
|
|
210
|
+
explicit_flags,
|
|
211
|
+
"outdir",
|
|
212
|
+
("--outdir",),
|
|
213
|
+
profile.chain.outdir,
|
|
214
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
215
|
+
)
|
|
216
|
+
_set_if_not_cli(
|
|
217
|
+
args,
|
|
218
|
+
explicit_flags,
|
|
219
|
+
"export_outdir",
|
|
220
|
+
("--export-outdir",),
|
|
221
|
+
profile.chain.export_outdir,
|
|
222
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
223
|
+
)
|
|
224
|
+
_set_if_not_cli(
|
|
225
|
+
args,
|
|
226
|
+
explicit_flags,
|
|
227
|
+
"parsed_root",
|
|
228
|
+
("--parsed-root",),
|
|
229
|
+
profile.chain.parsed_root,
|
|
230
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
_set_if_not_cli(
|
|
234
|
+
args,
|
|
235
|
+
explicit_flags,
|
|
236
|
+
"formatting",
|
|
237
|
+
("--formatting",),
|
|
238
|
+
profile.output.formatting,
|
|
239
|
+
)
|
|
240
|
+
_set_if_not_cli(args, explicit_flags, "split", ("--split",), profile.output.split)
|
|
241
|
+
|
|
242
|
+
for attr, flag in (("split_soft_overflow", "--split-soft-overflow"), ("tiny_tail_threshold", "--tiny-tail-threshold")):
|
|
243
|
+
value = getattr(profile.output, attr)
|
|
244
|
+
if value is not None:
|
|
245
|
+
_set_if_not_cli(args, explicit_flags, attr, (flag,), value)
|
|
246
|
+
|
|
247
|
+
for attr, flag in (("split_hard", "--split-hard"), ("split_preview", "--split-preview"), ("dry_run", "--dry-run"), ("fail_fast", "--fail-fast"), ("validate_schema", "--validate-schema")):
|
|
248
|
+
value = getattr(profile.chain, attr) if attr in {"dry_run", "fail_fast", "validate_schema"} else getattr(profile.output, attr)
|
|
249
|
+
if value is not None:
|
|
250
|
+
_set_if_not_cli(args, explicit_flags, attr, (flag,), value)
|
|
251
|
+
|
|
252
|
+
elif args.command == "extract":
|
|
253
|
+
_set_if_not_cli(
|
|
254
|
+
args,
|
|
255
|
+
explicit_flags,
|
|
256
|
+
"outdir",
|
|
257
|
+
("--outdir",),
|
|
258
|
+
profile.extract.outdir,
|
|
259
|
+
transform=lambda v: _resolve_path(v, base_dir) or Path(v),
|
|
260
|
+
)
|
|
261
|
+
_set_if_not_cli(
|
|
262
|
+
args,
|
|
263
|
+
explicit_flags,
|
|
264
|
+
"conversation_id",
|
|
265
|
+
("--conversation-id",),
|
|
266
|
+
profile.extract.conversation_id,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
dry = profile.extract.dry_run
|
|
270
|
+
if dry is not None:
|
|
271
|
+
_set_if_not_cli(args, explicit_flags, "dry_run", ("--dry-run",), dry)
|
|
272
|
+
|
|
273
|
+
return info
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def resolve_sanitize_policy(profile: ConfigProfile | None) -> SanitizePolicy:
|
|
277
|
+
sanitize = profile.sanitize if profile is not None else None
|
|
278
|
+
if sanitize is None:
|
|
279
|
+
return SanitizePolicy.defaults()
|
|
280
|
+
return SanitizePolicy.from_settings(
|
|
281
|
+
enabled=sanitize.enabled,
|
|
282
|
+
replacement=sanitize.replacement,
|
|
283
|
+
scope=sanitize.scope,
|
|
284
|
+
extra_keywords=sanitize.extra_keywords,
|
|
285
|
+
mask_patterns=sanitize.mask_patterns,
|
|
286
|
+
)
|