openai-gabriel 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. gabriel/__init__.py +61 -0
  2. gabriel/_version.py +1 -0
  3. gabriel/api.py +2284 -0
  4. gabriel/cli/__main__.py +60 -0
  5. gabriel/core/__init__.py +7 -0
  6. gabriel/core/llm_client.py +34 -0
  7. gabriel/core/pipeline.py +18 -0
  8. gabriel/core/prompt_template.py +152 -0
  9. gabriel/prompts/__init__.py +1 -0
  10. gabriel/prompts/bucket_prompt.jinja2 +113 -0
  11. gabriel/prompts/classification_prompt.jinja2 +50 -0
  12. gabriel/prompts/codify_prompt.jinja2 +95 -0
  13. gabriel/prompts/comparison_prompt.jinja2 +60 -0
  14. gabriel/prompts/deduplicate_prompt.jinja2 +41 -0
  15. gabriel/prompts/deidentification_prompt.jinja2 +112 -0
  16. gabriel/prompts/extraction_prompt.jinja2 +61 -0
  17. gabriel/prompts/filter_prompt.jinja2 +31 -0
  18. gabriel/prompts/ideation_prompt.jinja2 +80 -0
  19. gabriel/prompts/merge_prompt.jinja2 +47 -0
  20. gabriel/prompts/paraphrase_prompt.jinja2 +17 -0
  21. gabriel/prompts/rankings_prompt.jinja2 +49 -0
  22. gabriel/prompts/ratings_prompt.jinja2 +50 -0
  23. gabriel/prompts/regional_analysis_prompt.jinja2 +40 -0
  24. gabriel/prompts/seed.jinja2 +43 -0
  25. gabriel/prompts/snippets.jinja2 +117 -0
  26. gabriel/tasks/__init__.py +63 -0
  27. gabriel/tasks/_attribute_utils.py +69 -0
  28. gabriel/tasks/bucket.py +432 -0
  29. gabriel/tasks/classify.py +562 -0
  30. gabriel/tasks/codify.py +1033 -0
  31. gabriel/tasks/compare.py +235 -0
  32. gabriel/tasks/debias.py +1460 -0
  33. gabriel/tasks/deduplicate.py +341 -0
  34. gabriel/tasks/deidentify.py +316 -0
  35. gabriel/tasks/discover.py +524 -0
  36. gabriel/tasks/extract.py +455 -0
  37. gabriel/tasks/filter.py +169 -0
  38. gabriel/tasks/ideate.py +782 -0
  39. gabriel/tasks/merge.py +464 -0
  40. gabriel/tasks/paraphrase.py +531 -0
  41. gabriel/tasks/rank.py +2041 -0
  42. gabriel/tasks/rate.py +347 -0
  43. gabriel/tasks/seed.py +465 -0
  44. gabriel/tasks/whatever.py +344 -0
  45. gabriel/utils/__init__.py +64 -0
  46. gabriel/utils/audio_utils.py +42 -0
  47. gabriel/utils/file_utils.py +464 -0
  48. gabriel/utils/image_utils.py +22 -0
  49. gabriel/utils/jinja.py +31 -0
  50. gabriel/utils/logging.py +86 -0
  51. gabriel/utils/mapmaker.py +304 -0
  52. gabriel/utils/media_utils.py +78 -0
  53. gabriel/utils/modality_utils.py +148 -0
  54. gabriel/utils/openai_utils.py +5470 -0
  55. gabriel/utils/parsing.py +282 -0
  56. gabriel/utils/passage_viewer.py +2557 -0
  57. gabriel/utils/pdf_utils.py +20 -0
  58. gabriel/utils/plot_utils.py +2881 -0
  59. gabriel/utils/prompt_utils.py +42 -0
  60. gabriel/utils/word_matching.py +158 -0
  61. openai_gabriel-1.0.1.dist-info/METADATA +443 -0
  62. openai_gabriel-1.0.1.dist-info/RECORD +67 -0
  63. openai_gabriel-1.0.1.dist-info/WHEEL +5 -0
  64. openai_gabriel-1.0.1.dist-info/entry_points.txt +2 -0
  65. openai_gabriel-1.0.1.dist-info/licenses/LICENSE +201 -0
  66. openai_gabriel-1.0.1.dist-info/licenses/NOTICE +13 -0
  67. openai_gabriel-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,464 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import os
5
+ from typing import Any, Dict, Iterable, List, Optional, Set
6
+
7
+ import pandas as pd
8
+
9
+ from .logging import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+ TEXTUAL_MODALITIES = {"text", "entity", "web"}
14
+ PATH_MODALITIES = {"image", "audio", "pdf"}
15
+ ALL_MODALITIES = TEXTUAL_MODALITIES | PATH_MODALITIES
16
+ TABULAR_EXTENSIONS = {".csv", ".tsv", ".xlsx", ".xls", ".parquet", ".pq", ".feather"}
17
+ IMAGE_EXTENSIONS = {
18
+ ".png",
19
+ ".jpg",
20
+ ".jpeg",
21
+ ".gif",
22
+ ".bmp",
23
+ ".tiff",
24
+ ".tif",
25
+ ".webp",
26
+ ".svg",
27
+ }
28
+ PDF_EXTENSIONS = {".pdf"}
29
+ TEXT_EXTENSIONS = {
30
+ ".txt",
31
+ ".md",
32
+ ".rtf",
33
+ ".html",
34
+ ".htm",
35
+ ".xml",
36
+ ".json",
37
+ ".csv",
38
+ ".tsv",
39
+ }
40
+ AUDIO_EXTENSIONS = {
41
+ ".mp3",
42
+ ".wav",
43
+ ".flac",
44
+ ".m4a",
45
+ ".aac",
46
+ ".ogg",
47
+ ".oga",
48
+ ".opus",
49
+ ".aiff",
50
+ ".aif",
51
+ ".aifc",
52
+ ".wma",
53
+ ".alac",
54
+ }
55
+ IMAGE_EXTENSION_SUFFIXES = {ext.lstrip(".") for ext in IMAGE_EXTENSIONS}
56
+ AUDIO_EXTENSION_SUFFIXES = {ext.lstrip(".") for ext in AUDIO_EXTENSIONS}
57
+ PDF_EXTENSION_SUFFIXES = {ext.lstrip(".") for ext in PDF_EXTENSIONS}
58
+
59
+
60
+ def load(
61
+ folder_path: str,
62
+ extensions: Optional[Iterable[str]] = None,
63
+ *,
64
+ tag_dict: Optional[Dict[str, Any]] = None,
65
+ save_name: str = "gabriel_aggregated_content.csv",
66
+ save_dir: Optional[str] = None,
67
+ reset_files: bool = False,
68
+ modality: Optional[str] = None,
69
+ ) -> pd.DataFrame:
70
+ """Aggregate files from a folder into a single CSV.
71
+
72
+ Parameters
73
+ ----------
74
+ folder_path:
75
+ Path to a directory containing media files or to a single file. When a
76
+ CSV/Excel file is provided, it is loaded directly without creating a
77
+ copy.
78
+ extensions:
79
+ Optional iterable of file extensions (without leading dots) to include.
80
+ When ``None`` all files are processed.
81
+ tag_dict:
82
+ Optional mapping of substrings to tag values. The first matching
83
+ substring found in a file name determines the ``tag`` column value.
84
+ save_name:
85
+ Name of the output CSV written inside ``save_dir``. Defaults to
86
+ ``"gabriel_aggregated_content.csv"``.
87
+ save_dir:
88
+ Optional directory for the aggregated CSV. When omitted, the data is
89
+ saved inside ``folder_path`` (or the parent directory if
90
+ ``folder_path`` points to a file).
91
+ reset_files:
92
+ When ``False`` (default), an existing file at ``save_path`` is reused
93
+ instead of being regenerated. Set to ``True`` to overwrite the file.
94
+ modality:
95
+ Optional modality hint. ``"text"``, ``"entity"``, and ``"web"`` are
96
+ treated as text; ``"image"``, ``"audio"``, and ``"pdf"`` collect file paths. When
97
+ ``None`` (default) the modality is inferred from the first matching file.
98
+
99
+ Returns
100
+ -------
101
+ DataFrame
102
+ The aggregated contents or file paths of the processed files.
103
+ """
104
+
105
+ folder_path = os.path.expanduser(os.path.expandvars(folder_path))
106
+ target_dir = _resolve_save_directory(folder_path, save_dir)
107
+ save_path = os.path.join(target_dir, save_name)
108
+
109
+ if os.path.exists(save_path) and not reset_files:
110
+ logger.info("Loading existing aggregated file from %s", save_path)
111
+ df = _read_tabular_file(save_path)
112
+ print(df.head())
113
+ print(f"Loaded existing aggregated file from {save_path}")
114
+ return df
115
+
116
+ extset = {e.lower().lstrip(".") for e in extensions} if extensions else None
117
+ modality = _resolve_modality(folder_path, extset, save_name, modality)
118
+ is_textual = _is_textual_modality(modality)
119
+
120
+ path_key = "path"
121
+ rows: List[Dict[str, Any]] = []
122
+ max_layers = 0
123
+
124
+ warned_pdf = False
125
+ warned_image = False
126
+ warned_audio = False
127
+ warned_doc = False
128
+ has_non_pdf = False
129
+ has_pdf = False
130
+
131
+ if os.path.isfile(folder_path):
132
+ ext = os.path.splitext(folder_path)[1].lower()
133
+ if ext == ".doc":
134
+ if not warned_doc:
135
+ print(
136
+ "[gabriel.load] Ignoring legacy .doc files. Please convert them "
137
+ "to .docx or PDF before loading."
138
+ )
139
+ warned_doc = True
140
+ if ext == ".pdf":
141
+ has_pdf = True
142
+ if is_textual and ext in TABULAR_EXTENSIONS:
143
+ logger.info(
144
+ "Input path %s is a tabular file; loading it without creating a copy.",
145
+ folder_path,
146
+ )
147
+ df = _read_tabular_file(folder_path)
148
+ print(df.head())
149
+ print(f"Loaded existing file from {folder_path}")
150
+ return df
151
+ name = os.path.basename(folder_path)
152
+ if ext != ".doc":
153
+ warned_pdf, warned_image, warned_audio = _warn_for_media_mismatch(
154
+ ext,
155
+ modality,
156
+ warned_pdf,
157
+ warned_image,
158
+ warned_audio,
159
+ folder_path,
160
+ )
161
+ rows.append(
162
+ _build_row(
163
+ file_path=folder_path,
164
+ name=name,
165
+ layers=(),
166
+ tag_dict=tag_dict,
167
+ is_textual=is_textual,
168
+ )
169
+ )
170
+ else:
171
+ for root, _, files in os.walk(folder_path):
172
+ for fname in files:
173
+ if fname == save_name:
174
+ continue
175
+ ext = os.path.splitext(fname)[1].lower()
176
+ if ext == ".doc":
177
+ if not warned_doc:
178
+ print(
179
+ "[gabriel.load] Ignoring legacy .doc files. Please convert "
180
+ "them to .docx or PDF before loading."
181
+ )
182
+ warned_doc = True
183
+ continue
184
+ short_ext = ext.lstrip(".")
185
+ if ext == ".pdf":
186
+ has_pdf = True
187
+ if modality == "pdf" and ext != ".pdf":
188
+ has_non_pdf = True
189
+ warned_pdf, warned_image, warned_audio = _warn_for_media_mismatch(
190
+ ext,
191
+ modality,
192
+ warned_pdf,
193
+ warned_image,
194
+ warned_audio,
195
+ folder_path,
196
+ )
197
+ if not _should_include_file(short_ext, modality, extset):
198
+ continue
199
+ file_path = os.path.join(root, fname)
200
+ rel = os.path.relpath(file_path, folder_path)
201
+ parts = rel.split(os.sep)
202
+ name = parts[-1]
203
+ layers = parts[:-1]
204
+ max_layers = max(max_layers, len(layers))
205
+ rows.append(
206
+ _build_row(
207
+ file_path=file_path,
208
+ name=name,
209
+ layers=layers,
210
+ tag_dict=tag_dict,
211
+ is_textual=is_textual,
212
+ )
213
+ )
214
+
215
+ if modality == "pdf" and has_non_pdf:
216
+ print(
217
+ "[gabriel.load] Detected non-PDF files in a PDF run. Only PDFs were "
218
+ "ingested. Set modality='text' (or 'entity'/'web') in gabriel.load if you "
219
+ "need to extract text from PDFs and include non-PDF files."
220
+ )
221
+ if modality == "pdf" and has_pdf:
222
+ print(
223
+ "[gabriel.load] PDF modality attaches PDFs directly (richer layout, figures, and "
224
+ "images). Set modality='text' (or 'entity'/'web') to extract text-only "
225
+ "versions of PDFs."
226
+ )
227
+
228
+ df = pd.DataFrame(rows)
229
+ for i in range(1, max_layers + 1):
230
+ col = f"layer_{i}"
231
+ if col not in df.columns:
232
+ df[col] = None
233
+
234
+ cols = ["name", path_key] + [f"layer_{i}" for i in range(1, max_layers + 1)]
235
+ if tag_dict:
236
+ cols.append("tag")
237
+ else:
238
+ df.drop(columns=["tag"], inplace=True, errors="ignore")
239
+ if is_textual:
240
+ cols.append("text")
241
+ else:
242
+ df.drop(columns=["text"], inplace=True, errors="ignore")
243
+ if not df.empty:
244
+ df = df[cols]
245
+ df.to_csv(save_path, index=False)
246
+ print(df.head())
247
+ print(f"Saved aggregated file to {save_path}")
248
+ return df
249
+
250
+
251
+ def _build_row(
252
+ *,
253
+ file_path: str,
254
+ name: str,
255
+ layers: Iterable[str],
256
+ tag_dict: Optional[Dict[str, Any]],
257
+ is_textual: bool,
258
+ ) -> Dict[str, Any]:
259
+ tag = _match_tag(name, tag_dict)
260
+ row: Dict[str, Any] = {
261
+ "name": name,
262
+ "path": file_path,
263
+ "tag": tag,
264
+ }
265
+ if is_textual:
266
+ row["text"] = _extract_text(file_path)
267
+ for i, layer in enumerate(layers, start=1):
268
+ row[f"layer_{i}"] = layer
269
+ return row
270
+
271
+
272
+ def _extract_text(file_path: str) -> str:
273
+ ext = os.path.splitext(file_path)[1].lower()
274
+ if ext in TEXT_EXTENSIONS or not ext:
275
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as fh:
276
+ return fh.read()
277
+ if ext == ".pdf":
278
+ pypdf = _optional_import("pypdf", "pypdf")
279
+ reader = pypdf.PdfReader(file_path)
280
+ return "\n".join(page.extract_text() or "" for page in reader.pages).strip()
281
+ if ext == ".docx":
282
+ docx = _optional_import("docx", "python-docx")
283
+ document = docx.Document(file_path)
284
+ return "\n".join(p.text for p in document.paragraphs).strip()
285
+ if ext == ".doc":
286
+ return ""
287
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as fh:
288
+ return fh.read()
289
+
290
+
291
+ def _optional_import(module_name: str, package_name: str):
292
+ if importlib.util.find_spec(module_name) is None:
293
+ raise ImportError(
294
+ f"Missing optional dependency '{package_name}'. Install it to "
295
+ f"extract {module_name} documents."
296
+ )
297
+ return importlib.import_module(module_name)
298
+
299
+
300
+ def _match_tag(name: str, tag_dict: Optional[Dict[str, Any]]) -> Optional[Any]:
301
+ if not tag_dict:
302
+ return None
303
+ lower_name = name.lower()
304
+ for key, val in tag_dict.items():
305
+ if key.lower() in lower_name:
306
+ return val
307
+ return None
308
+
309
+
310
+ def _resolve_modality(
311
+ folder_path: str,
312
+ extset: Optional[Set[str]],
313
+ save_name: str,
314
+ requested_modality: Optional[str],
315
+ ) -> str:
316
+ if requested_modality:
317
+ normalized = requested_modality.lower()
318
+ if normalized not in ALL_MODALITIES:
319
+ logger.info(
320
+ "Unknown modality '%s'; defaulting to text-style processing.",
321
+ normalized,
322
+ )
323
+ return normalized
324
+ detected = _detect_modality(folder_path, extset, save_name)
325
+ logger.info("Detected %s modality for %s", detected, folder_path)
326
+ return detected
327
+
328
+
329
+ def _detect_modality(
330
+ folder_path: str,
331
+ extset: Optional[Set[str]],
332
+ save_name: str,
333
+ ) -> str:
334
+ detected: Set[str] = set()
335
+ for file_path in _iter_candidate_files(folder_path, extset, save_name):
336
+ ext = os.path.splitext(file_path)[1].lower()
337
+ if ext in PDF_EXTENSIONS:
338
+ detected.add("pdf")
339
+ elif ext in IMAGE_EXTENSIONS:
340
+ detected.add("image")
341
+ elif ext in AUDIO_EXTENSIONS:
342
+ detected.add("audio")
343
+ else:
344
+ detected.add("text")
345
+ if "text" in detected:
346
+ break
347
+ if not detected:
348
+ return "text"
349
+ if "text" in detected:
350
+ return "text"
351
+ if len(detected) == 1:
352
+ return detected.pop()
353
+ return "text"
354
+
355
+
356
+ def _iter_candidate_files(
357
+ folder_path: str,
358
+ extset: Optional[Set[str]],
359
+ save_name: str,
360
+ ) -> Iterable[str]:
361
+ if os.path.isfile(folder_path):
362
+ yield folder_path
363
+ return
364
+ for root, _, files in os.walk(folder_path):
365
+ for fname in files:
366
+ if fname == save_name:
367
+ continue
368
+ short_ext = os.path.splitext(fname)[1].lower().lstrip(".")
369
+ if extset and short_ext not in extset:
370
+ continue
371
+ yield os.path.join(root, fname)
372
+
373
+
374
+ def _is_textual_modality(modality: str) -> bool:
375
+ if modality in TEXTUAL_MODALITIES:
376
+ return True
377
+ if modality in PATH_MODALITIES:
378
+ return False
379
+ return True
380
+
381
+
382
+ def _should_include_file(
383
+ short_ext: str,
384
+ modality: str,
385
+ extset: Optional[Set[str]],
386
+ ) -> bool:
387
+ if extset and short_ext not in extset:
388
+ return False
389
+ if modality in TEXTUAL_MODALITIES:
390
+ if short_ext in IMAGE_EXTENSION_SUFFIXES or short_ext in AUDIO_EXTENSION_SUFFIXES:
391
+ return False
392
+ return True
393
+ if modality == "image":
394
+ return short_ext in IMAGE_EXTENSION_SUFFIXES
395
+ if modality == "audio":
396
+ return short_ext in AUDIO_EXTENSION_SUFFIXES
397
+ if modality == "pdf":
398
+ return short_ext in PDF_EXTENSION_SUFFIXES
399
+ return True
400
+
401
+
402
+ def _warn_for_media_mismatch(
403
+ ext: str,
404
+ modality: str,
405
+ warned_pdf: bool,
406
+ warned_image: bool,
407
+ warned_audio: bool,
408
+ folder_path: str,
409
+ ) -> tuple[bool, bool, bool]:
410
+ if ext in PDF_EXTENSIONS and modality != "pdf" and not warned_pdf:
411
+ print(
412
+ f"[gabriel.load] Found PDF files in {folder_path} while modality='{modality}'. "
413
+ "PDFs will be extracted into plain text. For best PDF fidelity (layout, "
414
+ "figures, and images), set modality='pdf' here and in the downstream "
415
+ "gabriel call."
416
+ )
417
+ warned_pdf = True
418
+ if ext in IMAGE_EXTENSIONS and modality != "image" and not warned_image:
419
+ print(
420
+ f"[gabriel.load] Found image files in {folder_path}. "
421
+ "Set modality='image' to attach images directly to GPT calls."
422
+ )
423
+ warned_image = True
424
+ if ext in AUDIO_EXTENSIONS and modality != "audio" and not warned_audio:
425
+ print(
426
+ f"[gabriel.load] Found audio files in {folder_path}. "
427
+ "Set modality='audio' to attach audio directly to GPT calls."
428
+ )
429
+ warned_audio = True
430
+ return warned_pdf, warned_image, warned_audio
431
+
432
+
433
+ def _resolve_save_directory(folder_path: str, save_dir: Optional[str]) -> str:
434
+ if save_dir:
435
+ resolved = os.path.expanduser(os.path.expandvars(save_dir))
436
+ else:
437
+ if os.path.isdir(folder_path):
438
+ resolved = folder_path
439
+ else:
440
+ parent = os.path.dirname(folder_path)
441
+ if not parent:
442
+ parent = os.path.dirname(os.path.abspath(folder_path))
443
+ resolved = parent
444
+ if not resolved:
445
+ resolved = os.getcwd()
446
+ if os.path.isfile(resolved):
447
+ raise ValueError(f"save_dir must be a directory path, got file {resolved}")
448
+ os.makedirs(resolved, exist_ok=True)
449
+ return resolved
450
+
451
+
452
+ def _read_tabular_file(path: str) -> pd.DataFrame:
453
+ ext = os.path.splitext(path)[1].lower()
454
+ if ext == ".csv":
455
+ return pd.read_csv(path)
456
+ if ext == ".tsv":
457
+ return pd.read_csv(path, sep="\t")
458
+ if ext in {".xlsx", ".xls"}:
459
+ return pd.read_excel(path)
460
+ if ext in {".parquet", ".pq"}:
461
+ return pd.read_parquet(path)
462
+ if ext == ".feather":
463
+ return pd.read_feather(path)
464
+ return pd.read_csv(path)
@@ -0,0 +1,22 @@
1
+ import base64
2
+ from typing import Optional
3
+
4
+
5
+ def encode_image(image_path: str) -> Optional[str]:
6
+ """Return the contents of ``image_path`` as a base64 string.
7
+
8
+ Parameters
9
+ ----------
10
+ image_path: str
11
+ Path to the image file to encode.
12
+
13
+ Returns
14
+ -------
15
+ str or None
16
+ The base64-encoded contents of the file, or ``None`` if reading fails.
17
+ """
18
+ try:
19
+ with open(image_path, "rb") as image_file:
20
+ return base64.b64encode(image_file.read()).decode("utf-8")
21
+ except Exception:
22
+ return None
gabriel/utils/jinja.py ADDED
@@ -0,0 +1,31 @@
1
+ import os
2
+ import random
3
+ from collections import OrderedDict
4
+ import json
5
+ from jinja2 import Environment, FileSystemLoader
6
+
7
+
8
+ def shuffled(it, seed=None):
9
+ """Return a new list with the same elements, shuffled."""
10
+ seq = list(it)
11
+ rnd = random.Random(seed) if seed is not None else random
12
+ rnd.shuffle(seq)
13
+ return seq
14
+
15
+
16
+ def shuffled_dict(d, seed=None):
17
+ """Return a JSON-formatted dict string with items shuffled."""
18
+ items = list(d.items())
19
+ rnd = random.Random(seed) if seed is not None else random
20
+ rnd.shuffle(items)
21
+ ordered = OrderedDict(items)
22
+ return json.dumps(ordered, ensure_ascii=False, indent=2)
23
+
24
+
25
+ def get_env():
26
+ """Return a Jinja2 environment with shuffle filters preloaded."""
27
+ templates_dir = os.path.join(os.path.dirname(__file__), "..", "prompts")
28
+ env = Environment(loader=FileSystemLoader(os.path.abspath(templates_dir)))
29
+ env.filters["shuffled"] = shuffled
30
+ env.filters["shuffled_dict"] = shuffled_dict
31
+ return env
@@ -0,0 +1,86 @@
1
+ """Simple logging helpers with configurable verbosity.
2
+
3
+ This module centralises logging configuration for the project. Users can
4
+ control verbosity either programmatically through :func:`set_log_level` or via
5
+ the ``GABRIEL_LOG_LEVEL`` environment variable. Levels mirror typical logging
6
+ conventions and add a "silent" option which suppresses all log output.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ from typing import Union
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Verbosity handling
17
+ # ---------------------------------------------------------------------------
18
+
19
+ LOG_LEVELS = {
20
+ "silent": logging.CRITICAL + 1,
21
+ "error": logging.ERROR,
22
+ "warning": logging.WARNING,
23
+ "info": logging.INFO,
24
+ "debug": logging.DEBUG,
25
+ }
26
+
27
+
28
+ def _parse_level(level: Union[str, int, None]) -> int:
29
+ """Translate a human friendly level to ``logging`` constants."""
30
+
31
+ if isinstance(level, str):
32
+ return LOG_LEVELS.get(level.lower(), logging.INFO)
33
+ if isinstance(level, int):
34
+ return level
35
+ return logging.INFO
36
+
37
+
38
+ CURRENT_LEVEL = _parse_level(os.getenv("GABRIEL_LOG_LEVEL", "warning"))
39
+
40
+
41
+ def set_log_level(level: Union[str, int]) -> None:
42
+ """Set the global logging level for all GABRIEL loggers."""
43
+
44
+ global CURRENT_LEVEL
45
+ CURRENT_LEVEL = _parse_level(level)
46
+ root = logging.getLogger()
47
+ root.setLevel(CURRENT_LEVEL)
48
+ if not root.handlers:
49
+ handler = logging.StreamHandler()
50
+ formatter = logging.Formatter(
51
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
52
+ )
53
+ handler.setFormatter(formatter)
54
+ root.addHandler(handler)
55
+ for handler in root.handlers:
56
+ handler.setLevel(CURRENT_LEVEL)
57
+ # Update existing loggers to the new level
58
+ for logger in logging.getLogger().manager.loggerDict.values():
59
+ if isinstance(logger, logging.Logger):
60
+ logger.setLevel(CURRENT_LEVEL)
61
+ for h in logger.handlers:
62
+ h.setLevel(CURRENT_LEVEL)
63
+
64
+
65
+ def get_logger(name: str) -> logging.Logger:
66
+ """Return a module logger configured with the global level."""
67
+
68
+ logger = logging.getLogger(name)
69
+ logger.setLevel(CURRENT_LEVEL)
70
+ return logger
71
+
72
+
73
+ def announce_prompt_rendering(task: str, count: int) -> None:
74
+ """Emit a lightweight notice when prompt rendering begins.
75
+
76
+ Direct ``print`` is intentional here so users see activity immediately in
77
+ notebooks/terminals without needing logging configuration.
78
+ """
79
+
80
+ if count <= 0:
81
+ return
82
+ print(f"[{task}] Rendering {count} prompts…", flush=True)
83
+
84
+
85
+ # Configure root logger on import according to ``GABRIEL_LOG_LEVEL``.
86
+ set_log_level(CURRENT_LEVEL)