biblicus 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,12 +13,12 @@ from typing import Any, Dict, List, Tuple
13
13
 
14
14
  from pydantic import BaseModel
15
15
 
16
+ from ..ai.llm import generate_completion
16
17
  from ..corpus import Corpus
17
18
  from ..models import ExtractionRunReference
18
19
  from ..retrieval import hash_text
19
20
  from ..time import utc_now_iso
20
21
  from .base import CorpusAnalysisBackend
21
- from .llm import generate_completion
22
22
  from .models import (
23
23
  AnalysisRecipeManifest,
24
24
  AnalysisRunInput,
@@ -45,7 +45,18 @@ from .models import (
45
45
 
46
46
 
47
47
  @dataclass
48
- class _TopicDocument:
48
+ class TopicModelingDocument:
49
+ """
50
+ Text document input for topic modeling.
51
+
52
+ :ivar document_id: Stable identifier for this document in the topic modeling stage.
53
+ :vartype document_id: str
54
+ :ivar source_item_id: Corpus item identifier the text was derived from.
55
+ :vartype source_item_id: str
56
+ :ivar text: Document text content.
57
+ :vartype text: str
58
+ """
59
+
49
60
  document_id: str
50
61
  source_item_id: str
51
62
  text: str
@@ -190,6 +201,74 @@ def _run_topic_modeling(
190
201
  return output
191
202
 
192
203
 
204
+ def run_topic_modeling_for_documents(
205
+ *,
206
+ documents: List[TopicModelingDocument],
207
+ config: TopicModelingRecipeConfig,
208
+ ) -> TopicModelingReport:
209
+ """
210
+ Run topic modeling using caller-provided documents.
211
+
212
+ :param documents: Pre-collected documents to model.
213
+ :type documents: list[TopicModelingDocument]
214
+ :param config: Topic modeling recipe configuration.
215
+ :type config: TopicModelingRecipeConfig
216
+ :return: Topic modeling report with topic assignments.
217
+ :rtype: TopicModelingReport
218
+ """
219
+ text_report = TopicModelingTextCollectionReport(
220
+ status=TopicModelingStageStatus.COMPLETE,
221
+ source_items=len({doc.source_item_id for doc in documents}),
222
+ documents=len(documents),
223
+ sample_size=config.text_source.sample_size,
224
+ min_text_characters=config.text_source.min_text_characters,
225
+ empty_texts=len([doc for doc in documents if not doc.text.strip()]),
226
+ skipped_items=0,
227
+ warnings=[],
228
+ errors=[],
229
+ )
230
+
231
+ llm_extraction_report, extracted_documents = _apply_llm_extraction(
232
+ documents=documents,
233
+ config=config.llm_extraction,
234
+ )
235
+
236
+ lexical_report, lexical_documents = _apply_lexical_processing(
237
+ documents=extracted_documents,
238
+ config=config.lexical_processing,
239
+ )
240
+
241
+ bertopic_report, topics = _run_bertopic(
242
+ documents=lexical_documents,
243
+ config=config.bertopic_analysis,
244
+ )
245
+
246
+ fine_tuning_report, labeled_topics = _apply_llm_fine_tuning(
247
+ topics=topics,
248
+ documents=lexical_documents,
249
+ config=config.llm_fine_tuning,
250
+ )
251
+
252
+ return TopicModelingReport(
253
+ text_collection=text_report,
254
+ llm_extraction=llm_extraction_report,
255
+ lexical_processing=lexical_report,
256
+ bertopic_analysis=bertopic_report,
257
+ llm_fine_tuning=fine_tuning_report,
258
+ topics=labeled_topics,
259
+ warnings=(
260
+ text_report.warnings
261
+ + llm_extraction_report.warnings
262
+ + bertopic_report.warnings
263
+ + fine_tuning_report.warnings
264
+ ),
265
+ errors=text_report.errors
266
+ + llm_extraction_report.errors
267
+ + bertopic_report.errors
268
+ + fine_tuning_report.errors,
269
+ )
270
+
271
+
193
272
  def _create_recipe_manifest(
194
273
  *, name: str, config: TopicModelingRecipeConfig
195
274
  ) -> AnalysisRecipeManifest:
@@ -226,14 +305,14 @@ def _collect_documents(
226
305
  corpus: Corpus,
227
306
  extraction_run: ExtractionRunReference,
228
307
  config: TopicModelingTextSourceConfig,
229
- ) -> Tuple[List[_TopicDocument], TopicModelingTextCollectionReport]:
308
+ ) -> Tuple[List[TopicModelingDocument], TopicModelingTextCollectionReport]:
230
309
  manifest = corpus.load_extraction_run_manifest(
231
310
  extractor_id=extraction_run.extractor_id,
232
311
  run_id=extraction_run.run_id,
233
312
  )
234
313
  warnings: List[str] = []
235
314
  errors: List[str] = []
236
- documents: List[_TopicDocument] = []
315
+ documents: List[TopicModelingDocument] = []
237
316
  skipped_items = 0
238
317
  empty_texts = 0
239
318
 
@@ -256,7 +335,7 @@ def _collect_documents(
256
335
  skipped_items += 1
257
336
  continue
258
337
  documents.append(
259
- _TopicDocument(
338
+ TopicModelingDocument(
260
339
  document_id=item_result.item_id,
261
340
  source_item_id=item_result.item_id,
262
341
  text=text_value,
@@ -286,9 +365,9 @@ def _collect_documents(
286
365
 
287
366
  def _apply_llm_extraction(
288
367
  *,
289
- documents: List[_TopicDocument],
368
+ documents: List[TopicModelingDocument],
290
369
  config: TopicModelingLlmExtractionConfig,
291
- ) -> Tuple[TopicModelingLlmExtractionReport, List[_TopicDocument]]:
370
+ ) -> Tuple[TopicModelingLlmExtractionReport, List[TopicModelingDocument]]:
292
371
  if not config.enabled:
293
372
  report = TopicModelingLlmExtractionReport(
294
373
  status=TopicModelingStageStatus.SKIPPED,
@@ -300,7 +379,7 @@ def _apply_llm_extraction(
300
379
  )
301
380
  return report, list(documents)
302
381
 
303
- extracted_documents: List[_TopicDocument] = []
382
+ extracted_documents: List[TopicModelingDocument] = []
304
383
  errors: List[str] = []
305
384
 
306
385
  for document in documents:
@@ -315,7 +394,7 @@ def _apply_llm_extraction(
315
394
  errors.append(f"LLM extraction returned empty output for {document.document_id}")
316
395
  continue
317
396
  extracted_documents.append(
318
- _TopicDocument(
397
+ TopicModelingDocument(
319
398
  document_id=document.document_id,
320
399
  source_item_id=document.source_item_id,
321
400
  text=response_text,
@@ -328,7 +407,7 @@ def _apply_llm_extraction(
328
407
  continue
329
408
  for index, item_text in enumerate(items, start=1):
330
409
  extracted_documents.append(
331
- _TopicDocument(
410
+ TopicModelingDocument(
332
411
  document_id=f"{document.document_id}:{index}",
333
412
  source_item_id=document.source_item_id,
334
413
  text=item_text,
@@ -381,9 +460,9 @@ def _parse_itemized_response(response_text: str) -> List[str]:
381
460
 
382
461
  def _apply_lexical_processing(
383
462
  *,
384
- documents: List[_TopicDocument],
463
+ documents: List[TopicModelingDocument],
385
464
  config: TopicModelingLexicalProcessingConfig,
386
- ) -> Tuple[TopicModelingLexicalProcessingReport, List[_TopicDocument]]:
465
+ ) -> Tuple[TopicModelingLexicalProcessingReport, List[TopicModelingDocument]]:
387
466
  if not config.enabled:
388
467
  report = TopicModelingLexicalProcessingReport(
389
468
  status=TopicModelingStageStatus.SKIPPED,
@@ -395,7 +474,7 @@ def _apply_lexical_processing(
395
474
  )
396
475
  return report, list(documents)
397
476
 
398
- processed: List[_TopicDocument] = []
477
+ processed: List[TopicModelingDocument] = []
399
478
  for document in documents:
400
479
  text_value = document.text
401
480
  if config.lowercase:
@@ -405,7 +484,7 @@ def _apply_lexical_processing(
405
484
  if config.collapse_whitespace:
406
485
  text_value = re.sub(r"\s+", " ", text_value).strip()
407
486
  processed.append(
408
- _TopicDocument(
487
+ TopicModelingDocument(
409
488
  document_id=document.document_id,
410
489
  source_item_id=document.source_item_id,
411
490
  text=text_value,
@@ -425,7 +504,7 @@ def _apply_lexical_processing(
425
504
 
426
505
  def _run_bertopic(
427
506
  *,
428
- documents: List[_TopicDocument],
507
+ documents: List[TopicModelingDocument],
429
508
  config: TopicModelingBerTopicConfig,
430
509
  ) -> Tuple[TopicModelingBerTopicReport, List[TopicModelingTopic]]:
431
510
  try:
@@ -496,9 +575,9 @@ def _run_bertopic(
496
575
 
497
576
 
498
577
  def _group_documents_by_topic(
499
- documents: List[_TopicDocument], assignments: List[int]
500
- ) -> Dict[int, List[_TopicDocument]]:
501
- grouped: Dict[int, List[_TopicDocument]] = {}
578
+ documents: List[TopicModelingDocument], assignments: List[int]
579
+ ) -> Dict[int, List[TopicModelingDocument]]:
580
+ grouped: Dict[int, List[TopicModelingDocument]] = {}
502
581
  for index, topic_id in enumerate(assignments):
503
582
  grouped.setdefault(int(topic_id), []).append(documents[index])
504
583
  return grouped
@@ -514,7 +593,7 @@ def _resolve_topic_keywords(*, topic_model: Any, topic_id: int) -> List[TopicMod
514
593
  def _apply_llm_fine_tuning(
515
594
  *,
516
595
  topics: List[TopicModelingTopic],
517
- documents: List[_TopicDocument],
596
+ documents: List[TopicModelingDocument],
518
597
  config: TopicModelingLlmFineTuningConfig,
519
598
  ) -> Tuple[TopicModelingLlmFineTuningReport, List[TopicModelingTopic]]:
520
599
  if not config.enabled:
@@ -459,7 +459,8 @@ def _create_full_text_search_schema(conn: sqlite3.Connection) -> None:
459
459
  :return: None.
460
460
  :rtype: None
461
461
  """
462
- conn.execute("""
462
+ conn.execute(
463
+ """
463
464
  CREATE VIRTUAL TABLE chunks_full_text_search USING fts5(
464
465
  content,
465
466
  item_id UNINDEXED,
@@ -470,7 +471,8 @@ def _create_full_text_search_schema(conn: sqlite3.Connection) -> None:
470
471
  start_offset UNINDEXED,
471
472
  end_offset UNINDEXED
472
473
  )
473
- """)
474
+ """
475
+ )
474
476
 
475
477
 
476
478
  def _build_full_text_search_index(
biblicus/cli.py CHANGED
@@ -394,7 +394,7 @@ def cmd_extract_build(arguments: argparse.Namespace) -> int:
394
394
  :return: Exit code.
395
395
  :rtype: int
396
396
  """
397
- import yaml
397
+ from .recipes import load_recipe_view
398
398
 
399
399
  corpus = (
400
400
  Corpus.open(arguments.corpus)
@@ -404,11 +404,11 @@ def cmd_extract_build(arguments: argparse.Namespace) -> int:
404
404
 
405
405
  # Load recipe from file if --recipe is provided
406
406
  if getattr(arguments, "recipe", None):
407
- recipe_path = Path(arguments.recipe)
408
- if not recipe_path.exists():
409
- raise FileNotFoundError(f"Recipe file not found: {recipe_path}")
410
- with open(recipe_path, "r", encoding="utf-8") as f:
411
- recipe_data = yaml.safe_load(f)
407
+ recipe_data = load_recipe_view(
408
+ arguments.recipe,
409
+ recipe_label="Recipe file",
410
+ mapping_error_message="Extraction recipe must be a mapping/object",
411
+ )
412
412
  loaded_extractor_id = recipe_data.get("extractor_id", "pipeline")
413
413
  loaded_config = recipe_data.get("config", {})
414
414
 
@@ -713,19 +713,20 @@ def cmd_analyze_topics(arguments: argparse.Namespace) -> int:
713
713
  :return: Exit code.
714
714
  :rtype: int
715
715
  """
716
- import yaml
716
+ from .recipes import apply_dotted_overrides, load_recipe_view, parse_dotted_overrides
717
717
 
718
718
  corpus = (
719
719
  Corpus.open(arguments.corpus)
720
720
  if getattr(arguments, "corpus", None)
721
721
  else Corpus.find(Path.cwd())
722
722
  )
723
- recipe_path = Path(arguments.recipe)
724
- if not recipe_path.is_file():
725
- raise FileNotFoundError(f"Recipe file not found: {recipe_path}")
726
- recipe_data = yaml.safe_load(recipe_path.read_text(encoding="utf-8")) or {}
727
- if not isinstance(recipe_data, dict):
728
- raise ValueError("Topic modeling recipe must be a mapping/object")
723
+ recipe_data = load_recipe_view(
724
+ arguments.recipe,
725
+ recipe_label="Recipe file",
726
+ mapping_error_message="Topic modeling recipe must be a mapping/object",
727
+ )
728
+ overrides = parse_dotted_overrides(arguments.config)
729
+ recipe_data = apply_dotted_overrides(recipe_data, overrides)
729
730
 
730
731
  if arguments.extraction_run:
731
732
  extraction_run = parse_extraction_run_reference(arguments.extraction_run)
@@ -761,7 +762,7 @@ def cmd_analyze_profile(arguments: argparse.Namespace) -> int:
761
762
  :return: Exit code.
762
763
  :rtype: int
763
764
  """
764
- import yaml
765
+ from .recipes import apply_dotted_overrides, load_recipe_view, parse_dotted_overrides
765
766
 
766
767
  corpus = (
767
768
  Corpus.open(arguments.corpus)
@@ -771,13 +772,17 @@ def cmd_analyze_profile(arguments: argparse.Namespace) -> int:
771
772
 
772
773
  recipe_data: dict[str, object] = {}
773
774
  if arguments.recipe is not None:
774
- recipe_path = Path(arguments.recipe)
775
- if not recipe_path.is_file():
776
- raise FileNotFoundError(f"Recipe file not found: {recipe_path}")
777
- recipe_raw = yaml.safe_load(recipe_path.read_text(encoding="utf-8")) or {}
778
- if not isinstance(recipe_raw, dict):
779
- raise ValueError("Profiling recipe must be a mapping/object")
780
- recipe_data = recipe_raw
775
+ recipe_data = load_recipe_view(
776
+ arguments.recipe,
777
+ recipe_label="Recipe file",
778
+ mapping_error_message="Profiling recipe must be a mapping/object",
779
+ )
780
+ overrides = parse_dotted_overrides(arguments.config)
781
+ recipe_data = apply_dotted_overrides(recipe_data, overrides)
782
+ else:
783
+ overrides = parse_dotted_overrides(arguments.config)
784
+ if overrides:
785
+ recipe_data = apply_dotted_overrides(recipe_data, overrides)
781
786
 
782
787
  if arguments.extraction_run:
783
788
  extraction_run = parse_extraction_run_reference(arguments.extraction_run)
@@ -804,6 +809,55 @@ def cmd_analyze_profile(arguments: argparse.Namespace) -> int:
804
809
  return 0
805
810
 
806
811
 
812
+ def cmd_analyze_markov(arguments: argparse.Namespace) -> int:
813
+ """
814
+ Run Markov analysis for a corpus.
815
+
816
+ :param arguments: Parsed command-line interface arguments.
817
+ :type arguments: argparse.Namespace
818
+ :return: Exit code.
819
+ :rtype: int
820
+ """
821
+ from .recipes import apply_dotted_overrides, load_recipe_view, parse_dotted_overrides
822
+
823
+ corpus = (
824
+ Corpus.open(arguments.corpus)
825
+ if getattr(arguments, "corpus", None)
826
+ else Corpus.find(Path.cwd())
827
+ )
828
+ recipe_data = load_recipe_view(
829
+ arguments.recipe,
830
+ recipe_label="Recipe file",
831
+ mapping_error_message="Markov analysis recipe must be a mapping/object",
832
+ )
833
+ overrides = parse_dotted_overrides(arguments.config)
834
+ recipe_data = apply_dotted_overrides(recipe_data, overrides)
835
+
836
+ if arguments.extraction_run:
837
+ extraction_run = parse_extraction_run_reference(arguments.extraction_run)
838
+ else:
839
+ extraction_run = corpus.latest_extraction_run_reference()
840
+ if extraction_run is None:
841
+ raise ValueError("Markov analysis requires an extraction run to supply text inputs")
842
+ print(
843
+ "Warning: using latest extraction run; pass --extraction-run for reproducibility.",
844
+ file=sys.stderr,
845
+ )
846
+
847
+ backend = get_analysis_backend("markov")
848
+ try:
849
+ output = backend.run_analysis(
850
+ corpus,
851
+ recipe_name=arguments.recipe_name,
852
+ config=recipe_data,
853
+ extraction_run=extraction_run,
854
+ )
855
+ except ValidationError as exc:
856
+ raise ValueError(f"Invalid Markov analysis recipe: {exc}") from exc
857
+ print(output.model_dump_json(indent=2))
858
+ return 0
859
+
860
+
807
861
  def build_parser() -> argparse.ArgumentParser:
808
862
  """
809
863
  Build the command-line interface argument parser.
@@ -912,6 +966,7 @@ def build_parser() -> argparse.ArgumentParser:
912
966
  p_extract_build.add_argument(
913
967
  "--recipe",
914
968
  default=None,
969
+ action="append",
915
970
  help="Path to YAML recipe file. If provided, --step arguments are ignored.",
916
971
  )
917
972
  p_extract_build.add_argument(
@@ -1067,7 +1122,14 @@ def build_parser() -> argparse.ArgumentParser:
1067
1122
  p_analyze_topics.add_argument(
1068
1123
  "--recipe",
1069
1124
  required=True,
1070
- help="Path to topic modeling recipe YAML.",
1125
+ action="append",
1126
+ help="Path to topic modeling recipe YAML. Repeatable; later recipes override earlier recipes.",
1127
+ )
1128
+ p_analyze_topics.add_argument(
1129
+ "--config",
1130
+ action="append",
1131
+ default=[],
1132
+ help="Override key=value pairs applied after composing recipes (supports dotted keys).",
1071
1133
  )
1072
1134
  p_analyze_topics.add_argument(
1073
1135
  "--recipe-name",
@@ -1086,7 +1148,14 @@ def build_parser() -> argparse.ArgumentParser:
1086
1148
  p_analyze_profile.add_argument(
1087
1149
  "--recipe",
1088
1150
  default=None,
1089
- help="Optional profiling recipe YAML file.",
1151
+ action="append",
1152
+ help="Optional profiling recipe YAML file. Repeatable; later recipes override earlier recipes.",
1153
+ )
1154
+ p_analyze_profile.add_argument(
1155
+ "--config",
1156
+ action="append",
1157
+ default=[],
1158
+ help="Override key=value pairs applied after composing recipes (supports dotted keys).",
1090
1159
  )
1091
1160
  p_analyze_profile.add_argument(
1092
1161
  "--recipe-name",
@@ -1100,6 +1169,32 @@ def build_parser() -> argparse.ArgumentParser:
1100
1169
  )
1101
1170
  p_analyze_profile.set_defaults(func=cmd_analyze_profile)
1102
1171
 
1172
+ p_analyze_markov = analyze_sub.add_parser("markov", help="Run Markov analysis.")
1173
+ _add_common_corpus_arg(p_analyze_markov)
1174
+ p_analyze_markov.add_argument(
1175
+ "--recipe",
1176
+ required=True,
1177
+ action="append",
1178
+ help="Path to Markov analysis recipe YAML. Repeatable; later recipes override earlier recipes.",
1179
+ )
1180
+ p_analyze_markov.add_argument(
1181
+ "--config",
1182
+ action="append",
1183
+ default=[],
1184
+ help="Override key=value pairs applied after composing recipes (supports dotted keys).",
1185
+ )
1186
+ p_analyze_markov.add_argument(
1187
+ "--recipe-name",
1188
+ default="default",
1189
+ help="Human-readable recipe name.",
1190
+ )
1191
+ p_analyze_markov.add_argument(
1192
+ "--extraction-run",
1193
+ default=None,
1194
+ help="Extraction run reference in the form extractor_id:run_id.",
1195
+ )
1196
+ p_analyze_markov.set_defaults(func=cmd_analyze_markov)
1197
+
1103
1198
  return parser
1104
1199
 
1105
1200
 
biblicus/recipes.py ADDED
@@ -0,0 +1,136 @@
1
+ """
2
+ Recipe loading utilities for Biblicus.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Dict, Iterable, List, Mapping, MutableMapping, Optional
10
+
11
+
12
+ def _parse_scalar(value: str) -> object:
13
+ lowered = value.lower()
14
+ if lowered == "true":
15
+ return True
16
+ if lowered == "false":
17
+ return False
18
+ if lowered in {"null", "none"}:
19
+ return None
20
+ if value.isdigit():
21
+ return int(value)
22
+ try:
23
+ return float(value)
24
+ except ValueError:
25
+ return value
26
+
27
+
28
+ def parse_override_value(raw: str) -> object:
29
+ """
30
+ Parse a command-line override string into a Python value.
31
+
32
+ :param raw: Raw override string.
33
+ :type raw: str
34
+ :return: Parsed value.
35
+ :rtype: object
36
+ """
37
+ raw = str(raw)
38
+ stripped = raw.strip()
39
+ if not stripped:
40
+ return ""
41
+ if stripped[0] in {"{", "["}:
42
+ try:
43
+ return json.loads(stripped)
44
+ except json.JSONDecodeError:
45
+ return raw
46
+ return _parse_scalar(stripped)
47
+
48
+
49
+ def parse_dotted_overrides(pairs: Optional[List[str]]) -> Dict[str, object]:
50
+ """
51
+ Parse repeated key=value pairs into a dotted override mapping.
52
+
53
+ :param pairs: Repeated command-line pairs.
54
+ :type pairs: list[str] or None
55
+ :return: Override mapping.
56
+ :rtype: dict[str, object]
57
+ :raises ValueError: If a pair is not key=value.
58
+ """
59
+ overrides: Dict[str, object] = {}
60
+ for item in pairs or []:
61
+ if "=" not in item:
62
+ raise ValueError(f"Config values must be key=value (got {item!r})")
63
+ key, raw = item.split("=", 1)
64
+ key = key.strip()
65
+ if not key:
66
+ raise ValueError("Config keys must be non-empty")
67
+ overrides[key] = parse_override_value(raw)
68
+ return overrides
69
+
70
+
71
+ def _set_dotted_key(target: MutableMapping[str, object], dotted_key: str, value: object) -> None:
72
+ parts = [part.strip() for part in dotted_key.split(".") if part.strip()]
73
+ if not parts:
74
+ raise ValueError("Override keys must be non-empty")
75
+ current: MutableMapping[str, object] = target
76
+ for part in parts[:-1]:
77
+ existing = current.get(part)
78
+ if not isinstance(existing, dict):
79
+ nested: Dict[str, object] = {}
80
+ current[part] = nested
81
+ current = nested
82
+ else:
83
+ current = existing
84
+ current[parts[-1]] = value
85
+
86
+
87
+ def apply_dotted_overrides(
88
+ config: Dict[str, object], overrides: Mapping[str, object]
89
+ ) -> Dict[str, object]:
90
+ """
91
+ Apply dotted key overrides to a nested configuration mapping.
92
+
93
+ :param config: Base configuration mapping.
94
+ :type config: dict[str, object]
95
+ :param overrides: Dotted key override mapping.
96
+ :type overrides: Mapping[str, object]
97
+ :return: New configuration mapping with overrides applied.
98
+ :rtype: dict[str, object]
99
+ """
100
+ updated: Dict[str, object] = json.loads(json.dumps(config))
101
+ for key, value in overrides.items():
102
+ _set_dotted_key(updated, key, value)
103
+ return updated
104
+
105
+
106
+ def load_recipe_view(
107
+ recipe_paths: Iterable[str],
108
+ *,
109
+ recipe_label: str = "Recipe",
110
+ mapping_error_message: Optional[str] = None,
111
+ ) -> Dict[str, object]:
112
+ """
113
+ Load a composed recipe view from one or more YAML files.
114
+
115
+ :param recipe_paths: Iterable of recipe file paths in precedence order.
116
+ :type recipe_paths: Iterable[str]
117
+ :param recipe_label: Label used in error messages (for example: "Recipe file").
118
+ :type recipe_label: str
119
+ :return: Composed configuration view.
120
+ :rtype: dict[str, object]
121
+ :raises FileNotFoundError: If any recipe file is missing.
122
+ :raises ValueError: If any recipe file is not a mapping/object.
123
+ """
124
+ from biblicus._vendor.dotyaml import load_yaml_view
125
+
126
+ paths: List[str] = [str(path) for path in recipe_paths]
127
+ for raw in paths:
128
+ candidate = Path(raw)
129
+ if not candidate.is_file():
130
+ raise FileNotFoundError(f"{recipe_label} not found: {candidate}")
131
+ try:
132
+ view = load_yaml_view(paths)
133
+ except ValueError as exc:
134
+ message = mapping_error_message or f"{recipe_label} must be a mapping/object"
135
+ raise ValueError(message) from exc
136
+ return view
@@ -0,0 +1,43 @@
1
+ """
2
+ Agentic text utilities.
3
+ """
4
+
5
+ from .annotate import apply_text_annotate
6
+ from .extract import apply_text_extract
7
+ from .link import apply_text_link
8
+ from .models import (
9
+ TextAnnotateRequest,
10
+ TextAnnotateResult,
11
+ TextExtractRequest,
12
+ TextExtractResult,
13
+ TextExtractSpan,
14
+ TextLinkRequest,
15
+ TextLinkResult,
16
+ TextRedactRequest,
17
+ TextRedactResult,
18
+ TextSliceRequest,
19
+ TextSliceResult,
20
+ TextSliceSegment,
21
+ )
22
+ from .redact import apply_text_redact
23
+ from .slice import apply_text_slice
24
+
25
+ __all__ = [
26
+ "TextAnnotateRequest",
27
+ "TextAnnotateResult",
28
+ "TextExtractRequest",
29
+ "TextExtractResult",
30
+ "TextExtractSpan",
31
+ "TextLinkRequest",
32
+ "TextLinkResult",
33
+ "TextRedactRequest",
34
+ "TextRedactResult",
35
+ "TextSliceRequest",
36
+ "TextSliceResult",
37
+ "TextSliceSegment",
38
+ "apply_text_annotate",
39
+ "apply_text_extract",
40
+ "apply_text_link",
41
+ "apply_text_redact",
42
+ "apply_text_slice",
43
+ ]