biblicus 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biblicus/__init__.py CHANGED
@@ -27,4 +27,4 @@ __all__ = [
27
27
  "RetrievalRun",
28
28
  ]
29
29
 
30
- __version__ = "0.11.0"
30
+ __version__ = "0.13.0"
biblicus/cli.py CHANGED
@@ -15,9 +15,11 @@ from pydantic import ValidationError
15
15
  from .analysis import get_analysis_backend
16
16
  from .backends import get_backend
17
17
  from .context import (
18
+ CharacterBudget,
18
19
  ContextPackPolicy,
19
20
  TokenBudget,
20
21
  build_context_pack,
22
+ fit_context_pack_to_character_budget,
21
23
  fit_context_pack_to_token_budget,
22
24
  )
23
25
  from .corpus import Corpus
@@ -26,6 +28,11 @@ from .errors import ExtractionRunFatalError
26
28
  from .evaluation import evaluate_run, load_dataset
27
29
  from .evidence_processing import apply_evidence_filter, apply_evidence_reranker
28
30
  from .extraction import build_extraction_run
31
+ from .extraction_evaluation import (
32
+ evaluate_extraction_run,
33
+ load_extraction_dataset,
34
+ write_extraction_evaluation_result,
35
+ )
29
36
  from .models import QueryBudget, RetrievalResult, parse_extraction_run_reference
30
37
  from .uris import corpus_ref_to_path
31
38
 
@@ -504,6 +511,54 @@ def cmd_extract_delete(arguments: argparse.Namespace) -> int:
504
511
  return 0
505
512
 
506
513
 
514
+ def cmd_extract_evaluate(arguments: argparse.Namespace) -> int:
515
+ """
516
+ Evaluate an extraction run against a dataset.
517
+
518
+ :param arguments: Parsed command-line interface arguments.
519
+ :type arguments: argparse.Namespace
520
+ :return: Exit code.
521
+ :rtype: int
522
+ """
523
+ corpus = (
524
+ Corpus.open(arguments.corpus)
525
+ if getattr(arguments, "corpus", None)
526
+ else Corpus.find(Path.cwd())
527
+ )
528
+ if arguments.run:
529
+ run_ref = parse_extraction_run_reference(arguments.run)
530
+ else:
531
+ run_ref = corpus.latest_extraction_run_reference()
532
+ if run_ref is None:
533
+ raise ValueError("Extraction evaluation requires an extraction run")
534
+ print(
535
+ "Warning: using latest extraction run; pass --run for reproducibility.",
536
+ file=sys.stderr,
537
+ )
538
+
539
+ dataset_path = Path(arguments.dataset)
540
+ if not dataset_path.is_file():
541
+ raise FileNotFoundError(f"Dataset file not found: {dataset_path}")
542
+ try:
543
+ dataset = load_extraction_dataset(dataset_path)
544
+ except ValidationError as exc:
545
+ raise ValueError(f"Invalid extraction dataset: {exc}") from exc
546
+
547
+ run = corpus.load_extraction_run_manifest(
548
+ extractor_id=run_ref.extractor_id,
549
+ run_id=run_ref.run_id,
550
+ )
551
+ result = evaluate_extraction_run(
552
+ corpus=corpus,
553
+ run=run,
554
+ extractor_id=run_ref.extractor_id,
555
+ dataset=dataset,
556
+ )
557
+ write_extraction_evaluation_result(corpus=corpus, run_id=run.run_id, result=result)
558
+ print(result.model_dump_json(indent=2))
559
+ return 0
560
+
561
+
507
562
  def cmd_query(arguments: argparse.Namespace) -> int:
508
563
  """
509
564
  Execute a retrieval query.
@@ -568,7 +623,11 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
568
623
  )
569
624
  retrieval_result = RetrievalResult.model_validate_json(input_text)
570
625
  join_with = bytes(arguments.join_with, "utf-8").decode("unicode_escape")
571
- policy = ContextPackPolicy(join_with=join_with)
626
+ policy = ContextPackPolicy(
627
+ join_with=join_with,
628
+ ordering=arguments.ordering,
629
+ include_metadata=arguments.include_metadata,
630
+ )
572
631
  context_pack = build_context_pack(retrieval_result, policy=policy)
573
632
  if arguments.max_tokens is not None:
574
633
  context_pack = fit_context_pack_to_token_budget(
@@ -576,6 +635,12 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
576
635
  policy=policy,
577
636
  token_budget=TokenBudget(max_tokens=int(arguments.max_tokens)),
578
637
  )
638
+ if arguments.max_characters is not None:
639
+ context_pack = fit_context_pack_to_character_budget(
640
+ context_pack,
641
+ policy=policy,
642
+ character_budget=CharacterBudget(max_characters=int(arguments.max_characters)),
643
+ )
579
644
  print(
580
645
  json.dumps(
581
646
  {
@@ -889,6 +954,22 @@ def build_parser() -> argparse.ArgumentParser:
889
954
  )
890
955
  p_extract_delete.set_defaults(func=cmd_extract_delete)
891
956
 
957
+ p_extract_evaluate = extract_sub.add_parser(
958
+ "evaluate", help="Evaluate an extraction run against a dataset."
959
+ )
960
+ _add_common_corpus_arg(p_extract_evaluate)
961
+ p_extract_evaluate.add_argument(
962
+ "--run",
963
+ default=None,
964
+ help="Extraction run reference in the form extractor_id:run_id (defaults to latest run).",
965
+ )
966
+ p_extract_evaluate.add_argument(
967
+ "--dataset",
968
+ required=True,
969
+ help="Path to the extraction evaluation dataset JSON file.",
970
+ )
971
+ p_extract_evaluate.set_defaults(func=cmd_extract_evaluate)
972
+
892
973
  p_query = sub.add_parser("query", help="Run a retrieval query.")
893
974
  _add_common_corpus_arg(p_query)
894
975
  p_query.add_argument("--run", default=None, help="Run identifier (defaults to latest run).")
@@ -921,12 +1002,29 @@ def build_parser() -> argparse.ArgumentParser:
921
1002
  default="\\n\\n",
922
1003
  help="Separator between evidence blocks (escape sequences supported, default is two newlines).",
923
1004
  )
1005
+ p_context_pack_build.add_argument(
1006
+ "--ordering",
1007
+ choices=["rank", "score", "source"],
1008
+ default="rank",
1009
+ help="Evidence ordering policy (rank, score, source).",
1010
+ )
1011
+ p_context_pack_build.add_argument(
1012
+ "--include-metadata",
1013
+ action="store_true",
1014
+ help="Include evidence metadata in each context pack block.",
1015
+ )
924
1016
  p_context_pack_build.add_argument(
925
1017
  "--max-tokens",
926
1018
  default=None,
927
1019
  type=int,
928
1020
  help="Optional token budget for the final context pack using the naive-whitespace tokenizer.",
929
1021
  )
1022
+ p_context_pack_build.add_argument(
1023
+ "--max-characters",
1024
+ default=None,
1025
+ type=int,
1026
+ help="Optional character budget for the final context pack.",
1027
+ )
930
1028
  p_context_pack_build.set_defaults(func=cmd_context_pack_build)
931
1029
 
932
1030
  p_eval = sub.add_parser("eval", help="Evaluate a run against a dataset.")
biblicus/constants.py CHANGED
@@ -4,6 +4,7 @@ Shared constants for Biblicus.
4
4
 
5
5
  SCHEMA_VERSION = 2
6
6
  DATASET_SCHEMA_VERSION = 1
7
+ EXTRACTION_DATASET_SCHEMA_VERSION = 1
7
8
  ANALYSIS_SCHEMA_VERSION = 1
8
9
  CORPUS_DIR_NAME = ".biblicus"
9
10
  DEFAULT_RAW_DIR = "raw"
biblicus/context.py CHANGED
@@ -8,11 +8,11 @@ stable contract while context formatting remains an explicit policy surface.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- from typing import List, Optional
11
+ from typing import Dict, List, Literal, Optional
12
12
 
13
13
  from pydantic import BaseModel, ConfigDict, Field
14
14
 
15
- from .models import RetrievalResult
15
+ from .models import Evidence, RetrievalResult
16
16
 
17
17
 
18
18
  class ContextPackPolicy(BaseModel):
@@ -21,11 +21,17 @@ class ContextPackPolicy(BaseModel):
21
21
 
22
22
  :ivar join_with: Separator inserted between evidence text blocks.
23
23
  :vartype join_with: str
24
+ :ivar ordering: Evidence ordering policy (rank, score, or source).
25
+ :vartype ordering: str
26
+ :ivar include_metadata: Whether to include evidence metadata lines in each block.
27
+ :vartype include_metadata: bool
24
28
  """
25
29
 
26
30
  model_config = ConfigDict(extra="forbid")
27
31
 
28
32
  join_with: str = Field(default="\n\n")
33
+ ordering: Literal["rank", "score", "source"] = Field(default="rank")
34
+ include_metadata: bool = Field(default=False)
29
35
 
30
36
 
31
37
  class ContextPack(BaseModel):
@@ -55,12 +61,15 @@ class ContextPackBlock(BaseModel):
55
61
  :vartype evidence_item_id: str
56
62
  :ivar text: Text included in this block.
57
63
  :vartype text: str
64
+ :ivar metadata: Optional metadata included with the block.
65
+ :vartype metadata: dict[str, object] or None
58
66
  """
59
67
 
60
68
  model_config = ConfigDict(extra="forbid")
61
69
 
62
70
  evidence_item_id: str = Field(min_length=1)
63
71
  text: str = Field(min_length=1)
72
+ metadata: Optional[Dict[str, object]] = None
64
73
 
65
74
 
66
75
  class TokenCounter(BaseModel):
@@ -92,6 +101,19 @@ class TokenBudget(BaseModel):
92
101
  max_tokens: int = Field(ge=1)
93
102
 
94
103
 
104
+ class CharacterBudget(BaseModel):
105
+ """
106
+ Character budget for a context pack.
107
+
108
+ :ivar max_characters: Maximum characters permitted for the final context pack text.
109
+ :vartype max_characters: int
110
+ """
111
+
112
+ model_config = ConfigDict(extra="forbid")
113
+
114
+ max_characters: int = Field(ge=1)
115
+
116
+
95
117
  def build_context_pack(result: RetrievalResult, *, policy: ContextPackPolicy) -> ContextPack:
96
118
  """
97
119
  Build a context pack from a retrieval result using an explicit policy.
@@ -104,14 +126,20 @@ def build_context_pack(result: RetrievalResult, *, policy: ContextPackPolicy) ->
104
126
  :rtype: ContextPack
105
127
  """
106
128
  selected_blocks: List[ContextPackBlock] = []
107
- for evidence in result.evidence:
129
+ for evidence in _order_evidence(result.evidence, policy=policy):
108
130
  if not isinstance(evidence.text, str):
109
131
  continue
110
132
  trimmed_text = evidence.text.strip()
111
133
  if not trimmed_text:
112
134
  continue
135
+ metadata = _metadata_for_evidence(evidence) if policy.include_metadata else None
136
+ block_text = _format_block_text(trimmed_text, metadata=metadata)
113
137
  selected_blocks.append(
114
- ContextPackBlock(evidence_item_id=evidence.item_id, text=trimmed_text)
138
+ ContextPackBlock(
139
+ evidence_item_id=evidence.item_id,
140
+ text=block_text,
141
+ metadata=metadata,
142
+ )
115
143
  )
116
144
 
117
145
  return ContextPack(
@@ -181,3 +209,109 @@ def fit_context_pack_to_token_budget(
181
209
  remaining_blocks = remaining_blocks[:-1]
182
210
 
183
211
  return ContextPack(text="", evidence_count=0, blocks=[])
212
+
213
+
214
+ def fit_context_pack_to_character_budget(
215
+ context_pack: ContextPack,
216
+ *,
217
+ policy: ContextPackPolicy,
218
+ character_budget: CharacterBudget,
219
+ ) -> ContextPack:
220
+ """
221
+ Fit a context pack to a character budget by dropping trailing blocks.
222
+
223
+ :param context_pack: Context pack to fit.
224
+ :type context_pack: ContextPack
225
+ :param policy: Policy controlling how blocks are joined into text.
226
+ :type policy: ContextPackPolicy
227
+ :param character_budget: Character budget to enforce.
228
+ :type character_budget: CharacterBudget
229
+ :return: Fitted context pack.
230
+ :rtype: ContextPack
231
+ """
232
+ remaining_blocks: List[ContextPackBlock] = list(context_pack.blocks)
233
+ max_characters = character_budget.max_characters
234
+
235
+ while remaining_blocks:
236
+ candidate_text = policy.join_with.join([block.text for block in remaining_blocks])
237
+ if len(candidate_text) <= max_characters:
238
+ return ContextPack(
239
+ text=candidate_text,
240
+ evidence_count=len(remaining_blocks),
241
+ blocks=remaining_blocks,
242
+ )
243
+ remaining_blocks = remaining_blocks[:-1]
244
+
245
+ return ContextPack(text="", evidence_count=0, blocks=[])
246
+
247
+
248
+ def _order_evidence(
249
+ evidence: List[Evidence],
250
+ *,
251
+ policy: ContextPackPolicy,
252
+ ) -> List[Evidence]:
253
+ """
254
+ Order evidence items according to the context pack policy.
255
+
256
+ :param evidence: Evidence list to order.
257
+ :type evidence: list[Evidence]
258
+ :param policy: Context pack policy.
259
+ :type policy: ContextPackPolicy
260
+ :return: Ordered evidence list.
261
+ :rtype: list[Evidence]
262
+ """
263
+ if policy.ordering == "rank":
264
+ return sorted(evidence, key=lambda item: (item.rank, item.item_id))
265
+ if policy.ordering == "score":
266
+ return sorted(evidence, key=lambda item: (-item.score, item.item_id))
267
+ if policy.ordering == "source":
268
+ return sorted(
269
+ evidence,
270
+ key=lambda item: (
271
+ item.source_uri or item.item_id,
272
+ -item.score,
273
+ item.item_id,
274
+ ),
275
+ )
276
+ raise ValueError(f"Unknown context pack ordering: {policy.ordering}")
277
+
278
+
279
+ def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
280
+ """
281
+ Build metadata for a context pack block.
282
+
283
+ :param evidence: Evidence item to describe.
284
+ :type evidence: Evidence
285
+ :return: Metadata mapping.
286
+ :rtype: dict[str, object]
287
+ """
288
+ return {
289
+ "item_id": evidence.item_id,
290
+ "source_uri": evidence.source_uri or "none",
291
+ "score": evidence.score,
292
+ "stage": evidence.stage,
293
+ }
294
+
295
+
296
+ def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> str:
297
+ """
298
+ Format a context pack block text with optional metadata.
299
+
300
+ :param text: Evidence text.
301
+ :type text: str
302
+ :param metadata: Optional metadata mapping.
303
+ :type metadata: dict[str, object] or None
304
+ :return: Formatted block text.
305
+ :rtype: str
306
+ """
307
+ if not metadata:
308
+ return text
309
+ metadata_lines = "\n".join(
310
+ [
311
+ f"item_id: {metadata['item_id']}",
312
+ f"source_uri: {metadata['source_uri']}",
313
+ f"score: {metadata['score']}",
314
+ f"stage: {metadata['stage']}",
315
+ ]
316
+ )
317
+ return f"{metadata_lines}\n{text}"
@@ -0,0 +1,312 @@
1
+ """
2
+ Extraction evaluation utilities for Biblicus.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from difflib import SequenceMatcher
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
13
+
14
+ from .constants import EXTRACTION_DATASET_SCHEMA_VERSION
15
+ from .corpus import Corpus
16
+ from .extraction import ExtractionRunManifest
17
+ from .models import CatalogItem
18
+ from .time import utc_now_iso
19
+
20
+
21
+ class ExtractionEvaluationItem(BaseModel):
22
+ """
23
+ Dataset item for extraction evaluation.
24
+
25
+ :ivar item_id: Optional item identifier.
26
+ :vartype item_id: str or None
27
+ :ivar source_uri: Optional source uniform resource identifier.
28
+ :vartype source_uri: str or None
29
+ :ivar expected_text: Expected extracted text.
30
+ :vartype expected_text: str
31
+ :ivar kind: Label kind (gold or synthetic).
32
+ :vartype kind: str
33
+ """
34
+
35
+ model_config = ConfigDict(extra="forbid")
36
+
37
+ item_id: Optional[str] = None
38
+ source_uri: Optional[str] = None
39
+ expected_text: str
40
+ kind: str = Field(default="gold")
41
+
42
+ @model_validator(mode="after")
43
+ def _require_locator(self) -> "ExtractionEvaluationItem":
44
+ if not self.item_id and not self.source_uri:
45
+ raise ValueError("Evaluation items must include item_id or source_uri")
46
+ return self
47
+
48
+
49
+ class ExtractionEvaluationDataset(BaseModel):
50
+ """
51
+ Dataset for extraction evaluation.
52
+
53
+ :ivar schema_version: Dataset schema version.
54
+ :vartype schema_version: int
55
+ :ivar name: Dataset name.
56
+ :vartype name: str
57
+ :ivar description: Optional description.
58
+ :vartype description: str or None
59
+ :ivar items: Labeled evaluation items.
60
+ :vartype items: list[ExtractionEvaluationItem]
61
+ """
62
+
63
+ model_config = ConfigDict(extra="forbid")
64
+
65
+ schema_version: int = Field(ge=1)
66
+ name: str
67
+ description: Optional[str] = None
68
+ items: List[ExtractionEvaluationItem] = Field(default_factory=list)
69
+
70
+ @model_validator(mode="after")
71
+ def _enforce_schema_version(self) -> "ExtractionEvaluationDataset":
72
+ if self.schema_version != EXTRACTION_DATASET_SCHEMA_VERSION:
73
+ raise ValueError(
74
+ f"Unsupported extraction dataset schema version: {self.schema_version}"
75
+ )
76
+ return self
77
+
78
+
79
+ class ExtractionEvaluationItemReport(BaseModel):
80
+ """
81
+ Per-item report for extraction evaluation.
82
+
83
+ :ivar item_id: Item identifier.
84
+ :vartype item_id: str
85
+ :ivar source_uri: Source uniform resource identifier.
86
+ :vartype source_uri: str
87
+ :ivar expected_text: Expected text from the dataset.
88
+ :vartype expected_text: str
89
+ :ivar extracted_text: Extracted text when available.
90
+ :vartype extracted_text: str or None
91
+ :ivar coverage_status: Coverage status (present, empty, missing).
92
+ :vartype coverage_status: str
93
+ :ivar extraction_status: Extraction status from the run (extracted, skipped, errored, missing).
94
+ :vartype extraction_status: str
95
+ :ivar similarity_score: Similarity score between expected and extracted text.
96
+ :vartype similarity_score: float
97
+ :ivar kind: Label kind from the dataset.
98
+ :vartype kind: str
99
+ """
100
+
101
+ model_config = ConfigDict(extra="forbid")
102
+
103
+ item_id: str
104
+ source_uri: str
105
+ expected_text: str
106
+ extracted_text: Optional[str] = None
107
+ coverage_status: str
108
+ extraction_status: str
109
+ similarity_score: float
110
+ kind: str
111
+
112
+
113
+ class ExtractionEvaluationResult(BaseModel):
114
+ """
115
+ Result bundle for an extraction evaluation.
116
+
117
+ :ivar dataset: Dataset metadata.
118
+ :vartype dataset: dict[str, object]
119
+ :ivar extractor_id: Extractor identifier.
120
+ :vartype extractor_id: str
121
+ :ivar run_id: Extraction run identifier.
122
+ :vartype run_id: str
123
+ :ivar recipe_id: Extraction recipe identifier.
124
+ :vartype recipe_id: str
125
+ :ivar recipe_name: Extraction recipe name.
126
+ :vartype recipe_name: str
127
+ :ivar evaluated_at: International Organization for Standardization 8601 timestamp.
128
+ :vartype evaluated_at: str
129
+ :ivar metrics: Evaluation metrics for coverage and accuracy.
130
+ :vartype metrics: dict[str, float]
131
+ :ivar items: Per-item evaluation reports.
132
+ :vartype items: list[ExtractionEvaluationItemReport]
133
+ """
134
+
135
+ model_config = ConfigDict(extra="forbid")
136
+
137
+ dataset: Dict[str, object]
138
+ extractor_id: str
139
+ run_id: str
140
+ recipe_id: str
141
+ recipe_name: str
142
+ evaluated_at: str
143
+ metrics: Dict[str, float]
144
+ items: List[ExtractionEvaluationItemReport]
145
+
146
+
147
+ def load_extraction_dataset(path: Path) -> ExtractionEvaluationDataset:
148
+ """
149
+ Load an extraction evaluation dataset from JavaScript Object Notation.
150
+
151
+ :param path: Path to the dataset file.
152
+ :type path: Path
153
+ :return: Parsed extraction evaluation dataset.
154
+ :rtype: ExtractionEvaluationDataset
155
+ """
156
+ try:
157
+ data = json.loads(path.read_text(encoding="utf-8"))
158
+ except json.JSONDecodeError as exc:
159
+ raise ValueError("Invalid extraction dataset") from exc
160
+ return ExtractionEvaluationDataset.model_validate(data)
161
+
162
+
163
+ def evaluate_extraction_run(
164
+ *,
165
+ corpus: Corpus,
166
+ run: ExtractionRunManifest,
167
+ extractor_id: str,
168
+ dataset: ExtractionEvaluationDataset,
169
+ ) -> ExtractionEvaluationResult:
170
+ """
171
+ Evaluate an extraction run against a dataset.
172
+
173
+ :param corpus: Corpus associated with the run.
174
+ :type corpus: Corpus
175
+ :param run: Extraction run manifest.
176
+ :type run: ExtractionRunManifest
177
+ :param extractor_id: Extractor identifier for the run.
178
+ :type extractor_id: str
179
+ :param dataset: Extraction evaluation dataset.
180
+ :type dataset: ExtractionEvaluationDataset
181
+ :return: Extraction evaluation result bundle.
182
+ :rtype: ExtractionEvaluationResult
183
+ """
184
+ catalog = corpus.load_catalog()
185
+ item_index = {item.item_id: item for item in run.items}
186
+ coverage_present = 0
187
+ coverage_empty = 0
188
+ coverage_missing = 0
189
+ processable = 0
190
+ similarity_scores: List[float] = []
191
+ item_reports: List[ExtractionEvaluationItemReport] = []
192
+
193
+ for dataset_item in dataset.items:
194
+ item_id = _resolve_item_id(dataset_item, catalog_items=catalog.items)
195
+ catalog_item = catalog.items.get(item_id)
196
+ if catalog_item is None:
197
+ raise ValueError(f"Unknown item identifier: {item_id}")
198
+ extraction_item = item_index.get(item_id)
199
+ extraction_status = extraction_item.status if extraction_item else "missing"
200
+ if extraction_status != "errored" and extraction_status != "missing":
201
+ processable += 1
202
+
203
+ extracted_text = corpus.read_extracted_text(
204
+ extractor_id=extractor_id, run_id=run.run_id, item_id=item_id
205
+ )
206
+ coverage_status = _coverage_status(extracted_text)
207
+ if coverage_status == "present":
208
+ coverage_present += 1
209
+ elif coverage_status == "empty":
210
+ coverage_empty += 1
211
+ else:
212
+ coverage_missing += 1
213
+
214
+ similarity_score = _similarity_score(
215
+ expected_text=dataset_item.expected_text, extracted_text=extracted_text
216
+ )
217
+ similarity_scores.append(similarity_score)
218
+ item_reports.append(
219
+ ExtractionEvaluationItemReport(
220
+ item_id=item_id,
221
+ source_uri=catalog_item.source_uri,
222
+ expected_text=dataset_item.expected_text,
223
+ extracted_text=extracted_text,
224
+ coverage_status=coverage_status,
225
+ extraction_status=extraction_status,
226
+ similarity_score=similarity_score,
227
+ kind=dataset_item.kind,
228
+ )
229
+ )
230
+
231
+ total_items = max(len(dataset.items), 1)
232
+ average_similarity = sum(similarity_scores) / total_items if similarity_scores else 0.0
233
+ metrics = {
234
+ "coverage_present": float(coverage_present),
235
+ "coverage_empty": float(coverage_empty),
236
+ "coverage_missing": float(coverage_missing),
237
+ "processable_fraction": processable / total_items,
238
+ "average_similarity": average_similarity,
239
+ }
240
+ dataset_meta = {
241
+ "name": dataset.name,
242
+ "description": dataset.description,
243
+ "items": len(dataset.items),
244
+ }
245
+ return ExtractionEvaluationResult(
246
+ dataset=dataset_meta,
247
+ extractor_id=extractor_id,
248
+ run_id=run.run_id,
249
+ recipe_id=run.recipe.recipe_id,
250
+ recipe_name=run.recipe.name,
251
+ evaluated_at=utc_now_iso(),
252
+ metrics=metrics,
253
+ items=item_reports,
254
+ )
255
+
256
+
257
+ def write_extraction_evaluation_result(
258
+ *, corpus: Corpus, run_id: str, result: ExtractionEvaluationResult
259
+ ) -> Path:
260
+ """
261
+ Persist extraction evaluation output under the corpus.
262
+
263
+ :param corpus: Corpus associated with the evaluation.
264
+ :type corpus: Corpus
265
+ :param run_id: Extraction run identifier.
266
+ :type run_id: str
267
+ :param result: Evaluation result to write.
268
+ :type result: ExtractionEvaluationResult
269
+ :return: Output path.
270
+ :rtype: Path
271
+ """
272
+ output_dir = corpus.runs_dir / "evaluation" / "extraction" / run_id
273
+ output_dir.mkdir(parents=True, exist_ok=True)
274
+ output_path = output_dir / "output.json"
275
+ output_path.write_text(result.model_dump_json(indent=2) + "\n", encoding="utf-8")
276
+ return output_path
277
+
278
+
279
+ def _resolve_item_id(
280
+ dataset_item: ExtractionEvaluationItem, *, catalog_items: Dict[str, CatalogItem]
281
+ ) -> str:
282
+ if dataset_item.item_id:
283
+ return dataset_item.item_id
284
+ source_uri = dataset_item.source_uri
285
+ if not source_uri:
286
+ raise ValueError("Evaluation item is missing item_id and source_uri")
287
+ for item_id, catalog_item in catalog_items.items():
288
+ if getattr(catalog_item, "source_uri", None) == source_uri:
289
+ return item_id
290
+ raise ValueError(f"Unknown source uniform resource identifier: {source_uri}")
291
+
292
+
293
+ def _coverage_status(extracted_text: Optional[str]) -> str:
294
+ if extracted_text is None:
295
+ return "missing"
296
+ if extracted_text.strip():
297
+ return "present"
298
+ return "empty"
299
+
300
+
301
+ def _normalize_text(text: str) -> str:
302
+ return " ".join(text.lower().split())
303
+
304
+
305
+ def _similarity_score(*, expected_text: str, extracted_text: Optional[str]) -> float:
306
+ if extracted_text is None:
307
+ return 0.0
308
+ expected = _normalize_text(expected_text)
309
+ actual = _normalize_text(extracted_text)
310
+ if not expected and not actual:
311
+ return 1.0
312
+ return SequenceMatcher(None, expected, actual).ratio()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.11.0
3
+ Version: 0.13.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -486,10 +486,11 @@ corpus/
486
486
 
487
487
  ## Retrieval backends
488
488
 
489
- Two backends are included.
489
+ Three backends are included.
490
490
 
491
491
  - `scan` is a minimal baseline that scans raw items directly.
492
492
  - `sqlite-full-text-search` is a practical baseline that builds a full text search index in SQLite.
493
+ - `vector` is a deterministic term-frequency vector baseline with cosine similarity scoring.
493
494
 
494
495
  For detailed documentation including configuration options, performance characteristics, and usage examples, see the [Backend Reference][backend-reference].
495
496
 
@@ -535,6 +536,9 @@ These extractors are built in. Optional ones require extra dependencies. See [te
535
536
 
536
537
  For detailed documentation on all extractors, see the [Extractor Reference][extractor-reference].
537
538
 
539
+ For extraction evaluation workflows, dataset formats, and report interpretation, see
540
+ `docs/EXTRACTION_EVALUATION.md`.
541
+
538
542
  ## Topic modeling analysis
539
543
 
540
544
  Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Profiling and topic modeling
@@ -1,14 +1,15 @@
1
- biblicus/__init__.py,sha256=sT0PFc3DRGFRcN7Zx4Yooc8OzmLvaj1-ZjbvFHce8lU,496
1
+ biblicus/__init__.py,sha256=pD55sYei6AGGLcN1AWnpUY6-zPIPq1WxOp-sexOOlT0,496
2
2
  biblicus/__main__.py,sha256=ipfkUoTlocVnrQDM69C7TeBqQxmHVeiWMRaT3G9rtnk,117
3
- biblicus/cli.py,sha256=aH3plnednnYgcPnSoYQf200nboKc6N-tuc3FuLPQEcU,35132
4
- biblicus/constants.py,sha256=-JaHI3Dngte2drawx93cGWxFVobbgIuaVhmjUJpf4GI,333
5
- biblicus/context.py,sha256=qnT9CH7_ldoPcg-rxnUOtRhheOmpDAbF8uqhf8OdjC4,5832
3
+ biblicus/cli.py,sha256=cMoirLFPhTwftNuqaadajCcRUEz_FBaLkupjVxpAxO8,38403
4
+ biblicus/constants.py,sha256=gAlEVJhxdFj-eWWJrlYbP7H1X3c5gwhrIBq9NQ1Vq_E,371
5
+ biblicus/context.py,sha256=U7qkOwMdqNgYnqaC9hgQY0kv0R-6qcjV6bhXQl2WUkE,10215
6
6
  biblicus/corpus.py,sha256=qSDnYJXhWlF2p_BbFLl6xtI53lIIPxwyKLLGLC432Sg,55612
7
7
  biblicus/crawl.py,sha256=n8rXBMnziBK9vtKQQCXYOpBzqsPCswj2PzVJUb370KY,6250
8
8
  biblicus/errors.py,sha256=uMajd5DvgnJ_-jq5sbeom1GV8DPUc-kojBaECFi6CsY,467
9
9
  biblicus/evaluation.py,sha256=5xWpb-8f49Osh9aHzo1ab3AXOmls3Imc5rdnEC0pN-8,8143
10
10
  biblicus/evidence_processing.py,sha256=sJe6T1nLxvU0xs9yMH8JZZS19zHXMR-Fpr5lWi5ndUM,6120
11
11
  biblicus/extraction.py,sha256=qvrsq6zSz2Kg-cap-18HPHC9pQlqEGo7pyID2uKCyBo,19760
12
+ biblicus/extraction_evaluation.py,sha256=cBC2B1nQCtXmOcVWUhHyO2NJRX8QSDuqhVjEc8PXrOA,10400
12
13
  biblicus/frontmatter.py,sha256=JOGjIDzbbOkebQw2RzA-3WDVMAMtJta2INjS4e7-LMg,2463
13
14
  biblicus/hook_logging.py,sha256=IMvde-JhVWrx9tNz3eDJ1CY_rr5Sj7DZ2YNomYCZbz0,5366
14
15
  biblicus/hook_manager.py,sha256=ZCAkE5wLvn4lnQz8jho_o0HGEC9KdQd9qitkAEUQRcw,6997
@@ -57,9 +58,9 @@ biblicus/extractors/select_override.py,sha256=gSpffFmn1ux9pGtFvHD5Uu_LO8TmmJC4L_
57
58
  biblicus/extractors/select_smart_override.py,sha256=-sLMnNoeXbCB3dO9zflQq324eHuLbd6hpveSwduXP-U,6763
58
59
  biblicus/extractors/select_text.py,sha256=w0ATmDy3tWWbOObzW87jGZuHbgXllUhotX5XyySLs-o,3395
59
60
  biblicus/extractors/unstructured_text.py,sha256=l2S_wD_htu7ZHoJQNQtP-kGlEgOeKV_w2IzAC93lePE,3564
60
- biblicus-0.11.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
61
- biblicus-0.11.0.dist-info/METADATA,sha256=zrJESYGfGLu7Iq1I--GPIkEY9gXDb9szBIuenlWor7I,27765
62
- biblicus-0.11.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
63
- biblicus-0.11.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
64
- biblicus-0.11.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
65
- biblicus-0.11.0.dist-info/RECORD,,
61
+ biblicus-0.13.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
62
+ biblicus-0.13.0.dist-info/METADATA,sha256=Ae0gttdvOggyE1vQVab4IOSmbx-JklxzvBZJ_3UyxIA,27979
63
+ biblicus-0.13.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
64
+ biblicus-0.13.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
65
+ biblicus-0.13.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
66
+ biblicus-0.13.0.dist-info/RECORD,,