biblicus 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. biblicus/__init__.py +2 -2
  2. biblicus/_vendor/dotyaml/__init__.py +14 -0
  3. biblicus/_vendor/dotyaml/interpolation.py +63 -0
  4. biblicus/_vendor/dotyaml/loader.py +181 -0
  5. biblicus/_vendor/dotyaml/transformer.py +135 -0
  6. biblicus/backends/__init__.py +0 -2
  7. biblicus/backends/base.py +3 -3
  8. biblicus/backends/scan.py +21 -15
  9. biblicus/backends/sqlite_full_text_search.py +14 -15
  10. biblicus/cli.py +177 -53
  11. biblicus/corpus.py +209 -59
  12. biblicus/crawl.py +186 -0
  13. biblicus/errors.py +15 -0
  14. biblicus/evaluation.py +4 -8
  15. biblicus/extraction.py +280 -79
  16. biblicus/extractors/__init__.py +14 -3
  17. biblicus/extractors/base.py +12 -5
  18. biblicus/extractors/metadata_text.py +13 -5
  19. biblicus/extractors/openai_stt.py +180 -0
  20. biblicus/extractors/pass_through_text.py +16 -6
  21. biblicus/extractors/pdf_text.py +100 -0
  22. biblicus/extractors/pipeline.py +105 -0
  23. biblicus/extractors/rapidocr_text.py +129 -0
  24. biblicus/extractors/select_longest_text.py +105 -0
  25. biblicus/extractors/select_text.py +100 -0
  26. biblicus/extractors/unstructured_text.py +100 -0
  27. biblicus/frontmatter.py +0 -3
  28. biblicus/hook_logging.py +0 -5
  29. biblicus/hook_manager.py +3 -5
  30. biblicus/hooks.py +3 -7
  31. biblicus/ignore.py +0 -3
  32. biblicus/models.py +118 -0
  33. biblicus/retrieval.py +0 -4
  34. biblicus/sources.py +44 -9
  35. biblicus/time.py +1 -2
  36. biblicus/uris.py +3 -4
  37. biblicus/user_config.py +138 -0
  38. {biblicus-0.2.0.dist-info → biblicus-0.4.0.dist-info}/METADATA +96 -18
  39. biblicus-0.4.0.dist-info/RECORD +45 -0
  40. biblicus/extractors/cascade.py +0 -101
  41. biblicus-0.2.0.dist-info/RECORD +0 -32
  42. {biblicus-0.2.0.dist-info → biblicus-0.4.0.dist-info}/WHEEL +0 -0
  43. {biblicus-0.2.0.dist-info → biblicus-0.4.0.dist-info}/entry_points.txt +0 -0
  44. {biblicus-0.2.0.dist-info → biblicus-0.4.0.dist-info}/licenses/LICENSE +0 -0
  45. {biblicus-0.2.0.dist-info → biblicus-0.4.0.dist-info}/top_level.txt +0 -0
biblicus/__init__.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Biblicus public package interface.
3
3
  """
4
4
 
5
+ from .corpus import Corpus
5
6
  from .models import (
6
7
  CorpusConfig,
7
8
  Evidence,
@@ -11,7 +12,6 @@ from .models import (
11
12
  RetrievalResult,
12
13
  RetrievalRun,
13
14
  )
14
- from .corpus import Corpus
15
15
 
16
16
  __all__ = [
17
17
  "__version__",
@@ -25,4 +25,4 @@ __all__ = [
25
25
  "RetrievalRun",
26
26
  ]
27
27
 
28
- __version__ = "0.2.0"
28
+ __version__ = "0.4.0"
@@ -0,0 +1,14 @@
1
+ """
2
+ Vendored dotyaml utilities.
3
+
4
+ This package vendors the minimal pieces of the `dotyaml` project that Biblicus uses for
5
+ loading and interpolating YAML configuration files.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .interpolation import interpolate_env_vars
11
+ from .loader import ConfigLoader, load_config
12
+
13
+ __all__ = ["ConfigLoader", "interpolate_env_vars", "load_config"]
14
+
@@ -0,0 +1,63 @@
1
+ """
2
+ Environment variable interpolation functionality for dotyaml.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ import re
9
+ from typing import Any, Dict, Union
10
+
11
+
12
+ def interpolate_env_vars(data: Union[str, Dict[str, Any], Any]) -> Union[str, Dict[str, Any], Any]:
13
+ """
14
+ Recursively interpolate environment variables in YAML data using Jinja-like syntax.
15
+
16
+ Supports syntax like: ``{{ ENV_VAR_NAME }}`` or ``{{ ENV_VAR_NAME|default_value }}``
17
+
18
+ :param data: Data structure to interpolate (string, dict, list, etc).
19
+ :type data: str or dict[str, Any] or Any
20
+ :return: Data structure with environment variables interpolated.
21
+ :rtype: str or dict[str, Any] or Any
22
+ """
23
+
24
+ if isinstance(data, str):
25
+ return _interpolate_string(data)
26
+ if isinstance(data, dict):
27
+ return {key: interpolate_env_vars(value) for key, value in data.items()}
28
+ if isinstance(data, list):
29
+ return [interpolate_env_vars(item) for item in data]
30
+ return data
31
+
32
+
33
+ def _interpolate_string(text: str) -> str:
34
+ """
35
+ Interpolate environment variables in a string using Jinja-like syntax.
36
+
37
+ Supports:
38
+ - ``{{ ENV_VAR }}`` required environment variable.
39
+ - ``{{ ENV_VAR|default_value }}`` environment variable with default.
40
+
41
+ :param text: String to interpolate.
42
+ :type text: str
43
+ :return: String with environment variables interpolated.
44
+ :rtype: str
45
+ :raises ValueError: If a required environment variable is not found.
46
+ """
47
+
48
+ pattern = r"\{\{\s*([A-Z_][A-Z0-9_]*)\s*(?:\|\s*([^}]*?))?\s*\}\}"
49
+
50
+ def replace_match(match): # type: ignore[no-untyped-def]
51
+ env_var = match.group(1)
52
+ default_value = match.group(2)
53
+
54
+ env_value = os.getenv(env_var)
55
+
56
+ if env_value is not None:
57
+ return env_value
58
+ if default_value is not None:
59
+ return default_value.strip()
60
+ raise ValueError(f"Required environment variable '{env_var}' not found")
61
+
62
+ return re.sub(pattern, replace_match, text)
63
+
@@ -0,0 +1,181 @@
1
+ """
2
+ Core loading functionality for dotyaml.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Optional, Union
10
+
11
+ import yaml
12
+
13
+ try:
14
+ from dotenv import load_dotenv
15
+
16
+ DOTENV_AVAILABLE = True
17
+ except ImportError:
18
+ DOTENV_AVAILABLE = False
19
+
20
+ from .interpolation import interpolate_env_vars
21
+ from .transformer import flatten_dict, unflatten_env_vars
22
+
23
+
24
+ def load_config(
25
+ yaml_path: Optional[Union[str, Path]] = None,
26
+ prefix: str = "",
27
+ override: bool = False,
28
+ dotenv_path: Optional[Union[str, Path]] = ".env",
29
+ load_dotenv_first: bool = True,
30
+ ) -> Dict[str, str]:
31
+ """
32
+ Load configuration from a YAML file and set environment variables.
33
+
34
+ :param yaml_path: Path to YAML configuration file. When None, only reads existing env vars.
35
+ :type yaml_path: str or Path or None
36
+ :param prefix: Prefix for environment variable names (for example, ``APP``).
37
+ :type prefix: str
38
+ :param override: Whether to override existing environment variables.
39
+ :type override: bool
40
+ :param dotenv_path: Optional ``.env`` file path to load first.
41
+ :type dotenv_path: str or Path or None
42
+ :param load_dotenv_first: Whether to load ``.env`` before YAML.
43
+ :type load_dotenv_first: bool
44
+ :return: Mapping of values that were set.
45
+ :rtype: dict[str, str]
46
+ """
47
+
48
+ config: Dict[str, str] = {}
49
+
50
+ if load_dotenv_first and DOTENV_AVAILABLE and dotenv_path:
51
+ env_file = Path(dotenv_path)
52
+ env_locations: list[Path] = []
53
+
54
+ if env_file.is_absolute():
55
+ env_locations.append(env_file)
56
+ else:
57
+ env_locations.append(Path.cwd() / dotenv_path)
58
+ if yaml_path:
59
+ yaml_dir = Path(yaml_path).parent
60
+ env_locations.append(yaml_dir / dotenv_path)
61
+
62
+ for env_path in env_locations:
63
+ if env_path.exists():
64
+ load_dotenv(env_path)
65
+ break
66
+
67
+ if yaml_path and Path(yaml_path).exists():
68
+ with open(yaml_path, "r", encoding="utf-8") as file:
69
+ yaml_data = yaml.safe_load(file)
70
+
71
+ if yaml_data:
72
+ yaml_data = interpolate_env_vars(yaml_data)
73
+ flat_config = flatten_dict(yaml_data, prefix)
74
+
75
+ for key, value in flat_config.items():
76
+ if not override and key in os.environ:
77
+ config[key] = os.environ[key]
78
+ else:
79
+ os.environ[key] = value
80
+ config[key] = value
81
+
82
+ return config
83
+
84
+
85
+ class ConfigLoader:
86
+ """
87
+ Configuration loader that can read YAML files or environment variables.
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ prefix: str = "",
93
+ schema: Optional[Dict[str, Any]] = None,
94
+ dotenv_path: Optional[Union[str, Path]] = ".env",
95
+ load_dotenv_first: bool = True,
96
+ ):
97
+ self.prefix = prefix
98
+ self.schema = schema
99
+ self.dotenv_path = dotenv_path
100
+ self.load_dotenv_first = load_dotenv_first
101
+
102
+ if self.load_dotenv_first and DOTENV_AVAILABLE and self.dotenv_path:
103
+ env_file = Path(self.dotenv_path)
104
+ env_locations: list[Path] = []
105
+
106
+ if env_file.is_absolute():
107
+ env_locations.append(env_file)
108
+ else:
109
+ env_locations.append(Path.cwd() / self.dotenv_path)
110
+
111
+ for env_path in env_locations:
112
+ if env_path.exists():
113
+ load_dotenv(env_path)
114
+ break
115
+
116
+ def load_from_yaml(self, yaml_path: Union[str, Path]) -> Dict[str, Any]:
117
+ """
118
+ Load configuration from a YAML file with environment variable interpolation.
119
+
120
+ :param yaml_path: YAML configuration file path.
121
+ :type yaml_path: str or Path
122
+ :return: Parsed YAML data.
123
+ :rtype: dict[str, Any]
124
+ """
125
+
126
+ if not Path(yaml_path).exists():
127
+ return {}
128
+
129
+ if self.load_dotenv_first and DOTENV_AVAILABLE and self.dotenv_path:
130
+ env_file = Path(self.dotenv_path)
131
+ env_locations: list[Path] = []
132
+
133
+ if env_file.is_absolute():
134
+ env_locations.append(env_file)
135
+ else:
136
+ env_locations.append(Path.cwd() / self.dotenv_path)
137
+ yaml_dir = Path(yaml_path).parent
138
+ env_locations.append(yaml_dir / self.dotenv_path)
139
+
140
+ for env_path in env_locations:
141
+ if env_path.exists():
142
+ load_dotenv(env_path)
143
+ break
144
+
145
+ with open(yaml_path, "r", encoding="utf-8") as file:
146
+ yaml_data = yaml.safe_load(file)
147
+
148
+ if yaml_data:
149
+ yaml_data = interpolate_env_vars(yaml_data)
150
+
151
+ return yaml_data or {}
152
+
153
+ def load_from_env(self) -> Dict[str, Any]:
154
+ """
155
+ Load configuration from environment variables.
156
+
157
+ :return: Nested configuration dictionary.
158
+ :rtype: dict[str, Any]
159
+ """
160
+
161
+ env_vars = dict(os.environ)
162
+ return unflatten_env_vars(env_vars, self.prefix)
163
+
164
+ def set_env_vars(self, config: Dict[str, Any], override: bool = False) -> None:
165
+ """
166
+ Set environment variables from a configuration dictionary.
167
+
168
+ :param config: Configuration mapping.
169
+ :type config: dict[str, Any]
170
+ :param override: Whether to override existing environment variables.
171
+ :type override: bool
172
+ :return: None.
173
+ :rtype: None
174
+ """
175
+
176
+ flat_config = flatten_dict(config, self.prefix)
177
+
178
+ for key, value in flat_config.items():
179
+ if override or key not in os.environ:
180
+ os.environ[key] = value
181
+
@@ -0,0 +1,135 @@
1
+ """
2
+ YAML to environment variable transformation utilities for dotyaml.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from typing import Any, Dict
9
+
10
+
11
+ def flatten_dict(data: Dict[str, Any], prefix: str = "", separator: str = "_") -> Dict[str, str]:
12
+ """
13
+ Flatten a nested dictionary into environment-variable style keys.
14
+
15
+ :param data: Nested dictionary to flatten.
16
+ :type data: dict[str, Any]
17
+ :param prefix: Prefix to add to all keys.
18
+ :type prefix: str
19
+ :param separator: Separator between key parts.
20
+ :type separator: str
21
+ :return: Flattened mapping with string values.
22
+ :rtype: dict[str, str]
23
+ """
24
+
25
+ result: Dict[str, str] = {}
26
+
27
+ for key, value in data.items():
28
+ if prefix:
29
+ full_key = f"{prefix}{separator}{key.upper()}"
30
+ else:
31
+ full_key = key.upper()
32
+
33
+ clean_key = full_key.replace("-", "_").replace(".", "_")
34
+
35
+ if isinstance(value, dict):
36
+ result.update(flatten_dict(value, clean_key, separator))
37
+ else:
38
+ result[clean_key] = convert_value_to_string(value)
39
+
40
+ return result
41
+
42
+
43
+ def convert_value_to_string(value: Any) -> str:
44
+ """
45
+ Convert a Python value to its environment variable string representation.
46
+
47
+ :param value: Value to convert.
48
+ :type value: Any
49
+ :return: String representation suitable for environment variables.
50
+ :rtype: str
51
+ """
52
+
53
+ if value is None:
54
+ return ""
55
+ if isinstance(value, bool):
56
+ return "true" if value else "false"
57
+ if isinstance(value, (int, float)):
58
+ return str(value)
59
+ if isinstance(value, str):
60
+ return value
61
+ if isinstance(value, (list, tuple)):
62
+ return ",".join(convert_value_to_string(item) for item in value)
63
+ if isinstance(value, dict):
64
+ return json.dumps(value)
65
+ return str(value)
66
+
67
+
68
+ def unflatten_env_vars(env_vars: Dict[str, str], prefix: str = "") -> Dict[str, Any]:
69
+ """
70
+ Convert flat environment variables back to nested dictionary structure.
71
+
72
+ :param env_vars: Mapping of environment variables.
73
+ :type env_vars: dict[str, str]
74
+ :param prefix: Optional prefix to filter by.
75
+ :type prefix: str
76
+ :return: Nested dictionary structure.
77
+ :rtype: dict[str, Any]
78
+ """
79
+
80
+ result: Dict[str, Any] = {}
81
+
82
+ for key, value in env_vars.items():
83
+ if prefix and not key.startswith(f"{prefix}_"):
84
+ continue
85
+
86
+ clean_key = key
87
+ if prefix:
88
+ clean_key = key[len(prefix) + 1 :]
89
+
90
+ parts = clean_key.lower().split("_")
91
+
92
+ current: Dict[str, Any] = result
93
+ for part in parts[:-1]:
94
+ if part not in current:
95
+ current[part] = {}
96
+ current = current[part]
97
+
98
+ final_key = parts[-1]
99
+ current[final_key] = convert_string_to_value(value)
100
+
101
+ return result
102
+
103
+
104
+ def convert_string_to_value(value: str) -> Any:
105
+ """
106
+ Convert a string environment variable back to an appropriate Python type.
107
+
108
+ :param value: String value from an environment variable.
109
+ :type value: str
110
+ :return: Converted Python value.
111
+ :rtype: Any
112
+ """
113
+
114
+ if value == "":
115
+ return None
116
+ lowered = value.lower()
117
+ if lowered == "true":
118
+ return True
119
+ if lowered == "false":
120
+ return False
121
+ if value.isdigit():
122
+ return int(value)
123
+ if value.replace(".", "").replace("-", "").isdigit():
124
+ try:
125
+ return float(value)
126
+ except ValueError:
127
+ return value
128
+ if "," in value:
129
+ items = [item.strip() for item in value.split(",")]
130
+ return [convert_string_to_value(item) for item in items]
131
+ try:
132
+ return json.loads(value)
133
+ except (json.JSONDecodeError, ValueError):
134
+ return value
135
+
@@ -18,7 +18,6 @@ def available_backends() -> Dict[str, Type[RetrievalBackend]]:
18
18
  :return: Mapping of backend identifiers to backend classes.
19
19
  :rtype: dict[str, Type[RetrievalBackend]]
20
20
  """
21
-
22
21
  return {
23
22
  ScanBackend.backend_id: ScanBackend,
24
23
  SqliteFullTextSearchBackend.backend_id: SqliteFullTextSearchBackend,
@@ -35,7 +34,6 @@ def get_backend(backend_id: str) -> RetrievalBackend:
35
34
  :rtype: RetrievalBackend
36
35
  :raises KeyError: If the backend identifier is unknown.
37
36
  """
38
-
39
37
  registry = available_backends()
40
38
  backend_class = registry.get(backend_id)
41
39
  if backend_class is None:
biblicus/backends/base.py CHANGED
@@ -22,7 +22,9 @@ class RetrievalBackend(ABC):
22
22
  backend_id: str
23
23
 
24
24
  @abstractmethod
25
- def build_run(self, corpus: Corpus, *, recipe_name: str, config: Dict[str, object]) -> RetrievalRun:
25
+ def build_run(
26
+ self, corpus: Corpus, *, recipe_name: str, config: Dict[str, object]
27
+ ) -> RetrievalRun:
26
28
  """
27
29
  Build or register a retrieval run for the backend.
28
30
 
@@ -35,7 +37,6 @@ class RetrievalBackend(ABC):
35
37
  :return: Run manifest describing the build.
36
38
  :rtype: RetrievalRun
37
39
  """
38
-
39
40
  raise NotImplementedError
40
41
 
41
42
  @abstractmethod
@@ -61,5 +62,4 @@ class RetrievalBackend(ABC):
61
62
  :return: Retrieval results containing evidence.
62
63
  :rtype: RetrievalResult
63
64
  """
64
-
65
65
  raise NotImplementedError
biblicus/backends/scan.py CHANGED
@@ -9,9 +9,15 @@ from typing import Dict, Iterable, List, Optional, Tuple
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from ..corpus import Corpus
12
- from ..extraction import ExtractionRunReference, parse_extraction_run_reference
13
12
  from ..frontmatter import parse_front_matter
14
- from ..models import Evidence, QueryBudget, RetrievalResult, RetrievalRun
13
+ from ..models import (
14
+ Evidence,
15
+ ExtractionRunReference,
16
+ QueryBudget,
17
+ RetrievalResult,
18
+ RetrievalRun,
19
+ parse_extraction_run_reference,
20
+ )
15
21
  from ..retrieval import apply_budget, create_recipe_manifest, create_run_manifest, hash_text
16
22
  from ..time import utc_now_iso
17
23
 
@@ -42,7 +48,9 @@ class ScanBackend:
42
48
 
43
49
  backend_id = "scan"
44
50
 
45
- def build_run(self, corpus: Corpus, *, recipe_name: str, config: Dict[str, object]) -> RetrievalRun:
51
+ def build_run(
52
+ self, corpus: Corpus, *, recipe_name: str, config: Dict[str, object]
53
+ ) -> RetrievalRun:
46
54
  """
47
55
  Register a scan backend run (no materialization).
48
56
 
@@ -55,7 +63,6 @@ class ScanBackend:
55
63
  :return: Run manifest describing the build.
56
64
  :rtype: RetrievalRun
57
65
  """
58
-
59
66
  recipe_config = ScanRecipeConfig.model_validate(config)
60
67
  catalog = corpus.load_catalog()
61
68
  recipe = create_recipe_manifest(
@@ -63,7 +70,10 @@ class ScanBackend:
63
70
  name=recipe_name,
64
71
  config=recipe_config.model_dump(),
65
72
  )
66
- stats = {"items": len(catalog.items), "text_items": _count_text_items(corpus, catalog.items.values(), recipe_config)}
73
+ stats = {
74
+ "items": len(catalog.items),
75
+ "text_items": _count_text_items(corpus, catalog.items.values(), recipe_config),
76
+ }
67
77
  run = create_run_manifest(corpus, recipe=recipe, stats=stats, artifact_paths=[])
68
78
  corpus.write_run(run)
69
79
  return run
@@ -90,7 +100,6 @@ class ScanBackend:
90
100
  :return: Retrieval results containing evidence.
91
101
  :rtype: RetrievalResult
92
102
  """
93
-
94
103
  recipe_config = ScanRecipeConfig.model_validate(run.recipe.config)
95
104
  catalog = corpus.load_catalog()
96
105
  extraction_reference = _resolve_extraction_reference(corpus, recipe_config)
@@ -130,7 +139,9 @@ class ScanBackend:
130
139
  )
131
140
 
132
141
 
133
- def _resolve_extraction_reference(corpus: Corpus, recipe_config: ScanRecipeConfig) -> Optional[ExtractionRunReference]:
142
+ def _resolve_extraction_reference(
143
+ corpus: Corpus, recipe_config: ScanRecipeConfig
144
+ ) -> Optional[ExtractionRunReference]:
134
145
  """
135
146
  Resolve an extraction run reference from a recipe config.
136
147
 
@@ -142,7 +153,6 @@ def _resolve_extraction_reference(corpus: Corpus, recipe_config: ScanRecipeConfi
142
153
  :rtype: ExtractionRunReference or None
143
154
  :raises FileNotFoundError: If an extraction run is referenced but not present.
144
155
  """
145
-
146
156
  if not recipe_config.extraction_run:
147
157
  return None
148
158
  extraction_reference = parse_extraction_run_reference(recipe_config.extraction_run)
@@ -155,7 +165,9 @@ def _resolve_extraction_reference(corpus: Corpus, recipe_config: ScanRecipeConfi
155
165
  return extraction_reference
156
166
 
157
167
 
158
- def _count_text_items(corpus: Corpus, items: Iterable[object], recipe_config: ScanRecipeConfig) -> int:
168
+ def _count_text_items(
169
+ corpus: Corpus, items: Iterable[object], recipe_config: ScanRecipeConfig
170
+ ) -> int:
159
171
  """
160
172
  Count catalog items that represent text content.
161
173
 
@@ -170,7 +182,6 @@ def _count_text_items(corpus: Corpus, items: Iterable[object], recipe_config: Sc
170
182
  :return: Number of text items.
171
183
  :rtype: int
172
184
  """
173
-
174
185
  text_item_count = 0
175
186
  extraction_reference = _resolve_extraction_reference(corpus, recipe_config)
176
187
  for catalog_item in items:
@@ -199,7 +210,6 @@ def _tokenize_query(query_text: str) -> List[str]:
199
210
  :return: Lowercased non-empty tokens.
200
211
  :rtype: list[str]
201
212
  """
202
-
203
213
  return [token for token in query_text.lower().split() if token]
204
214
 
205
215
 
@@ -227,7 +237,6 @@ def _load_text_from_item(
227
237
  :return: Text payload or None if not decodable as text.
228
238
  :rtype: str or None
229
239
  """
230
-
231
240
  if extraction_reference:
232
241
  extracted_text = corpus.read_extracted_text(
233
242
  extractor_id=extraction_reference.extractor_id,
@@ -259,7 +268,6 @@ def _find_first_match(text: str, tokens: List[str]) -> Optional[Tuple[int, int]]
259
268
  :return: Start/end span for the earliest match, or None if no matches.
260
269
  :rtype: tuple[int, int] or None
261
270
  """
262
-
263
271
  lower_text = text.lower()
264
272
  best_start: Optional[int] = None
265
273
  best_end: Optional[int] = None
@@ -291,7 +299,6 @@ def _build_snippet(text: str, span: Optional[Tuple[int, int]], *, max_chars: int
291
299
  :return: Snippet text.
292
300
  :rtype: str
293
301
  """
294
-
295
302
  if not text:
296
303
  return ""
297
304
  if span is None:
@@ -325,7 +332,6 @@ def _score_items(
325
332
  :return: Evidence candidates with provisional ranks.
326
333
  :rtype: list[Evidence]
327
334
  """
328
-
329
335
  evidence_items: List[Evidence] = []
330
336
  for catalog_item in items:
331
337
  media_type = getattr(catalog_item, "media_type", "")