biblicus 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. biblicus/__init__.py +30 -0
  2. biblicus/__main__.py +8 -0
  3. biblicus/_vendor/dotyaml/__init__.py +14 -0
  4. biblicus/_vendor/dotyaml/interpolation.py +63 -0
  5. biblicus/_vendor/dotyaml/loader.py +181 -0
  6. biblicus/_vendor/dotyaml/transformer.py +135 -0
  7. biblicus/backends/__init__.py +42 -0
  8. biblicus/backends/base.py +65 -0
  9. biblicus/backends/scan.py +375 -0
  10. biblicus/backends/sqlite_full_text_search.py +487 -0
  11. biblicus/cli.py +804 -0
  12. biblicus/constants.py +12 -0
  13. biblicus/context.py +183 -0
  14. biblicus/corpus.py +1531 -0
  15. biblicus/crawl.py +186 -0
  16. biblicus/errors.py +15 -0
  17. biblicus/evaluation.py +257 -0
  18. biblicus/evidence_processing.py +201 -0
  19. biblicus/extraction.py +531 -0
  20. biblicus/extractors/__init__.py +44 -0
  21. biblicus/extractors/base.py +68 -0
  22. biblicus/extractors/metadata_text.py +106 -0
  23. biblicus/extractors/openai_stt.py +180 -0
  24. biblicus/extractors/pass_through_text.py +84 -0
  25. biblicus/extractors/pdf_text.py +100 -0
  26. biblicus/extractors/pipeline.py +105 -0
  27. biblicus/extractors/rapidocr_text.py +129 -0
  28. biblicus/extractors/select_longest_text.py +105 -0
  29. biblicus/extractors/select_text.py +100 -0
  30. biblicus/extractors/unstructured_text.py +100 -0
  31. biblicus/frontmatter.py +89 -0
  32. biblicus/hook_logging.py +180 -0
  33. biblicus/hook_manager.py +203 -0
  34. biblicus/hooks.py +261 -0
  35. biblicus/ignore.py +64 -0
  36. biblicus/knowledge_base.py +191 -0
  37. biblicus/models.py +445 -0
  38. biblicus/retrieval.py +133 -0
  39. biblicus/sources.py +212 -0
  40. biblicus/time.py +17 -0
  41. biblicus/uris.py +63 -0
  42. biblicus/user_config.py +138 -0
  43. biblicus-0.6.0.dist-info/METADATA +533 -0
  44. biblicus-0.6.0.dist-info/RECORD +48 -0
  45. biblicus-0.6.0.dist-info/WHEEL +5 -0
  46. biblicus-0.6.0.dist-info/entry_points.txt +2 -0
  47. biblicus-0.6.0.dist-info/licenses/LICENSE +21 -0
  48. biblicus-0.6.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,106 @@
1
+ """
2
+ Metadata-based text extractor plugin.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+ from ..models import CatalogItem, ExtractedText, ExtractionStepOutput
12
+ from .base import TextExtractor
13
+
14
+
15
+ class MetadataTextExtractorConfig(BaseModel):
16
+ """
17
+ Configuration for the metadata text extractor.
18
+
19
+ The metadata text extractor is intentionally minimal and deterministic.
20
+ It emits a plain text representation derived only from an item's catalog metadata.
21
+
22
+ :ivar include_title: Whether to include the item title as the first line, if present.
23
+ :vartype include_title: bool
24
+ :ivar include_tags: Whether to include a ``tags: ...`` line, if tags are present.
25
+ :vartype include_tags: bool
26
+ """
27
+
28
+ model_config = ConfigDict(extra="forbid")
29
+
30
+ include_title: bool = Field(default=True)
31
+ include_tags: bool = Field(default=True)
32
+
33
+
34
+ class MetadataTextExtractor(TextExtractor):
35
+ """
36
+ Extractor plugin that emits a small, searchable text representation of item metadata.
37
+
38
+ The output is intended to be stable and human-readable:
39
+
40
+ - If a title exists, the first line is the title.
41
+ - If tags exist, the next line is ``tags: <comma separated tags>``.
42
+
43
+ This extractor is useful for:
44
+
45
+ - Retrieval over non-text items that carry meaningful metadata.
46
+ - Comparing downstream retrieval backends while holding extraction stable.
47
+
48
+ :ivar extractor_id: Extractor identifier.
49
+ :vartype extractor_id: str
50
+ """
51
+
52
+ extractor_id = "metadata-text"
53
+
54
+ def validate_config(self, config: Dict[str, Any]) -> BaseModel:
55
+ """
56
+ Validate extractor configuration.
57
+
58
+ :param config: Configuration mapping.
59
+ :type config: dict[str, Any]
60
+ :return: Parsed config.
61
+ :rtype: MetadataTextExtractorConfig
62
+ """
63
+ return MetadataTextExtractorConfig.model_validate(config)
64
+
65
+ def extract_text(
66
+ self,
67
+ *,
68
+ corpus,
69
+ item: CatalogItem,
70
+ config: BaseModel,
71
+ previous_extractions: List[ExtractionStepOutput],
72
+ ) -> Optional[ExtractedText]:
73
+ """
74
+ Extract a metadata-based text payload for the item.
75
+
76
+ :param corpus: Corpus containing the item bytes.
77
+ :type corpus: Corpus
78
+ :param item: Catalog item being processed.
79
+ :type item: CatalogItem
80
+ :param config: Parsed configuration model.
81
+ :type config: MetadataTextExtractorConfig
82
+ :param previous_extractions: Prior step outputs for this item within the pipeline.
83
+ :type previous_extractions: list[biblicus.models.ExtractionStepOutput]
84
+ :return: Extracted text payload, or ``None`` if no metadata is available.
85
+ :rtype: ExtractedText or None
86
+ """
87
+ parsed_config = (
88
+ config
89
+ if isinstance(config, MetadataTextExtractorConfig)
90
+ else MetadataTextExtractorConfig.model_validate(config)
91
+ )
92
+ _ = corpus
93
+ _ = previous_extractions
94
+ lines: list[str] = []
95
+
96
+ if parsed_config.include_title and isinstance(item.title, str) and item.title.strip():
97
+ lines.append(item.title.strip())
98
+
99
+ tags = [tag.strip() for tag in item.tags if isinstance(tag, str) and tag.strip()]
100
+ if parsed_config.include_tags and tags:
101
+ lines.append(f"tags: {', '.join(tags)}")
102
+
103
+ if not lines:
104
+ return None
105
+
106
+ return ExtractedText(text="\n".join(lines), producer_extractor_id=self.extractor_id)
@@ -0,0 +1,180 @@
1
+ """
2
+ OpenAI-backed speech to text extractor plugin.
3
+
4
+ This extractor is implemented as an optional dependency so the core installation stays small.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
12
+
13
+ from ..corpus import Corpus
14
+ from ..errors import ExtractionRunFatalError
15
+ from ..models import CatalogItem, ExtractedText, ExtractionStepOutput
16
+ from ..user_config import resolve_openai_api_key
17
+ from .base import TextExtractor
18
+
19
+
20
+ class OpenAiSpeechToTextExtractorConfig(BaseModel):
21
+ """
22
+ Configuration for OpenAI speech to text extraction.
23
+
24
+ :ivar model: OpenAI transcription model identifier.
25
+ :vartype model: str
26
+ :ivar response_format: OpenAI transcription response format.
27
+ :vartype response_format: str
28
+ :ivar language: Optional language code hint for transcription.
29
+ :vartype language: str or None
30
+ :ivar prompt: Optional prompt text to guide transcription.
31
+ :vartype prompt: str or None
32
+ :ivar no_speech_probability_threshold: Optional threshold for suppressing hallucinated transcripts.
33
+ :vartype no_speech_probability_threshold: float or None
34
+ """
35
+
36
+ model_config = ConfigDict(extra="forbid")
37
+
38
+ model: str = Field(default="whisper-1", min_length=1)
39
+ response_format: str = Field(default="json", min_length=1)
40
+ language: Optional[str] = Field(default=None, min_length=1)
41
+ prompt: Optional[str] = Field(default=None, min_length=1)
42
+ no_speech_probability_threshold: Optional[float] = Field(default=None, ge=0.0, le=1.0)
43
+
44
+ @model_validator(mode="after")
45
+ def _validate_no_speech_threshold(self) -> "OpenAiSpeechToTextExtractorConfig":
46
+ if self.no_speech_probability_threshold is None:
47
+ return self
48
+ if self.response_format != "verbose_json":
49
+ raise ValueError(
50
+ "no_speech_probability_threshold requires response_format='verbose_json' "
51
+ "so the transcription API returns per-segment no-speech probabilities"
52
+ )
53
+ return self
54
+
55
+
56
+ class OpenAiSpeechToTextExtractor(TextExtractor):
57
+ """
58
+ Extractor plugin that transcribes audio items using the OpenAI API.
59
+
60
+ This extractor is intended as a practical, hosted speech to text implementation.
61
+ It skips non-audio items.
62
+
63
+ :ivar extractor_id: Extractor identifier.
64
+ :vartype extractor_id: str
65
+ """
66
+
67
+ extractor_id = "stt-openai"
68
+
69
+ def validate_config(self, config: Dict[str, Any]) -> BaseModel:
70
+ """
71
+ Validate extractor configuration and ensure prerequisites are available.
72
+
73
+ :param config: Configuration mapping.
74
+ :type config: dict[str, Any]
75
+ :return: Parsed configuration model.
76
+ :rtype: OpenAiSpeechToTextExtractorConfig
77
+ :raises ExtractionRunFatalError: If the optional dependency or required environment is missing.
78
+ """
79
+ try:
80
+ from openai import OpenAI # noqa: F401
81
+ except ImportError as import_error:
82
+ raise ExtractionRunFatalError(
83
+ "OpenAI speech to text extractor requires an optional dependency. "
84
+ 'Install it with pip install "biblicus[openai]".'
85
+ ) from import_error
86
+
87
+ api_key = resolve_openai_api_key()
88
+ if api_key is None:
89
+ raise ExtractionRunFatalError(
90
+ "OpenAI speech to text extractor requires an OpenAI API key. "
91
+ "Set OPENAI_API_KEY or configure it in ~/.biblicus/config.yml or ./.biblicus/config.yml under "
92
+ "openai.api_key."
93
+ )
94
+
95
+ return OpenAiSpeechToTextExtractorConfig.model_validate(config)
96
+
97
+ def extract_text(
98
+ self,
99
+ *,
100
+ corpus: Corpus,
101
+ item: CatalogItem,
102
+ config: BaseModel,
103
+ previous_extractions: List[ExtractionStepOutput],
104
+ ) -> Optional[ExtractedText]:
105
+ """
106
+ Transcribe an audio item.
107
+
108
+ :param corpus: Corpus containing the item bytes.
109
+ :type corpus: Corpus
110
+ :param item: Catalog item being processed.
111
+ :type item: CatalogItem
112
+ :param config: Parsed configuration model.
113
+ :type config: OpenAiSpeechToTextExtractorConfig
114
+ :param previous_extractions: Prior step outputs for this item within the pipeline.
115
+ :type previous_extractions: list[biblicus.models.ExtractionStepOutput]
116
+ :return: Extracted text payload, or None when the item is not audio.
117
+ :rtype: ExtractedText or None
118
+ :raises ExtractionRunFatalError: If the optional dependency or required configuration is missing.
119
+ """
120
+ _ = previous_extractions
121
+ if not item.media_type.startswith("audio/"):
122
+ return None
123
+
124
+ parsed_config = (
125
+ config
126
+ if isinstance(config, OpenAiSpeechToTextExtractorConfig)
127
+ else OpenAiSpeechToTextExtractorConfig.model_validate(config)
128
+ )
129
+
130
+ api_key = resolve_openai_api_key()
131
+ if api_key is None:
132
+ raise ExtractionRunFatalError(
133
+ "OpenAI speech to text extractor requires an OpenAI API key. "
134
+ "Set OPENAI_API_KEY or configure it in ~/.biblicus/config.yml or ./.biblicus/config.yml under "
135
+ "openai.api_key."
136
+ )
137
+
138
+ try:
139
+ from openai import OpenAI
140
+ except ImportError as import_error:
141
+ raise ExtractionRunFatalError(
142
+ "OpenAI speech to text extractor requires an optional dependency. "
143
+ 'Install it with pip install "biblicus[openai]".'
144
+ ) from import_error
145
+
146
+ client = OpenAI(api_key=api_key)
147
+ source_path = corpus.root / item.relpath
148
+ with source_path.open("rb") as audio_handle:
149
+ result = client.audio.transcriptions.create(
150
+ file=audio_handle,
151
+ model=parsed_config.model,
152
+ response_format=parsed_config.response_format,
153
+ language=parsed_config.language,
154
+ prompt=parsed_config.prompt,
155
+ )
156
+
157
+ transcript_text: str
158
+ no_speech_probability_threshold = parsed_config.no_speech_probability_threshold
159
+
160
+ if isinstance(result, dict):
161
+ transcript_text = str(result.get("text") or "")
162
+ segments = result.get("segments")
163
+ if (
164
+ no_speech_probability_threshold is not None
165
+ and isinstance(segments, list)
166
+ and segments
167
+ ):
168
+ probabilities: list[float] = []
169
+ for entry in segments:
170
+ if not isinstance(entry, dict):
171
+ continue
172
+ value = entry.get("no_speech_prob", entry.get("no_speech_probability"))
173
+ if isinstance(value, (int, float)):
174
+ probabilities.append(float(value))
175
+ if probabilities and max(probabilities) >= no_speech_probability_threshold:
176
+ transcript_text = ""
177
+ else:
178
+ transcript_text = str(getattr(result, "text", "") or "")
179
+
180
+ return ExtractedText(text=transcript_text.strip(), producer_extractor_id=self.extractor_id)
@@ -0,0 +1,84 @@
1
+ """
2
+ Pass-through extractor for text items.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel, ConfigDict
10
+
11
+ from ..corpus import Corpus
12
+ from ..frontmatter import parse_front_matter
13
+ from ..models import CatalogItem, ExtractedText, ExtractionStepOutput
14
+ from .base import TextExtractor
15
+
16
+
17
+ class PassThroughTextExtractorConfig(BaseModel):
18
+ """
19
+ Configuration for the pass-through text extractor.
20
+
21
+ This extractor is intentionally minimal and requires no configuration.
22
+ """
23
+
24
+ model_config = ConfigDict(extra="forbid")
25
+
26
+
27
+ class PassThroughTextExtractor(TextExtractor):
28
+ """
29
+ Extractor plugin that reads text items from the corpus and returns their text content.
30
+
31
+ Non-text items are skipped.
32
+
33
+ :ivar extractor_id: Extractor identifier.
34
+ :vartype extractor_id: str
35
+ """
36
+
37
+ extractor_id = "pass-through-text"
38
+
39
+ def validate_config(self, config: Dict[str, Any]) -> BaseModel:
40
+ """
41
+ Validate extractor configuration.
42
+
43
+ :param config: Configuration mapping.
44
+ :type config: dict[str, Any]
45
+ :return: Parsed config.
46
+ :rtype: PassThroughTextExtractorConfig
47
+ """
48
+ return PassThroughTextExtractorConfig.model_validate(config)
49
+
50
+ def extract_text(
51
+ self,
52
+ *,
53
+ corpus: Corpus,
54
+ item: CatalogItem,
55
+ config: BaseModel,
56
+ previous_extractions: List[ExtractionStepOutput],
57
+ ) -> Optional[ExtractedText]:
58
+ """
59
+ Extract text by reading the raw item content from the corpus.
60
+
61
+ :param corpus: Corpus containing the item bytes.
62
+ :type corpus: Corpus
63
+ :param item: Catalog item being processed.
64
+ :type item: CatalogItem
65
+ :param config: Parsed configuration model.
66
+ :type config: PassThroughTextExtractorConfig
67
+ :param previous_extractions: Prior step outputs for this item within the pipeline.
68
+ :type previous_extractions: list[biblicus.models.ExtractionStepOutput]
69
+ :return: Extracted text payload, or None if the item is not text.
70
+ :rtype: ExtractedText or None
71
+ """
72
+ _ = config
73
+ _ = previous_extractions
74
+ media_type = item.media_type
75
+ if media_type != "text/markdown" and not media_type.startswith("text/"):
76
+ return None
77
+ raw_bytes = (corpus.root / item.relpath).read_bytes()
78
+ if media_type == "text/markdown":
79
+ markdown_text = raw_bytes.decode("utf-8")
80
+ parsed_document = parse_front_matter(markdown_text)
81
+ return ExtractedText(text=parsed_document.body, producer_extractor_id=self.extractor_id)
82
+ return ExtractedText(
83
+ text=raw_bytes.decode("utf-8"), producer_extractor_id=self.extractor_id
84
+ )
@@ -0,0 +1,100 @@
1
+ """
2
+ Portable Document Format text extractor plugin.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from io import BytesIO
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+ from pypdf import PdfReader
12
+
13
+ from ..models import CatalogItem, ExtractedText, ExtractionStepOutput
14
+ from .base import TextExtractor
15
+
16
+
17
+ class PortableDocumentFormatTextExtractorConfig(BaseModel):
18
+ """
19
+ Configuration for Portable Document Format text extraction.
20
+
21
+ :ivar max_pages: Optional maximum number of pages to process.
22
+ :vartype max_pages: int or None
23
+ """
24
+
25
+ model_config = ConfigDict(extra="forbid")
26
+
27
+ max_pages: Optional[int] = Field(default=None, ge=1)
28
+
29
+
30
+ class PortableDocumentFormatTextExtractor(TextExtractor):
31
+ """
32
+ Extractor plugin that attempts to extract text from Portable Document Format items.
33
+
34
+ This extractor only handles items whose media type is `application/pdf`.
35
+ Items of other media types are skipped.
36
+
37
+ :ivar extractor_id: Extractor identifier.
38
+ :vartype extractor_id: str
39
+ """
40
+
41
+ extractor_id = "pdf-text"
42
+
43
+ def validate_config(self, config: Dict[str, Any]) -> BaseModel:
44
+ """
45
+ Validate extractor configuration.
46
+
47
+ :param config: Configuration mapping.
48
+ :type config: dict[str, Any]
49
+ :return: Parsed configuration.
50
+ :rtype: PortableDocumentFormatTextExtractorConfig
51
+ """
52
+ return PortableDocumentFormatTextExtractorConfig.model_validate(config)
53
+
54
+ def extract_text(
55
+ self,
56
+ *,
57
+ corpus,
58
+ item: CatalogItem,
59
+ config: BaseModel,
60
+ previous_extractions: List[ExtractionStepOutput],
61
+ ) -> Optional[ExtractedText]:
62
+ """
63
+ Extract text for a Portable Document Format item.
64
+
65
+ :param corpus: Corpus containing the item bytes.
66
+ :type corpus: Corpus
67
+ :param item: Catalog item being processed.
68
+ :type item: CatalogItem
69
+ :param config: Parsed configuration model.
70
+ :type config: PortableDocumentFormatTextExtractorConfig
71
+ :param previous_extractions: Prior step outputs for this item within the pipeline.
72
+ :type previous_extractions: list[biblicus.models.ExtractionStepOutput]
73
+ :return: Extracted text payload, or None when the item is not a Portable Document Format item.
74
+ :rtype: ExtractedText or None
75
+ """
76
+ if item.media_type != "application/pdf":
77
+ return None
78
+
79
+ _ = previous_extractions
80
+ parsed_config = (
81
+ config
82
+ if isinstance(config, PortableDocumentFormatTextExtractorConfig)
83
+ else PortableDocumentFormatTextExtractorConfig.model_validate(config)
84
+ )
85
+
86
+ pdf_path = corpus.root / item.relpath
87
+ pdf_bytes = pdf_path.read_bytes()
88
+ reader = PdfReader(BytesIO(pdf_bytes))
89
+
90
+ texts: list[str] = []
91
+ pages = list(reader.pages)
92
+ if parsed_config.max_pages is not None:
93
+ pages = pages[: int(parsed_config.max_pages)]
94
+
95
+ for page in pages:
96
+ page_text = page.extract_text() or ""
97
+ texts.append(page_text)
98
+
99
+ combined_text = "\n".join(texts).strip()
100
+ return ExtractedText(text=combined_text, producer_extractor_id=self.extractor_id)
@@ -0,0 +1,105 @@
1
+ """
2
+ Pipeline extractor configuration and validation.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
10
+
11
+ from ..corpus import Corpus
12
+ from ..errors import ExtractionRunFatalError
13
+ from ..models import CatalogItem, ExtractedText, ExtractionStepOutput
14
+ from .base import TextExtractor
15
+
16
+
17
+ class PipelineStepSpec(BaseModel):
18
+ """
19
+ Single extractor step within a pipeline.
20
+
21
+ :ivar extractor_id: Extractor plugin identifier.
22
+ :vartype extractor_id: str
23
+ :ivar config: Extractor configuration mapping.
24
+ :vartype config: dict[str, Any]
25
+ """
26
+
27
+ model_config = ConfigDict(extra="forbid")
28
+
29
+ extractor_id: str = Field(min_length=1)
30
+ config: Dict[str, Any] = Field(default_factory=dict)
31
+
32
+
33
+ class PipelineExtractorConfig(BaseModel):
34
+ """
35
+ Configuration for the pipeline extractor.
36
+
37
+ :ivar steps: Ordered list of extractor steps to run.
38
+ :vartype steps: list[PipelineStepSpec]
39
+ """
40
+
41
+ model_config = ConfigDict(extra="forbid")
42
+
43
+ steps: List[PipelineStepSpec] = Field(min_length=1)
44
+
45
+ @model_validator(mode="after")
46
+ def _forbid_pipeline_step(self) -> "PipelineExtractorConfig":
47
+ if any(step.extractor_id == "pipeline" for step in self.steps):
48
+ raise ValueError("Pipeline steps cannot include the pipeline extractor itself")
49
+ return self
50
+
51
+
52
+ class PipelineExtractor(TextExtractor):
53
+ """
54
+ Pipeline extractor configuration shim.
55
+
56
+ The pipeline extractor is executed by the extraction engine so it can persist
57
+ per-step artifacts. This class only validates configuration.
58
+
59
+ :ivar extractor_id: Extractor identifier.
60
+ :vartype extractor_id: str
61
+ """
62
+
63
+ extractor_id = "pipeline"
64
+
65
+ def validate_config(self, config: Dict[str, Any]) -> BaseModel:
66
+ """
67
+ Validate pipeline configuration.
68
+
69
+ :param config: Configuration mapping.
70
+ :type config: dict[str, Any]
71
+ :return: Parsed configuration.
72
+ :rtype: PipelineExtractorConfig
73
+ """
74
+ return PipelineExtractorConfig.model_validate(config)
75
+
76
+ def extract_text(
77
+ self,
78
+ *,
79
+ corpus: Corpus,
80
+ item: CatalogItem,
81
+ config: BaseModel,
82
+ previous_extractions: List[ExtractionStepOutput],
83
+ ) -> Optional[ExtractedText]:
84
+ """
85
+ Reject direct execution of the pipeline extractor.
86
+
87
+ :param corpus: Corpus containing the item bytes.
88
+ :type corpus: Corpus
89
+ :param item: Catalog item being processed.
90
+ :type item: CatalogItem
91
+ :param config: Parsed configuration model.
92
+ :type config: PipelineExtractorConfig
93
+ :param previous_extractions: Prior step outputs for this item within the pipeline.
94
+ :type previous_extractions: list[biblicus.models.ExtractionStepOutput]
95
+ :raises ExtractionRunFatalError: Always, because the pipeline is executed by the runner.
96
+ :return: None.
97
+ :rtype: None
98
+ """
99
+ _ = corpus
100
+ _ = item
101
+ _ = config
102
+ _ = previous_extractions
103
+ raise ExtractionRunFatalError(
104
+ "Pipeline extractor must be executed by the extraction runner."
105
+ )