kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/__init__.py +6 -0
- kiln_ai/adapters/adapter_registry.py +43 -226
- kiln_ai/adapters/chunkers/__init__.py +13 -0
- kiln_ai/adapters/chunkers/base_chunker.py +42 -0
- kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
- kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
- kiln_ai/adapters/chunkers/helpers.py +23 -0
- kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
- kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
- kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
- kiln_ai/adapters/chunkers/test_helpers.py +75 -0
- kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
- kiln_ai/adapters/embedding/__init__.py +0 -0
- kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
- kiln_ai/adapters/embedding/embedding_registry.py +32 -0
- kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
- kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
- kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
- kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
- kiln_ai/adapters/eval/eval_runner.py +6 -2
- kiln_ai/adapters/eval/test_base_eval.py +1 -3
- kiln_ai/adapters/eval/test_g_eval.py +1 -1
- kiln_ai/adapters/extractors/__init__.py +18 -0
- kiln_ai/adapters/extractors/base_extractor.py +72 -0
- kiln_ai/adapters/extractors/encoding.py +20 -0
- kiln_ai/adapters/extractors/extractor_registry.py +44 -0
- kiln_ai/adapters/extractors/extractor_runner.py +112 -0
- kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
- kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
- kiln_ai/adapters/extractors/test_encoding.py +54 -0
- kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
- kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
- kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
- kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
- kiln_ai/adapters/ml_embedding_model_list.py +494 -0
- kiln_ai/adapters/ml_model_list.py +876 -18
- kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
- kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
- kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
- kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
- kiln_ai/adapters/ollama_tools.py +69 -12
- kiln_ai/adapters/provider_tools.py +190 -46
- kiln_ai/adapters/rag/deduplication.py +49 -0
- kiln_ai/adapters/rag/progress.py +252 -0
- kiln_ai/adapters/rag/rag_runners.py +844 -0
- kiln_ai/adapters/rag/test_deduplication.py +195 -0
- kiln_ai/adapters/rag/test_progress.py +785 -0
- kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
- kiln_ai/adapters/remote_config.py +80 -8
- kiln_ai/adapters/test_adapter_registry.py +579 -86
- kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
- kiln_ai/adapters/test_ml_model_list.py +202 -0
- kiln_ai/adapters/test_ollama_tools.py +340 -1
- kiln_ai/adapters/test_prompt_builders.py +1 -1
- kiln_ai/adapters/test_provider_tools.py +199 -8
- kiln_ai/adapters/test_remote_config.py +551 -56
- kiln_ai/adapters/vector_store/__init__.py +1 -0
- kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
- kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
- kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
- kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
- kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
- kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
- kiln_ai/datamodel/__init__.py +16 -13
- kiln_ai/datamodel/basemodel.py +201 -4
- kiln_ai/datamodel/chunk.py +158 -0
- kiln_ai/datamodel/datamodel_enums.py +27 -0
- kiln_ai/datamodel/embedding.py +64 -0
- kiln_ai/datamodel/external_tool_server.py +206 -54
- kiln_ai/datamodel/extraction.py +317 -0
- kiln_ai/datamodel/project.py +33 -1
- kiln_ai/datamodel/rag.py +79 -0
- kiln_ai/datamodel/task.py +5 -0
- kiln_ai/datamodel/task_output.py +41 -11
- kiln_ai/datamodel/test_attachment.py +649 -0
- kiln_ai/datamodel/test_basemodel.py +270 -14
- kiln_ai/datamodel/test_chunk_models.py +317 -0
- kiln_ai/datamodel/test_dataset_split.py +1 -1
- kiln_ai/datamodel/test_datasource.py +50 -0
- kiln_ai/datamodel/test_embedding_models.py +448 -0
- kiln_ai/datamodel/test_eval_model.py +6 -6
- kiln_ai/datamodel/test_external_tool_server.py +534 -152
- kiln_ai/datamodel/test_extraction_chunk.py +206 -0
- kiln_ai/datamodel/test_extraction_model.py +501 -0
- kiln_ai/datamodel/test_rag.py +641 -0
- kiln_ai/datamodel/test_task.py +35 -1
- kiln_ai/datamodel/test_tool_id.py +187 -1
- kiln_ai/datamodel/test_vector_store.py +320 -0
- kiln_ai/datamodel/tool_id.py +58 -0
- kiln_ai/datamodel/vector_store.py +141 -0
- kiln_ai/tools/base_tool.py +12 -3
- kiln_ai/tools/built_in_tools/math_tools.py +12 -4
- kiln_ai/tools/kiln_task_tool.py +158 -0
- kiln_ai/tools/mcp_server_tool.py +2 -2
- kiln_ai/tools/mcp_session_manager.py +51 -22
- kiln_ai/tools/rag_tools.py +164 -0
- kiln_ai/tools/test_kiln_task_tool.py +527 -0
- kiln_ai/tools/test_mcp_server_tool.py +4 -15
- kiln_ai/tools/test_mcp_session_manager.py +187 -227
- kiln_ai/tools/test_rag_tools.py +929 -0
- kiln_ai/tools/test_tool_registry.py +290 -7
- kiln_ai/tools/tool_registry.py +69 -16
- kiln_ai/utils/__init__.py +3 -0
- kiln_ai/utils/async_job_runner.py +62 -17
- kiln_ai/utils/config.py +2 -2
- kiln_ai/utils/env.py +15 -0
- kiln_ai/utils/filesystem.py +14 -0
- kiln_ai/utils/filesystem_cache.py +60 -0
- kiln_ai/utils/litellm.py +94 -0
- kiln_ai/utils/lock.py +100 -0
- kiln_ai/utils/mime_type.py +38 -0
- kiln_ai/utils/open_ai_types.py +19 -2
- kiln_ai/utils/pdf_utils.py +59 -0
- kiln_ai/utils/test_async_job_runner.py +151 -35
- kiln_ai/utils/test_env.py +142 -0
- kiln_ai/utils/test_filesystem_cache.py +316 -0
- kiln_ai/utils/test_litellm.py +206 -0
- kiln_ai/utils/test_lock.py +185 -0
- kiln_ai/utils/test_mime_type.py +66 -0
- kiln_ai/utils/test_open_ai_types.py +88 -12
- kiln_ai/utils/test_pdf_utils.py +86 -0
- kiln_ai/utils/test_uuid.py +111 -0
- kiln_ai/utils/test_validation.py +524 -0
- kiln_ai/utils/uuid.py +9 -0
- kiln_ai/utils/validation.py +90 -0
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
- kiln_ai-0.22.0.dist-info/RECORD +213 -0
- kiln_ai-0.20.1.dist-info/RECORD +0 -138
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -160,8 +160,12 @@ class EvalRunner:
|
|
|
160
160
|
"""
|
|
161
161
|
jobs = self.collect_tasks()
|
|
162
162
|
|
|
163
|
-
runner = AsyncJobRunner(
|
|
164
|
-
|
|
163
|
+
runner = AsyncJobRunner(
|
|
164
|
+
concurrency=concurrency,
|
|
165
|
+
jobs=jobs,
|
|
166
|
+
run_job_fn=self.run_job,
|
|
167
|
+
)
|
|
168
|
+
async for progress in runner.run():
|
|
165
169
|
yield progress
|
|
166
170
|
|
|
167
171
|
async def run_job(self, job: EvalJob) -> bool:
|
|
@@ -307,9 +307,7 @@ async def test_run_method():
|
|
|
307
307
|
evaluator = EvalTester(eval_config, run_config.run_config())
|
|
308
308
|
|
|
309
309
|
# Run the evaluation
|
|
310
|
-
task_run, eval_scores,
|
|
311
|
-
"test input"
|
|
312
|
-
)
|
|
310
|
+
task_run, eval_scores, _ = await evaluator.run_task_and_eval("test input")
|
|
313
311
|
|
|
314
312
|
# Verify task run was created
|
|
315
313
|
assert task_run.input == "test input"
|
|
@@ -188,7 +188,7 @@ async def test_run_g_eval_e2e(
|
|
|
188
188
|
g_eval = GEval(test_eval_config, test_run_config)
|
|
189
189
|
|
|
190
190
|
# Run the evaluation
|
|
191
|
-
|
|
191
|
+
_, scores, intermediate_outputs = await g_eval.run_task_and_eval("chickens")
|
|
192
192
|
|
|
193
193
|
# Verify the evaluation results
|
|
194
194
|
assert isinstance(scores, dict)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File extractors for processing different document types.
|
|
3
|
+
|
|
4
|
+
This package provides a framework for extracting content from files
|
|
5
|
+
using different extraction methods.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from . import base_extractor, extractor_registry, extractor_runner, litellm_extractor
|
|
9
|
+
from .base_extractor import ExtractionInput, ExtractionOutput
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ExtractionInput",
|
|
13
|
+
"ExtractionOutput",
|
|
14
|
+
"base_extractor",
|
|
15
|
+
"extractor_registry",
|
|
16
|
+
"extractor_runner",
|
|
17
|
+
"litellm_extractor",
|
|
18
|
+
]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from kiln_ai.datamodel.extraction import ExtractorConfig, OutputFormat
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExtractionInput(BaseModel):
|
|
13
|
+
path: Path | str = Field(description="The absolute path to the file to extract.")
|
|
14
|
+
mime_type: str = Field(description="The mime type of the file.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ExtractionOutput(BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
The output of an extraction. This is the data that will be saved to the data store.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
is_passthrough: bool = Field(
|
|
23
|
+
default=False, description="Whether the extractor returned the file as is."
|
|
24
|
+
)
|
|
25
|
+
content_format: OutputFormat = Field(
|
|
26
|
+
description="The format of the extracted data."
|
|
27
|
+
)
|
|
28
|
+
content: str = Field(description="The extracted data.")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BaseExtractor(ABC):
|
|
32
|
+
"""
|
|
33
|
+
Base class for all extractors.
|
|
34
|
+
|
|
35
|
+
Should be subclassed by each extractor.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, extractor_config: ExtractorConfig):
|
|
39
|
+
self.extractor_config = extractor_config
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
async def _extract(self, extraction_input: ExtractionInput) -> ExtractionOutput:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
async def extract(
|
|
46
|
+
self,
|
|
47
|
+
extraction_input: ExtractionInput,
|
|
48
|
+
) -> ExtractionOutput:
|
|
49
|
+
"""
|
|
50
|
+
Extracts content from a file by delegating to the concrete extractor implementation.
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
if self._should_passthrough(extraction_input.mime_type):
|
|
54
|
+
return ExtractionOutput(
|
|
55
|
+
is_passthrough=True,
|
|
56
|
+
content=Path(extraction_input.path).read_text(encoding="utf-8"),
|
|
57
|
+
content_format=self.extractor_config.output_format,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return await self._extract(
|
|
61
|
+
extraction_input,
|
|
62
|
+
)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
raise ValueError(f"Error extracting {extraction_input.path}: {e}") from e
|
|
65
|
+
|
|
66
|
+
def _should_passthrough(self, mime_type: str) -> bool:
|
|
67
|
+
return mime_type.lower() in {
|
|
68
|
+
mt.lower() for mt in self.extractor_config.passthrough_mimetypes
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def output_format(self) -> OutputFormat:
|
|
72
|
+
return self.extractor_config.output_format
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def to_base64_url(mime_type: str, bytes: bytes) -> str:
|
|
5
|
+
base64_url = f"data:{mime_type};base64,{base64.b64encode(bytes).decode('utf-8')}"
|
|
6
|
+
return base64_url
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def from_base64_url(base64_url: str) -> bytes:
|
|
10
|
+
if not base64_url.startswith("data:") or "," not in base64_url:
|
|
11
|
+
raise ValueError("Invalid base64 URL format")
|
|
12
|
+
|
|
13
|
+
parts = base64_url.split(",")
|
|
14
|
+
if len(parts) != 2:
|
|
15
|
+
raise ValueError("Invalid base64 URL format")
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
return base64.b64decode(parts[1])
|
|
19
|
+
except Exception as e:
|
|
20
|
+
raise ValueError(f"Failed to decode base64 data: {e}")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from kiln_ai.adapters.extractors.base_extractor import BaseExtractor
|
|
2
|
+
from kiln_ai.adapters.extractors.litellm_extractor import LitellmExtractor
|
|
3
|
+
from kiln_ai.adapters.ml_model_list import ModelProviderName
|
|
4
|
+
from kiln_ai.adapters.provider_tools import (
|
|
5
|
+
core_provider,
|
|
6
|
+
lite_llm_core_config_for_provider,
|
|
7
|
+
)
|
|
8
|
+
from kiln_ai.datamodel.extraction import ExtractorConfig, ExtractorType
|
|
9
|
+
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
10
|
+
from kiln_ai.utils.filesystem_cache import FilesystemCache
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def extractor_adapter_from_type(
|
|
14
|
+
extractor_type: ExtractorType,
|
|
15
|
+
extractor_config: ExtractorConfig,
|
|
16
|
+
filesystem_cache: FilesystemCache | None = None,
|
|
17
|
+
) -> BaseExtractor:
|
|
18
|
+
match extractor_type:
|
|
19
|
+
case ExtractorType.LITELLM:
|
|
20
|
+
try:
|
|
21
|
+
provider_enum = ModelProviderName(extractor_config.model_provider_name)
|
|
22
|
+
except ValueError:
|
|
23
|
+
raise ValueError(
|
|
24
|
+
f"Unsupported model provider name: {extractor_config.model_provider_name}. "
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
core_provider_name = core_provider(
|
|
28
|
+
extractor_config.model_name, provider_enum
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
provider_config = lite_llm_core_config_for_provider(core_provider_name)
|
|
32
|
+
if provider_config is None:
|
|
33
|
+
raise ValueError(
|
|
34
|
+
f"No configuration found for core provider: {core_provider_name.value}. "
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
return LitellmExtractor(
|
|
38
|
+
extractor_config,
|
|
39
|
+
provider_config,
|
|
40
|
+
filesystem_cache,
|
|
41
|
+
)
|
|
42
|
+
case _:
|
|
43
|
+
# type checking will catch missing cases
|
|
44
|
+
raise_exhaustive_enum_error(extractor_type)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import AsyncGenerator, Dict, List, Set
|
|
6
|
+
|
|
7
|
+
from kiln_ai.adapters.extractors.base_extractor import BaseExtractor, ExtractionInput
|
|
8
|
+
from kiln_ai.adapters.extractors.extractor_registry import extractor_adapter_from_type
|
|
9
|
+
from kiln_ai.datamodel.basemodel import ID_TYPE, KilnAttachmentModel
|
|
10
|
+
from kiln_ai.datamodel.extraction import (
|
|
11
|
+
Document,
|
|
12
|
+
Extraction,
|
|
13
|
+
ExtractionSource,
|
|
14
|
+
ExtractorConfig,
|
|
15
|
+
)
|
|
16
|
+
from kiln_ai.utils.async_job_runner import AsyncJobRunner, Progress
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ExtractorJob:
|
|
23
|
+
doc: Document
|
|
24
|
+
extractor_config: ExtractorConfig
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ExtractorRunner:
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
documents: List[Document],
|
|
31
|
+
extractor_configs: List[ExtractorConfig],
|
|
32
|
+
):
|
|
33
|
+
if len(extractor_configs) == 0:
|
|
34
|
+
raise ValueError("Extractor runner requires at least one extractor config")
|
|
35
|
+
|
|
36
|
+
self.documents = documents
|
|
37
|
+
self.extractor_configs = extractor_configs
|
|
38
|
+
|
|
39
|
+
def collect_jobs(self) -> List[ExtractorJob]:
|
|
40
|
+
jobs = []
|
|
41
|
+
|
|
42
|
+
# we want to avoid re-running the same document for the same extractor config
|
|
43
|
+
already_extracted: Dict[ID_TYPE, Set[ID_TYPE]] = defaultdict(set)
|
|
44
|
+
for document in self.documents:
|
|
45
|
+
for extraction in document.extractions():
|
|
46
|
+
already_extracted[extraction.extractor_config_id].add(document.id)
|
|
47
|
+
|
|
48
|
+
for extractor_config in self.extractor_configs:
|
|
49
|
+
for document in self.documents:
|
|
50
|
+
if document.id not in already_extracted[extractor_config.id]:
|
|
51
|
+
jobs.append(
|
|
52
|
+
ExtractorJob(
|
|
53
|
+
doc=document,
|
|
54
|
+
extractor_config=extractor_config,
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return jobs
|
|
59
|
+
|
|
60
|
+
async def run(self, concurrency: int = 25) -> AsyncGenerator[Progress, None]:
|
|
61
|
+
jobs = self.collect_jobs()
|
|
62
|
+
|
|
63
|
+
runner = AsyncJobRunner(
|
|
64
|
+
concurrency=concurrency,
|
|
65
|
+
jobs=jobs,
|
|
66
|
+
run_job_fn=self.run_job,
|
|
67
|
+
)
|
|
68
|
+
async for progress in runner.run():
|
|
69
|
+
yield progress
|
|
70
|
+
|
|
71
|
+
async def run_job(self, job: ExtractorJob) -> bool:
|
|
72
|
+
try:
|
|
73
|
+
extractor = extractor_adapter_from_type(
|
|
74
|
+
job.extractor_config.extractor_type,
|
|
75
|
+
job.extractor_config,
|
|
76
|
+
)
|
|
77
|
+
if not isinstance(extractor, BaseExtractor):
|
|
78
|
+
raise ValueError("Not able to create extractor from extractor config")
|
|
79
|
+
|
|
80
|
+
if job.doc.path is None:
|
|
81
|
+
raise ValueError("Document path is not set")
|
|
82
|
+
|
|
83
|
+
output = await extractor.extract(
|
|
84
|
+
extraction_input=ExtractionInput(
|
|
85
|
+
path=Path(
|
|
86
|
+
job.doc.original_file.attachment.resolve_path(
|
|
87
|
+
job.doc.path.parent
|
|
88
|
+
)
|
|
89
|
+
),
|
|
90
|
+
mime_type=job.doc.original_file.mime_type,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
extraction = Extraction(
|
|
95
|
+
parent=job.doc,
|
|
96
|
+
extractor_config_id=job.extractor_config.id,
|
|
97
|
+
output=KilnAttachmentModel.from_data(
|
|
98
|
+
data=output.content,
|
|
99
|
+
mime_type=output.content_format,
|
|
100
|
+
),
|
|
101
|
+
source=ExtractionSource.PASSTHROUGH
|
|
102
|
+
if output.is_passthrough
|
|
103
|
+
else ExtractionSource.PROCESSED,
|
|
104
|
+
)
|
|
105
|
+
extraction.save_to_file()
|
|
106
|
+
|
|
107
|
+
return True
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(
|
|
110
|
+
f"Error running extraction job for dataset item {job.doc.id}: {e}"
|
|
111
|
+
)
|
|
112
|
+
return False
|