extract-python 0.4.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {extract_python-0.4.2 → extract_python-0.5.4}/PKG-INFO +3 -1
- {extract_python-0.4.2 → extract_python-0.5.4}/benches/compare.ipynb +2 -2
- {extract_python-0.4.2 → extract_python-0.5.4}/benches/compare.py +7 -5
- extract_python-0.5.4/extract_python/__init__.py +23 -0
- extract_python-0.5.4/extract_python/constants.py +2 -0
- extract_python-0.5.4/extract_python/docling_.py +130 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/extract_python/marker_.py +7 -7
- {extract_python-0.4.2 → extract_python-0.5.4}/extract_python/miner_u.py +10 -74
- {extract_python-0.4.2 → extract_python-0.5.4}/extract_python/utils.py +4 -10
- {extract_python-0.4.2 → extract_python-0.5.4}/pyproject.toml +9 -1
- {extract_python-0.4.2 → extract_python-0.5.4}/uv.lock +169 -311
- extract_python-0.4.2/.dockerignore +0 -6
- extract_python-0.4.2/.github/workflows/publish.yml +0 -45
- extract_python-0.4.2/.github/workflows/tests.yml +0 -79
- extract_python-0.4.2/Dockerfile +0 -76
- extract_python-0.4.2/docker-compose.yml +0 -107
- extract_python-0.4.2/extract +0 -42
- extract_python-0.4.2/extract_python/__init__.py +0 -41
- extract_python-0.4.2/extract_python/constants.py +0 -6
- extract_python-0.4.2/extract_python/docling_.py +0 -278
- extract_python-0.4.2/extract_python/objects.py +0 -323
- extract_python-0.4.2/extract_python/pipeline.py +0 -38
- extract_python-0.4.2/qa/ruff.toml +0 -58
- {extract_python-0.4.2 → extract_python-0.5.4}/.gitignore +0 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/.python-version +0 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/README.md +0 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/benches/__init__.py +0 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/benches/constants.py +0 -0
- {extract_python-0.4.2 → extract_python-0.5.4}/data/.gitignore +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: extract-python
|
|
3
|
-
Version: 0.4
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Structured content extraction
|
|
5
5
|
Project-URL: Homepage, https://github.com/ICIJ/extract-python
|
|
6
6
|
Project-URL: Repository, https://github.com/ICIJ/extract-python
|
|
7
7
|
Project-URL: Issues, https://github.com/ICIJ/extract-python/issues
|
|
8
8
|
Author-email: Clément Doumouro <cdoumouro@icij.org>
|
|
9
9
|
Requires-Python: <3.14,>=3.11
|
|
10
|
+
Requires-Dist: extract-core~=0.1
|
|
10
11
|
Requires-Dist: icij-common~=0.8.2
|
|
11
12
|
Provides-Extra: benches
|
|
12
13
|
Requires-Dist: html2image~=2.0.7; extra == 'benches'
|
|
@@ -21,4 +22,5 @@ Provides-Extra: mineru
|
|
|
21
22
|
Requires-Dist: mineru[mlx]~=3.2; (sys_platform == 'darwin') and extra == 'mineru'
|
|
22
23
|
Requires-Dist: mineru[pipeline,vlm]~=3.2; extra == 'mineru'
|
|
23
24
|
Requires-Dist: pydantic-extra-types[pycountry]~=2.11; extra == 'mineru'
|
|
25
|
+
Requires-Dist: python-pptx~=1.0; extra == 'mineru'
|
|
24
26
|
Requires-Dist: six~=1.17; extra == 'mineru'
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
"from extract_python.benches.compare import (\n",
|
|
14
14
|
" compare,\n",
|
|
15
15
|
")\n",
|
|
16
|
-
"from extract_python.
|
|
17
|
-
"from extract_python.
|
|
16
|
+
"from extract_python.objects import InputDoc, OutputFormat\n",
|
|
17
|
+
"from extract_python.pipelines import DoclingPipeline, MarkerPipeline"
|
|
18
18
|
]
|
|
19
19
|
},
|
|
20
20
|
{
|
|
@@ -3,12 +3,11 @@ from tempfile import TemporaryDirectory
|
|
|
3
3
|
|
|
4
4
|
import markdown2
|
|
5
5
|
import pypdfium2
|
|
6
|
-
from
|
|
6
|
+
from extract_core import BaseModel, OutputFormat, PageIndexes
|
|
7
|
+
from extract_python.utils import chdir
|
|
7
8
|
from html2image import Html2Image
|
|
8
9
|
from PIL import Image, ImageDraw
|
|
9
10
|
|
|
10
|
-
from extract_python.objects import BaseModel, OutputFormat, PageIndexes
|
|
11
|
-
|
|
12
11
|
_WHITE_BACKGROUND_CSS = "body {background: white;}"
|
|
13
12
|
|
|
14
13
|
|
|
@@ -146,7 +145,10 @@ def _scan_pages(
|
|
|
146
145
|
).root
|
|
147
146
|
for compared in comparison.compared
|
|
148
147
|
]
|
|
149
|
-
all_pages = zip(*all_pages)
|
|
148
|
+
all_pages = zip(*all_pages, strict=True)
|
|
150
149
|
compared_names = (p.parent.name for p in comparison.compared)
|
|
151
|
-
pages = [
|
|
150
|
+
pages = [
|
|
151
|
+
dict(zip(compared_names, page_comp_ixs, strict=True))
|
|
152
|
+
for page_comp_ixs in all_pages
|
|
153
|
+
]
|
|
152
154
|
return pages
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from .docling_ import DOCLING_DEFAULT_ARTIFACTS_PATH, DoclingPipeline
|
|
3
|
+
except ImportError:
|
|
4
|
+
DOCKING_DEFAULT_ARTIFACTS_PATH, DoclingPipeline = None, None
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from .marker_ import MarkerPipeline
|
|
8
|
+
except ImportError:
|
|
9
|
+
MarkerPipeline = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from .miner_u import MinerUPipeline
|
|
14
|
+
except ImportError:
|
|
15
|
+
MinerUPipeline = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"DoclingPipeline",
|
|
20
|
+
"DOCLING_DEFAULT_ARTIFACTS_PATH",
|
|
21
|
+
"MarkerPipeline",
|
|
22
|
+
"MinerUPipeline",
|
|
23
|
+
]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import tempfile
|
|
3
|
+
from collections.abc import AsyncGenerator, Iterable, Iterator
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from docling.datamodel.base_models import InputFormat
|
|
7
|
+
from docling.datamodel.document import ConversionResult
|
|
8
|
+
from docling.document_converter import DocumentConverter
|
|
9
|
+
|
|
10
|
+
# TODO: this is long to load improve it
|
|
11
|
+
from docling_core.types.doc import ImageRefMode
|
|
12
|
+
from docling_core.types.io import DocumentStream
|
|
13
|
+
from extract_core import (
|
|
14
|
+
DoclingFormatOption,
|
|
15
|
+
DoclingPipelineConfig,
|
|
16
|
+
Error,
|
|
17
|
+
InputDoc,
|
|
18
|
+
MarkdownDoc,
|
|
19
|
+
OutputFormat,
|
|
20
|
+
PageIndexes,
|
|
21
|
+
Pipeline,
|
|
22
|
+
PipelineType,
|
|
23
|
+
Result,
|
|
24
|
+
Status,
|
|
25
|
+
)
|
|
26
|
+
from icij_common.registrable import FromConfig
|
|
27
|
+
|
|
28
|
+
from .constants import ARTIFACTS, DEFAULT_MD_PAGE_SEP
|
|
29
|
+
from .utils import chdir, map_and_preserve, path_to_artifacts_dirname
|
|
30
|
+
|
|
31
|
+
DOCLING_DEFAULT_ARTIFACTS_PATH = Path.home().joinpath(".cache", "docling", "models")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@Pipeline.register(PipelineType.DOCLING)
|
|
35
|
+
class DoclingPipeline(Pipeline):
|
|
36
|
+
def __init__(
|
|
37
|
+
self, format_options: dict["InputFormat", DoclingFormatOption] | None = None
|
|
38
|
+
):
|
|
39
|
+
format_options = {k: v.to_docling() for k, v in format_options.items()}
|
|
40
|
+
allowed_format = [
|
|
41
|
+
f.to_docling() for f in DoclingPipelineConfig.supported_exts()
|
|
42
|
+
]
|
|
43
|
+
self._converter = DocumentConverter(
|
|
44
|
+
allowed_formats=allowed_format, format_options=format_options
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
async def extract_content(
|
|
48
|
+
self, docs: Iterable[InputDoc], output_format: OutputFormat, output_path: Path
|
|
49
|
+
) -> AsyncGenerator[Result, None]:
|
|
50
|
+
docs, path_or_streams = map_and_preserve(_to_docling, docs)
|
|
51
|
+
outputs = self._converter.convert_all(path_or_streams, raises_on_error=False)
|
|
52
|
+
for doc, res in zip(docs, outputs, strict=True):
|
|
53
|
+
yield _to_result(res, doc, output_format, output_path=output_path)
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _from_config(cls, config: DoclingPipelineConfig) -> FromConfig:
|
|
57
|
+
return cls(config.format_options)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _to_docling(docs: Iterable[InputDoc]) -> Iterator["Path | DocumentStream"]:
|
|
61
|
+
for d in docs:
|
|
62
|
+
yield d.to_docling()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _to_result(
|
|
66
|
+
res: ConversionResult,
|
|
67
|
+
input_document: InputDoc,
|
|
68
|
+
output_format: OutputFormat,
|
|
69
|
+
output_path: Path,
|
|
70
|
+
**kwargs,
|
|
71
|
+
) -> Result:
|
|
72
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
73
|
+
output = None
|
|
74
|
+
status = Status.from_docling(res.status)
|
|
75
|
+
if status.allows_conversion:
|
|
76
|
+
match output_format:
|
|
77
|
+
case OutputFormat.MARKDOWN:
|
|
78
|
+
output = _to_markdown_doc(res, output_path, **kwargs)
|
|
79
|
+
case _:
|
|
80
|
+
raise NotImplementedError(f"unsupported output format {output_format}")
|
|
81
|
+
errors = [Error.from_docling(e) for e in res.errors]
|
|
82
|
+
input_doc = input_document.without_content()
|
|
83
|
+
return Result(input=input_doc, status=status, errors=errors, output=output)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _to_markdown_doc(
|
|
87
|
+
res: ConversionResult,
|
|
88
|
+
output_path: Path,
|
|
89
|
+
page_sep: str = DEFAULT_MD_PAGE_SEP,
|
|
90
|
+
**kwargs,
|
|
91
|
+
) -> MarkdownDoc:
|
|
92
|
+
# TODO: Should we add a hash to avoid collision between files with same names
|
|
93
|
+
# nested in the tree structured
|
|
94
|
+
md_dir_name = path_to_artifacts_dirname(res.input.file)
|
|
95
|
+
md_dir = output_path / md_dir_name
|
|
96
|
+
if md_dir.exists():
|
|
97
|
+
raise FileExistsError(f"directory {md_dir} already exists")
|
|
98
|
+
# Let's avoid issue of duplicated input file names flattened top level
|
|
99
|
+
md_filename = md_dir_name + OutputFormat.MARKDOWN
|
|
100
|
+
total_length = 0
|
|
101
|
+
n_pages = len(res.pages)
|
|
102
|
+
|
|
103
|
+
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
|
|
104
|
+
tmp_dir = Path(td)
|
|
105
|
+
page_path = Path("page.md")
|
|
106
|
+
# We do a chdir to bypass a Docling bug which only allows to maintain relative
|
|
107
|
+
# image ref when saving the markdown to a relative path
|
|
108
|
+
with (tmp_dir / md_filename).open("w") as f, chdir(tmp_dir):
|
|
109
|
+
end_indices = []
|
|
110
|
+
for page_i in range(n_pages):
|
|
111
|
+
res.document.save_as_markdown(
|
|
112
|
+
page_path,
|
|
113
|
+
page_no=page_i + 1,
|
|
114
|
+
image_mode=ImageRefMode.REFERENCED,
|
|
115
|
+
artifacts_dir=Path(ARTIFACTS),
|
|
116
|
+
**kwargs,
|
|
117
|
+
)
|
|
118
|
+
content = page_path.read_text()
|
|
119
|
+
if page_i > 0:
|
|
120
|
+
content += "\n"
|
|
121
|
+
if page_i < n_pages - 1:
|
|
122
|
+
content += page_sep
|
|
123
|
+
total_length += len(content)
|
|
124
|
+
end_indices.append(total_length)
|
|
125
|
+
f.write(content)
|
|
126
|
+
f.flush()
|
|
127
|
+
page_path.unlink()
|
|
128
|
+
shutil.move(tmp_dir, md_dir)
|
|
129
|
+
pages = PageIndexes.from_page_end_indices(end_indices)
|
|
130
|
+
return MarkdownDoc(path=Path(md_dir_name), pages=pages)
|
|
@@ -5,10 +5,8 @@ from functools import cache
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import TYPE_CHECKING, Any, ClassVar, Self
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
from .constants import ARTIFACTS
|
|
11
|
-
from .objects import (
|
|
8
|
+
from extract_core import BasePipelineConfig, Pipeline, PipelineType
|
|
9
|
+
from extract_core.objects import (
|
|
12
10
|
InputDoc,
|
|
13
11
|
MarkdownDoc,
|
|
14
12
|
OutputFormat,
|
|
@@ -17,7 +15,9 @@ from .objects import (
|
|
|
17
15
|
Status,
|
|
18
16
|
SupportedExt,
|
|
19
17
|
)
|
|
20
|
-
from
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from .constants import ARTIFACTS
|
|
21
21
|
from .utils import path_to_artifacts_dirname, report_recoverable_errors
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
@@ -25,10 +25,10 @@ if TYPE_CHECKING:
|
|
|
25
25
|
from PIL import Image
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
class MarkerPipelineConfig(
|
|
28
|
+
class MarkerPipelineConfig(BasePipelineConfig):
|
|
29
29
|
pipeline: ClassVar[PipelineType] = Field(frozen=True, default=PipelineType.MARKER)
|
|
30
30
|
|
|
31
|
-
config: dict[str, Any] = dict
|
|
31
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
32
32
|
|
|
33
33
|
@classmethod
|
|
34
34
|
@cache
|
|
@@ -1,96 +1,32 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import shutil
|
|
3
3
|
from collections.abc import AsyncGenerator, Callable, Iterable
|
|
4
|
-
from
|
|
5
|
-
from enum import StrEnum
|
|
6
|
-
from functools import cache, partial
|
|
4
|
+
from functools import partial
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from tempfile import TemporaryDirectory
|
|
9
|
-
from typing import
|
|
7
|
+
from typing import Self
|
|
10
8
|
|
|
11
|
-
from
|
|
12
|
-
from pydantic_extra_types.language_code import LanguageAlpha2
|
|
13
|
-
|
|
14
|
-
from .constants import ARTIFACTS, DEFAULT_MD_PAGE_SEP
|
|
15
|
-
from .objects import (
|
|
16
|
-
BaseModel,
|
|
9
|
+
from extract_core import (
|
|
17
10
|
ConversionOutput,
|
|
18
11
|
InputDoc,
|
|
12
|
+
MinerUBackend,
|
|
13
|
+
MinerUConfig,
|
|
14
|
+
MinerUPipelineConfig,
|
|
19
15
|
OutputFormat,
|
|
20
16
|
PageIndexes,
|
|
17
|
+
Pipeline,
|
|
18
|
+
PipelineType,
|
|
21
19
|
Result,
|
|
22
20
|
Status,
|
|
23
|
-
SupportedExt,
|
|
24
21
|
)
|
|
25
|
-
|
|
22
|
+
|
|
23
|
+
from .constants import ARTIFACTS, DEFAULT_MD_PAGE_SEP
|
|
26
24
|
from .utils import path_to_artifacts_dirname
|
|
27
25
|
|
|
28
26
|
_MINER_U_CONVERSION_ERRORS = tuple()
|
|
29
27
|
MDMakeFunction = Callable[[list, str, str], str | None]
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
class MinerUBackend(StrEnum):
|
|
33
|
-
PIPELINE = "pipeline"
|
|
34
|
-
VLM = "vlm"
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class MinerUConfig(BaseModel):
|
|
38
|
-
backend: MinerUBackend = MinerUBackend.PIPELINE
|
|
39
|
-
enable_formula_extraction: bool = True
|
|
40
|
-
enable_table_extraction: bool = True
|
|
41
|
-
# TODO: use enum or literal here
|
|
42
|
-
parse_method: str = "auto"
|
|
43
|
-
|
|
44
|
-
def as_parse_kwargs(self) -> dict[str, Any]:
|
|
45
|
-
kwargs = copy(self._get_default_kwargs())
|
|
46
|
-
kwargs["backend"] = self.backend
|
|
47
|
-
kwargs["parse_method"] = self.parse_method
|
|
48
|
-
kwargs["formula_enable"] = self.enable_formula_extraction
|
|
49
|
-
kwargs["table_enable"] = self.enable_table_extraction
|
|
50
|
-
return kwargs
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
@cache
|
|
54
|
-
def _get_default_kwargs(cls) -> dict[str, Any]:
|
|
55
|
-
from mineru.utils.enum_class import MakeMode # noqa: PLC0415
|
|
56
|
-
|
|
57
|
-
return {
|
|
58
|
-
"server_url": None,
|
|
59
|
-
# We don't dump md directly we process, we dump the middle json in order
|
|
60
|
-
# to be able to get page indexes
|
|
61
|
-
"parse_method": "auto",
|
|
62
|
-
"dump_md": False,
|
|
63
|
-
"dump_middle_json": True,
|
|
64
|
-
"f_draw_layout_bbox": False,
|
|
65
|
-
"f_draw_span_bbox": False,
|
|
66
|
-
"f_dump_model_output": False, # might be useful for debug though
|
|
67
|
-
"f_dump_orig_pdf": False,
|
|
68
|
-
"f_dump_content_list": False, # might be useful for debug though
|
|
69
|
-
"start_page_id": 0,
|
|
70
|
-
"f_make_md_mode": MakeMode.MM_MD,
|
|
71
|
-
"image_analysis": True,
|
|
72
|
-
"end_page_id": None,
|
|
73
|
-
"client_side_output_generation": False,
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class MinerUPipelineConfig(PipelineConfig): # noqa: F821
|
|
78
|
-
pipeline: ClassVar[PipelineType] = Field(frozen=True, default=PipelineType.MINER_U)
|
|
79
|
-
|
|
80
|
-
config: MinerUConfig = Field(frozen=True, default=MinerUConfig())
|
|
81
|
-
language: LanguageAlpha2 = Field(frozen=True, default="en")
|
|
82
|
-
|
|
83
|
-
@classmethod
|
|
84
|
-
@cache
|
|
85
|
-
def supported_exts(cls) -> set[SupportedExt]:
|
|
86
|
-
return {
|
|
87
|
-
SupportedExt.PDF,
|
|
88
|
-
SupportedExt.DOCX,
|
|
89
|
-
SupportedExt.PPTX,
|
|
90
|
-
SupportedExt.XLSX,
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
|
|
94
30
|
@Pipeline.register(PipelineType.MINER_U)
|
|
95
31
|
class MinerUPipeline(Pipeline):
|
|
96
32
|
def __init__(self, config: MinerUConfig, language: str):
|
|
@@ -6,26 +6,20 @@ from itertools import tee
|
|
|
6
6
|
from pathlib import Path, PurePath
|
|
7
7
|
from typing import Protocol, TypeVar
|
|
8
8
|
|
|
9
|
-
from
|
|
9
|
+
from extract_core import Error, InputDoc, Result, Status
|
|
10
10
|
|
|
11
11
|
R = TypeVar("R")
|
|
12
|
-
|
|
12
|
+
In = TypeVar("In")
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def map_and_preserve(
|
|
16
|
-
fn: Callable[[Iterable[
|
|
17
|
-
) -> tuple[Iterable[
|
|
16
|
+
fn: Callable[[Iterable[In]], Iterator[R]], inputs: Iterable[In]
|
|
17
|
+
) -> tuple[Iterable[In], Iterator[R]]:
|
|
18
18
|
save_inputs, function_inputs = tee(inputs)
|
|
19
19
|
outputs = iter(fn(function_inputs))
|
|
20
20
|
return save_inputs, outputs
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def all_subclasses(cls: type[T]) -> set[type[T]]:
|
|
24
|
-
return set(cls.__subclasses__()).union(
|
|
25
|
-
[s for c in cls.__subclasses__() for s in all_subclasses(c)]
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
23
|
def path_to_artifacts_dirname(path: PurePath, sep: str = "_") -> str:
|
|
30
24
|
dirname = f"{path.name[: -len(path.suffix)]}"
|
|
31
25
|
ext = path.suffix
|
|
@@ -9,6 +9,7 @@ readme = "README.md"
|
|
|
9
9
|
requires-python = ">=3.11,<3.14"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"icij-common~=0.8.2",
|
|
12
|
+
"extract-core~=0.1",
|
|
12
13
|
]
|
|
13
14
|
|
|
14
15
|
[project.optional-dependencies]
|
|
@@ -31,6 +32,7 @@ mineru = [
|
|
|
31
32
|
"mineru[pipeline,vlm]~=3.2",
|
|
32
33
|
"mineru[mlx]~=3.2; sys_platform == 'darwin'",
|
|
33
34
|
"pydantic-extra-types[pycountry]~=2.11",
|
|
35
|
+
"python-pptx~=1.0",
|
|
34
36
|
"six~=1.17",
|
|
35
37
|
]
|
|
36
38
|
|
|
@@ -45,7 +47,12 @@ required-environments = [
|
|
|
45
47
|
"sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
46
48
|
"sys_platform == 'linux'",
|
|
47
49
|
]
|
|
50
|
+
override-dependencies = [
|
|
51
|
+
"pillow==11.3.0",
|
|
52
|
+
]
|
|
48
53
|
|
|
54
|
+
[tool.uv.sources]
|
|
55
|
+
extract-core = { path = "../extract-core", editable = true }
|
|
49
56
|
|
|
50
57
|
[dependency-groups]
|
|
51
58
|
dev = [
|
|
@@ -87,4 +94,5 @@ exclude = [
|
|
|
87
94
|
]
|
|
88
95
|
[tool.uv-dynamic-versioning]
|
|
89
96
|
fallback-version = "0.0.0"
|
|
90
|
-
pattern = "
|
|
97
|
+
pattern-prefix = "extract-python-"
|
|
98
|
+
pattern = "default-unprefixed"
|