doctr-synth-generator 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/PKG-INFO +2 -2
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/README.md +1 -1
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/PKG-INFO +2 -2
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/corpus_downloader.py +1 -1
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/doctr_dataset.py +26 -4
- doctr_synth_generator-0.2.1/generator/version.py +1 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/setup.py +1 -1
- doctr_synth_generator-0.2.0/generator/version.py +0 -1
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/LICENSE +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/SOURCES.txt +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/dependency_links.txt +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/requires.txt +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/top_level.txt +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/zip-safe +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/__init__.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/__init__.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/augmentation_pipeline.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_blur.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_gaussian_noise.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_jpeg_compression.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_perspective.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_pixel_dropout.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_rotate.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/__init__.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/background_downloader.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/background_manager.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/config.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/dataset_balancer.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/dataset_splitter.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/font_downloader.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/font_selector.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/generator.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/page_generator.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/text_renderer.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/text_styling.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/vocab_coverage.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/vocabs.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/dataset_generator.py +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/pyproject.toml +0 -0
- {doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: doctr-synth-generator
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: A synthetic data generator for training OCR models
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -250,7 +250,7 @@ Dynamic: license-file
|
|
|
250
250
|

|
|
251
251
|
[](https://codecov.io/gh/felixdittrich92/docTR-Synth-Generator)
|
|
252
252
|
[](https://www.codefactor.io/repository/github/felixdittrich92/doctr-synth-generator)
|
|
253
|
-
[](https://pypi.org/project/docTR-Synth-Generator/)
|
|
254
254
|
|
|
255
255
|
# docTR-Synth-Generator
|
|
256
256
|
A tool to generate synthetic OCR datasets - made for docTR
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|

|
|
3
3
|
[](https://codecov.io/gh/felixdittrich92/docTR-Synth-Generator)
|
|
4
4
|
[](https://www.codefactor.io/repository/github/felixdittrich92/doctr-synth-generator)
|
|
5
|
-
[](https://pypi.org/project/docTR-Synth-Generator/)
|
|
6
6
|
|
|
7
7
|
# docTR-Synth-Generator
|
|
8
8
|
A tool to generate synthetic OCR datasets - made for docTR
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: doctr-synth-generator
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: A synthetic data generator for training OCR models
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -250,7 +250,7 @@ Dynamic: license-file
|
|
|
250
250
|

|
|
251
251
|
[](https://codecov.io/gh/felixdittrich92/docTR-Synth-Generator)
|
|
252
252
|
[](https://www.codefactor.io/repository/github/felixdittrich92/doctr-synth-generator)
|
|
253
|
-
[](https://pypi.org/project/docTR-Synth-Generator/)
|
|
254
254
|
|
|
255
255
|
# docTR-Synth-Generator
|
|
256
256
|
A tool to generate synthetic OCR datasets - made for docTR
|
|
@@ -263,5 +263,5 @@ def generate_numeric_tokens(n: int, seed: int | None = None) -> list[str]:
|
|
|
263
263
|
elif kind == "percent":
|
|
264
264
|
tokens.append(f"{rng.randint(0, 100)}%")
|
|
265
265
|
else: # phone
|
|
266
|
-
tokens.append(f"+{rng.randint(1, 99)}
|
|
266
|
+
tokens.append(f"+{rng.randint(1, 99)}{rng.randint(100, 999)}{rng.randint(100000, 9999999)}")
|
|
267
267
|
return tokens
|
|
@@ -20,11 +20,31 @@ from .dataset_generator import SyntheticDatasetGenerator
|
|
|
20
20
|
|
|
21
21
|
try: # docTR's single-class name ("words"); fall back to the same literal if absent.
|
|
22
22
|
from doctr.file_utils import CLASS_NAME # type: ignore[import-not-found]
|
|
23
|
+
from doctr.utils import Sample # type: ignore[import-not-found]
|
|
23
24
|
except Exception: # pragma: no cover - docTR not installed
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
24
27
|
CLASS_NAME = "words"
|
|
25
28
|
|
|
29
|
+
@dataclass
|
|
30
|
+
class Sample: # type: ignore[no-redef]
|
|
31
|
+
"""Canonical data container for all transforms."""
|
|
32
|
+
|
|
33
|
+
image: Any
|
|
34
|
+
mask: Any | None = None
|
|
35
|
+
target: np.ndarray | dict[str, np.ndarray] | None = None
|
|
36
|
+
|
|
37
|
+
def replace(self, **kwargs) -> "Sample":
|
|
38
|
+
return Sample(
|
|
39
|
+
image=kwargs.get("image", self.image),
|
|
40
|
+
mask=kwargs.get("mask", self.mask),
|
|
41
|
+
target=kwargs.get("target", self.target),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
26
45
|
__all__ = [
|
|
27
46
|
"CLASS_NAME",
|
|
47
|
+
"Sample",
|
|
28
48
|
"polygons_to_target",
|
|
29
49
|
"render_recognition_sample",
|
|
30
50
|
"render_detection_sample",
|
|
@@ -118,7 +138,7 @@ def _torch():
|
|
|
118
138
|
def _pil_to_tensor(img: Image.Image):
|
|
119
139
|
"""PIL RGB -> ``CxHxW`` float32 tensor in ``[0, 1]`` (matches docTR's reader)."""
|
|
120
140
|
torch = _torch()
|
|
121
|
-
arr = np.asarray(img.convert("RGB"), dtype=np.uint8)
|
|
141
|
+
arr = np.asarray(img.convert("RGB"), dtype=np.uint8, copy=True)
|
|
122
142
|
return torch.from_numpy(arr).permute(2, 0, 1).contiguous().float().div_(255.0)
|
|
123
143
|
|
|
124
144
|
|
|
@@ -175,11 +195,13 @@ class _BaseSynthDataset:
|
|
|
175
195
|
self._seed_sample(index)
|
|
176
196
|
img, target = self._render(index)
|
|
177
197
|
tensor = _pil_to_tensor(img)
|
|
198
|
+
sample = Sample(image=tensor, target=target)
|
|
178
199
|
if self.img_transforms is not None:
|
|
179
|
-
|
|
200
|
+
sample = self.img_transforms(sample)
|
|
180
201
|
if self.sample_transforms is not None:
|
|
181
|
-
|
|
182
|
-
|
|
202
|
+
sample = self.sample_transforms(sample)
|
|
203
|
+
# Keep compatibility with the existing collate_fn
|
|
204
|
+
return sample.image, sample.target
|
|
183
205
|
|
|
184
206
|
@staticmethod
|
|
185
207
|
def collate_fn(samples):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.2.1'
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.2.0'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/doctr_synth_generator.egg-info/zip-safe
RENAMED
|
File without changes
|
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_blur.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/augmentations/random_rotate.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/dataset_balancer.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/dataset_splitter.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/font_downloader.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/font_selector.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/generator.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/page_generator.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/text_renderer.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/text_styling.py
RENAMED
|
File without changes
|
{doctr_synth_generator-0.2.0 → doctr_synth_generator-0.2.1}/generator/components/vocab_coverage.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|