dtlpy 1.114.17__py3-none-any.whl → 1.116.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +491 -491
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +347 -311
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +292 -296
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +449 -442
- dtlpy/entities/dataset.py +1299 -1285
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +235 -223
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +145 -145
- dtlpy/entities/filters.py +798 -645
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +959 -953
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +684 -684
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +810 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +299 -299
- dtlpy/entities/pipeline.py +624 -624
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +505 -499
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +963 -958
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +495 -495
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +1257 -1086
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -332
- dtlpy/repositories/commands.py +152 -158
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +439 -435
- dtlpy/repositories/datasets.py +1504 -1291
- dtlpy/repositories/downloader.py +976 -903
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +482 -470
- dtlpy/repositories/executions.py +815 -817
- dtlpy/repositories/feature_sets.py +226 -226
- dtlpy/repositories/features.py +255 -238
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +912 -909
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +1000 -988
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +451 -451
- dtlpy/repositories/pipelines.py +640 -640
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +419 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1477 -1477
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +661 -651
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1785 -1782
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +285 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
- dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
- {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -183
- dtlpy-1.116.6.dist-info/RECORD +239 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
- tests/features/environment.py +551 -551
- dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
- dtlpy-1.114.17.data/scripts/dlp.bat +0 -2
- dtlpy-1.114.17.dist-info/RECORD +0 -240
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
|
@@ -1,670 +1,670 @@
|
|
|
1
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from PIL import Image
|
|
4
|
-
import collections.abc
|
|
5
|
-
import numpy as np
|
|
6
|
-
import collections
|
|
7
|
-
import logging
|
|
8
|
-
import shutil
|
|
9
|
-
import json
|
|
10
|
-
import copy
|
|
11
|
-
import tqdm
|
|
12
|
-
import sys
|
|
13
|
-
import os
|
|
14
|
-
import re
|
|
15
|
-
from ... import entities
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(name='dtlpy')
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class DataItem(dict):
|
|
21
|
-
def __init__(self, *args, **kwargs):
|
|
22
|
-
super(DataItem, self).__init__(*args, **kwargs)
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def image_filepath(self):
|
|
26
|
-
return self['image_filepath']
|
|
27
|
-
|
|
28
|
-
@image_filepath.setter
|
|
29
|
-
def image_filepath(self, val):
|
|
30
|
-
self['image_filepath'] = val
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class DatasetGenerator:
|
|
34
|
-
|
|
35
|
-
def __init__(self,
|
|
36
|
-
dataset_entity: entities.Dataset,
|
|
37
|
-
annotation_type: entities.AnnotationType,
|
|
38
|
-
item_type: list = None,
|
|
39
|
-
filters: entities.Filters = None,
|
|
40
|
-
data_path=None,
|
|
41
|
-
overwrite=False,
|
|
42
|
-
id_to_label_map=None,
|
|
43
|
-
label_to_id_map=None,
|
|
44
|
-
transforms=None,
|
|
45
|
-
transforms_callback=None,
|
|
46
|
-
num_workers=0,
|
|
47
|
-
batch_size=None,
|
|
48
|
-
collate_fn=None,
|
|
49
|
-
shuffle=True,
|
|
50
|
-
seed=None,
|
|
51
|
-
to_categorical=False,
|
|
52
|
-
to_mask=False,
|
|
53
|
-
class_balancing=False,
|
|
54
|
-
# debug flags
|
|
55
|
-
return_originals=False,
|
|
56
|
-
ignore_empty=True
|
|
57
|
-
) -> None:
|
|
58
|
-
"""
|
|
59
|
-
Base Dataset Generator to build and iterate over images and annotations
|
|
60
|
-
|
|
61
|
-
* Mapping Labels *
|
|
62
|
-
To set a label mapping from labels to id you can use the `label_to_id_map` or `id_to_label_map`.
|
|
63
|
-
NOTE: if they are not i.i.d you'll need to input both.
|
|
64
|
-
In semantic, a `$default` label should be added so that the background (and all unlabeled pixels) will be
|
|
65
|
-
mapped to the model's inputs
|
|
66
|
-
|
|
67
|
-
label_to_id_map = {'cat': 1,
|
|
68
|
-
'dog': 1,
|
|
69
|
-
'$default': 0}
|
|
70
|
-
id_to_label_map = {1: 'cats_and_dogs',
|
|
71
|
-
0: 'background'}
|
|
72
|
-
|
|
73
|
-
:param dataset_entity: dl.Dataset entity
|
|
74
|
-
:param annotation_type: dl.AnnotationType - type of annotation to load from the annotated dataset
|
|
75
|
-
:param item_type: list of file extension to load. default: ['jpg', 'jpeg', 'png', 'bmp']
|
|
76
|
-
:param filters: dl.Filters - filtering entity to filter the dataset items
|
|
77
|
-
:param data_path: Path to Dataloop annotations (root to "item" and "json").
|
|
78
|
-
:param overwrite:
|
|
79
|
-
:param dict id_to_label_map: Optional, {id: label_string} dictionary, default taken from dataset
|
|
80
|
-
:param dict label_to_id_map: Optional, {label_string: id} dictionary
|
|
81
|
-
:param transforms: Optional transform to be applied on a sample. list, imgaug.Sequence or torchvision.transforms.Compose
|
|
82
|
-
:param transforms_callback: Optional function to handle the callback of each batch.
|
|
83
|
-
look at default_transforms_callback for more information. available: imgaug_transforms_callback, torchvision_transforms_callback
|
|
84
|
-
:param num_workers: Optional - number of separate threads to load the images
|
|
85
|
-
:param batch_size: (int, optional): how many samples per batch to load, if not none - items will always be a list
|
|
86
|
-
:param collate_fn: Optional - merges a list of samples to form a mini-batch of Tensor(s).
|
|
87
|
-
:param shuffle: Whether to shuffle the data (default: True) If set to False, sorts the data in alphanumeric order.
|
|
88
|
-
:param seed: Optional random seed for shuffling and transformations.
|
|
89
|
-
:param to_categorical: convert label id to categorical format
|
|
90
|
-
:param to_mask: convert annotations to an instance mask (will be true for SEGMENTATION)
|
|
91
|
-
:param class_balancing: if True - performing random over-sample with class ids as the target to balance training data
|
|
92
|
-
:param return_originals: bool - If True, return ALSO images and annotations before transformations (for debug)
|
|
93
|
-
:param ignore_empty: bool - If True, generator will NOT collect items without annotations
|
|
94
|
-
"""
|
|
95
|
-
self._dataset_entity = dataset_entity
|
|
96
|
-
|
|
97
|
-
# default item types (extension for now)
|
|
98
|
-
if item_type is None:
|
|
99
|
-
item_type = ['jpg', 'jpeg', 'png', 'bmp']
|
|
100
|
-
if not isinstance(item_type, list):
|
|
101
|
-
item_type = [item_type]
|
|
102
|
-
self.item_type = item_type
|
|
103
|
-
|
|
104
|
-
# id labels mapping
|
|
105
|
-
if label_to_id_map is None and id_to_label_map is None:
|
|
106
|
-
# if both are None - take from dataset
|
|
107
|
-
label_to_id_map = dataset_entity.instance_map
|
|
108
|
-
id_to_label_map = {int(v): k for k, v in label_to_id_map.items()}
|
|
109
|
-
else:
|
|
110
|
-
# one or both is NOT None
|
|
111
|
-
if label_to_id_map is None:
|
|
112
|
-
# set label_to_id_map from the other
|
|
113
|
-
label_to_id_map = {v: int(k) for k, v in id_to_label_map.items()}
|
|
114
|
-
if id_to_label_map is None:
|
|
115
|
-
# set id_to_label_map from the other
|
|
116
|
-
id_to_label_map = {int(v): k for k, v in label_to_id_map.items()}
|
|
117
|
-
# put it on the local ontology for the annotations download
|
|
118
|
-
dataset_entity._get_ontology().instance_map = label_to_id_map
|
|
119
|
-
self.id_to_label_map = id_to_label_map
|
|
120
|
-
self.label_to_id_map = label_to_id_map
|
|
121
|
-
|
|
122
|
-
# if annotation type is segmentation - to_mask must be True
|
|
123
|
-
if annotation_type == entities.AnnotationType.SEGMENTATION:
|
|
124
|
-
to_mask = True
|
|
125
|
-
|
|
126
|
-
if data_path is None:
|
|
127
|
-
data_path = os.path.join(os.path.expanduser('~'),
|
|
128
|
-
'.dataloop',
|
|
129
|
-
'datasets',
|
|
130
|
-
"{}_{}".format(dataset_entity.name,
|
|
131
|
-
dataset_entity.id))
|
|
132
|
-
download = False
|
|
133
|
-
if os.path.isdir(data_path):
|
|
134
|
-
if overwrite:
|
|
135
|
-
logger.warning('overwrite flag is True! deleting and overwriting')
|
|
136
|
-
shutil.rmtree(data_path)
|
|
137
|
-
download = True
|
|
138
|
-
else:
|
|
139
|
-
download = True
|
|
140
|
-
if download:
|
|
141
|
-
annotation_options = [entities.ViewAnnotationOptions.JSON]
|
|
142
|
-
if to_mask is True:
|
|
143
|
-
annotation_options.append(entities.ViewAnnotationOptions.INSTANCE)
|
|
144
|
-
_ = dataset_entity.items.download(filters=filters,
|
|
145
|
-
local_path=data_path,
|
|
146
|
-
thickness=-1,
|
|
147
|
-
annotation_options=annotation_options)
|
|
148
|
-
self.root_dir = data_path
|
|
149
|
-
self._items_path = Path(self.root_dir).joinpath('items')
|
|
150
|
-
self._json_path = Path(self.root_dir).joinpath('json')
|
|
151
|
-
self._mask_path = Path(self.root_dir).joinpath('instance')
|
|
152
|
-
self._transforms = transforms
|
|
153
|
-
self._transforms_callback = transforms_callback
|
|
154
|
-
if self._transforms is not None and self._transforms_callback is None:
|
|
155
|
-
# use default callback
|
|
156
|
-
self._transforms_callback = default_transforms_callback
|
|
157
|
-
|
|
158
|
-
self.annotation_type = annotation_type
|
|
159
|
-
self.num_workers = num_workers
|
|
160
|
-
self.to_categorical = to_categorical
|
|
161
|
-
self.num_classes = len(label_to_id_map)
|
|
162
|
-
self.shuffle = shuffle
|
|
163
|
-
self.seed = seed
|
|
164
|
-
self.to_mask = to_mask
|
|
165
|
-
self.batch_size = batch_size
|
|
166
|
-
self.collate_fn = collate_fn
|
|
167
|
-
self.class_balancing = class_balancing
|
|
168
|
-
# inits
|
|
169
|
-
self.data_items = list()
|
|
170
|
-
# flags
|
|
171
|
-
self.return_originals = return_originals
|
|
172
|
-
self.ignore_empty = ignore_empty
|
|
173
|
-
|
|
174
|
-
####################
|
|
175
|
-
# Load annotations #
|
|
176
|
-
####################
|
|
177
|
-
self.load_annotations()
|
|
178
|
-
|
|
179
|
-
@property
|
|
180
|
-
def dataset_entity(self):
|
|
181
|
-
assert isinstance(self._dataset_entity, entities.Dataset)
|
|
182
|
-
return self._dataset_entity
|
|
183
|
-
|
|
184
|
-
@dataset_entity.setter
|
|
185
|
-
def dataset_entity(self, val):
|
|
186
|
-
assert isinstance(val, entities.Dataset)
|
|
187
|
-
self._dataset_entity = val
|
|
188
|
-
|
|
189
|
-
@property
|
|
190
|
-
def n_items(self):
|
|
191
|
-
return len(self.data_items)
|
|
192
|
-
|
|
193
|
-
def _load_single(self, image_filepath, pbar=None):
|
|
194
|
-
try:
|
|
195
|
-
is_empty = False
|
|
196
|
-
item_info = DataItem()
|
|
197
|
-
# add image path
|
|
198
|
-
item_info.image_filepath = str(image_filepath)
|
|
199
|
-
if os.stat(image_filepath).st_size < 5:
|
|
200
|
-
logger.warning('IGNORING corrupted image: {!r}'.format(image_filepath))
|
|
201
|
-
return None, True
|
|
202
|
-
# get "platform" path
|
|
203
|
-
rel_path = image_filepath.relative_to(self._items_path)
|
|
204
|
-
# replace suffix to JSON
|
|
205
|
-
rel_path_wo_png_ext = rel_path.with_suffix('.json')
|
|
206
|
-
# create local path
|
|
207
|
-
annotation_filepath = Path(self._json_path, rel_path_wo_png_ext)
|
|
208
|
-
|
|
209
|
-
if os.path.isfile(annotation_filepath):
|
|
210
|
-
with open(annotation_filepath, 'r') as f:
|
|
211
|
-
data = json.load(f)
|
|
212
|
-
if 'id' in data:
|
|
213
|
-
item_id = data.get('id')
|
|
214
|
-
elif '_id' in data:
|
|
215
|
-
item_id = data.get('_id')
|
|
216
|
-
annotations = entities.AnnotationCollection.from_json(data)
|
|
217
|
-
else:
|
|
218
|
-
item_id = ''
|
|
219
|
-
annotations = None
|
|
220
|
-
item_info.update(item_id=item_id)
|
|
221
|
-
if self.annotation_type is not None:
|
|
222
|
-
# add item id from json
|
|
223
|
-
polygon_coordinates = list()
|
|
224
|
-
box_coordinates = list()
|
|
225
|
-
classes_ids = list()
|
|
226
|
-
labels = list()
|
|
227
|
-
if annotations is not None:
|
|
228
|
-
for annotation in annotations:
|
|
229
|
-
if 'user' in annotation.metadata and \
|
|
230
|
-
'model' in annotation.metadata['user']:
|
|
231
|
-
# and 'name' in annotation.metadata['user']['model']:
|
|
232
|
-
# Do not use prediction annotations in the data generator
|
|
233
|
-
continue
|
|
234
|
-
if annotation.type == self.annotation_type:
|
|
235
|
-
if annotation.label not in self.label_to_id_map:
|
|
236
|
-
logger.warning(
|
|
237
|
-
'Missing label {!r} in label_to_id_map. Skipping.. Use label_to_id_map for other behaviour'.format(
|
|
238
|
-
annotation.label))
|
|
239
|
-
else:
|
|
240
|
-
classes_ids.append(self.label_to_id_map[annotation.label])
|
|
241
|
-
labels.append(annotation.label)
|
|
242
|
-
box_coordinates.append(np.asarray([annotation.left,
|
|
243
|
-
annotation.top,
|
|
244
|
-
annotation.right,
|
|
245
|
-
annotation.bottom]))
|
|
246
|
-
if self.annotation_type == entities.AnnotationType.POLYGON:
|
|
247
|
-
polygon_coordinates.append(annotation.geo)
|
|
248
|
-
if annotation.type not in [entities.AnnotationType.CLASSIFICATION,
|
|
249
|
-
entities.AnnotationType.SEGMENTATION,
|
|
250
|
-
entities.AnnotationType.BOX,
|
|
251
|
-
entities.AnnotationType.POLYGON]:
|
|
252
|
-
raise ValueError('unsupported annotation type: {}'.format(annotation.type))
|
|
253
|
-
dtype = object if self.annotation_type == entities.AnnotationType.POLYGON else None
|
|
254
|
-
# reorder for output
|
|
255
|
-
item_info.update({entities.AnnotationType.BOX.value: np.asarray(box_coordinates).astype(float),
|
|
256
|
-
entities.AnnotationType.CLASSIFICATION.value: np.asarray(classes_ids),
|
|
257
|
-
entities.AnnotationType.POLYGON.value: np.asarray(polygon_coordinates, dtype=dtype),
|
|
258
|
-
'labels': labels})
|
|
259
|
-
if len(item_info[entities.AnnotationType.CLASSIFICATION.value]) == 0:
|
|
260
|
-
logger.debug('Empty annotation (nothing matched label_to_id_map) for image filename: {}'.format(
|
|
261
|
-
image_filepath))
|
|
262
|
-
is_empty = True
|
|
263
|
-
if self.to_mask:
|
|
264
|
-
# get "platform" path
|
|
265
|
-
rel_path = image_filepath.relative_to(self._items_path)
|
|
266
|
-
# replace suffix to PNG
|
|
267
|
-
rel_path_wo_png_ext = rel_path.with_suffix('.png')
|
|
268
|
-
# create local path
|
|
269
|
-
mask_filepath = Path(self._mask_path, rel_path_wo_png_ext)
|
|
270
|
-
if not os.path.isfile(mask_filepath):
|
|
271
|
-
logger.debug('Empty annotation for image filename: {}'.format(image_filepath))
|
|
272
|
-
is_empty = True
|
|
273
|
-
item_info.update({entities.AnnotationType.SEGMENTATION.value: str(mask_filepath)})
|
|
274
|
-
item_info.update(annotation_filepath=str(annotation_filepath))
|
|
275
|
-
return item_info, is_empty
|
|
276
|
-
except Exception:
|
|
277
|
-
logger.exception('failed loading item in generator! {!r}'.format(image_filepath))
|
|
278
|
-
return None, True
|
|
279
|
-
finally:
|
|
280
|
-
if pbar is not None:
|
|
281
|
-
pbar.update()
|
|
282
|
-
|
|
283
|
-
def load_annotations(self):
|
|
284
|
-
logger.info(f"Collecting items with the following extensions: {self.item_type}")
|
|
285
|
-
files = list()
|
|
286
|
-
for ext in self.item_type:
|
|
287
|
-
# build regex to ignore extension case
|
|
288
|
-
regex = '*.{}'.format(''.join(['[{}{}]'.format(letter.lower(), letter.upper()) for letter in ext]))
|
|
289
|
-
files.extend(self._items_path.rglob(regex))
|
|
290
|
-
|
|
291
|
-
pool = ThreadPoolExecutor(max_workers=32)
|
|
292
|
-
jobs = list()
|
|
293
|
-
pbar = tqdm.tqdm(total=len(files),
|
|
294
|
-
desc='Loading Data Generator',
|
|
295
|
-
disable=self.dataset_entity._client_api.verbose.disable_progress_bar,
|
|
296
|
-
file=sys.stdout)
|
|
297
|
-
for image_filepath in files:
|
|
298
|
-
jobs.append(pool.submit(self._load_single,
|
|
299
|
-
image_filepath=image_filepath,
|
|
300
|
-
pbar=pbar))
|
|
301
|
-
outputs = [job.result() for job in jobs]
|
|
302
|
-
pbar.close()
|
|
303
|
-
|
|
304
|
-
n_items = len(outputs)
|
|
305
|
-
n_empty_items = sum([1 for _, is_empty in outputs if is_empty is True])
|
|
306
|
-
|
|
307
|
-
output_msg = 'Done loading items. Total items loaded: {}.'.format(n_items)
|
|
308
|
-
if n_empty_items > 0:
|
|
309
|
-
output_msg += '{action} {n_empty_items} items without annotations'.format(
|
|
310
|
-
action='IGNORING' if self.ignore_empty else 'INCLUDING',
|
|
311
|
-
n_empty_items=n_empty_items)
|
|
312
|
-
|
|
313
|
-
if self.ignore_empty:
|
|
314
|
-
# take ONLY non-empty
|
|
315
|
-
data_items = [data_item for data_item, is_empty in outputs if is_empty is False]
|
|
316
|
-
else:
|
|
317
|
-
# take all
|
|
318
|
-
data_items = [data_item for data_item, is_empty in outputs]
|
|
319
|
-
|
|
320
|
-
self.data_items = data_items
|
|
321
|
-
if len(self.data_items) == 0:
|
|
322
|
-
logger.warning(output_msg)
|
|
323
|
-
else:
|
|
324
|
-
logger.info(output_msg)
|
|
325
|
-
###################
|
|
326
|
-
# class balancing #
|
|
327
|
-
###################
|
|
328
|
-
labels = [label for item in self.data_items for label in item.get('labels', list())]
|
|
329
|
-
logger.info(f"Data Generator labels balance statistics: {collections.Counter(labels)}")
|
|
330
|
-
if self.class_balancing:
|
|
331
|
-
try:
|
|
332
|
-
from imblearn.over_sampling import RandomOverSampler
|
|
333
|
-
except Exception:
|
|
334
|
-
logger.error(
|
|
335
|
-
'Class balancing is ON but missing "imbalanced-learn". run "pip install -U imbalanced-learn" and try again')
|
|
336
|
-
raise
|
|
337
|
-
logger.info('Class balance is on!')
|
|
338
|
-
class_ids = [class_id for item in self.data_items for class_id in item['class']]
|
|
339
|
-
dummy_inds = [i_item for i_item, item in enumerate(self.data_items) for _ in item['class']]
|
|
340
|
-
over_sampler = RandomOverSampler(random_state=42)
|
|
341
|
-
X_res, y_res = over_sampler.fit_resample(np.asarray(dummy_inds).reshape(-1, 1), np.asarray(class_ids))
|
|
342
|
-
over_sampled_data_items = [self.data_items[i] for i in X_res.flatten()]
|
|
343
|
-
oversampled_labels = [label for item in over_sampled_data_items for label in item['labels']]
|
|
344
|
-
logger.info(f"Data Generator labels after oversampling: {collections.Counter(oversampled_labels)}")
|
|
345
|
-
self.data_items = over_sampled_data_items
|
|
346
|
-
|
|
347
|
-
if self.shuffle:
|
|
348
|
-
if self.seed is None:
|
|
349
|
-
self.seed = 256
|
|
350
|
-
np.random.seed(self.seed)
|
|
351
|
-
np.random.shuffle(self.data_items)
|
|
352
|
-
|
|
353
|
-
def transform(self, image, target=None):
|
|
354
|
-
if self._transforms is not None:
|
|
355
|
-
image, target = self._transforms_callback(transforms=self._transforms,
|
|
356
|
-
image=image,
|
|
357
|
-
target=target,
|
|
358
|
-
annotation_type=self.annotation_type)
|
|
359
|
-
return image, target
|
|
360
|
-
|
|
361
|
-
def _to_dtlpy(self, targets, labels=None):
|
|
362
|
-
annotations = entities.AnnotationCollection(item=None)
|
|
363
|
-
annotations._dataset = self._dataset_entity
|
|
364
|
-
if labels is None:
|
|
365
|
-
labels = [None] * len(targets)
|
|
366
|
-
if self.to_mask is True:
|
|
367
|
-
for label, label_ind in self.label_to_id_map.items():
|
|
368
|
-
target = targets == label_ind
|
|
369
|
-
if np.any(target):
|
|
370
|
-
annotations.add(annotation_definition=entities.Segmentation(geo=target,
|
|
371
|
-
label=label))
|
|
372
|
-
elif self.annotation_type == entities.AnnotationType.BOX:
|
|
373
|
-
for target, label in zip(targets, labels):
|
|
374
|
-
annotations.add(annotation_definition=entities.Box(left=target[0],
|
|
375
|
-
top=target[1],
|
|
376
|
-
right=target[2],
|
|
377
|
-
bottom=target[3],
|
|
378
|
-
label=label))
|
|
379
|
-
elif self.annotation_type == entities.AnnotationType.CLASSIFICATION:
|
|
380
|
-
for target, label in zip(targets, labels):
|
|
381
|
-
annotations.add(annotation_definition=entities.Classification(label=label))
|
|
382
|
-
elif self.annotation_type == entities.AnnotationType.POLYGON:
|
|
383
|
-
for target, label in zip(targets, labels):
|
|
384
|
-
annotations.add(annotation_definition=entities.Polygon(label=label,
|
|
385
|
-
geo=target.astype(float)))
|
|
386
|
-
else:
|
|
387
|
-
raise ValueError('unsupported annotation type: {}'.format(self.annotation_type))
|
|
388
|
-
# set dataset for color
|
|
389
|
-
for annotation in annotations:
|
|
390
|
-
annotation._dataset = self._dataset_entity
|
|
391
|
-
return annotations
|
|
392
|
-
|
|
393
|
-
def visualize(self, idx=None, return_output=False, plot=True):
|
|
394
|
-
if not self.__len__():
|
|
395
|
-
raise ValueError('no items selected, cannot preform visualization')
|
|
396
|
-
import matplotlib.pyplot as plt
|
|
397
|
-
if idx is None:
|
|
398
|
-
idx = np.random.randint(self.__len__())
|
|
399
|
-
if self.batch_size is not None:
|
|
400
|
-
raise ValueError('can visualize only of batch_size in None')
|
|
401
|
-
data_item = self.__getitem__(idx)
|
|
402
|
-
image = Image.fromarray(data_item.get('image'))
|
|
403
|
-
labels = data_item.get('labels')
|
|
404
|
-
targets = data_item.get('annotations')
|
|
405
|
-
annotations = self._to_dtlpy(targets=targets, labels=labels)
|
|
406
|
-
mask = Image.fromarray(annotations.show(height=image.size[1],
|
|
407
|
-
width=image.size[0],
|
|
408
|
-
alpha=0.8))
|
|
409
|
-
image.paste(mask, (0, 0), mask)
|
|
410
|
-
marked_image = np.asarray(image)
|
|
411
|
-
if plot:
|
|
412
|
-
plt.figure()
|
|
413
|
-
plt.imshow(marked_image)
|
|
414
|
-
if return_output:
|
|
415
|
-
return marked_image, annotations
|
|
416
|
-
|
|
417
|
-
def __getsingleitem__(self, idx):
|
|
418
|
-
data_item = copy.deepcopy(self.data_items[idx])
|
|
419
|
-
|
|
420
|
-
image_filename = data_item.get('image_filepath')
|
|
421
|
-
image = np.asarray(Image.open(image_filename))
|
|
422
|
-
data_item.update({'image': image})
|
|
423
|
-
|
|
424
|
-
annotations = data_item.get(self.annotation_type)
|
|
425
|
-
if self.to_mask is True:
|
|
426
|
-
# if segmentation - read from file
|
|
427
|
-
mask_filepath = data_item.get(entities.AnnotationType.SEGMENTATION)
|
|
428
|
-
annotations = np.asarray(Image.open(mask_filepath).convert('L'))
|
|
429
|
-
if self.to_categorical:
|
|
430
|
-
onehot = np.zeros((annotations.size, self.num_classes + 1))
|
|
431
|
-
onehot[np.arange(annotations.size), annotations] = 1
|
|
432
|
-
annotations = onehot
|
|
433
|
-
data_item.update({'annotations': annotations})
|
|
434
|
-
|
|
435
|
-
if self.return_originals is True:
|
|
436
|
-
annotations = []
|
|
437
|
-
if self.annotation_type is not None:
|
|
438
|
-
annotations = data_item.get('annotations')
|
|
439
|
-
data_item.update({'orig_image': image.copy(),
|
|
440
|
-
'orig_annotations': annotations.copy()})
|
|
441
|
-
|
|
442
|
-
###########################
|
|
443
|
-
# perform transformations #
|
|
444
|
-
###########################
|
|
445
|
-
if self._transforms is not None:
|
|
446
|
-
annotations = data_item.get('annotations')
|
|
447
|
-
image, annotations = self.transform(image, annotations)
|
|
448
|
-
data_item.update({'image': image,
|
|
449
|
-
'annotations': annotations})
|
|
450
|
-
return data_item
|
|
451
|
-
|
|
452
|
-
def __iter__(self):
|
|
453
|
-
"""Create a generator that iterate over the Sequence."""
|
|
454
|
-
for item in (self[i] for i in range(len(self))):
|
|
455
|
-
yield item
|
|
456
|
-
|
|
457
|
-
def __len__(self):
|
|
458
|
-
factor = self.batch_size
|
|
459
|
-
if factor is None:
|
|
460
|
-
factor = 1
|
|
461
|
-
return int(np.ceil(self.n_items / factor))
|
|
462
|
-
|
|
463
|
-
def __getitem__(self, idx):
|
|
464
|
-
"""
|
|
465
|
-
Support single index or a slice.
|
|
466
|
-
Uses ThreadPoolExecutor is num_workers != 0
|
|
467
|
-
"""
|
|
468
|
-
to_return = None
|
|
469
|
-
if isinstance(idx, int):
|
|
470
|
-
if self.batch_size is None:
|
|
471
|
-
to_return = self.__getsingleitem__(idx)
|
|
472
|
-
else:
|
|
473
|
-
# if batch_size is define, convert idx to batches
|
|
474
|
-
idx = slice(idx * self.batch_size, min((idx + 1) * self.batch_size, len(self.data_items)))
|
|
475
|
-
|
|
476
|
-
if isinstance(idx, slice):
|
|
477
|
-
to_return = list()
|
|
478
|
-
idxs = list(range(idx.start, idx.stop,
|
|
479
|
-
idx.step if idx.step else 1))
|
|
480
|
-
if self.num_workers == 0:
|
|
481
|
-
for dx in idxs:
|
|
482
|
-
to_return.append(self.__getsingleitem__(dx))
|
|
483
|
-
else:
|
|
484
|
-
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
|
485
|
-
for sample in executor.map(lambda i: self.__getsingleitem__(i), idxs):
|
|
486
|
-
to_return.append(sample)
|
|
487
|
-
|
|
488
|
-
if to_return is None:
|
|
489
|
-
raise TypeError('unsupported indexing: list indices must be integers or slices, not {}'.format(type(idx)))
|
|
490
|
-
|
|
491
|
-
if self.collate_fn is not None:
|
|
492
|
-
to_return = self.collate_fn(to_return)
|
|
493
|
-
return to_return
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
np_str_obj_array_pattern = re.compile(r'[SaUO]')
|
|
497
|
-
|
|
498
|
-
default_collate_err_msg_format = (
|
|
499
|
-
"default_collate: batch must contain tensors, numpy arrays, numbers, "
|
|
500
|
-
"dicts or lists; found {}")
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def default_transforms_callback(transforms, image, target, annotation_type):
|
|
504
|
-
"""
|
|
505
|
-
Recursive call to perform the augmentations in "transforms"
|
|
506
|
-
|
|
507
|
-
:param transforms:
|
|
508
|
-
:param image:
|
|
509
|
-
:param target:
|
|
510
|
-
:param annotation_type:
|
|
511
|
-
:return:
|
|
512
|
-
"""
|
|
513
|
-
# get the type string without importing any other package
|
|
514
|
-
transforms_type = type(transforms)
|
|
515
|
-
|
|
516
|
-
############
|
|
517
|
-
# Handle compositions and lists of augmentations with a recursive call
|
|
518
|
-
if transforms_type.__module__ == 'torchvision.transforms.transforms' and transforms_type.__name__ == 'Compose':
|
|
519
|
-
# torchvision compose - convert to list
|
|
520
|
-
image, target = default_transforms_callback(transforms.transforms, image, target, annotation_type)
|
|
521
|
-
return image, target
|
|
522
|
-
|
|
523
|
-
if transforms_type.__module__ == 'imgaug.augmenters.meta' and transforms_type.__name__ == 'Sequential':
|
|
524
|
-
# imgaug sequential - convert to list
|
|
525
|
-
image, target = default_transforms_callback(list(transforms), image, target, annotation_type)
|
|
526
|
-
return image, target
|
|
527
|
-
|
|
528
|
-
if isinstance(transforms, list):
|
|
529
|
-
for t in transforms:
|
|
530
|
-
image, target = default_transforms_callback(t, image, target, annotation_type)
|
|
531
|
-
return image, target
|
|
532
|
-
|
|
533
|
-
##############
|
|
534
|
-
# Handle single annotations
|
|
535
|
-
if 'imgaug.augmenters' in transforms_type.__module__:
|
|
536
|
-
# handle single imgaug augmentation
|
|
537
|
-
if target is not None and annotation_type is not None:
|
|
538
|
-
# works for batch but running on a single image
|
|
539
|
-
if annotation_type == entities.AnnotationType.BOX:
|
|
540
|
-
image, target = transforms(images=[image], bounding_boxes=[target])
|
|
541
|
-
target = target[0]
|
|
542
|
-
elif annotation_type == entities.AnnotationType.SEGMENTATION:
|
|
543
|
-
# expending to HxWx1 for the imgaug function to work
|
|
544
|
-
target = target[..., None]
|
|
545
|
-
image, target = transforms(images=[image], segmentation_maps=[target])
|
|
546
|
-
target = target[0][:, :, 0]
|
|
547
|
-
elif annotation_type == entities.AnnotationType.POLYGON:
|
|
548
|
-
image, target = transforms(images=[image], polygons=[target])
|
|
549
|
-
target = target[0]
|
|
550
|
-
elif annotation_type == entities.AnnotationType.CLASSIFICATION:
|
|
551
|
-
image = transforms(images=[image])
|
|
552
|
-
else:
|
|
553
|
-
raise ValueError('unsupported annotations type for image augmentations: {}'.format(annotation_type))
|
|
554
|
-
image = image[0]
|
|
555
|
-
else:
|
|
556
|
-
image = transforms(images=[image])
|
|
557
|
-
image = image[0]
|
|
558
|
-
else:
|
|
559
|
-
image = transforms(image)
|
|
560
|
-
|
|
561
|
-
return image, target
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
def collate_default(batch):
|
|
565
|
-
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
566
|
-
elem = batch[0]
|
|
567
|
-
elem_type = type(elem)
|
|
568
|
-
if isinstance(elem, np.ndarray):
|
|
569
|
-
return np.stack(batch, axis=0)
|
|
570
|
-
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
571
|
-
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
572
|
-
# array of string classes and object
|
|
573
|
-
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
574
|
-
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
575
|
-
return batch
|
|
576
|
-
# return [tf.convert_to_tensor(b) for b in batch]
|
|
577
|
-
elif elem.shape == (): # scalars
|
|
578
|
-
return batch
|
|
579
|
-
elif isinstance(elem, float):
|
|
580
|
-
return batch
|
|
581
|
-
elif isinstance(elem, int):
|
|
582
|
-
return batch
|
|
583
|
-
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
584
|
-
return batch
|
|
585
|
-
elif isinstance(elem, collections.abc.Mapping):
|
|
586
|
-
return {key: collate_default([d[key] for d in batch]) for key in elem}
|
|
587
|
-
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
588
|
-
return elem_type(*(collate_default(samples) for samples in zip(*batch)))
|
|
589
|
-
elif isinstance(elem, collections.abc.Sequence):
|
|
590
|
-
transposed = zip(*batch)
|
|
591
|
-
return transposed
|
|
592
|
-
raise TypeError(default_collate_err_msg_format.format(elem_type))
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
def collate_torch(batch):
|
|
596
|
-
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
597
|
-
import torch
|
|
598
|
-
elem = batch[0]
|
|
599
|
-
elem_type = type(elem)
|
|
600
|
-
if isinstance(elem, torch.Tensor):
|
|
601
|
-
out = None
|
|
602
|
-
if torch.utils.data.get_worker_info() is not None:
|
|
603
|
-
# If we're in a background process, concatenate directly into a
|
|
604
|
-
# shared memory tensor to avoid an extra copy
|
|
605
|
-
numel = sum(x.numel() for x in batch)
|
|
606
|
-
storage = elem.storage()._new_shared(numel)
|
|
607
|
-
out = elem.new(storage)
|
|
608
|
-
return torch.stack(batch, 0, out=out)
|
|
609
|
-
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
610
|
-
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
611
|
-
# array of string classes and object
|
|
612
|
-
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
613
|
-
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
614
|
-
try:
|
|
615
|
-
return torch.stack([torch.as_tensor(b) for b in batch])
|
|
616
|
-
except RuntimeError:
|
|
617
|
-
return batch
|
|
618
|
-
elif elem.shape == (): # scalars
|
|
619
|
-
return torch.as_tensor(batch)
|
|
620
|
-
elif isinstance(elem, float):
|
|
621
|
-
return torch.tensor(batch, dtype=torch.float64)
|
|
622
|
-
elif isinstance(elem, int):
|
|
623
|
-
return torch.tensor(batch)
|
|
624
|
-
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
625
|
-
return batch
|
|
626
|
-
elif isinstance(elem, collections.abc.Mapping):
|
|
627
|
-
return {key: collate_torch([d[key] for d in batch]) for key in elem}
|
|
628
|
-
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
629
|
-
return elem_type(*(collate_torch(samples) for samples in zip(*batch)))
|
|
630
|
-
elif isinstance(elem, collections.abc.Sequence):
|
|
631
|
-
transposed = zip(*batch)
|
|
632
|
-
return transposed
|
|
633
|
-
|
|
634
|
-
raise TypeError(default_collate_err_msg_format.format(elem_type))
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
def collate_tf(batch):
|
|
638
|
-
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
639
|
-
import tensorflow as tf
|
|
640
|
-
elem = batch[0]
|
|
641
|
-
elem_type = type(elem)
|
|
642
|
-
if isinstance(elem, tf.Tensor):
|
|
643
|
-
return tf.stack(batch, axis=0)
|
|
644
|
-
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
645
|
-
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
646
|
-
# array of string classes and object
|
|
647
|
-
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
648
|
-
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
649
|
-
try:
|
|
650
|
-
return tf.convert_to_tensor(batch)
|
|
651
|
-
except ValueError:
|
|
652
|
-
# failed on orig_image because of a mismatch in the shape (not resizing all the images so cannot stack)
|
|
653
|
-
return batch
|
|
654
|
-
# return [tf.convert_to_tensor(b) for b in batch]
|
|
655
|
-
elif elem.shape == (): # scalars
|
|
656
|
-
return tf.convert_to_tensor(batch)
|
|
657
|
-
elif isinstance(elem, float):
|
|
658
|
-
return tf.convert_to_tensor(batch, dtype=tf.float64)
|
|
659
|
-
elif isinstance(elem, int):
|
|
660
|
-
return tf.convert_to_tensor(batch)
|
|
661
|
-
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
662
|
-
return batch
|
|
663
|
-
elif isinstance(elem, collections.abc.Mapping):
|
|
664
|
-
return {key: collate_tf([d[key] for d in batch]) for key in elem}
|
|
665
|
-
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
666
|
-
return elem_type(*(collate_tf(samples) for samples in zip(*batch)))
|
|
667
|
-
elif isinstance(elem, collections.abc.Sequence):
|
|
668
|
-
transposed = zip(*batch)
|
|
669
|
-
return transposed
|
|
670
|
-
raise TypeError(default_collate_err_msg_format.format(elem_type))
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from PIL import Image
|
|
4
|
+
import collections.abc
|
|
5
|
+
import numpy as np
|
|
6
|
+
import collections
|
|
7
|
+
import logging
|
|
8
|
+
import shutil
|
|
9
|
+
import json
|
|
10
|
+
import copy
|
|
11
|
+
import tqdm
|
|
12
|
+
import sys
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from ... import entities
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(name='dtlpy')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DataItem(dict):
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
super(DataItem, self).__init__(*args, **kwargs)
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def image_filepath(self):
|
|
26
|
+
return self['image_filepath']
|
|
27
|
+
|
|
28
|
+
@image_filepath.setter
|
|
29
|
+
def image_filepath(self, val):
|
|
30
|
+
self['image_filepath'] = val
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatasetGenerator:
|
|
34
|
+
|
|
35
|
+
def __init__(self,
|
|
36
|
+
dataset_entity: entities.Dataset,
|
|
37
|
+
annotation_type: entities.AnnotationType,
|
|
38
|
+
item_type: list = None,
|
|
39
|
+
filters: entities.Filters = None,
|
|
40
|
+
data_path=None,
|
|
41
|
+
overwrite=False,
|
|
42
|
+
id_to_label_map=None,
|
|
43
|
+
label_to_id_map=None,
|
|
44
|
+
transforms=None,
|
|
45
|
+
transforms_callback=None,
|
|
46
|
+
num_workers=0,
|
|
47
|
+
batch_size=None,
|
|
48
|
+
collate_fn=None,
|
|
49
|
+
shuffle=True,
|
|
50
|
+
seed=None,
|
|
51
|
+
to_categorical=False,
|
|
52
|
+
to_mask=False,
|
|
53
|
+
class_balancing=False,
|
|
54
|
+
# debug flags
|
|
55
|
+
return_originals=False,
|
|
56
|
+
ignore_empty=True
|
|
57
|
+
) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Base Dataset Generator to build and iterate over images and annotations
|
|
60
|
+
|
|
61
|
+
* Mapping Labels *
|
|
62
|
+
To set a label mapping from labels to id you can use the `label_to_id_map` or `id_to_label_map`.
|
|
63
|
+
NOTE: if they are not i.i.d you'll need to input both.
|
|
64
|
+
In semantic, a `$default` label should be added so that the background (and all unlabeled pixels) will be
|
|
65
|
+
mapped to the model's inputs
|
|
66
|
+
|
|
67
|
+
label_to_id_map = {'cat': 1,
|
|
68
|
+
'dog': 1,
|
|
69
|
+
'$default': 0}
|
|
70
|
+
id_to_label_map = {1: 'cats_and_dogs',
|
|
71
|
+
0: 'background'}
|
|
72
|
+
|
|
73
|
+
:param dataset_entity: dl.Dataset entity
|
|
74
|
+
:param annotation_type: dl.AnnotationType - type of annotation to load from the annotated dataset
|
|
75
|
+
:param item_type: list of file extension to load. default: ['jpg', 'jpeg', 'png', 'bmp']
|
|
76
|
+
:param filters: dl.Filters - filtering entity to filter the dataset items
|
|
77
|
+
:param data_path: Path to Dataloop annotations (root to "item" and "json").
|
|
78
|
+
:param overwrite:
|
|
79
|
+
:param dict id_to_label_map: Optional, {id: label_string} dictionary, default taken from dataset
|
|
80
|
+
:param dict label_to_id_map: Optional, {label_string: id} dictionary
|
|
81
|
+
:param transforms: Optional transform to be applied on a sample. list, imgaug.Sequence or torchvision.transforms.Compose
|
|
82
|
+
:param transforms_callback: Optional function to handle the callback of each batch.
|
|
83
|
+
look at default_transforms_callback for more information. available: imgaug_transforms_callback, torchvision_transforms_callback
|
|
84
|
+
:param num_workers: Optional - number of separate threads to load the images
|
|
85
|
+
:param batch_size: (int, optional): how many samples per batch to load, if not none - items will always be a list
|
|
86
|
+
:param collate_fn: Optional - merges a list of samples to form a mini-batch of Tensor(s).
|
|
87
|
+
:param shuffle: Whether to shuffle the data (default: True) If set to False, sorts the data in alphanumeric order.
|
|
88
|
+
:param seed: Optional random seed for shuffling and transformations.
|
|
89
|
+
:param to_categorical: convert label id to categorical format
|
|
90
|
+
:param to_mask: convert annotations to an instance mask (will be true for SEGMENTATION)
|
|
91
|
+
:param class_balancing: if True - performing random over-sample with class ids as the target to balance training data
|
|
92
|
+
:param return_originals: bool - If True, return ALSO images and annotations before transformations (for debug)
|
|
93
|
+
:param ignore_empty: bool - If True, generator will NOT collect items without annotations
|
|
94
|
+
"""
|
|
95
|
+
self._dataset_entity = dataset_entity
|
|
96
|
+
|
|
97
|
+
# default item types (extension for now)
|
|
98
|
+
if item_type is None:
|
|
99
|
+
item_type = ['jpg', 'jpeg', 'png', 'bmp']
|
|
100
|
+
if not isinstance(item_type, list):
|
|
101
|
+
item_type = [item_type]
|
|
102
|
+
self.item_type = item_type
|
|
103
|
+
|
|
104
|
+
# id labels mapping
|
|
105
|
+
if label_to_id_map is None and id_to_label_map is None:
|
|
106
|
+
# if both are None - take from dataset
|
|
107
|
+
label_to_id_map = dataset_entity.instance_map
|
|
108
|
+
id_to_label_map = {int(v): k for k, v in label_to_id_map.items()}
|
|
109
|
+
else:
|
|
110
|
+
# one or both is NOT None
|
|
111
|
+
if label_to_id_map is None:
|
|
112
|
+
# set label_to_id_map from the other
|
|
113
|
+
label_to_id_map = {v: int(k) for k, v in id_to_label_map.items()}
|
|
114
|
+
if id_to_label_map is None:
|
|
115
|
+
# set id_to_label_map from the other
|
|
116
|
+
id_to_label_map = {int(v): k for k, v in label_to_id_map.items()}
|
|
117
|
+
# put it on the local ontology for the annotations download
|
|
118
|
+
dataset_entity._get_ontology().instance_map = label_to_id_map
|
|
119
|
+
self.id_to_label_map = id_to_label_map
|
|
120
|
+
self.label_to_id_map = label_to_id_map
|
|
121
|
+
|
|
122
|
+
# if annotation type is segmentation - to_mask must be True
|
|
123
|
+
if annotation_type == entities.AnnotationType.SEGMENTATION:
|
|
124
|
+
to_mask = True
|
|
125
|
+
|
|
126
|
+
if data_path is None:
|
|
127
|
+
data_path = os.path.join(os.path.expanduser('~'),
|
|
128
|
+
'.dataloop',
|
|
129
|
+
'datasets',
|
|
130
|
+
"{}_{}".format(dataset_entity.name,
|
|
131
|
+
dataset_entity.id))
|
|
132
|
+
download = False
|
|
133
|
+
if os.path.isdir(data_path):
|
|
134
|
+
if overwrite:
|
|
135
|
+
logger.warning('overwrite flag is True! deleting and overwriting')
|
|
136
|
+
shutil.rmtree(data_path)
|
|
137
|
+
download = True
|
|
138
|
+
else:
|
|
139
|
+
download = True
|
|
140
|
+
if download:
|
|
141
|
+
annotation_options = [entities.ViewAnnotationOptions.JSON]
|
|
142
|
+
if to_mask is True:
|
|
143
|
+
annotation_options.append(entities.ViewAnnotationOptions.INSTANCE)
|
|
144
|
+
_ = dataset_entity.items.download(filters=filters,
|
|
145
|
+
local_path=data_path,
|
|
146
|
+
thickness=-1,
|
|
147
|
+
annotation_options=annotation_options)
|
|
148
|
+
self.root_dir = data_path
|
|
149
|
+
self._items_path = Path(self.root_dir).joinpath('items')
|
|
150
|
+
self._json_path = Path(self.root_dir).joinpath('json')
|
|
151
|
+
self._mask_path = Path(self.root_dir).joinpath('instance')
|
|
152
|
+
self._transforms = transforms
|
|
153
|
+
self._transforms_callback = transforms_callback
|
|
154
|
+
if self._transforms is not None and self._transforms_callback is None:
|
|
155
|
+
# use default callback
|
|
156
|
+
self._transforms_callback = default_transforms_callback
|
|
157
|
+
|
|
158
|
+
self.annotation_type = annotation_type
|
|
159
|
+
self.num_workers = num_workers
|
|
160
|
+
self.to_categorical = to_categorical
|
|
161
|
+
self.num_classes = len(label_to_id_map)
|
|
162
|
+
self.shuffle = shuffle
|
|
163
|
+
self.seed = seed
|
|
164
|
+
self.to_mask = to_mask
|
|
165
|
+
self.batch_size = batch_size
|
|
166
|
+
self.collate_fn = collate_fn
|
|
167
|
+
self.class_balancing = class_balancing
|
|
168
|
+
# inits
|
|
169
|
+
self.data_items = list()
|
|
170
|
+
# flags
|
|
171
|
+
self.return_originals = return_originals
|
|
172
|
+
self.ignore_empty = ignore_empty
|
|
173
|
+
|
|
174
|
+
####################
|
|
175
|
+
# Load annotations #
|
|
176
|
+
####################
|
|
177
|
+
self.load_annotations()
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def dataset_entity(self):
|
|
181
|
+
assert isinstance(self._dataset_entity, entities.Dataset)
|
|
182
|
+
return self._dataset_entity
|
|
183
|
+
|
|
184
|
+
@dataset_entity.setter
|
|
185
|
+
def dataset_entity(self, val):
|
|
186
|
+
assert isinstance(val, entities.Dataset)
|
|
187
|
+
self._dataset_entity = val
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def n_items(self):
|
|
191
|
+
return len(self.data_items)
|
|
192
|
+
|
|
193
|
+
def _load_single(self, image_filepath, pbar=None):
|
|
194
|
+
try:
|
|
195
|
+
is_empty = False
|
|
196
|
+
item_info = DataItem()
|
|
197
|
+
# add image path
|
|
198
|
+
item_info.image_filepath = str(image_filepath)
|
|
199
|
+
if os.stat(image_filepath).st_size < 5:
|
|
200
|
+
logger.warning('IGNORING corrupted image: {!r}'.format(image_filepath))
|
|
201
|
+
return None, True
|
|
202
|
+
# get "platform" path
|
|
203
|
+
rel_path = image_filepath.relative_to(self._items_path)
|
|
204
|
+
# replace suffix to JSON
|
|
205
|
+
rel_path_wo_png_ext = rel_path.with_suffix('.json')
|
|
206
|
+
# create local path
|
|
207
|
+
annotation_filepath = Path(self._json_path, rel_path_wo_png_ext)
|
|
208
|
+
|
|
209
|
+
if os.path.isfile(annotation_filepath):
|
|
210
|
+
with open(annotation_filepath, 'r') as f:
|
|
211
|
+
data = json.load(f)
|
|
212
|
+
if 'id' in data:
|
|
213
|
+
item_id = data.get('id')
|
|
214
|
+
elif '_id' in data:
|
|
215
|
+
item_id = data.get('_id')
|
|
216
|
+
annotations = entities.AnnotationCollection.from_json(data)
|
|
217
|
+
else:
|
|
218
|
+
item_id = ''
|
|
219
|
+
annotations = None
|
|
220
|
+
item_info.update(item_id=item_id)
|
|
221
|
+
if self.annotation_type is not None:
|
|
222
|
+
# add item id from json
|
|
223
|
+
polygon_coordinates = list()
|
|
224
|
+
box_coordinates = list()
|
|
225
|
+
classes_ids = list()
|
|
226
|
+
labels = list()
|
|
227
|
+
if annotations is not None:
|
|
228
|
+
for annotation in annotations:
|
|
229
|
+
if 'user' in annotation.metadata and \
|
|
230
|
+
'model' in annotation.metadata['user']:
|
|
231
|
+
# and 'name' in annotation.metadata['user']['model']:
|
|
232
|
+
# Do not use prediction annotations in the data generator
|
|
233
|
+
continue
|
|
234
|
+
if annotation.type == self.annotation_type:
|
|
235
|
+
if annotation.label not in self.label_to_id_map:
|
|
236
|
+
logger.warning(
|
|
237
|
+
'Missing label {!r} in label_to_id_map. Skipping.. Use label_to_id_map for other behaviour'.format(
|
|
238
|
+
annotation.label))
|
|
239
|
+
else:
|
|
240
|
+
classes_ids.append(self.label_to_id_map[annotation.label])
|
|
241
|
+
labels.append(annotation.label)
|
|
242
|
+
box_coordinates.append(np.asarray([annotation.left,
|
|
243
|
+
annotation.top,
|
|
244
|
+
annotation.right,
|
|
245
|
+
annotation.bottom]))
|
|
246
|
+
if self.annotation_type == entities.AnnotationType.POLYGON:
|
|
247
|
+
polygon_coordinates.append(annotation.geo)
|
|
248
|
+
if annotation.type not in [entities.AnnotationType.CLASSIFICATION,
|
|
249
|
+
entities.AnnotationType.SEGMENTATION,
|
|
250
|
+
entities.AnnotationType.BOX,
|
|
251
|
+
entities.AnnotationType.POLYGON]:
|
|
252
|
+
raise ValueError('unsupported annotation type: {}'.format(annotation.type))
|
|
253
|
+
dtype = object if self.annotation_type == entities.AnnotationType.POLYGON else None
|
|
254
|
+
# reorder for output
|
|
255
|
+
item_info.update({entities.AnnotationType.BOX.value: np.asarray(box_coordinates).astype(float),
|
|
256
|
+
entities.AnnotationType.CLASSIFICATION.value: np.asarray(classes_ids),
|
|
257
|
+
entities.AnnotationType.POLYGON.value: np.asarray(polygon_coordinates, dtype=dtype),
|
|
258
|
+
'labels': labels})
|
|
259
|
+
if len(item_info[entities.AnnotationType.CLASSIFICATION.value]) == 0:
|
|
260
|
+
logger.debug('Empty annotation (nothing matched label_to_id_map) for image filename: {}'.format(
|
|
261
|
+
image_filepath))
|
|
262
|
+
is_empty = True
|
|
263
|
+
if self.to_mask:
|
|
264
|
+
# get "platform" path
|
|
265
|
+
rel_path = image_filepath.relative_to(self._items_path)
|
|
266
|
+
# replace suffix to PNG
|
|
267
|
+
rel_path_wo_png_ext = rel_path.with_suffix('.png')
|
|
268
|
+
# create local path
|
|
269
|
+
mask_filepath = Path(self._mask_path, rel_path_wo_png_ext)
|
|
270
|
+
if not os.path.isfile(mask_filepath):
|
|
271
|
+
logger.debug('Empty annotation for image filename: {}'.format(image_filepath))
|
|
272
|
+
is_empty = True
|
|
273
|
+
item_info.update({entities.AnnotationType.SEGMENTATION.value: str(mask_filepath)})
|
|
274
|
+
item_info.update(annotation_filepath=str(annotation_filepath))
|
|
275
|
+
return item_info, is_empty
|
|
276
|
+
except Exception:
|
|
277
|
+
logger.exception('failed loading item in generator! {!r}'.format(image_filepath))
|
|
278
|
+
return None, True
|
|
279
|
+
finally:
|
|
280
|
+
if pbar is not None:
|
|
281
|
+
pbar.update()
|
|
282
|
+
|
|
283
|
+
def load_annotations(self):
|
|
284
|
+
logger.info(f"Collecting items with the following extensions: {self.item_type}")
|
|
285
|
+
files = list()
|
|
286
|
+
for ext in self.item_type:
|
|
287
|
+
# build regex to ignore extension case
|
|
288
|
+
regex = '*.{}'.format(''.join(['[{}{}]'.format(letter.lower(), letter.upper()) for letter in ext]))
|
|
289
|
+
files.extend(self._items_path.rglob(regex))
|
|
290
|
+
|
|
291
|
+
pool = ThreadPoolExecutor(max_workers=32)
|
|
292
|
+
jobs = list()
|
|
293
|
+
pbar = tqdm.tqdm(total=len(files),
|
|
294
|
+
desc='Loading Data Generator',
|
|
295
|
+
disable=self.dataset_entity._client_api.verbose.disable_progress_bar,
|
|
296
|
+
file=sys.stdout)
|
|
297
|
+
for image_filepath in files:
|
|
298
|
+
jobs.append(pool.submit(self._load_single,
|
|
299
|
+
image_filepath=image_filepath,
|
|
300
|
+
pbar=pbar))
|
|
301
|
+
outputs = [job.result() for job in jobs]
|
|
302
|
+
pbar.close()
|
|
303
|
+
|
|
304
|
+
n_items = len(outputs)
|
|
305
|
+
n_empty_items = sum([1 for _, is_empty in outputs if is_empty is True])
|
|
306
|
+
|
|
307
|
+
output_msg = 'Done loading items. Total items loaded: {}.'.format(n_items)
|
|
308
|
+
if n_empty_items > 0:
|
|
309
|
+
output_msg += '{action} {n_empty_items} items without annotations'.format(
|
|
310
|
+
action='IGNORING' if self.ignore_empty else 'INCLUDING',
|
|
311
|
+
n_empty_items=n_empty_items)
|
|
312
|
+
|
|
313
|
+
if self.ignore_empty:
|
|
314
|
+
# take ONLY non-empty
|
|
315
|
+
data_items = [data_item for data_item, is_empty in outputs if is_empty is False]
|
|
316
|
+
else:
|
|
317
|
+
# take all
|
|
318
|
+
data_items = [data_item for data_item, is_empty in outputs]
|
|
319
|
+
|
|
320
|
+
self.data_items = data_items
|
|
321
|
+
if len(self.data_items) == 0:
|
|
322
|
+
logger.warning(output_msg)
|
|
323
|
+
else:
|
|
324
|
+
logger.info(output_msg)
|
|
325
|
+
###################
|
|
326
|
+
# class balancing #
|
|
327
|
+
###################
|
|
328
|
+
labels = [label for item in self.data_items for label in item.get('labels', list())]
|
|
329
|
+
logger.info(f"Data Generator labels balance statistics: {collections.Counter(labels)}")
|
|
330
|
+
if self.class_balancing:
|
|
331
|
+
try:
|
|
332
|
+
from imblearn.over_sampling import RandomOverSampler
|
|
333
|
+
except Exception:
|
|
334
|
+
logger.error(
|
|
335
|
+
'Class balancing is ON but missing "imbalanced-learn". run "pip install -U imbalanced-learn" and try again')
|
|
336
|
+
raise
|
|
337
|
+
logger.info('Class balance is on!')
|
|
338
|
+
class_ids = [class_id for item in self.data_items for class_id in item['class']]
|
|
339
|
+
dummy_inds = [i_item for i_item, item in enumerate(self.data_items) for _ in item['class']]
|
|
340
|
+
over_sampler = RandomOverSampler(random_state=42)
|
|
341
|
+
X_res, y_res = over_sampler.fit_resample(np.asarray(dummy_inds).reshape(-1, 1), np.asarray(class_ids))
|
|
342
|
+
over_sampled_data_items = [self.data_items[i] for i in X_res.flatten()]
|
|
343
|
+
oversampled_labels = [label for item in over_sampled_data_items for label in item['labels']]
|
|
344
|
+
logger.info(f"Data Generator labels after oversampling: {collections.Counter(oversampled_labels)}")
|
|
345
|
+
self.data_items = over_sampled_data_items
|
|
346
|
+
|
|
347
|
+
if self.shuffle:
|
|
348
|
+
if self.seed is None:
|
|
349
|
+
self.seed = 256
|
|
350
|
+
np.random.seed(self.seed)
|
|
351
|
+
np.random.shuffle(self.data_items)
|
|
352
|
+
|
|
353
|
+
def transform(self, image, target=None):
|
|
354
|
+
if self._transforms is not None:
|
|
355
|
+
image, target = self._transforms_callback(transforms=self._transforms,
|
|
356
|
+
image=image,
|
|
357
|
+
target=target,
|
|
358
|
+
annotation_type=self.annotation_type)
|
|
359
|
+
return image, target
|
|
360
|
+
|
|
361
|
+
def _to_dtlpy(self, targets, labels=None):
|
|
362
|
+
annotations = entities.AnnotationCollection(item=None)
|
|
363
|
+
annotations._dataset = self._dataset_entity
|
|
364
|
+
if labels is None:
|
|
365
|
+
labels = [None] * len(targets)
|
|
366
|
+
if self.to_mask is True:
|
|
367
|
+
for label, label_ind in self.label_to_id_map.items():
|
|
368
|
+
target = targets == label_ind
|
|
369
|
+
if np.any(target):
|
|
370
|
+
annotations.add(annotation_definition=entities.Segmentation(geo=target,
|
|
371
|
+
label=label))
|
|
372
|
+
elif self.annotation_type == entities.AnnotationType.BOX:
|
|
373
|
+
for target, label in zip(targets, labels):
|
|
374
|
+
annotations.add(annotation_definition=entities.Box(left=target[0],
|
|
375
|
+
top=target[1],
|
|
376
|
+
right=target[2],
|
|
377
|
+
bottom=target[3],
|
|
378
|
+
label=label))
|
|
379
|
+
elif self.annotation_type == entities.AnnotationType.CLASSIFICATION:
|
|
380
|
+
for target, label in zip(targets, labels):
|
|
381
|
+
annotations.add(annotation_definition=entities.Classification(label=label))
|
|
382
|
+
elif self.annotation_type == entities.AnnotationType.POLYGON:
|
|
383
|
+
for target, label in zip(targets, labels):
|
|
384
|
+
annotations.add(annotation_definition=entities.Polygon(label=label,
|
|
385
|
+
geo=target.astype(float)))
|
|
386
|
+
else:
|
|
387
|
+
raise ValueError('unsupported annotation type: {}'.format(self.annotation_type))
|
|
388
|
+
# set dataset for color
|
|
389
|
+
for annotation in annotations:
|
|
390
|
+
annotation._dataset = self._dataset_entity
|
|
391
|
+
return annotations
|
|
392
|
+
|
|
393
|
+
def visualize(self, idx=None, return_output=False, plot=True):
|
|
394
|
+
if not self.__len__():
|
|
395
|
+
raise ValueError('no items selected, cannot preform visualization')
|
|
396
|
+
import matplotlib.pyplot as plt
|
|
397
|
+
if idx is None:
|
|
398
|
+
idx = np.random.randint(self.__len__())
|
|
399
|
+
if self.batch_size is not None:
|
|
400
|
+
raise ValueError('can visualize only of batch_size in None')
|
|
401
|
+
data_item = self.__getitem__(idx)
|
|
402
|
+
image = Image.fromarray(data_item.get('image'))
|
|
403
|
+
labels = data_item.get('labels')
|
|
404
|
+
targets = data_item.get('annotations')
|
|
405
|
+
annotations = self._to_dtlpy(targets=targets, labels=labels)
|
|
406
|
+
mask = Image.fromarray(annotations.show(height=image.size[1],
|
|
407
|
+
width=image.size[0],
|
|
408
|
+
alpha=0.8))
|
|
409
|
+
image.paste(mask, (0, 0), mask)
|
|
410
|
+
marked_image = np.asarray(image)
|
|
411
|
+
if plot:
|
|
412
|
+
plt.figure()
|
|
413
|
+
plt.imshow(marked_image)
|
|
414
|
+
if return_output:
|
|
415
|
+
return marked_image, annotations
|
|
416
|
+
|
|
417
|
+
def __getsingleitem__(self, idx):
|
|
418
|
+
data_item = copy.deepcopy(self.data_items[idx])
|
|
419
|
+
|
|
420
|
+
image_filename = data_item.get('image_filepath')
|
|
421
|
+
image = np.asarray(Image.open(image_filename))
|
|
422
|
+
data_item.update({'image': image})
|
|
423
|
+
|
|
424
|
+
annotations = data_item.get(self.annotation_type)
|
|
425
|
+
if self.to_mask is True:
|
|
426
|
+
# if segmentation - read from file
|
|
427
|
+
mask_filepath = data_item.get(entities.AnnotationType.SEGMENTATION)
|
|
428
|
+
annotations = np.asarray(Image.open(mask_filepath).convert('L'))
|
|
429
|
+
if self.to_categorical:
|
|
430
|
+
onehot = np.zeros((annotations.size, self.num_classes + 1))
|
|
431
|
+
onehot[np.arange(annotations.size), annotations] = 1
|
|
432
|
+
annotations = onehot
|
|
433
|
+
data_item.update({'annotations': annotations})
|
|
434
|
+
|
|
435
|
+
if self.return_originals is True:
|
|
436
|
+
annotations = []
|
|
437
|
+
if self.annotation_type is not None:
|
|
438
|
+
annotations = data_item.get('annotations')
|
|
439
|
+
data_item.update({'orig_image': image.copy(),
|
|
440
|
+
'orig_annotations': annotations.copy()})
|
|
441
|
+
|
|
442
|
+
###########################
|
|
443
|
+
# perform transformations #
|
|
444
|
+
###########################
|
|
445
|
+
if self._transforms is not None:
|
|
446
|
+
annotations = data_item.get('annotations')
|
|
447
|
+
image, annotations = self.transform(image, annotations)
|
|
448
|
+
data_item.update({'image': image,
|
|
449
|
+
'annotations': annotations})
|
|
450
|
+
return data_item
|
|
451
|
+
|
|
452
|
+
def __iter__(self):
|
|
453
|
+
"""Create a generator that iterate over the Sequence."""
|
|
454
|
+
for item in (self[i] for i in range(len(self))):
|
|
455
|
+
yield item
|
|
456
|
+
|
|
457
|
+
def __len__(self):
|
|
458
|
+
factor = self.batch_size
|
|
459
|
+
if factor is None:
|
|
460
|
+
factor = 1
|
|
461
|
+
return int(np.ceil(self.n_items / factor))
|
|
462
|
+
|
|
463
|
+
def __getitem__(self, idx):
|
|
464
|
+
"""
|
|
465
|
+
Support single index or a slice.
|
|
466
|
+
Uses ThreadPoolExecutor is num_workers != 0
|
|
467
|
+
"""
|
|
468
|
+
to_return = None
|
|
469
|
+
if isinstance(idx, int):
|
|
470
|
+
if self.batch_size is None:
|
|
471
|
+
to_return = self.__getsingleitem__(idx)
|
|
472
|
+
else:
|
|
473
|
+
# if batch_size is define, convert idx to batches
|
|
474
|
+
idx = slice(idx * self.batch_size, min((idx + 1) * self.batch_size, len(self.data_items)))
|
|
475
|
+
|
|
476
|
+
if isinstance(idx, slice):
|
|
477
|
+
to_return = list()
|
|
478
|
+
idxs = list(range(idx.start, idx.stop,
|
|
479
|
+
idx.step if idx.step else 1))
|
|
480
|
+
if self.num_workers == 0:
|
|
481
|
+
for dx in idxs:
|
|
482
|
+
to_return.append(self.__getsingleitem__(dx))
|
|
483
|
+
else:
|
|
484
|
+
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
|
485
|
+
for sample in executor.map(lambda i: self.__getsingleitem__(i), idxs):
|
|
486
|
+
to_return.append(sample)
|
|
487
|
+
|
|
488
|
+
if to_return is None:
|
|
489
|
+
raise TypeError('unsupported indexing: list indices must be integers or slices, not {}'.format(type(idx)))
|
|
490
|
+
|
|
491
|
+
if self.collate_fn is not None:
|
|
492
|
+
to_return = self.collate_fn(to_return)
|
|
493
|
+
return to_return
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
np_str_obj_array_pattern = re.compile(r'[SaUO]')
|
|
497
|
+
|
|
498
|
+
default_collate_err_msg_format = (
|
|
499
|
+
"default_collate: batch must contain tensors, numpy arrays, numbers, "
|
|
500
|
+
"dicts or lists; found {}")
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def default_transforms_callback(transforms, image, target, annotation_type):
|
|
504
|
+
"""
|
|
505
|
+
Recursive call to perform the augmentations in "transforms"
|
|
506
|
+
|
|
507
|
+
:param transforms:
|
|
508
|
+
:param image:
|
|
509
|
+
:param target:
|
|
510
|
+
:param annotation_type:
|
|
511
|
+
:return:
|
|
512
|
+
"""
|
|
513
|
+
# get the type string without importing any other package
|
|
514
|
+
transforms_type = type(transforms)
|
|
515
|
+
|
|
516
|
+
############
|
|
517
|
+
# Handle compositions and lists of augmentations with a recursive call
|
|
518
|
+
if transforms_type.__module__ == 'torchvision.transforms.transforms' and transforms_type.__name__ == 'Compose':
|
|
519
|
+
# torchvision compose - convert to list
|
|
520
|
+
image, target = default_transforms_callback(transforms.transforms, image, target, annotation_type)
|
|
521
|
+
return image, target
|
|
522
|
+
|
|
523
|
+
if transforms_type.__module__ == 'imgaug.augmenters.meta' and transforms_type.__name__ == 'Sequential':
|
|
524
|
+
# imgaug sequential - convert to list
|
|
525
|
+
image, target = default_transforms_callback(list(transforms), image, target, annotation_type)
|
|
526
|
+
return image, target
|
|
527
|
+
|
|
528
|
+
if isinstance(transforms, list):
|
|
529
|
+
for t in transforms:
|
|
530
|
+
image, target = default_transforms_callback(t, image, target, annotation_type)
|
|
531
|
+
return image, target
|
|
532
|
+
|
|
533
|
+
##############
|
|
534
|
+
# Handle single annotations
|
|
535
|
+
if 'imgaug.augmenters' in transforms_type.__module__:
|
|
536
|
+
# handle single imgaug augmentation
|
|
537
|
+
if target is not None and annotation_type is not None:
|
|
538
|
+
# works for batch but running on a single image
|
|
539
|
+
if annotation_type == entities.AnnotationType.BOX:
|
|
540
|
+
image, target = transforms(images=[image], bounding_boxes=[target])
|
|
541
|
+
target = target[0]
|
|
542
|
+
elif annotation_type == entities.AnnotationType.SEGMENTATION:
|
|
543
|
+
# expending to HxWx1 for the imgaug function to work
|
|
544
|
+
target = target[..., None]
|
|
545
|
+
image, target = transforms(images=[image], segmentation_maps=[target])
|
|
546
|
+
target = target[0][:, :, 0]
|
|
547
|
+
elif annotation_type == entities.AnnotationType.POLYGON:
|
|
548
|
+
image, target = transforms(images=[image], polygons=[target])
|
|
549
|
+
target = target[0]
|
|
550
|
+
elif annotation_type == entities.AnnotationType.CLASSIFICATION:
|
|
551
|
+
image = transforms(images=[image])
|
|
552
|
+
else:
|
|
553
|
+
raise ValueError('unsupported annotations type for image augmentations: {}'.format(annotation_type))
|
|
554
|
+
image = image[0]
|
|
555
|
+
else:
|
|
556
|
+
image = transforms(images=[image])
|
|
557
|
+
image = image[0]
|
|
558
|
+
else:
|
|
559
|
+
image = transforms(image)
|
|
560
|
+
|
|
561
|
+
return image, target
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def collate_default(batch):
|
|
565
|
+
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
566
|
+
elem = batch[0]
|
|
567
|
+
elem_type = type(elem)
|
|
568
|
+
if isinstance(elem, np.ndarray):
|
|
569
|
+
return np.stack(batch, axis=0)
|
|
570
|
+
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
571
|
+
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
572
|
+
# array of string classes and object
|
|
573
|
+
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
574
|
+
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
575
|
+
return batch
|
|
576
|
+
# return [tf.convert_to_tensor(b) for b in batch]
|
|
577
|
+
elif elem.shape == (): # scalars
|
|
578
|
+
return batch
|
|
579
|
+
elif isinstance(elem, float):
|
|
580
|
+
return batch
|
|
581
|
+
elif isinstance(elem, int):
|
|
582
|
+
return batch
|
|
583
|
+
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
584
|
+
return batch
|
|
585
|
+
elif isinstance(elem, collections.abc.Mapping):
|
|
586
|
+
return {key: collate_default([d[key] for d in batch]) for key in elem}
|
|
587
|
+
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
588
|
+
return elem_type(*(collate_default(samples) for samples in zip(*batch)))
|
|
589
|
+
elif isinstance(elem, collections.abc.Sequence):
|
|
590
|
+
transposed = zip(*batch)
|
|
591
|
+
return transposed
|
|
592
|
+
raise TypeError(default_collate_err_msg_format.format(elem_type))
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def collate_torch(batch):
|
|
596
|
+
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
597
|
+
import torch
|
|
598
|
+
elem = batch[0]
|
|
599
|
+
elem_type = type(elem)
|
|
600
|
+
if isinstance(elem, torch.Tensor):
|
|
601
|
+
out = None
|
|
602
|
+
if torch.utils.data.get_worker_info() is not None:
|
|
603
|
+
# If we're in a background process, concatenate directly into a
|
|
604
|
+
# shared memory tensor to avoid an extra copy
|
|
605
|
+
numel = sum(x.numel() for x in batch)
|
|
606
|
+
storage = elem.storage()._new_shared(numel)
|
|
607
|
+
out = elem.new(storage)
|
|
608
|
+
return torch.stack(batch, 0, out=out)
|
|
609
|
+
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
610
|
+
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
611
|
+
# array of string classes and object
|
|
612
|
+
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
613
|
+
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
614
|
+
try:
|
|
615
|
+
return torch.stack([torch.as_tensor(b) for b in batch])
|
|
616
|
+
except RuntimeError:
|
|
617
|
+
return batch
|
|
618
|
+
elif elem.shape == (): # scalars
|
|
619
|
+
return torch.as_tensor(batch)
|
|
620
|
+
elif isinstance(elem, float):
|
|
621
|
+
return torch.tensor(batch, dtype=torch.float64)
|
|
622
|
+
elif isinstance(elem, int):
|
|
623
|
+
return torch.tensor(batch)
|
|
624
|
+
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
625
|
+
return batch
|
|
626
|
+
elif isinstance(elem, collections.abc.Mapping):
|
|
627
|
+
return {key: collate_torch([d[key] for d in batch]) for key in elem}
|
|
628
|
+
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
629
|
+
return elem_type(*(collate_torch(samples) for samples in zip(*batch)))
|
|
630
|
+
elif isinstance(elem, collections.abc.Sequence):
|
|
631
|
+
transposed = zip(*batch)
|
|
632
|
+
return transposed
|
|
633
|
+
|
|
634
|
+
raise TypeError(default_collate_err_msg_format.format(elem_type))
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def collate_tf(batch):
|
|
638
|
+
r"""Puts each data field into a tensor with outer dimension batch size"""
|
|
639
|
+
import tensorflow as tf
|
|
640
|
+
elem = batch[0]
|
|
641
|
+
elem_type = type(elem)
|
|
642
|
+
if isinstance(elem, tf.Tensor):
|
|
643
|
+
return tf.stack(batch, axis=0)
|
|
644
|
+
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_':
|
|
645
|
+
if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
|
|
646
|
+
# array of string classes and object
|
|
647
|
+
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
|
648
|
+
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
|
649
|
+
try:
|
|
650
|
+
return tf.convert_to_tensor(batch)
|
|
651
|
+
except ValueError:
|
|
652
|
+
# failed on orig_image because of a mismatch in the shape (not resizing all the images so cannot stack)
|
|
653
|
+
return batch
|
|
654
|
+
# return [tf.convert_to_tensor(b) for b in batch]
|
|
655
|
+
elif elem.shape == (): # scalars
|
|
656
|
+
return tf.convert_to_tensor(batch)
|
|
657
|
+
elif isinstance(elem, float):
|
|
658
|
+
return tf.convert_to_tensor(batch, dtype=tf.float64)
|
|
659
|
+
elif isinstance(elem, int):
|
|
660
|
+
return tf.convert_to_tensor(batch)
|
|
661
|
+
elif isinstance(elem, str) or isinstance(elem, bytes) or elem is None:
|
|
662
|
+
return batch
|
|
663
|
+
elif isinstance(elem, collections.abc.Mapping):
|
|
664
|
+
return {key: collate_tf([d[key] for d in batch]) for key in elem}
|
|
665
|
+
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
|
|
666
|
+
return elem_type(*(collate_tf(samples) for samples in zip(*batch)))
|
|
667
|
+
elif isinstance(elem, collections.abc.Sequence):
|
|
668
|
+
transposed = zip(*batch)
|
|
669
|
+
return transposed
|
|
670
|
+
raise TypeError(default_collate_err_msg_format.format(elem_type))
|