lost_ds 1.2.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lost_ds/__init__.py +63 -0
- lost_ds/compat.py +104 -0
- lost_ds/copy.py +117 -0
- lost_ds/core.py +999 -0
- lost_ds/cropping/__init__.py +0 -0
- lost_ds/cropping/api.py +3 -0
- lost_ds/cropping/cropping.py +424 -0
- lost_ds/cropping/ds_cropper.py +153 -0
- lost_ds/detection/__init__.py +0 -0
- lost_ds/detection/api.py +1 -0
- lost_ds/detection/bbox_merge.py +91 -0
- lost_ds/detection/detection.py +297 -0
- lost_ds/experimental/__init__.py +0 -0
- lost_ds/functional/__init__.py +0 -0
- lost_ds/functional/api.py +30 -0
- lost_ds/functional/filter.py +109 -0
- lost_ds/functional/mapping.py +60 -0
- lost_ds/functional/split.py +271 -0
- lost_ds/functional/transform.py +454 -0
- lost_ds/functional/validation.py +120 -0
- lost_ds/geometry/__init__.py +0 -0
- lost_ds/geometry/api.py +6 -0
- lost_ds/geometry/bbox.py +163 -0
- lost_ds/geometry/geometry.py +109 -0
- lost_ds/geometry/line.py +56 -0
- lost_ds/geometry/lost_geom.py +270 -0
- lost_ds/geometry/point.py +56 -0
- lost_ds/geometry/polygon.py +59 -0
- lost_ds/im_util.py +43 -0
- lost_ds/io/__init__.py +0 -0
- lost_ds/io/file_man.py +172 -0
- lost_ds/masking/__init__.py +0 -0
- lost_ds/masking/api.py +1 -0
- lost_ds/masking/masking.py +83 -0
- lost_ds/reporting/__init__.py +0 -0
- lost_ds/scheduler.py +15 -0
- lost_ds/segmentation/__init__.py +0 -0
- lost_ds/segmentation/anno_from_seg.py +138 -0
- lost_ds/segmentation/api.py +4 -0
- lost_ds/segmentation/instance_seg.py +75 -0
- lost_ds/segmentation/panoptic_seg.py +119 -0
- lost_ds/segmentation/semantic_seg.py +141 -0
- lost_ds/util.py +75 -0
- lost_ds/vis/__init__.py +0 -0
- lost_ds/vis/api.py +10 -0
- lost_ds/vis/geometries.py +202 -0
- lost_ds/vis/vis.py +162 -0
- lost_ds-1.2.0a4.dist-info/METADATA +51 -0
- lost_ds-1.2.0a4.dist-info/RECORD +51 -0
- lost_ds-1.2.0a4.dist-info/WHEEL +4 -0
- lost_ds-1.2.0a4.dist-info/licenses/LICENSE +21 -0
lost_ds/__init__.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from lost_ds.geometry.api import LOSTGeometries
|
|
2
|
+
|
|
3
|
+
from lost_ds.functional.api import(remove_empty,
|
|
4
|
+
split_by_empty,
|
|
5
|
+
split_by_img_path,
|
|
6
|
+
split_train_test,
|
|
7
|
+
split_multilabels,
|
|
8
|
+
remap_labels,
|
|
9
|
+
remap_img_path,
|
|
10
|
+
ignore_labels,
|
|
11
|
+
img_selection,
|
|
12
|
+
is_multilabel,
|
|
13
|
+
label_selection,
|
|
14
|
+
polygon_to_bbox,
|
|
15
|
+
selection_mask,
|
|
16
|
+
to_abs,
|
|
17
|
+
to_rel,
|
|
18
|
+
transform_bbox_style,
|
|
19
|
+
to_coco,
|
|
20
|
+
unique_labels,
|
|
21
|
+
validate_empty_images,
|
|
22
|
+
validate_geometries,
|
|
23
|
+
validate_img_paths,
|
|
24
|
+
validate_unique_annos,
|
|
25
|
+
validate_single_labels)
|
|
26
|
+
|
|
27
|
+
from lost_ds.cropping.api import (DSCropper,
|
|
28
|
+
crop_anno,
|
|
29
|
+
crop_img,
|
|
30
|
+
crop_dataset,
|
|
31
|
+
crop_components)
|
|
32
|
+
|
|
33
|
+
from lost_ds.copy import (copy_imgs,
|
|
34
|
+
pack_ds)
|
|
35
|
+
|
|
36
|
+
from lost_ds.im_util import (get_imagesize,
|
|
37
|
+
pad_image)
|
|
38
|
+
|
|
39
|
+
from lost_ds.vis.api import (vis_sample,
|
|
40
|
+
vis_and_store,
|
|
41
|
+
vis_semantic_segmentation,
|
|
42
|
+
draw_polygons,
|
|
43
|
+
draw_boxes,
|
|
44
|
+
draw_lines,
|
|
45
|
+
draw_points,
|
|
46
|
+
draw_text)
|
|
47
|
+
|
|
48
|
+
from lost_ds.segmentation.api import (semantic_segmentation,
|
|
49
|
+
segmentation_to_lost)
|
|
50
|
+
|
|
51
|
+
from lost_ds.detection.api import (detection_dataset, bbox_nms, coco_eval,
|
|
52
|
+
voc_eval, voc_score_iou_multiplex)
|
|
53
|
+
|
|
54
|
+
from lost_ds.masking.api import mask_dataset
|
|
55
|
+
|
|
56
|
+
from lost_ds.util import (get_fs,
|
|
57
|
+
to_parquet)
|
|
58
|
+
|
|
59
|
+
from lost_ds.core import LOSTDataset
|
|
60
|
+
|
|
61
|
+
from lost_ds.compat import old_lds_to_new_lds
|
|
62
|
+
|
|
63
|
+
__version__='1.2.0-alpha.4'
|
lost_ds/compat.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
|
|
2
|
+
from ast import literal_eval
|
|
3
|
+
import json
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from lost_ds.core import LOSTDataset
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def old_lds_to_new_lds(df):
|
|
9
|
+
# load data (unstring)
|
|
10
|
+
def literal_eval_parse(entry):
|
|
11
|
+
if not pd.isnull(entry):
|
|
12
|
+
if isinstance(entry, str):
|
|
13
|
+
return literal_eval(entry.replace('nan', 'None'))
|
|
14
|
+
else:
|
|
15
|
+
return literal_eval(entry)
|
|
16
|
+
else:
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
def parse_col(col):
|
|
20
|
+
try:
|
|
21
|
+
return col.map(lambda entry: literal_eval_parse(entry))
|
|
22
|
+
except Exception as e:
|
|
23
|
+
try:
|
|
24
|
+
return col.map(lambda entry: json.loads(entry))
|
|
25
|
+
except Exception as e:
|
|
26
|
+
return col
|
|
27
|
+
|
|
28
|
+
df = df.apply(lambda x: parse_col(x), axis=0)
|
|
29
|
+
# map keys
|
|
30
|
+
mapping = { 'anno.idx': 'anno_uid',
|
|
31
|
+
'anno.timestamp': 'anno_timestamp',
|
|
32
|
+
'anno.state': 'anno_state',
|
|
33
|
+
'anno.dtype':'anno_dtype',
|
|
34
|
+
'anno.sim_class':'anno_sim_class',
|
|
35
|
+
'anno.iteration':'anno_iteration',
|
|
36
|
+
'anno.user_id':'anno_user_id',
|
|
37
|
+
'anno.user':'anno_user',
|
|
38
|
+
'anno.confidence':'anno_confidence',
|
|
39
|
+
'anno.anno_time':'anno_anno_time',
|
|
40
|
+
'anno.data': 'anno_data',
|
|
41
|
+
'anno.lbl.name':'anno_lbl',
|
|
42
|
+
'img.idx': 'img_uid',
|
|
43
|
+
'img.timestamp': 'img_timestamp',
|
|
44
|
+
'img.state': 'img_state',
|
|
45
|
+
'img.sim_class': 'img_sim_class',
|
|
46
|
+
'img.frame_n': 'img_frame_n',
|
|
47
|
+
'img.img_path': 'img_path',
|
|
48
|
+
'img.iteration': 'img_iteration',
|
|
49
|
+
'img.user_id': 'img_user_id',
|
|
50
|
+
'img.anno_time': 'img_anno_time',
|
|
51
|
+
'img.lbl.name': 'img_lbl',
|
|
52
|
+
'img.annotator': 'img_user',
|
|
53
|
+
'img.is_junk': 'img_is_junk'
|
|
54
|
+
}
|
|
55
|
+
df_map = df.rename(columns=mapping)
|
|
56
|
+
new_keys = ['anno_style', 'anno_format']
|
|
57
|
+
df_map[new_keys] = None
|
|
58
|
+
|
|
59
|
+
# # transform timestamp data
|
|
60
|
+
# all_keys = list(mapping.values())
|
|
61
|
+
# for k in all_keys:
|
|
62
|
+
# if 'timestamp' in k:
|
|
63
|
+
# df_map[k] = df_map[k].map(lambda x: str(x))
|
|
64
|
+
|
|
65
|
+
# transform anno data
|
|
66
|
+
def parse_data(row):
|
|
67
|
+
dtype = row['anno_dtype']
|
|
68
|
+
data = None
|
|
69
|
+
style = None
|
|
70
|
+
frmt = 'rel'
|
|
71
|
+
if dtype == 'bbox':
|
|
72
|
+
data = list(row['anno_data'].values())
|
|
73
|
+
style = 'xcycwh'
|
|
74
|
+
elif dtype in ['polygon', 'line']:
|
|
75
|
+
data = [list(p.values()) for p in row['anno_data']]
|
|
76
|
+
style = 'xy'
|
|
77
|
+
elif dtype == 'point':
|
|
78
|
+
data = list(row['anno_data'].values())
|
|
79
|
+
style = 'xy'
|
|
80
|
+
return data, style, frmt
|
|
81
|
+
new_dat = df_map.apply(lambda x: parse_data(x), axis=1)
|
|
82
|
+
dic = {'anno_data': [],
|
|
83
|
+
'anno_style': [],
|
|
84
|
+
'anno_format': []}
|
|
85
|
+
|
|
86
|
+
indexes = []
|
|
87
|
+
for i, dat in new_dat.iteritems():
|
|
88
|
+
indexes.append(i)
|
|
89
|
+
dic['anno_data'].append(dat[0])
|
|
90
|
+
dic['anno_style'].append(dat[1])
|
|
91
|
+
dic['anno_format'].append(dat[2])
|
|
92
|
+
df_dat = pd.DataFrame(dic, index=indexes)
|
|
93
|
+
df_map[['anno_data', 'anno_style', 'anno_format']] = df_dat[['anno_data', 'anno_style', 'anno_format']]
|
|
94
|
+
|
|
95
|
+
drop_keys = [k for k in list(df_map.keys()) if '.' in k]
|
|
96
|
+
df_map.drop(labels=drop_keys, axis='columns', inplace=True)
|
|
97
|
+
|
|
98
|
+
def _lbl_parser(lbl):
|
|
99
|
+
if isinstance(lbl, str): return lbl.lower()
|
|
100
|
+
else: return lbl
|
|
101
|
+
df_map['anno_lbl'] = df_map['anno_lbl'].map(lambda x: [_lbl_parser(lbl) for lbl in x] if isinstance(x, list) else x)
|
|
102
|
+
|
|
103
|
+
return LOSTDataset(df_map)
|
|
104
|
+
|
lost_ds/copy.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from tqdm import tqdm
|
|
2
|
+
import fsspec
|
|
3
|
+
import os
|
|
4
|
+
from joblib import Parallel, delayed, cpu_count
|
|
5
|
+
from zipfile import ZipFile
|
|
6
|
+
|
|
7
|
+
from lost_ds.functional.mapping import remap_img_path
|
|
8
|
+
from lost_ds.util import get_fs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def copy_imgs(df, out_dir, col='img_path', force_overwrite=False,
|
|
12
|
+
filesystem=None, parallel=-1):
|
|
13
|
+
'''Copy all images of dataset into out_dir
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
df (pd.DataFrame): dataframe to copy
|
|
17
|
+
out_dir (str): Destination folder to store images
|
|
18
|
+
col (str): column containing paths to files
|
|
19
|
+
filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
|
|
20
|
+
if not initialized
|
|
21
|
+
'''
|
|
22
|
+
fs = get_fs(filesystem)
|
|
23
|
+
def copy_file(src_path, dst_dir):
|
|
24
|
+
dst_path = os.path.join(dst_dir, os.path.basename(src_path))
|
|
25
|
+
if fs.exists(dst_path) and not force_overwrite:
|
|
26
|
+
return
|
|
27
|
+
fs.copy(src_path, dst_path)
|
|
28
|
+
|
|
29
|
+
fs.makedirs(out_dir, exist_ok=True)
|
|
30
|
+
img_paths = list(df[col].unique())
|
|
31
|
+
if parallel:
|
|
32
|
+
Parallel(n_jobs=parallel)(delayed(copy_file)(path, out_dir)
|
|
33
|
+
for path in tqdm(img_paths, desc='copy imgs'))
|
|
34
|
+
else:
|
|
35
|
+
for path in tqdm(img_paths, desc='copy imgs'):
|
|
36
|
+
copy_file(path, out_dir)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def copy_to_zip(zip_file, df, zip_dir, col='img_path',
|
|
40
|
+
filesystem=None, progress_callback=None):
|
|
41
|
+
'''Copy all images of dataset into zip archive
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
df (pd.DataFrame): dataframe to copy
|
|
45
|
+
zip_root (str): Root path in zip archive
|
|
46
|
+
col (str): column containing paths to files
|
|
47
|
+
filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
|
|
48
|
+
if not initialized
|
|
49
|
+
progress_callback (function): Will be called on progress
|
|
50
|
+
callback definition -> progress_callback(progress), where progress
|
|
51
|
+
value is between 0...100
|
|
52
|
+
'''
|
|
53
|
+
|
|
54
|
+
fs = get_fs(filesystem).fs
|
|
55
|
+
def copy_file_to_zip(src_path, zip_file):
|
|
56
|
+
dst_path = os.path.join(zip_dir, os.path.basename(src_path))
|
|
57
|
+
try:
|
|
58
|
+
fs.ls('')
|
|
59
|
+
except:
|
|
60
|
+
pass
|
|
61
|
+
with fs.open(src_path, 'rb') as f:
|
|
62
|
+
zip_file.writestr(dst_path, f.read())
|
|
63
|
+
|
|
64
|
+
img_paths = list(df[col].unique())
|
|
65
|
+
total = len(img_paths)
|
|
66
|
+
next_pg = 0
|
|
67
|
+
for idx, path in enumerate(img_paths):
|
|
68
|
+
copy_file_to_zip(path, zip_file)
|
|
69
|
+
if progress_callback is not None:
|
|
70
|
+
pg = (idx+1) *100 / total
|
|
71
|
+
if pg == 100:
|
|
72
|
+
progress_callback(pg)
|
|
73
|
+
elif pg >= next_pg:
|
|
74
|
+
progress_callback(pg)
|
|
75
|
+
next_pg += 5
|
|
76
|
+
|
|
77
|
+
def pack_ds(df, out_dir, cols=['img_path', 'mask_path', 'crop_path'],
|
|
78
|
+
dirs = ['imgs', 'masks', 'crops'], filesystem=None, zip_file=None,
|
|
79
|
+
progress_callback=None):
|
|
80
|
+
'''Copy all images from dataset to a new place and update the dataframe
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
df (pd.DataFrame): Dataframe to copy
|
|
84
|
+
out_dir (str): Name of the directory to store the information
|
|
85
|
+
executor (Client, ThreadPoolExecutor): executor for parallelization
|
|
86
|
+
if None a new ThreadPoolExecutor will be initialized
|
|
87
|
+
filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
|
|
88
|
+
if not initialized
|
|
89
|
+
cols (list of string): column names containing file-paths
|
|
90
|
+
dirs (list of string): name of new directories according to cols. The
|
|
91
|
+
dirs will contain the copied data
|
|
92
|
+
zip_file (zipfile.ZipFile or None): If not None, a ZipFile object will
|
|
93
|
+
be used to pack dataset to zip archive
|
|
94
|
+
progress_callback (function): Will be called on progress
|
|
95
|
+
callback definition -> progress_callback(progress), where progress
|
|
96
|
+
value is between 0...100
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
pd.DataFrame with new image paths
|
|
100
|
+
'''
|
|
101
|
+
fs = get_fs(filesystem)
|
|
102
|
+
for col, _dir in zip(cols, dirs):
|
|
103
|
+
if col in df.keys():
|
|
104
|
+
dout = os.path.join(out_dir, _dir)
|
|
105
|
+
if zip_file is None:
|
|
106
|
+
if progress_callback is not None:
|
|
107
|
+
raise Exception('progress_callback is only implement for packing to zip files yet!')
|
|
108
|
+
fs.makedirs(dout, exist_ok=True)
|
|
109
|
+
copy_imgs(df=df, out_dir=dout, col=col, filesystem=fs)
|
|
110
|
+
else:
|
|
111
|
+
out_base = os.path.basename(out_dir)
|
|
112
|
+
out_base = os.path.splitext(out_base)[0]
|
|
113
|
+
zip_dir = os.path.join(out_base, _dir)
|
|
114
|
+
copy_to_zip(zip_file, df, zip_dir=zip_dir, col=col, filesystem=fs,
|
|
115
|
+
progress_callback=progress_callback)
|
|
116
|
+
df = remap_img_path(df, dout, col)
|
|
117
|
+
return df
|