lost_ds 1.2.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. lost_ds/__init__.py +63 -0
  2. lost_ds/compat.py +104 -0
  3. lost_ds/copy.py +117 -0
  4. lost_ds/core.py +999 -0
  5. lost_ds/cropping/__init__.py +0 -0
  6. lost_ds/cropping/api.py +3 -0
  7. lost_ds/cropping/cropping.py +424 -0
  8. lost_ds/cropping/ds_cropper.py +153 -0
  9. lost_ds/detection/__init__.py +0 -0
  10. lost_ds/detection/api.py +1 -0
  11. lost_ds/detection/bbox_merge.py +91 -0
  12. lost_ds/detection/detection.py +297 -0
  13. lost_ds/experimental/__init__.py +0 -0
  14. lost_ds/functional/__init__.py +0 -0
  15. lost_ds/functional/api.py +30 -0
  16. lost_ds/functional/filter.py +109 -0
  17. lost_ds/functional/mapping.py +60 -0
  18. lost_ds/functional/split.py +271 -0
  19. lost_ds/functional/transform.py +454 -0
  20. lost_ds/functional/validation.py +120 -0
  21. lost_ds/geometry/__init__.py +0 -0
  22. lost_ds/geometry/api.py +6 -0
  23. lost_ds/geometry/bbox.py +163 -0
  24. lost_ds/geometry/geometry.py +109 -0
  25. lost_ds/geometry/line.py +56 -0
  26. lost_ds/geometry/lost_geom.py +270 -0
  27. lost_ds/geometry/point.py +56 -0
  28. lost_ds/geometry/polygon.py +59 -0
  29. lost_ds/im_util.py +43 -0
  30. lost_ds/io/__init__.py +0 -0
  31. lost_ds/io/file_man.py +172 -0
  32. lost_ds/masking/__init__.py +0 -0
  33. lost_ds/masking/api.py +1 -0
  34. lost_ds/masking/masking.py +83 -0
  35. lost_ds/reporting/__init__.py +0 -0
  36. lost_ds/scheduler.py +15 -0
  37. lost_ds/segmentation/__init__.py +0 -0
  38. lost_ds/segmentation/anno_from_seg.py +138 -0
  39. lost_ds/segmentation/api.py +4 -0
  40. lost_ds/segmentation/instance_seg.py +75 -0
  41. lost_ds/segmentation/panoptic_seg.py +119 -0
  42. lost_ds/segmentation/semantic_seg.py +141 -0
  43. lost_ds/util.py +75 -0
  44. lost_ds/vis/__init__.py +0 -0
  45. lost_ds/vis/api.py +10 -0
  46. lost_ds/vis/geometries.py +202 -0
  47. lost_ds/vis/vis.py +162 -0
  48. lost_ds-1.2.0a4.dist-info/METADATA +51 -0
  49. lost_ds-1.2.0a4.dist-info/RECORD +51 -0
  50. lost_ds-1.2.0a4.dist-info/WHEEL +4 -0
  51. lost_ds-1.2.0a4.dist-info/licenses/LICENSE +21 -0
lost_ds/__init__.py ADDED
@@ -0,0 +1,63 @@
1
+ from lost_ds.geometry.api import LOSTGeometries
2
+
3
+ from lost_ds.functional.api import(remove_empty,
4
+ split_by_empty,
5
+ split_by_img_path,
6
+ split_train_test,
7
+ split_multilabels,
8
+ remap_labels,
9
+ remap_img_path,
10
+ ignore_labels,
11
+ img_selection,
12
+ is_multilabel,
13
+ label_selection,
14
+ polygon_to_bbox,
15
+ selection_mask,
16
+ to_abs,
17
+ to_rel,
18
+ transform_bbox_style,
19
+ to_coco,
20
+ unique_labels,
21
+ validate_empty_images,
22
+ validate_geometries,
23
+ validate_img_paths,
24
+ validate_unique_annos,
25
+ validate_single_labels)
26
+
27
+ from lost_ds.cropping.api import (DSCropper,
28
+ crop_anno,
29
+ crop_img,
30
+ crop_dataset,
31
+ crop_components)
32
+
33
+ from lost_ds.copy import (copy_imgs,
34
+ pack_ds)
35
+
36
+ from lost_ds.im_util import (get_imagesize,
37
+ pad_image)
38
+
39
+ from lost_ds.vis.api import (vis_sample,
40
+ vis_and_store,
41
+ vis_semantic_segmentation,
42
+ draw_polygons,
43
+ draw_boxes,
44
+ draw_lines,
45
+ draw_points,
46
+ draw_text)
47
+
48
+ from lost_ds.segmentation.api import (semantic_segmentation,
49
+ segmentation_to_lost)
50
+
51
+ from lost_ds.detection.api import (detection_dataset, bbox_nms, coco_eval,
52
+ voc_eval, voc_score_iou_multiplex)
53
+
54
+ from lost_ds.masking.api import mask_dataset
55
+
56
+ from lost_ds.util import (get_fs,
57
+ to_parquet)
58
+
59
+ from lost_ds.core import LOSTDataset
60
+
61
+ from lost_ds.compat import old_lds_to_new_lds
62
+
63
+ __version__='1.2.0-alpha.4'
lost_ds/compat.py ADDED
@@ -0,0 +1,104 @@
1
+
2
+ from ast import literal_eval
3
+ import json
4
+ import pandas as pd
5
+ from lost_ds.core import LOSTDataset
6
+
7
+
8
+ def old_lds_to_new_lds(df):
9
+ # load data (unstring)
10
+ def literal_eval_parse(entry):
11
+ if not pd.isnull(entry):
12
+ if isinstance(entry, str):
13
+ return literal_eval(entry.replace('nan', 'None'))
14
+ else:
15
+ return literal_eval(entry)
16
+ else:
17
+ return None
18
+
19
+ def parse_col(col):
20
+ try:
21
+ return col.map(lambda entry: literal_eval_parse(entry))
22
+ except Exception as e:
23
+ try:
24
+ return col.map(lambda entry: json.loads(entry))
25
+ except Exception as e:
26
+ return col
27
+
28
+ df = df.apply(lambda x: parse_col(x), axis=0)
29
+ # map keys
30
+ mapping = { 'anno.idx': 'anno_uid',
31
+ 'anno.timestamp': 'anno_timestamp',
32
+ 'anno.state': 'anno_state',
33
+ 'anno.dtype':'anno_dtype',
34
+ 'anno.sim_class':'anno_sim_class',
35
+ 'anno.iteration':'anno_iteration',
36
+ 'anno.user_id':'anno_user_id',
37
+ 'anno.user':'anno_user',
38
+ 'anno.confidence':'anno_confidence',
39
+ 'anno.anno_time':'anno_anno_time',
40
+ 'anno.data': 'anno_data',
41
+ 'anno.lbl.name':'anno_lbl',
42
+ 'img.idx': 'img_uid',
43
+ 'img.timestamp': 'img_timestamp',
44
+ 'img.state': 'img_state',
45
+ 'img.sim_class': 'img_sim_class',
46
+ 'img.frame_n': 'img_frame_n',
47
+ 'img.img_path': 'img_path',
48
+ 'img.iteration': 'img_iteration',
49
+ 'img.user_id': 'img_user_id',
50
+ 'img.anno_time': 'img_anno_time',
51
+ 'img.lbl.name': 'img_lbl',
52
+ 'img.annotator': 'img_user',
53
+ 'img.is_junk': 'img_is_junk'
54
+ }
55
+ df_map = df.rename(columns=mapping)
56
+ new_keys = ['anno_style', 'anno_format']
57
+ df_map[new_keys] = None
58
+
59
+ # # transform timestamp data
60
+ # all_keys = list(mapping.values())
61
+ # for k in all_keys:
62
+ # if 'timestamp' in k:
63
+ # df_map[k] = df_map[k].map(lambda x: str(x))
64
+
65
+ # transform anno data
66
+ def parse_data(row):
67
+ dtype = row['anno_dtype']
68
+ data = None
69
+ style = None
70
+ frmt = 'rel'
71
+ if dtype == 'bbox':
72
+ data = list(row['anno_data'].values())
73
+ style = 'xcycwh'
74
+ elif dtype in ['polygon', 'line']:
75
+ data = [list(p.values()) for p in row['anno_data']]
76
+ style = 'xy'
77
+ elif dtype == 'point':
78
+ data = list(row['anno_data'].values())
79
+ style = 'xy'
80
+ return data, style, frmt
81
+ new_dat = df_map.apply(lambda x: parse_data(x), axis=1)
82
+ dic = {'anno_data': [],
83
+ 'anno_style': [],
84
+ 'anno_format': []}
85
+
86
+ indexes = []
87
+ for i, dat in new_dat.iteritems():
88
+ indexes.append(i)
89
+ dic['anno_data'].append(dat[0])
90
+ dic['anno_style'].append(dat[1])
91
+ dic['anno_format'].append(dat[2])
92
+ df_dat = pd.DataFrame(dic, index=indexes)
93
+ df_map[['anno_data', 'anno_style', 'anno_format']] = df_dat[['anno_data', 'anno_style', 'anno_format']]
94
+
95
+ drop_keys = [k for k in list(df_map.keys()) if '.' in k]
96
+ df_map.drop(labels=drop_keys, axis='columns', inplace=True)
97
+
98
+ def _lbl_parser(lbl):
99
+ if isinstance(lbl, str): return lbl.lower()
100
+ else: return lbl
101
+ df_map['anno_lbl'] = df_map['anno_lbl'].map(lambda x: [_lbl_parser(lbl) for lbl in x] if isinstance(x, list) else x)
102
+
103
+ return LOSTDataset(df_map)
104
+
lost_ds/copy.py ADDED
@@ -0,0 +1,117 @@
1
+ from tqdm import tqdm
2
+ import fsspec
3
+ import os
4
+ from joblib import Parallel, delayed, cpu_count
5
+ from zipfile import ZipFile
6
+
7
+ from lost_ds.functional.mapping import remap_img_path
8
+ from lost_ds.util import get_fs
9
+
10
+
11
+ def copy_imgs(df, out_dir, col='img_path', force_overwrite=False,
12
+ filesystem=None, parallel=-1):
13
+ '''Copy all images of dataset into out_dir
14
+
15
+ Args:
16
+ df (pd.DataFrame): dataframe to copy
17
+ out_dir (str): Destination folder to store images
18
+ col (str): column containing paths to files
19
+ filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
20
+ if not initialized
21
+ '''
22
+ fs = get_fs(filesystem)
23
+ def copy_file(src_path, dst_dir):
24
+ dst_path = os.path.join(dst_dir, os.path.basename(src_path))
25
+ if fs.exists(dst_path) and not force_overwrite:
26
+ return
27
+ fs.copy(src_path, dst_path)
28
+
29
+ fs.makedirs(out_dir, exist_ok=True)
30
+ img_paths = list(df[col].unique())
31
+ if parallel:
32
+ Parallel(n_jobs=parallel)(delayed(copy_file)(path, out_dir)
33
+ for path in tqdm(img_paths, desc='copy imgs'))
34
+ else:
35
+ for path in tqdm(img_paths, desc='copy imgs'):
36
+ copy_file(path, out_dir)
37
+
38
+
39
+ def copy_to_zip(zip_file, df, zip_dir, col='img_path',
40
+ filesystem=None, progress_callback=None):
41
+ '''Copy all images of dataset into zip archive
42
+
43
+ Args:
44
+ df (pd.DataFrame): dataframe to copy
45
+ zip_root (str): Root path in zip archive
46
+ col (str): column containing paths to files
47
+ filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
48
+ if not initialized
49
+ progress_callback (function): Will be called on progress
50
+ callback definition -> progress_callback(progress), where progress
51
+ value is between 0...100
52
+ '''
53
+
54
+ fs = get_fs(filesystem).fs
55
+ def copy_file_to_zip(src_path, zip_file):
56
+ dst_path = os.path.join(zip_dir, os.path.basename(src_path))
57
+ try:
58
+ fs.ls('')
59
+ except:
60
+ pass
61
+ with fs.open(src_path, 'rb') as f:
62
+ zip_file.writestr(dst_path, f.read())
63
+
64
+ img_paths = list(df[col].unique())
65
+ total = len(img_paths)
66
+ next_pg = 0
67
+ for idx, path in enumerate(img_paths):
68
+ copy_file_to_zip(path, zip_file)
69
+ if progress_callback is not None:
70
+ pg = (idx+1) *100 / total
71
+ if pg == 100:
72
+ progress_callback(pg)
73
+ elif pg >= next_pg:
74
+ progress_callback(pg)
75
+ next_pg += 5
76
+
77
+ def pack_ds(df, out_dir, cols=['img_path', 'mask_path', 'crop_path'],
78
+ dirs = ['imgs', 'masks', 'crops'], filesystem=None, zip_file=None,
79
+ progress_callback=None):
80
+ '''Copy all images from dataset to a new place and update the dataframe
81
+
82
+ Args:
83
+ df (pd.DataFrame): Dataframe to copy
84
+ out_dir (str): Name of the directory to store the information
85
+ executor (Client, ThreadPoolExecutor): executor for parallelization
86
+ if None a new ThreadPoolExecutor will be initialized
87
+ filesystem (fsspec.filesystem, FileMan): filesystem to use. Use local
88
+ if not initialized
89
+ cols (list of string): column names containing file-paths
90
+ dirs (list of string): name of new directories according to cols. The
91
+ dirs will contain the copied data
92
+ zip_file (zipfile.ZipFile or None): If not None, a ZipFile object will
93
+ be used to pack dataset to zip archive
94
+ progress_callback (function): Will be called on progress
95
+ callback definition -> progress_callback(progress), where progress
96
+ value is between 0...100
97
+
98
+ Returns:
99
+ pd.DataFrame with new image paths
100
+ '''
101
+ fs = get_fs(filesystem)
102
+ for col, _dir in zip(cols, dirs):
103
+ if col in df.keys():
104
+ dout = os.path.join(out_dir, _dir)
105
+ if zip_file is None:
106
+ if progress_callback is not None:
107
+ raise Exception('progress_callback is only implement for packing to zip files yet!')
108
+ fs.makedirs(dout, exist_ok=True)
109
+ copy_imgs(df=df, out_dir=dout, col=col, filesystem=fs)
110
+ else:
111
+ out_base = os.path.basename(out_dir)
112
+ out_base = os.path.splitext(out_base)[0]
113
+ zip_dir = os.path.join(out_base, _dir)
114
+ copy_to_zip(zip_file, df, zip_dir=zip_dir, col=col, filesystem=fs,
115
+ progress_callback=progress_callback)
116
+ df = remap_img_path(df, dout, col)
117
+ return df